prrte-3.0.13/0000775000175000017500000000000015145263360013123 5ustar alastairalastairprrte-3.0.13/Makefile.prte-rules0000664000175000017500000000227215145263240016664 0ustar alastairalastair# -*- makefile -*- # Copyright (c) 2008-2022 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2023 Nanook Consulting. All rights reserved. # Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # A little verbosity magic; "make" will show the terse output. "make # V=1" will show the actual commands used (just like the other # Automake-generated cprtelation/linker rules). V=0 PRTE_V_SPHINX_HTML = $(prte__v_SPHINX_HTML_$V) prte__v_SPHINX_HTML_ = $(prte__v_SPHINX_HTML_$AM_DEFAULT_VERBOSITY) prte__v_SPHINX_HTML_0 = @echo " GENERATE HTML docs"; PRTE_V_SPHINX_MAN = $(prte__v_SPHINX_MAN_$V) prte__v_SPHINX_MAN_ = $(prte__v_SPHINX_MAN_$AM_DEFAULT_VERBOSITY) prte__v_SPHINX_MAN_0 = @echo " GENERATE man pages"; PRTE_V_TXT = $(prte__v_TXT_$V) prte__v_TXT_ = $(prte__v_TXT_$AM_DEFAULT_VERBOSITY) prte__v_TXT_0 = @echo " GENERATE text files"; PRTE_V_LN_S = $(prte__v_LN_S_$V) prte__v_LN_S_ = $(prte__v_LN_S_$AM_DEFAULT_VERBOSITY) prte__v_LN_S_0 = @echo " LN_S " `basename $@`; prrte-3.0.13/.mailmap0000664000175000017500000001062015145263240014540 0ustar alastairalastair# This file exists to help consolidate names and email addresses # (e.g., when people accidentally commit with an incorrect or local # email address). Two common use cases: # # 1. Consolidate multiple email addresses from a single person. # Example: one commit from John Smith is from # and another is from # , and a third is from # . But they're all from # the same John Smith person. # # 2. Consolidate misspellings / altername names from a single person. # Example: one commit is from "John Smith" and another is from # "John Smith, CONTRACTOR", and third is from "RocketMan 9982". But # these are all really the same person, who can be listed once in # AUTHORS as "John Smith". # # The format of this file is documented in git-shortlog(1). Specifically, # a line like this: # # Proper Name # # means that when git sees "commit@email.xx" it will display # "Proper Name " instead in certain circumstances. Those # circumstances include: # # - git shortlog # - git blame # - git log --format=tformat:"%aN <%aE>" (and similar) # Jeff Squyres Jeff Squyres --quiet <--quiet> Jeff Squyres George Bosilca Howard Pritchard Howard Pritchard Andrew Friedley Devendar Bureddy Edgar Gabriel Edgar Gabriel Gilles Gouaillardet Matias A Cabral Matias A Cabral Pavel Shamis Pavel Shamis Pavel Shamis Todd Kordenbrock Yohann Burette Yohann Burette MPI Team (bot) MPI Team (bot) MPI Team (bot) Yossi Itigin Josh Hursey Josh Hursey Adrian Reber Elena Elkina Elena Elkina Igor Ivanov Igor Ivanov Mangala Jyothi Bhaskar Mangala Jyothi Bhaskar Ralph Castain Ralph Castain Rolf vandeVaart Karol Mroz Nadezhda Kogteva Thananon Patinyasakdikul Nysal Jan K A Nysal Jan K A Zhi Ming Wang Annapurna Dasari L. R. Rajeshnarayanan Aurelien Bouteiller Aurelien Bouteiller Alex Mikheev Thomas Naughton Geoffrey Paulsen Anandhi S Jayakumar Mohan Gandhi prrte-3.0.13/.github/0000775000175000017500000000000015145263240014460 5ustar alastairalastairprrte-3.0.13/.github/issue_template.md0000664000175000017500000000173715145263240020035 0ustar alastairalastairThank you for taking the time to submit an issue! ## Background information ### What version of the PMIx Reference Server are you using? (e.g., v1.0, v2.1, git master @ hash, etc.) ##### What version of PMIx are you using? (e.g., v1.2.5, v2.0.3, v2.1.0, git branch name and hash, etc.) ### Please describe the system on which you are running * Operating system/version: * Computer hardware: * Network type: ----------------------------- ## Details of the problem Please describe, in detail, the problem that you are having, including the behavior you expect to see, the actual behavior that you are seeing, steps to reproduce the problem, etc. It is most helpful if you can attach a small program that a developer can use to reproduce your problem. **Note**: If you include verbatim output (or a code block), please use a [GitHub Markdown](https://help.github.com/articles/creating-and-highlighting-code-blocks/) code block like below: ```shell shell$ prun -np 2 ./hello_world ``` prrte-3.0.13/.github/CONTRIBUTING.md0000664000175000017500000000423515145263240016715 0ustar alastairalastair## How to contribute to the PMIx Reference Server General information about contributing to the PMIx project can be found at the [Contributing to PMIx webpage](https://pmix.org/code/contribute/). The instructions below are specifically for opening issues and pull requests against the PMIx Reference Server. #### **Did you find a bug?** * **Ensure the bug was not already reported** by searching on GitHub under [Issues](https://github.com/pmix/pmix-reference-server/issues). * If you're unable to find an open issue addressing the problem, [open a new one](https://github.com/pmix/pmix-reference-server/issues/new). * For more detailed information on submitting a bug report and creating an issue, visit our [FAQ](https://pmix.org/support/faq). #### **Did you write a patch that fixes a bug?** * Open a new GitHub pull request with the patch. * Ensure the PR description clearly describes the problem and solution. If there is an existing GitHub issue open describing this bug, please include it in the description so we can close it. * Before submitting, please read the [Contributing to the PMIx Project FAQ](https://pmix.org/support/faq/contributing) and the [SubmittingPullRequests](https://pmix.org/support/faq/submitting-pull-requests) web pages. In particular, note that all git commits contributed to PMIx (including the reference server) require a Signed-off by line. #### **Do you intend to add a new feature or change an existing one?** * Suggest your change on the [devel mail list](https://groups.google.com/forum/#!forum/pmix) and start writing code. * Do not open an issue on GitHub until you have collected positive feedback about the change. GitHub issues are primarily intended for bug reports and fixes. #### **Do you have questions about the source code?** * We are working to provide more online info about the PMIx code. A paper describing the [multi-component architecture](https://www.open-mpi.org/papers/ics-2004/ics-2004.pdf) of Open MPI (also used in PMIx and the reference server) may also be helpful. The [devel mail list](https://groups.google.com/forum/#!forum/pmix) is a good place to post questions about the source code as well. Thanks The PMIx Team prrte-3.0.13/.github/workflows/0000775000175000017500000000000015145263240016515 5ustar alastairalastairprrte-3.0.13/.github/workflows/prte_mpi4py.yaml0000664000175000017500000001107315145263240021657 0ustar alastairalastairname: mpi4py on: pull_request: workflow_dispatch: inputs: repository: description: 'mpi4py repository' default: 'mpi4py/mpi4py' required: false type: string ref: description: 'mpi4py branch/tag/SHA' default: 'master' required: false type: string permissions: contents: read jobs: test: runs-on: ubuntu-latest timeout-minutes: 30 env: MPI4PY_TEST_SPAWN: true steps: - name: Configure hostname run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev - name: Git clone OpenPMIx uses: actions/checkout@v4 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build OpenPMIx run: | cd openpmix/v5.0 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j $(nproc) make install - name: Git clone PRRTE uses: actions/checkout@v4 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl ./configure \ --prefix=$RUNNER_TEMP/prteinstall \ --with-pmix=$RUNNER_TEMP/pmixinstall \ --disable-sphinx make -j $(nproc) make install - name: Checkout Open MPI uses: actions/checkout@v4 with: submodules: recursive repository: open-mpi/ompi path: mpi-build ref: main clean: false - name: Bootstrap Open MPI working-directory: mpi-build run: ./autogen.pl - name: Configure Open MPI working-directory: mpi-build run: | ./configure \ --disable-dependency-tracking \ --enable-debug \ --enable-mem-debug \ --disable-sphinx \ --disable-mpi-fortran \ --disable-oshmem \ --with-pmix=$RUNNER_TEMP/pmixinstall \ --with-prrte=$RUNNER_TEMP/prteinstall \ --prefix=$RUNNER_TEMP/openmpi - name: Build Open MPI working-directory: mpi-build run: | make -j $(nproc) install - name: Add Open MPI to PATH run: echo $RUNNER_TEMP/openmpi/bin >> $GITHUB_PATH - name: Tweak MPI default parameters run: | # Tweak MPI mca_params="$HOME/.openmpi/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo mpi_param_check = true >> "$mca_params" echo mpi_show_handle_leaks = true >> "$mca_params" mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Show MPI run: ompi_info - name: Show MPICC run: mpicc -show - name: Setup Python uses: actions/setup-python@v5 with: python-version: 3 architecture: x64 - name: Install Python packages (build) run: python -m pip install --upgrade setuptools pip wheel - name: Install Python packages (test) run: python -m pip install --upgrade numpy cffi pyyaml - name: Checkout mpi4py uses: actions/checkout@v4 with: repository: ${{ inputs.repository || 'mpi4py/mpi4py' }} ref: ${{ inputs.ref }} - name: Install mpi4py run: python -m pip install . env: CFLAGS: "-O0" - name: Test mpi4py (singleton) run: python test/main.py -v -x test_doc if: ${{ true }} timeout-minutes: 5 - name: Test mpi4py (np=1) run: mpiexec -n 1 python test/main.py -v -x test_doc if: ${{ true }} timeout-minutes: 5 - name: Test mpi4py (np=2) run: mpiexec -n 2 python test/main.py -v -f -x test_doc if: ${{ true }} timeout-minutes: 5 - name: Test mpi4py (np=3) run: mpiexec -n 3 python test/main.py -v -f -x test_doc if: ${{ true }} timeout-minutes: 5 - name: Test mpi4py (np=4) run: mpiexec -n 4 python test/main.py -v -f -x test_doc if: ${{ true }} timeout-minutes: 10 - name: Test mpi4py (np=5) run: mpiexec -n 5 python test/main.py -v -f -x test_doc if: ${{ true }} timeout-minutes: 10 - name: Test mpi4py.run run: python demo/test-run/test_run.py -v if: ${{ true }} timeout-minutes: 5 prrte-3.0.13/.github/workflows/build-ompi-external.yaml0000664000175000017500000000527515145263240023273 0ustar alastairalastairname: OMPI External on: [pull_request] permissions: contents: read jobs: test: runs-on: ubuntu-latest timeout-minutes: 30 steps: - name: Configure hostname run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} - name: Install dependencies run: | sudo apt-get update sudo apt install -y --no-install-recommends wget software-properties-common hwloc libhwloc-dev libevent-2.1-7 libevent-dev - name: Git clone PMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build PMIx run: | cd openpmix/v5.0 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive path: prrte clean: false - name: Build PRRTE run: | cd prrte ./autogen.pl ./configure --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall --enable-devel-check make -j make install - name: Checkout Open MPI uses: actions/checkout@v4 with: submodules: recursive repository: open-mpi/ompi path: ompi/v5.0.x ref: v5.0.x clean: false - name: Bootstrap Open MPI run: | cd ompi/v5.0.x ./autogen.pl ./configure \ --disable-dependency-tracking \ --enable-debug \ --enable-mem-debug \ --disable-sphinx \ --disable-mpi-fortran \ --disable-oshmem \ --prefix=$RUNNER_TEMP/openmpi \ --with-libevent=external \ --with-hwloc=external \ --with-pmix=$RUNNER_TEMP/pmixinstall \ --with-prrte=$RUNNER_TEMP/prteinstall make -j $(nproc) install - name: Add Open MPI to PATH run: echo $RUNNER_TEMP/openmpi/bin >> $GITHUB_PATH - name: Tweak MPI default parameters run: | # Tweak MPI mca_params="$HOME/.openmpi/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo mpi_param_check = true >> "$mca_params" echo mpi_show_handle_leaks = true >> "$mca_params" mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Simple test run: | cd ompi/v5.0.x/examples make hello_c mpirun -n 1 ./hello_c if: ${{ true }} timeout-minutes: 5 prrte-3.0.13/.github/workflows/dvm.yaml0000664000175000017500000000360115145263240020167 0ustar alastairalastairname: DVM on: [pull_request] permissions: contents: read jobs: pub-lookup: runs-on: ubuntu-22.04 steps: - name: Install dependencies run: | sudo apt update sudo apt install -y --no-install-recommends wget software-properties-common hwloc libhwloc-dev libevent-2.1-7 libevent-dev - uses: actions/checkout@v4 with: submodules: recursive - name: Git clone PMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build PMIx run: | cd openpmix/v5.0 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl ./configure --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall --enable-devel-check make -j make install - name: Tweak PRRTE run: | # Tweak PRRTE mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Run simple test run: | export PATH=$RUNNER_TEMP/prteinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun -n 4 --pset foo ./openpmix/v5.0/examples/hello if: ${{ true }} timeout-minutes: 5 - name: Run pub-lookup test1 run: | export PATH=$RUNNER_TEMP/prteinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun -n 3 ./openpmix/v5.0/examples/pub if: ${{ true }} timeout-minutes: 5 prrte-3.0.13/.github/workflows/pr-target.yaml0000664000175000017500000000171315145263240021310 0ustar alastairalastairname: PR Checks CI # We're using pull_request_target here instead of just pull_request so that the # action runs in the context of the base of the pull request, rather than in the # context of the merge commit. For more detail about the differences, see: # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target on: pull_request_target: # We don't need this to be run on all types of PR behavior # See https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request types: - opened - synchronize - edited permissions: {} # none jobs: check: permissions: pull-requests: write name: Check Commits runs-on: ubuntu-latest steps: - name: Pull Request Commit Checker uses: open-mpi/pr-git-commit-checker@v1.0.1 with: token: "${{ secrets.GITHUB_TOKEN}}" cherry-pick-required: true prrte-3.0.13/.github/workflows/builds.yaml0000664000175000017500000001737015145263240020673 0ustar alastairalastairname: Build tests on: [pull_request] permissions: contents: read jobs: macos: runs-on: macos-latest strategy: matrix: path: ['non-vpath', 'vpath'] sphinx: ['no-sphinx', 'sphinx'] steps: - name: Install dependencies run: brew install libevent hwloc autoconf automake libtool - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build v5.0 run: | cd openpmix/v5.0 # Homebrew doesn't install Libevent's (or libev's) header or # library files into a default search location. Shrug. So # use pkg-config to get the location and explicitly pass it to # configure. libevent_cppflags=$(pkg-config libevent --cflags) libevent_ldflags=$(pkg-config libevent --libs | perl -pe 's/^.*(-L[^ ]+).*$/\1/') ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall \ CPPFLAGS=$libevent_cppflags \ LDFLAGS=$libevent_ldflags make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl sphinx= if test "${{ matrix.sphinx }}" = sphinx; then # The macos Github Action environment gets angry at us if # we try to pip install into the global environment. So # make a virtual environment and install sphinx into that. python -m venv venv . ./venv/bin/activate pip3 install -r docs/requirements.txt sphinx=--enable-sphinx fi c=./configure if test "${{ matrix.path }}" = vpath; then mkdir build cd build c=../configure fi # Homebrew doesn't install Libevent's (or libev's) header or # library files into a default search location. Shrug. So # use pkg-config to get the location and explicitly pass it to # configure. libevent_cppflags=$(pkg-config libevent --cflags) libevent_ldflags=$(pkg-config libevent --libs | perl -pe 's/^.*(-L[^ ]+).*$/\1/') $c --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall $sphinx \ --enable-devel-check \ --enable-testbuild-launchers \ CPPFLAGS=$libevent_cppflags \ LDFLAGS=$libevent_ldflags make -j make install - name: Build examples run: | pushd examples export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} make popd - name: Test sanity check run: | export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun --map-by ppr:1:core examples/hello ubuntu: runs-on: ubuntu-latest strategy: matrix: path: ['non-vpath', 'vpath'] sphinx: ['no-sphinx', 'sphinx'] steps: - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build OpenPMIx run: | cd openpmix/v5.0 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl sphinx= if test "${{ matrix.sphinx }}" = sphinx; then pip3 install -r docs/requirements.txt sphinx=--enable-sphinx fi c=./configure if test "${{ matrix.path }}" = vpath; then mkdir build cd build c=../configure fi $c --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall $sphinx \ --enable-devel-check \ --enable-testbuild-launchers make -j make install - name: Build examples run: | pushd examples export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} make popd - name: Test sanity check run: | export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun --map-by ppr:1:core examples/hello prterun --map-by ppr:1:core examples/legacy ubuntuClang: runs-on: ubuntu-latest steps: - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev clang - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build OpenPMIx run: | cd openpmix/v5.0 ./autogen.pl CC=clang ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl pip3 install -r docs/requirements.txt sphinx=--enable-sphinx c=./configure CC=clang $c --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall $sphinx \ --enable-devel-check \ --enable-testbuild-launchers make -j make install - name: Build examples run: | pushd examples export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} make popd - name: Test sanity check run: | export PATH=$RUNNER_TEMP/prteinstall/bin:$RUNNER_TEMP/pmixinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun --map-by ppr:1:core examples/hello prterun --map-by ppr:1:core examples/legacy distcheck: runs-on: ubuntu-latest steps: - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev python3 python3-pip - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v5.0 ref: v5.0 - name: Build OpenPMIx run: | cd openpmix/v5.0 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Distcheck run: | pip install -r docs/requirements.txt ./autogen.pl ./configure --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall --enable-sphinx \ --enable-devel-check \ --enable-testbuild-launchers make distcheck AM_DISTCHECK_MAKEFLAGS=-j AM_DISTCHECK_CONFIGURE_FLAGS="--with-pmix=$RUNNER_TEMP/pmixinstall" prrte-3.0.13/.github/workflows/build-ompi.yaml0000664000175000017500000000425615145263240021451 0ustar alastairalastairname: OMPI Internal on: [pull_request] permissions: contents: read jobs: test: runs-on: ubuntu-latest timeout-minutes: 30 steps: - name: Configure hostname run: echo 127.0.0.1 `hostname` | sudo tee -a /etc/hosts > /dev/null if: ${{ runner.os == 'Linux' || runner.os == 'macOS' }} - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common - name: Checkout Open MPI uses: actions/checkout@v4 with: submodules: recursive repository: open-mpi/ompi ref: v5.0.x - name: Tweak OMPI run: | rm -r 3rd-party/prrte 3rd-party/openpmix - name: Git clone PMIx uses: actions/checkout@v4 with: submodules: recursive repository: openpmix/openpmix path: 3rd-party/openpmix ref: v5.0 clean: false - name: Git clone PRRTE uses: actions/checkout@v4 with: submodules: recursive path: 3rd-party/prrte clean: false - name: Bootstrap Open MPI run: ./autogen.pl - name: Configure Open MPI run: | ./configure \ --disable-dependency-tracking \ --enable-debug \ --enable-mem-debug \ --disable-sphinx \ --disable-mpi-fortran \ --disable-oshmem \ --prefix=$RUNNER_TEMP/openmpi - name: Build Open MPI run: | make -j $(nproc) install - name: Add Open MPI to PATH run: echo $RUNNER_TEMP/openmpi/bin >> $GITHUB_PATH - name: Tweak MPI default parameters run: | # Tweak MPI mca_params="$HOME/.openmpi/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo mpi_param_check = true >> "$mca_params" echo mpi_show_handle_leaks = true >> "$mca_params" mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Simple test run: | cd examples make hello_c mpirun -n 1 ./hello_c if: ${{ true }} timeout-minutes: 5 prrte-3.0.13/.github/workflows/builds-ancient.yaml0000664000175000017500000000617115145263240022307 0ustar alastairalastairname: Build with PMIx v4.2 on: [pull_request] jobs: ubuntu4: runs-on: ubuntu-latest steps: - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v4 ref: v4.2 - name: Build OpenPMIx run: | cd openpmix/v4 ./autogen.pl ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install cp examples/.libs/hello $RUNNER_TEMP/pmixinstall/bin - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl ./configure --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall --enable-devel-check make -j make install - name: Tweak PRRTE run: | # Tweak PRRTE mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Run simple test run: | export PATH=$RUNNER_TEMP/prteinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun -n 4 $RUNNER_TEMP/pmixinstall/bin/hello if: ${{ true }} timeout-minutes: 5 ubuntuClang4: runs-on: ubuntu-latest steps: - name: Install dependencies run: | sudo apt-get update sudo apt-get install -y --no-install-recommends software-properties-common libhwloc-dev libevent-dev clang - name: Git clone OpenPMIx uses: actions/checkout@v3 with: submodules: recursive repository: openpmix/openpmix path: openpmix/v4 ref: v4.2 - name: Build OpenPMIx run: | cd openpmix/v4 ./autogen.pl CC=clang ./configure --prefix=$RUNNER_TEMP/pmixinstall make -j make install cp examples/.libs/hello $RUNNER_TEMP/pmixinstall/bin - name: Git clone PRRTE uses: actions/checkout@v3 with: submodules: recursive clean: false - name: Build PRRTE run: | ./autogen.pl pip3 install -r docs/requirements.txt CC=clang ./configure --prefix=$RUNNER_TEMP/prteinstall --with-pmix=$RUNNER_TEMP/pmixinstall $sphinx --enable-devel-check make -j make install - name: Tweak PRRTE run: | # Tweak PRRTE mca_params="$HOME/.prte/mca-params.conf" mkdir -p "$(dirname "$mca_params")" echo rmaps_default_mapping_policy = :oversubscribe >> "$mca_params" - name: Run simple test run: | export PATH=$RUNNER_TEMP/prteinstall/bin:${PATH} export LD_LIBRARY_PATH=$RUNNER_TEMP/prteinstall/lib:${LD_LIBRARY_PATH} prterun -n 4 $RUNNER_TEMP/pmixinstall/bin/hello if: ${{ true }} timeout-minutes: 5 prrte-3.0.13/README.md0000664000175000017500000000146315145263240014403 0ustar alastairalastair# PMIx Reference RunTime Environment (PRRTE) PRRTE is [the PMIx Reference RunTime Environment](https://github.com/openpmix/prrte) ## Official documentation The PRRTE documentation can be viewed in the following ways: 1. Online at https://docs.prrte.org/ 1. In self-contained (i.e., suitable for local viewing, without an internet connection) in official distribution tarballs under `docs/_build/html/index.html`. ## Building the documentation locally The source code for PRRTE's docs can be found in the PRRTE Git repository under the `docs` folder. Developers who clone the PRRTE Git repository will not have the HTML documentation and man pages by default; it must be built. Instructions for how to build the PRRTE documentation can be found here: https://docs.prrte.org/en/latest/developers/sphinx.html prrte-3.0.13/include/0000775000175000017500000000000015145263240014543 5ustar alastairalastairprrte-3.0.13/include/prte.h0000664000175000017500000000122515145263240015666 0ustar alastairalastair/* * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * Copyright (c) 2018 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_H #define PRTE_H #include "prte_version.h" /** * Main body of prte functionality */ int prte(int argc, char *argv[]); #endif prrte-3.0.13/include/prte_version.h.in0000664000175000017500000000137415145263240020045 0ustar alastairalastair/* * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2018 IBM Corporation. All rights reserved. * Copyright (c) 2018 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_VERSION_H #define PRTE_VERSION_H /* define PRTE version */ #define PRTE_VERSION_MAJOR @prtemajor@ #define PRTE_VERSION_MINOR @prteminor@ #define PRTE_VERSION_RELEASE @prterelease@ #define PRTE_NUMERIC_VERSION @prtenumeric@ #endif prrte-3.0.13/include/Makefile.am0000664000175000017500000000042715145263240016602 0ustar alastairalastair# # Copyright (c) 2015-2020 Intel, Inc. All rights reserved. # # Copyright (c) 2021 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # include_HEADERS = \ prte.h nodist_include_HEADERS = \ prte_version.h prrte-3.0.13/contrib/0000775000175000017500000000000015145263240014560 5ustar alastairalastairprrte-3.0.13/contrib/platform/0000775000175000017500000000000015145263240016404 5ustar alastairalastairprrte-3.0.13/contrib/platform/snl/0000775000175000017500000000000015145263240017200 5ustar alastairalastairprrte-3.0.13/contrib/platform/snl/portals4-orte0000664000175000017500000000043015145263240021637 0ustar alastairalastairwith_memory_manager=no enable_mca_no_build=bml,btl,coll-hierarch,coll-sm,common-sm,mpool,pml-bfo,pml-csum,pml-dr,pml-ob1,osc-rdma,rcache,vprotocol enable_contrib_no_build=libnbc enable_heterogeneous=no enable_mem_debug=no enable_mem_profile=no with_verbs=no with_gm=no with_mx=no prrte-3.0.13/contrib/platform/snl/portals4-m50000664000175000017500000000102115145263240021204 0ustar alastairalastairenable_static=yes enable_shared=no with_threads=no enable_dlopen=no with_memory_manager=no enable_mca_no_build=carto,compress,crs,maffinity,paffinity,debugger,notifier,bml,btl,coll-hierarch,coll-sm,common-sm,mpool,pml-bfo,pml-csum,pml-dr,pml-ob1,osc-rdma,rcache,vprotocol,btl enable_contrib_no_build=libnbc with_rte_support=no enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no enable_mem_profile=no with_openib=no with_gm=no with_mx=no enable_binaries=no enable_script_wrapper_compilers=yes enable_mpi_fortran=no prrte-3.0.13/contrib/platform/snl/kitten0000664000175000017500000000433415145263240020425 0ustar alastairalastairenable_static=yes enable_shared=no enable_dlopen=no enable_mca_no_build=if,compress,crs,pml-bfo,pml-v,vprotocol,btl-tcp enable_contrib_no_build= enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no enable_mem_profile=no enable_binaries=no enable_script_wrapper_compilers=yes with_prte=no enable_event_rtsig=no enable_event_select=no enable_event_poll=no enable_event_devpoll=no enable_event_kqueue=no enable_event_epoll=no enable_event_evport=no ac_cv_func_cpuset_setaffinity=${ac_cv_func_cpuset_setaffinity=no} ac_cv_func_dlopen=${ac_cv_func_dlopen=no} ac_cv_func_dlsym=${ac_cv_func_dlsym=no} ac_cv_func_execve=${ac_cv_func_execve=no} ac_cv_func_fork=${ac_cv_func_fork=no} ac_cv_func_gethrtime=${ac_cv_func_gethrtime=no} ac_cv_func_getpagesize=${ac_cv_func_getpagesize=no} ac_cv_func_getpwuid=${ac_cv_func_getpwuid=no} ac_cv_func_htonl=${ac_cv_func_htonl=yes} ac_cv_func_isatty=${ac_cv_func_isatty=no} ac_cv_func_mkfifo=${ac_cv_func_mkfifo=no} ac_cv_func_mmap=${ac_cv_func_mmap=no} ac_cv_func_openat=${ac_cv_func_openat=no} ac_cv_func_openpty=${ac_cv_func_openpty=no} ac_cv_func_pipe=${ac_cv_func_pipe=no} ac_cv_func_ptsname=${ac_cv_func_ptsname=no} ac_cv_func_sched_setaffinity=${ac_cv_func_sched_setaffinity=no} ac_cv_func_sched_yield=${ac_cv_func_sched_yield=no} ac_cv_func_setpgid=${ac_cv_func_setpgid=no} ac_cv_func_setsid=${ac_cv_func_setsid=no} ac_cv_func_shl_load=${ac_cv_func_shl_load=no} ac_cv_func_shmget=${ac_cv_func_shmget=no} ac_cv_func_socket=${ac_cv_func_socket=no} ac_cv_func_socketpair=${ac_cv_func_socketpair=no} ac_cv_func_strsignal=${ac_cv_func_strsignal=yes} ac_cv_func_sysconf=${ac_cv_func_sysconf=no} ac_cv_func_syslog=${ac_cv_func_syslog=no} ac_cv_func_tcgetpgrp=${ac_cv_func_tcgetpgrp=no} ac_cv_func_time_base_to_time=${ac_cv_func_time_base_to_time=no} ac_cv_func_uname=${ac_cv_func_uname=no} ac_cv_func_usleep=${ac_cv_func_usleep=no} ac_cv_func_vm_read_overwrite=${ac_cv_func_vm_read_overwrite=no} ac_cv_func_waitpid=${ac_cv_func_waitpid=no} if test "with_verbs" != "no" ; then enable_mca_direct=pml-ob1 enable_mca_no_built="$enable_mca_no_build,btl-sm" elif test "with_portals4" != "no" ; then enable_mca_no_build="$enable_mca_no_build,pml-ob1,btl,bml,mpool,rcache" enable_mca_direct=pml-cm,mtl-portals4 fi prrte-3.0.13/contrib/platform/cray_xt_cnl_romio0000664000175000017500000001214515145263240022044 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_debug_symbols=no enable_io_romio=yes enable_static=yes enable_shared=no with_threads=no enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=cnl_modex with_memory_manager=none enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos enable_heterogeneous=no enable_pty_support=no enable_binaries=yes ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4} ompi_cv_f77_alignment_LOGICAL=${ompi_cv_f77_alignment_LOGICAL=4} ompi_cv_f77_sizeof_INTEGER=${ompi_cv_f77_sizeof_INTEGER=4} ompi_cv_f77_alignment_INTEGER=${ompi_cv_f77_alignment_INTEGER=4} ompi_cv_f77_sizeof_INTEGERp1=${ompi_cv_f77_sizeof_INTEGERp1=1} ompi_cv_f77_alignment_INTEGERp1=${ompi_cv_f77_alignment_INTEGERp1=1} ompi_cv_f77_sizeof_INTEGERp2=${ompi_cv_f77_sizeof_INTEGERp2=2} ompi_cv_f77_alignment_INTEGERp2=${ompi_cv_f77_alignment_INTEGERp2=2} ompi_cv_f77_sizeof_INTEGERp4=${ompi_cv_f77_sizeof_INTEGERp4=4} ompi_cv_f77_alignment_INTEGERp4=${ompi_cv_f77_alignment_INTEGERp4=4} ompi_cv_f77_sizeof_INTEGERp8=${ompi_cv_f77_sizeof_INTEGERp8=8} ompi_cv_f77_alignment_INTEGERp8=${ompi_cv_f77_alignment_INTEGERp8=8} ompi_cv_f77_sizeof_INTEGERp16=${ompi_cv_f77_sizeof_INTEGERp16=16} ompi_cv_f77_alignment_INTEGERp16=${ompi_cv_f77_alignment_INTEGERp16=8} ompi_cv_f77_sizeof_REAL=${ompi_cv_f77_sizeof_REAL=4} ompi_cv_f77_alignment_REAL=${ompi_cv_f77_alignment_REAL=4} ompi_cv_f77_sizeof_REALp2=${ompi_cv_f77_sizeof_REALp2=2} ompi_cv_f77_alignment_REALp2=${ompi_cv_f77_alignment_REALp2=2} ompi_cv_f77_sizeof_REALp4=${ompi_cv_f77_sizeof_REALp4=4} ompi_cv_f77_alignment_REALp4=${ompi_cv_f77_alignment_REALp4=4} ompi_cv_f77_sizeof_REALp8=${ompi_cv_f77_sizeof_REALp8=8} ompi_cv_f77_alignment_REALp8=${ompi_cv_f77_alignment_REALp8=8} ompi_cv_f77_sizeof_REALp16=${ompi_cv_f77_sizeof_REALp16=16} ompi_cv_f77_alignment_REALp16=${ompi_cv_f77_alignment_REALp16=8} ompi_cv_f77_sizeof_DOUBLE_PRECISION=${ompi_cv_f77_sizeof_DOUBLE_PRECISION=8} ompi_cv_f77_alignment_DOUBLE_PRECISION=${ompi_cv_f77_alignment_DOUBLE_PRECISION=8} ompi_cv_f77_sizeof_COMPLEX=${ompi_cv_f77_sizeof_COMPLEX=8} ompi_cv_f77_alignment_COMPLEX=${ompi_cv_f77_alignment_COMPLEX=4} ompi_cv_f77_sizeof_COMPLEXp8=${ompi_cv_f77_sizeof_COMPLEXp8=8} ompi_cv_f77_alignment_COMPLEXp8=${ompi_cv_f77_alignment_COMPLEXp8=4} ompi_cv_f77_sizeof_COMPLEXp16=${ompi_cv_f77_sizeof_COMPLEXp16=16} ompi_cv_f77_alignment_COMPLEXp16=${ompi_cv_f77_alignment_COMPLEXp16=8} ompi_cv_f77_sizeof_COMPLEXp32=${ompi_cv_f77_sizeof_COMPLEXp32=32} ompi_cv_f77_alignment_COMPLEXp32=${ompi_cv_f77_alignment_COMPLEXp32=8} ompi_cv_f77_true_value=${ompi_cv_f77_true_value=-1} ompi_cv_f90_sizeof_LOGICAL=${ompi_cv_f90_sizeof_LOGICAL=4} ompi_cv_f90_alignment_LOGICAL=${ompi_cv_f90_alignment_LOGICAL=4} ompi_cv_f90_sizeof_INTEGER=${ompi_cv_f90_sizeof_INTEGER=4} ompi_cv_f90_alignment_INTEGER=${ompi_cv_f90_alignment_INTEGER=4} ompi_cv_f90_sizeof_INTEGERp1=${ompi_cv_f90_sizeof_INTEGERp1=1} ompi_cv_f90_alignment_INTEGERp1=${ompi_cv_f90_alignment_INTEGERp1=1} ompi_cv_f90_sizeof_INTEGERp2=${ompi_cv_f90_sizeof_INTEGERp2=2} ompi_cv_f90_alignment_INTEGERp2=${ompi_cv_f90_alignment_INTEGERp2=2} ompi_cv_f90_sizeof_INTEGERp4=${ompi_cv_f90_sizeof_INTEGERp4=4} ompi_cv_f90_alignment_INTEGERp4=${ompi_cv_f90_alignment_INTEGERp4=4} ompi_cv_f90_sizeof_INTEGERp8=${ompi_cv_f90_sizeof_INTEGERp8=8} ompi_cv_f90_alignment_INTEGERp8=${ompi_cv_f90_alignment_INTEGERp8=8} ompi_cv_f90_sizeof_INTEGERp16=${ompi_cv_f90_sizeof_INTEGERp16=16} ompi_cv_f90_alignment_INTEGERp16=${ompi_cv_f90_alignment_INTEGERp16=8} ompi_cv_f90_sizeof_REAL=${ompi_cv_f90_sizeof_REAL=4} ompi_cv_f90_alignment_REAL=${ompi_cv_f90_alignment_REAL=4} ompi_cv_f90_sizeof_REALp2=${ompi_cv_f90_sizeof_REALp2=2} ompi_cv_f90_alignment_REALp2=${ompi_cv_f90_alignment_REALp2=2} ompi_cv_f90_sizeof_REALp4=${ompi_cv_f90_sizeof_REALp4=4} ompi_cv_f90_alignment_REALp4=${ompi_cv_f90_alignment_REALp4=4} ompi_cv_f90_sizeof_REALp8=${ompi_cv_f90_sizeof_REALp8=8} ompi_cv_f90_alignment_REALp8=${ompi_cv_f90_alignment_REALp8=8} ompi_cv_f90_sizeof_REALp16=${ompi_cv_f90_sizeof_REALp16=16} ompi_cv_f90_alignment_REALp16=${ompi_cv_f90_alignment_REALp16=8} ompi_cv_f90_sizeof_DOUBLE_PRECISION=${ompi_cv_f90_sizeof_DOUBLE_PRECISION=8} ompi_cv_f90_alignment_DOUBLE_PRECISION=${ompi_cv_f90_alignment_DOUBLE_PRECISION=8} ompi_cv_f90_sizeof_COMPLEX=${ompi_cv_f90_sizeof_COMPLEX=8} ompi_cv_f90_alignment_COMPLEX=${ompi_cv_f90_alignment_COMPLEX=4} ompi_cv_f90_sizeof_COMPLEXp8=${ompi_cv_f90_sizeof_COMPLEXp8=8} ompi_cv_f90_alignment_COMPLEXp8=${ompi_cv_f90_alignment_COMPLEXp8=4} ompi_cv_f90_sizeof_COMPLEXp16=${ompi_cv_f90_sizeof_COMPLEXp16=16} ompi_cv_f90_alignment_COMPLEXp16=${ompi_cv_f90_alignment_COMPLEXp16=8} ompi_cv_f90_sizeof_COMPLEXp32=${ompi_cv_f90_sizeof_COMPLEXp32=32} ompi_cv_f90_alignment_COMPLEXp32=${ompi_cv_f90_alignment_COMPLEXp32=8} ompi_cv_f90_true_value=${ompi_cv_f90_true_value=-1} ompi_cv_f90_sizeof_DOUBLE_COMPLEX=${ompi_cv_f90_sizeof_DOUBLE_COMPLEX=16} ompi_cv_f90_alignment_DOUBLE_COMPLEX=${ompi_cv_f90_alignment_DOUBLE_COMPLEX=8} ompi_cv_f90_int_kind_9=${ompi_cv_f90_int_kind_9=4} ompi_cv_f90_int_kind_18=${ompi_cv_f90_int_kind_18=8} prrte-3.0.13/contrib/platform/embedded/0000775000175000017500000000000015145263240020135 5ustar alastairalastairprrte-3.0.13/contrib/platform/embedded/gen_embedded.sh0000775000175000017500000000060015145263240023052 0ustar alastairalastair#! /usr/bin/env bash # if (( $# < 2 )) ; then echo "usage ./contrib/platform/embedded/gen_embedded.sh prefix debug|optimized" exit 1 fi prefix=$1 shift 1 platform=$1 ./autogen.pl -no-ompi ./configure --prefix="${prefix}" --with-platform=contrib/platform/embedded/"${platform}" make clean > /dev/null make -j2 all > /dev/null make -j2 install > /dev/null # All done exit 0 prrte-3.0.13/contrib/platform/embedded/debug0000664000175000017500000000132415145263240021146 0ustar alastairalastairenable_dlopen=no enable_mem_debug=yes enable_memchecker=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=yes enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_ft_thread=no enable_per_user_config_files=no enable_mca_no_build=crs,carto,maffinity,paffinity,pstat,filem,grpcomm-basic,grpcomm-hier,rmaps-rank_file,rmaps-seq,rmaps-topo,routed-binomial,routed-linear,routed-radix,routed-slave,snapc enable_contrib_no_build=libnbc with_devel_headers=yes with_alps=no with_ftb=no with_sge=no with_xgrid=no with_slurm=no with_tm=no with_lsf=no with_threads=posix prrte-3.0.13/contrib/platform/embedded/build_embedded.sh0000775000175000017500000000055415145263240023410 0ustar alastairalastair#! /usr/bin/env bash # if (( $# < 2 )) ; then echo "usage ./contrib/platform/embedded/build_embedded.sh prefix debug|optimized" exit 1 fi prefix=$1 shift 1 platform=$1 ./configure --prefix="${prefix}" --with-platform=contrib/platform/embedded/"${platform}" make clean > /dev/null make -j2 all > /dev/null make -j2 install > /dev/null # All done exit 0 prrte-3.0.13/contrib/platform/embedded/optimized0000664000175000017500000000132015145263240022060 0ustar alastairalastairenable_dlopen=no enable_mem_debug=no enable_memchecker=no enable_mem_profile=no enable_debug_symbols=no enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=no enable_shared=yes enable_static=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_ft_thread=no enable_per_user_config_files=no enable_mca_no_build=crs,carto,maffinity,paffinity,pstat,filem,grpcomm-basic,grpcomm-hier,rmaps-rank_file,rmaps-seq,rmaps-topo,routed-binomial,routed-linear,routed-radix,routed-slave,snapc enable_contrib_no_build=libnbc with_devel_headers=yes with_alps=no with_ftb=no with_sge=no with_xgrid=no with_slurm=no with_tm=no with_lsf=no with_threads=posix prrte-3.0.13/contrib/platform/ibm/0000775000175000017500000000000015145263240017153 5ustar alastairalastairprrte-3.0.13/contrib/platform/ibm/debug-power7-gcc0000664000175000017500000000111115145263240022131 0ustar alastairalastairenable_mem_debug=yes enable_debug_symbols=yes enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" with_wrapper_cflags="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" with_wrapper_cxxflags="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" with_wrapper_fcflags="-m64 -mcpu=power7 -mtune=power7 -O0 -g3 -ggdb" prrte-3.0.13/contrib/platform/ibm/debug-power6-gcc0000664000175000017500000000111115145263240022130 0ustar alastairalastairenable_mem_debug=yes enable_debug_symbols=yes enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" with_wrapper_cflags="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" with_wrapper_cxxflags="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" with_wrapper_fcflags="-m64 -mcpu=power6 -mtune=power6 -O0 -g3 -ggdb" prrte-3.0.13/contrib/platform/ibm/optimized-power7-gcc0000664000175000017500000000077715145263240023070 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr enable_shared=yes enable_static=no CXXFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" CCASFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" FCFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" CFLAGS="-m64 -mcpu=power7 -mtune=power7 -O3" with_wrapper_cflags="-m64 -mcpu=power7 -mtune=power7" with_wrapper_cxxflags="-m64 -mcpu=power7 -mtune=power7" with_wrapper_fcflags="-m64 -mcpu=power7 -mtune=power7" prrte-3.0.13/contrib/platform/ibm/debug-ppc32-gcc0000664000175000017500000000116315145263240021644 0ustar alastairalastairenable_mem_debug=yes enable_debug_symbols=yes enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CCASFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" FCFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_cflags="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_cxxflags="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_fcflags="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" prrte-3.0.13/contrib/platform/ibm/optimized-ppc64-gcc0000664000175000017500000000105115145263240022563 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr enable_shared=yes enable_static=no CXXFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O3" CCASFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O3" FCFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O3" CFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O3" with_wrapper_cflags="-m64 -mcpu=powerpc64 -mtune=powerpc64" with_wrapper_cxxflags="-m64 -mcpu=powerpc64 -mtune=powerpc64" with_wrapper_fcflags="-m64 -mcpu=powerpc64 -mtune=powerpc64" prrte-3.0.13/contrib/platform/ibm/debug-ppc64-gcc0000664000175000017500000000116315145263240021651 0ustar alastairalastairenable_mem_debug=yes enable_debug_symbols=yes enable_debug=yes enable_mem_profile=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr CXXFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CCASFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" FCFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" CFLAGS="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_cflags="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_cxxflags="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" with_wrapper_fcflags="-m64 -mcpu=powerpc64 -mtune=powerpc64 -O0 -g3 -ggdb" prrte-3.0.13/contrib/platform/ibm/optimized-power6-gcc0000664000175000017500000000077715145263240023067 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr enable_shared=yes enable_static=no CXXFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" CCASFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" FCFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" CFLAGS="-m64 -mcpu=power6 -mtune=power6 -O3" with_wrapper_cflags="-m64 -mcpu=power6 -mtune=power6" with_wrapper_cxxflags="-m64 -mcpu=power6 -mtune=power6" with_wrapper_fcflags="-m64 -mcpu=power6 -mtune=power6" prrte-3.0.13/contrib/platform/ibm/optimized-ppc32-gcc0000664000175000017500000000105115145263240022556 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_contrib_no_build=libnbc enable_ft_thread=no with_verbs=/usr enable_shared=yes enable_static=no CXXFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" CCASFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" FCFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" CFLAGS="-m32 -mcpu=powerpc64 -mtune=powerpc64 -O3" with_wrapper_cflags="-m32 -mcpu=powerpc64 -mtune=powerpc64" with_wrapper_cxxflags="-m32 -mcpu=powerpc64 -mtune=powerpc64" with_wrapper_fcflags="-m32 -mcpu=powerpc64 -mtune=powerpc64" prrte-3.0.13/contrib/platform/clang/0000775000175000017500000000000015145263240017470 5ustar alastairalastairprrte-3.0.13/contrib/platform/clang/align0000664000175000017500000000040515145263240020504 0ustar alastairalastairCC=clang CXX=clang++ CFLAGS="-g -O1 -fsanitize=alignment" CXXFLAGS="-O1 -g -fsanitize=alignment" CCASFLAGS="-O1 -g -fsanitize=alignment" with_wrapper_cflags="-O1 -g -fsanitize=alignment" with_wrapper_cxxflags="-O1 -g -fsanitize=alignment" enable_mpi_fortran=no prrte-3.0.13/contrib/platform/utk/0000775000175000017500000000000015145263240017207 5ustar alastairalastairprrte-3.0.13/contrib/platform/utk/cray_xc30_darter0000664000175000017500000000220415145263240022264 0ustar alastairalastair# Copyright (c) 2014 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Debug options enable_mem_debug=no enable_mem_profile=no enable_debug=no enable_debug_symbols=no #enable_pretty_print_stacktrace=no with_valgrind=no enable_contrib_no_build= # Extensions and language bindings enable_io_romio=yes enable_oshmem=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_binaries=yes # Components to load/ignore with_alps=yes with_tm=no with_slurm=no with_xpmem=yes with_verbs=no enable_mca_no_build=crs,filem,routed-linear,snapc,pml-example,pml-cm,ess-cnos,grpcomm-cnos,plm-rsh,btl-tcp,oob-ud,ras-simulator,mpool-fake,maffinity-first_use,maffinity-libnuma,paffinity-linux enable_mca_static=btl:ugni,btl:self,btl:vader,pml:ob1,coll:ml #enable_mca_direct=pml-ob1 with_threads=yes enable_heterogeneous=no with_memory_manager=linux #enable_ipv6=no #enable_prte_static_ports=no #enable_pty_support=no # Setup for static build on Cray enable_static=yes enable_shared=no enable_dlopen=no enable_getpwuid=no enable_hwloc_pci=no prrte-3.0.13/contrib/platform/intel/0000775000175000017500000000000015145263240017517 5ustar alastairalastairprrte-3.0.13/contrib/platform/intel/bend/0000775000175000017500000000000015145263240020427 5ustar alastairalastairprrte-3.0.13/contrib/platform/intel/bend/ext.conf0000664000175000017500000000570615145263240022106 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2016-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent prrte-3.0.13/contrib/platform/intel/bend/linux.conf0000664000175000017500000000574115145263240022444 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2018-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent oob=^ud btl=self,vader,tcp prrte-3.0.13/contrib/platform/intel/bend/ubuntu.conf0000664000175000017500000000577315145263240022634 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2018-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent oob=^ud btl=self,vader,tcp btl_tcp_if_include=enp0s3 prrte-3.0.13/contrib/platform/intel/bend/mac.conf0000664000175000017500000000570615145263240022046 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent prrte-3.0.13/contrib/platform/intel/bend/linux-optimized.conf0000664000175000017500000000631515145263240024444 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up oob_tcp_peer_retries = 120 #oob_tcp_connect_timeout=600 ## Define the MPI interconnects btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 prrte-3.0.13/contrib/platform/intel/bend/gadget.conf0000664000175000017500000000573415145263240022542 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent btl_ugni_rcache=grdma prrte-3.0.13/contrib/platform/intel/bend/linux0000664000175000017500000000130215145263240021505 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_mpi_java=no enable_io_romio=no enable_contrib_no_build=libnbc enable_install_libpmix=yes with_memory_manager=no with_tm=no with_psm=no with_psm2=no with_devel_headers=yes with_libfabric=no with_portals=no with_valgrind=no if [ -n "$SLURMHOME" ] ; then with_slurm=$SLURMHOME with_pmi=$SLURMHOME else with_slurm=no fi prrte-3.0.13/contrib/platform/intel/bend/ext0000664000175000017500000000134315145263240021153 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_mpi_fortran=yes enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_mpi_java=no enable_io_romio=no enable_contrib_no_build=libnbc with_memory_manager=no with_tm=no with_devel_headers=yes with_portals=no with_valgrind=no if [ -n "$SLURMHOME" ] ; then with_slurm=$SLURMHOME with_pmi=$SLURMHOME else with_slurm=no fi with_libevent=/home/common/local with_hwloc=/home/common/local with_pmix=/home/common/pmix/build/ompi prrte-3.0.13/contrib/platform/intel/bend/pi0000664000175000017500000000055715145263240020771 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_install_libpmix=yes with_memory_manager=no with_tm=no with_devel_headers=yes prrte-3.0.13/contrib/platform/intel/bend/mac0000664000175000017500000000031315145263240021107 0ustar alastairalastairenable_sensors=no enable_debug_symbols=yes enable_binaries=yes enable_picky=no enable_debug=yes enable_shared=yes enable_static=no enable_ipv6=no with_devel_headers=yes enable_prte_prefix_by_default=yes prrte-3.0.13/contrib/platform/intel/bend/mac-optimized0000664000175000017500000000124615145263240023117 0ustar alastairalastairenable_opal_multi_threads=yes enable_prte_progress_threads=yes enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=no enable_binaries=yes enable_heterogeneous=no enable_picky=no enable_debug=no enable_shared=yes enable_static=no enable_io_romio=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_mpi_java=yes enable_memchecker=no enable_mca_no_build=crs,memchecker,snapc,rml-ftrm,filem-rsh enable_contrib_no_build=libnbc with_memory_manager=no with_devel_headers=yes with_xgrid=no with_slurm=no with_jdk_bindir=/usr/bin with_jdk_headers=/System/Library/Frameworks/JavaVM.framework/Versions/Current/Headers with_mpi_param_check=no prrte-3.0.13/contrib/platform/intel/bend/mac-optimized.conf0000664000175000017500000000634115145263240024044 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 mpi_param_check = 0 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent ## Protect the shared file systems ## Add the interface for out-of-band communication ## and set it up oob_tcp_peer_retries = 120 #oob_tcp_connect_timeout=600 ## Define the MPI interconnects btl = sm,tcp,self ## Setup shared memory btl_sm_free_list_max = 768 prrte-3.0.13/contrib/platform/intel/bend/ubuntu0000664000175000017500000000130215145263240021670 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_mpi_java=no enable_io_romio=no enable_contrib_no_build=libnbc enable_install_libpmix=yes with_memory_manager=no with_tm=no with_psm=no with_psm2=no with_devel_headers=yes with_libfabric=no with_portals=no with_valgrind=no if [ -n "$SLURMHOME" ] ; then with_slurm=$SLURMHOME with_pmi=$SLURMHOME else with_slurm=no fi prrte-3.0.13/contrib/platform/intel/bend/pi.conf0000664000175000017500000000570615145263240021716 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2018-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent prrte-3.0.13/contrib/platform/intel/bend/linux-optimized0000664000175000017500000000131015145263240023506 0ustar alastairalastairenable_opal_multi_threads=yes enable_prte_progress_threads=yes enable_ft_thread=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=no enable_binaries=yes enable_heterogeneous=no enable_picky=no enable_debug=no enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_mpi_java=yes enable_io_romio=no enable_mca_no_build=crs,memchecker,snapc,rml-ftrm,filem-rsh enable_contrib_no_build=libnbc with_memory_manager=no with_tm=no with_devel_headers=yes with_portals=no with_valgrind=no with_mpi_param_check=no if [ -n "$SLURMHOME" ] ; then with_slurm=$SLURMHOME with_pmi=$SLURMHOME fi prrte-3.0.13/contrib/platform/intel/bend/gadget-optimized0000664000175000017500000000105715145263240023612 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=no enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=no enable_shared=no enable_static=yes enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_oshmem=no enable_mpi_java=no enable_io_romio=no enable_contrib_no_build=libnbc with_memory_manager=no with_tm=no with_verbs=no with_devel_headers=yes with_portals=no with_valgrind=no prrte-3.0.13/contrib/platform/intel/bend/gadget0000664000175000017500000000122515145263240021605 0ustar alastairalastairenable_prterun_prefix_by_default=yes enable_mpi_thread_multiple=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=yes enable_binaries=yes enable_heterogeneous=no enable_picky=yes enable_debug=yes enable_shared=yes enable_static=no enable_memchecker=no enable_ipv6=no enable_mpi_fortran=no enable_mpi_cxx=no enable_mpi_cxx_seek=no enable_cxx_exceptions=no enable_oshmem=no enable_mpi_java=no enable_io_romio=no enable_builtin_atomics=no enable_contrib_no_build=libnbc enable_mca_no_build=btl-tcp,btl-sm,rcache-udreg enable_mca_direct=pml-ob1 with_memory_manager=no with_tm=no with_verbs=no with_devel_headers=yes with_portals=no with_valgrind=no prrte-3.0.13/contrib/platform/intel/bend/gadget-optimized.conf0000664000175000017500000000573415145263240024544 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. # # Basic behavior to smooth startup mca_base_component_show_load_errors = 1 prte_abort_timeout = 10 hwloc_base_mem_bind_failure_action = silent btl_ugni_rcache=grdma prrte-3.0.13/contrib/platform/ornl/0000775000175000017500000000000015145263240017356 5ustar alastairalastairprrte-3.0.13/contrib/platform/ornl/ornl_configure.pgi0000664000175000017500000000527115145263240023077 0ustar alastairalastair#!/bin/bash echo "" echo "If you have not loaded the xt-catamount module or your version" echo "of automake is below 1.10.1, it is in your best interest to" echo "type now. Note that loading the automake module on" echo "jaguar.ccs.ornl.gov/jaguarpf.ccs.ornl.gov will meet the" echo "latter requirement." read ANS # Change the following for install path. Note: VER appends to path. SRC_DIR=.. INSTALL_ROOT=/tmp/work/keh/ompi-pgi-ken VER=standard BUILD=pgi SVER=cnl`echo "$XTOS_VERSION" | cut -c1-3` CVER=${BUILD}`pgcc -V | awk '/pgcc/{print $2}'|sed 's/-/./'` SW_BLDDIR=${INSTALL_ROOT}/${VER}/${SVER}_${CVER} PLATFORM=ornl/cray_xt_cnl_romio_debug XTOSVER=`echo "$XTOS_VERSION" | cut -c1,3` # Set header file paths (structure of libsci changed at version 10.3.4). if [ -d ${LIBSCI_BASE_DIR}/${BUILD}/include ] then SCI_INC=-I${LIBSCI_BASE_DIR}/${BUILD}/include SCI_LIB=-L${LIBSCI_BASE_DIR}/${BUILD}/lib else SCI_INC=-I${LIBSCI_BASE_DIR}/${BUILD}/snos64/include SCI_LIB=-L${LIBSCI_BASE_DIR}/${BUILD}/snos64/lib fi if [ ! -d ${CATAMOUNT_DIR} ] ; then echo "Please load xt-catamount module" echo " module load xt-catamount fi CAT_INC=-I${CATAMOUNT_DIR}/catamount/linux/include PCT_INC=-I${PE_DIR}/include PCT_LIB=-L${PE_DIR}/lib/snos64 MPT_LIB=-L${MPT_DIR}/util/lib MPT_SO=-R${MPT_DIR}/util/lib SER_LIB=-L${SE_DIR}/lib/snos64 BASE_LPATHS="${MPT_LIB} ${PCT_LIB} ${SER_LIB}" BASE_LIBS="-lrt -lpct -lalpslli -lalpsutil -lportals -lpthread -lm" MAKE_OPTS="-j 4 orted_LDFLAGS=-all-static" ID=x86_64-cray-linux-gnu JD=x86_64-unknown-linux-gnu umask 02 rm -f build.log config.log # Run the aclocal/autoconf/automake hierarchy. # cd $SRC_DIR # ./autogen.pl 2>&1 | tee -a build.log # cd $OLDPWD # Do the configure. $SRC_DIR/configure \ NM=/usr/bin/nm \ CC=pgcc \ CXX=pgCC \ F77=pgf77 \ FC=pgf90 \ CPPFLAGS="${CAT_INC} ${PCT_INC}" \ LDFLAGS="${BASE_LPATHS} ${MPT_SO}" \ LIBS="${BASE_LIBS}" \ --host=${ID} \ --build=${JD} \ --disable-mpi-cxx \ --disable-mpi-f77 \ --disable-mpi-f90 \ --disable-heterogeneous \ --without-tm \ --with-alps \ --with-threads \ --with-platform=${SRC_DIR}/contrib/platform/${PLATFORM} \ --with-io-romio-flags="host_alias=${ID} build_alias=${JD} \ --enable-ltdl-convenience --no-recursion" \ --with-wrapper-cflags="${SCI_INC}" \ --with-wrapper-ldflags="-Bstatic ${SCI_LIB} ${BASE_LPATHS}" \ --with-wrapper-libs="-lsci_quadcore" \ --prefix=${SW_BLDDIR} 2>&1 | tee -a build.log # Build. gmake ${MAKE_OPTS} all 2>&1 | tee -a build.log # Install. gmake ${MAKE_OPTS} install 2>&1 | tee -a build.log prrte-3.0.13/contrib/platform/ornl/ornl_configure_self_contained0000664000175000017500000001161415145263240025354 0ustar alastairalastair#!/bin/sh # # Self-contained configure script, that does not rely # on cross-compilation, aka no need for a platforms-file. # # If the below env flags are not set, initialize to default (gcc, std dir on Jaguar)... # This file works stand-alone, but is also integrated into the NCCS swtools environment # # Compilation should be done as VPATH if [ -d .svn -o -f AUTHORS ] ; then echo WARNING: Should not compile in source directory echo Please create a directory and adapt SRCDIR in this script return fi FLAG=0 [[ -z ${XTOS_VERSION} ]] && echo "XTOS_VERSION is not set. Please load the xt-os module" && FLAG=1 [[ -z ${PE_DIR} ]] && echo "PE_DIR is not set. Please load the xt-pe module" && FLAG=1 [[ -z ${SE_DIR} ]] && echo "SE_DIR is not set. Please load the xt-service module" && FLAG=1 [[ -z ${MPT_DIR} ]] && echo "MPT_DIR is not set. Please load the xt-mpt module" && FLAG=1 [[ -z ${PE_ENV} ]] && echo "PE_ENV is not set. Please load the PrgEnv module" && FLAG=1 if [ $FLAG -eq 1 ] ; then echo -n "One of the modules was not properly loaded (might want to CTRL-C)." sleep 1 ; echo -n "." sleep 1 ; echo -n "." sleep 1 ; echo -n "." sleep 1 ; echo "." fi ################################################################### # If the env flags are not set, initialize to default... SRCDIR=${SRCDIR:-..} COMPILER=${COMPILER:-gnu} INSTALL_ROOT=${INSTALL_ROOT:-/sw/xt5/ompi} VERSION=${VERSION:-"`${SRCDIR}/config/opal_get_version.sh ${SRCDIR}/VERSION`"} SVER=${SVER:-"cnl`echo "${XTOS_VERSION}" | cut -c1-3`"} CVER=${CVER:-"${COMPILER}`gcc --version | awk '/gcc/{print $3}'`"} # The following is all derived PREFIX=${PREFIX:-${INSTALL_ROOT}/${VERSION}/${SVER}_${CVER}} ################################################################### case "$COMPILER" in "cray" ) COMPILER_FLAGS="CC=craycc CXX=crayc++ F77=crayftn FC=crayftn" CFLAGS="-O2 -h cpu=istanbul -h negmsgs ${CFLAGS}" CXXFLAGS="-O2 -h cpu=istanbul -h negmsgs ${CXXFLAGS}" LDFLAGS="${LDFLAGS}" # Static compilation is the default, there is not even a flag... WRAPPER_LDFLAGS="" ;; "gnu" ) # No need to specify compiler CFLAGS="-O2 -fomit-frame-pointer ${CFLAGS}" CXXFLAGS="-O2 -fomit-frame-pointer ${CXXFLAGS}" LDFLAGS="${LDFLAGS}" # REQUIRED WRAPPER_LDFLAGS="-static" ;; "intel" ) COMPILER_FLAGS="CC=icc CXX=icpc F77=ifort FC=ifort" CFLAGS="-O2 ${CFLAGS}" CXXFLAGS="-O2 ${CXXFLAGS}" FFLAGS="-O2 ${FFLAGS}" FCFLAGS="-O2 ${FCFLAGS}" LDFLAGS="${LDFLAGS}" # REQUIRED WRAPPER_LDFLAGS="-static" ;; "pathscale" ) COMPILER_FLAGS="CC=pathcc CXX=pathCC F77=pathf90 FC=pathf90" CFLAGS="-O2 -TARG:processor=barcelona -TARG:sse2=on -TARG:sse3=on ${CFLAGS}" CXXFLAGS="-O2 -TARG:processor=barcelona -TARG:sse2=on -TARG:sse3=on ${CXXFLAGS}" # REQUIRED LDFLAGS="${LDFLAGS}" WRAPPER_LDFLAGS="-static" ;; "pgi" ) COMPILER_FLAGS="CC=pgcc CXX=pgCC F77=pgf77 FC=pgf90" CFLAGS="-O2 -tp istanbul-64 ${CFLAGS}" CXXFLAGS="-O2 -tp istanbul-64 ${CXXFLAGS}" # REQUIRED FFLAGS="-Mnomain $PGI/linux86-64/default/lib/pgfmain.o ${FFLAGS}" FCFLAGS="-Mnomain $PGI/linux86-64/default/lib/pgfmain.o ${FCFLAGS}" LDFLAGS="-L$PGI/linux86-64/default/lib ${LDFLAGS}" WRAPPER_LDFLAGS="-Bstatic" ;; * ) echo "Error: Unknown Compiler: $COMPILER" return ;; esac # If further packages should be disabled, insert # --disable-mpi-cxx --disable-io-romio \ $SRCDIR/configure \ --prefix=$PREFIX \ --enable-static --disable-shared --disable-dlopen --disable-pretty-print-stacktrace --disable-pty-support \ --with-threads --with-memory-manager=none \ --without-tm --with-alps --with-portals --with-portals-config=xt3-modex \ --enable-mca-no-build=maffinity-first_use,maffinity-libnuma,ess-cnos,ess-slurm,ess-slurmd,filem-rsh,grpcomm-cnos,plm-rsh,plm-rshd,plm-slurm,pml-dr,btl-tcp,notifier-twitter,notifier-smtp,pml-csum,pml-v,vprotocol-pessimist,dpm-prte \ --with-wrapper-ldflags="-L${PE_DIR}/lib/snos64 -L${SE_DIR}/lib/snos64 -L/opt/xt-mpt/default/lib/snos64/ ${WRAPPER_LDFLAGS}" \ --with-wrapper-libs="-lpct -lalpslli -lalpsutil -lportals -lpthread -lm" \ ${COMPILER_FLAGS} \ CPPFLAGS="-DNDEBUG -I${PE_DIR}/include ${CPPFLAGS}" \ CFLAGS="${CFLAGS}" \ CXXFLAGS="${CXXFLAGS}" \ FFLAGS="-I${PE_DIR}/include ${FFLAGS}" \ FCFLAGS="-I${PE_DIR}/include ${FCFLAGS}" \ LDFLAGS="-L${PE_DIR}/lib/snos64 -L${SE_DIR}/lib/snos64 -L/opt/xt-mpt/default/lib/snos64/ ${LDFLAGS}" \ LIBS="${LIBS} -lpct -lalpslli -lalpsutil -lportals -lpthread -lrt -lm" $* | tee build.log # # To build orted static, use the libtool-flag -all-static # make -s -j4 orted_LDFLAGS=-all-static all | tee -a build.log make -s orted_LDFLAGS=-all-static install | tee -a install.log prrte-3.0.13/contrib/platform/ornl/cray_xt_cnl_romio0000664000175000017500000001152215145263240023014 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no enable_debug_symbols=no enable_io_romio=yes enable_static=yes enable_shared=no with_threads=yes enable_pretty_print_stacktrace=no enable_dlopen=no with_portals_config=cnl_modex with_memory_manager=none enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos enable_heterogeneous=no enable_pty_support=no enable_binaries=yes ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4} ompi_cv_f77_alignment_LOGICAL=${ompi_cv_f77_alignment_LOGICAL=4} ompi_cv_f77_sizeof_INTEGER=${ompi_cv_f77_sizeof_INTEGER=4} ompi_cv_f77_alignment_INTEGER=${ompi_cv_f77_alignment_INTEGER=4} ompi_cv_f77_sizeof_INTEGERp1=${ompi_cv_f77_sizeof_INTEGERp1=1} ompi_cv_f77_alignment_INTEGERp1=${ompi_cv_f77_alignment_INTEGERp1=1} ompi_cv_f77_sizeof_INTEGERp2=${ompi_cv_f77_sizeof_INTEGERp2=2} ompi_cv_f77_alignment_INTEGERp2=${ompi_cv_f77_alignment_INTEGERp2=2} ompi_cv_f77_sizeof_INTEGERp4=${ompi_cv_f77_sizeof_INTEGERp4=4} ompi_cv_f77_alignment_INTEGERp4=${ompi_cv_f77_alignment_INTEGERp4=4} ompi_cv_f77_sizeof_INTEGERp8=${ompi_cv_f77_sizeof_INTEGERp8=8} ompi_cv_f77_alignment_INTEGERp8=${ompi_cv_f77_alignment_INTEGERp8=8} ompi_cv_f77_sizeof_INTEGERp16=${ompi_cv_f77_sizeof_INTEGERp16=16} ompi_cv_f77_alignment_INTEGERp16=${ompi_cv_f77_alignment_INTEGERp16=8} ompi_cv_f77_sizeof_REAL=${ompi_cv_f77_sizeof_REAL=4} ompi_cv_f77_alignment_REAL=${ompi_cv_f77_alignment_REAL=4} ompi_cv_f77_sizeof_REALp2=${ompi_cv_f77_sizeof_REALp2=2} ompi_cv_f77_alignment_REALp2=${ompi_cv_f77_alignment_REALp2=2} ompi_cv_f77_sizeof_REALp4=${ompi_cv_f77_sizeof_REALp4=4} ompi_cv_f77_alignment_REALp4=${ompi_cv_f77_alignment_REALp4=4} ompi_cv_f77_sizeof_REALp8=${ompi_cv_f77_sizeof_REALp8=8} ompi_cv_f77_alignment_REALp8=${ompi_cv_f77_alignment_REALp8=8} ompi_cv_f77_sizeof_REALp16=${ompi_cv_f77_sizeof_REALp16=16} ompi_cv_f77_alignment_REALp16=${ompi_cv_f77_alignment_REALp16=8} ompi_cv_f77_sizeof_DOUBLE_PRECISION=${ompi_cv_f77_sizeof_DOUBLE_PRECISION=8} ompi_cv_f77_alignment_DOUBLE_PRECISION=${ompi_cv_f77_alignment_DOUBLE_PRECISION=8} ompi_cv_f77_sizeof_COMPLEX=${ompi_cv_f77_sizeof_COMPLEX=8} ompi_cv_f77_alignment_COMPLEX=${ompi_cv_f77_alignment_COMPLEX=4} ompi_cv_f77_sizeof_COMPLEXp8=${ompi_cv_f77_sizeof_COMPLEXp8=8} ompi_cv_f77_alignment_COMPLEXp8=${ompi_cv_f77_alignment_COMPLEXp8=4} ompi_cv_f77_sizeof_COMPLEXp16=${ompi_cv_f77_sizeof_COMPLEXp16=16} ompi_cv_f77_alignment_COMPLEXp16=${ompi_cv_f77_alignment_COMPLEXp16=8} ompi_cv_f77_true_value=${ompi_cv_f77_true_value=0} ompi_cv_f90_sizeof_LOGICAL=${ompi_cv_f90_sizeof_LOGICAL=4} ompi_cv_f90_alignment_LOGICAL=${ompi_cv_f90_alignment_LOGICAL=4} ompi_cv_f90_sizeof_INTEGER=${ompi_cv_f90_sizeof_INTEGER=4} ompi_cv_f90_alignment_INTEGER=${ompi_cv_f90_alignment_INTEGER=4} ompi_cv_f90_sizeof_INTEGERp1=${ompi_cv_f90_sizeof_INTEGERp1=1} ompi_cv_f90_alignment_INTEGERp1=${ompi_cv_f90_alignment_INTEGERp1=1} ompi_cv_f90_sizeof_INTEGERp2=${ompi_cv_f90_sizeof_INTEGERp2=2} ompi_cv_f90_alignment_INTEGERp2=${ompi_cv_f90_alignment_INTEGERp2=2} ompi_cv_f90_sizeof_INTEGERp4=${ompi_cv_f90_sizeof_INTEGERp4=4} ompi_cv_f90_alignment_INTEGERp4=${ompi_cv_f90_alignment_INTEGERp4=4} ompi_cv_f90_sizeof_INTEGERp8=${ompi_cv_f90_sizeof_INTEGERp8=8} ompi_cv_f90_alignment_INTEGERp8=${ompi_cv_f90_alignment_INTEGERp8=8} ompi_cv_f90_sizeof_INTEGERp16=${ompi_cv_f90_sizeof_INTEGERp16=16} ompi_cv_f90_alignment_INTEGERp16=${ompi_cv_f90_alignment_INTEGERp16=8} ompi_cv_f90_sizeof_REAL=${ompi_cv_f90_sizeof_REAL=4} ompi_cv_f90_alignment_REAL=${ompi_cv_f90_alignment_REAL=4} ompi_cv_f90_sizeof_REALp2=${ompi_cv_f90_sizeof_REALp2=2} ompi_cv_f90_alignment_REALp2=${ompi_cv_f90_alignment_REALp2=2} ompi_cv_f90_sizeof_REALp4=${ompi_cv_f90_sizeof_REALp4=4} ompi_cv_f90_alignment_REALp4=${ompi_cv_f90_alignment_REALp4=4} ompi_cv_f90_sizeof_REALp8=${ompi_cv_f90_sizeof_REALp8=8} ompi_cv_f90_alignment_REALp8=${ompi_cv_f90_alignment_REALp8=8} ompi_cv_f90_sizeof_REALp16=${ompi_cv_f90_sizeof_REALp16=16} ompi_cv_f90_alignment_REALp16=${ompi_cv_f90_alignment_REALp16=8} ompi_cv_f90_sizeof_DOUBLE_PRECISION=${ompi_cv_f90_sizeof_DOUBLE_PRECISION=8} ompi_cv_f90_alignment_DOUBLE_PRECISION=${ompi_cv_f90_alignment_DOUBLE_PRECISION=8} ompi_cv_f90_sizeof_COMPLEX=${ompi_cv_f90_sizeof_COMPLEX=8} ompi_cv_f90_alignment_COMPLEX=${ompi_cv_f90_alignment_COMPLEX=4} ompi_cv_f90_sizeof_COMPLEXp8=${ompi_cv_f90_sizeof_COMPLEXp8=8} ompi_cv_f90_alignment_COMPLEXp8=${ompi_cv_f90_alignment_COMPLEXp8=4} ompi_cv_f90_sizeof_COMPLEXp16=${ompi_cv_f90_sizeof_COMPLEXp16=16} ompi_cv_f90_alignment_COMPLEXp16=${ompi_cv_f90_alignment_COMPLEXp16=8} ompi_cv_f90_true_value=${ompi_cv_f90_true_value=0} ompi_cv_f90_sizeof_DOUBLE_COMPLEX=${ompi_cv_f90_sizeof_DOUBLE_COMPLEX=16} ompi_cv_f90_alignment_DOUBLE_COMPLEX=${ompi_cv_f90_alignment_DOUBLE_COMPLEX=8} ompi_cv_f90_int_kind_9=${ompi_cv_f90_int_kind_9=4} ompi_cv_f90_int_kind_18=${ompi_cv_f90_int_kind_18=8} prrte-3.0.13/contrib/platform/ornl/cray_xt_cnl_romio_debug0000664000175000017500000001152615145263240024166 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=yes enable_debug=yes enable_debug_symbols=yes enable_io_romio=yes enable_static=yes enable_shared=no with_threads=yes enable_pretty_print_stacktrace=yes enable_dlopen=no with_portals_config=cnl_modex with_memory_manager=none enable_mca_no_build=maffinity-first_use,maffinity-libnuma,paffinity-linux,ess-cnos,pml-dr,filem-rsh,grpcomm-cnos,rmgr-cnos,rml-cnos enable_heterogeneous=no enable_pty_support=no enable_binaries=yes ompi_cv_f77_sizeof_LOGICAL=${ompi_cv_f77_sizeof_LOGICAL=4} ompi_cv_f77_alignment_LOGICAL=${ompi_cv_f77_alignment_LOGICAL=4} ompi_cv_f77_sizeof_INTEGER=${ompi_cv_f77_sizeof_INTEGER=4} ompi_cv_f77_alignment_INTEGER=${ompi_cv_f77_alignment_INTEGER=4} ompi_cv_f77_sizeof_INTEGERp1=${ompi_cv_f77_sizeof_INTEGERp1=1} ompi_cv_f77_alignment_INTEGERp1=${ompi_cv_f77_alignment_INTEGERp1=1} ompi_cv_f77_sizeof_INTEGERp2=${ompi_cv_f77_sizeof_INTEGERp2=2} ompi_cv_f77_alignment_INTEGERp2=${ompi_cv_f77_alignment_INTEGERp2=2} ompi_cv_f77_sizeof_INTEGERp4=${ompi_cv_f77_sizeof_INTEGERp4=4} ompi_cv_f77_alignment_INTEGERp4=${ompi_cv_f77_alignment_INTEGERp4=4} ompi_cv_f77_sizeof_INTEGERp8=${ompi_cv_f77_sizeof_INTEGERp8=8} ompi_cv_f77_alignment_INTEGERp8=${ompi_cv_f77_alignment_INTEGERp8=8} ompi_cv_f77_sizeof_INTEGERp16=${ompi_cv_f77_sizeof_INTEGERp16=16} ompi_cv_f77_alignment_INTEGERp16=${ompi_cv_f77_alignment_INTEGERp16=8} ompi_cv_f77_sizeof_REAL=${ompi_cv_f77_sizeof_REAL=4} ompi_cv_f77_alignment_REAL=${ompi_cv_f77_alignment_REAL=4} ompi_cv_f77_sizeof_REALp2=${ompi_cv_f77_sizeof_REALp2=2} ompi_cv_f77_alignment_REALp2=${ompi_cv_f77_alignment_REALp2=2} ompi_cv_f77_sizeof_REALp4=${ompi_cv_f77_sizeof_REALp4=4} ompi_cv_f77_alignment_REALp4=${ompi_cv_f77_alignment_REALp4=4} ompi_cv_f77_sizeof_REALp8=${ompi_cv_f77_sizeof_REALp8=8} ompi_cv_f77_alignment_REALp8=${ompi_cv_f77_alignment_REALp8=8} ompi_cv_f77_sizeof_REALp16=${ompi_cv_f77_sizeof_REALp16=16} ompi_cv_f77_alignment_REALp16=${ompi_cv_f77_alignment_REALp16=8} ompi_cv_f77_sizeof_DOUBLE_PRECISION=${ompi_cv_f77_sizeof_DOUBLE_PRECISION=8} ompi_cv_f77_alignment_DOUBLE_PRECISION=${ompi_cv_f77_alignment_DOUBLE_PRECISION=8} ompi_cv_f77_sizeof_COMPLEX=${ompi_cv_f77_sizeof_COMPLEX=8} ompi_cv_f77_alignment_COMPLEX=${ompi_cv_f77_alignment_COMPLEX=4} ompi_cv_f77_sizeof_COMPLEXp8=${ompi_cv_f77_sizeof_COMPLEXp8=8} ompi_cv_f77_alignment_COMPLEXp8=${ompi_cv_f77_alignment_COMPLEXp8=4} ompi_cv_f77_sizeof_COMPLEXp16=${ompi_cv_f77_sizeof_COMPLEXp16=16} ompi_cv_f77_alignment_COMPLEXp16=${ompi_cv_f77_alignment_COMPLEXp16=8} ompi_cv_f77_true_value=${ompi_cv_f77_true_value=0} ompi_cv_f90_sizeof_LOGICAL=${ompi_cv_f90_sizeof_LOGICAL=4} ompi_cv_f90_alignment_LOGICAL=${ompi_cv_f90_alignment_LOGICAL=4} ompi_cv_f90_sizeof_INTEGER=${ompi_cv_f90_sizeof_INTEGER=4} ompi_cv_f90_alignment_INTEGER=${ompi_cv_f90_alignment_INTEGER=4} ompi_cv_f90_sizeof_INTEGERp1=${ompi_cv_f90_sizeof_INTEGERp1=1} ompi_cv_f90_alignment_INTEGERp1=${ompi_cv_f90_alignment_INTEGERp1=1} ompi_cv_f90_sizeof_INTEGERp2=${ompi_cv_f90_sizeof_INTEGERp2=2} ompi_cv_f90_alignment_INTEGERp2=${ompi_cv_f90_alignment_INTEGERp2=2} ompi_cv_f90_sizeof_INTEGERp4=${ompi_cv_f90_sizeof_INTEGERp4=4} ompi_cv_f90_alignment_INTEGERp4=${ompi_cv_f90_alignment_INTEGERp4=4} ompi_cv_f90_sizeof_INTEGERp8=${ompi_cv_f90_sizeof_INTEGERp8=8} ompi_cv_f90_alignment_INTEGERp8=${ompi_cv_f90_alignment_INTEGERp8=8} ompi_cv_f90_sizeof_INTEGERp16=${ompi_cv_f90_sizeof_INTEGERp16=16} ompi_cv_f90_alignment_INTEGERp16=${ompi_cv_f90_alignment_INTEGERp16=8} ompi_cv_f90_sizeof_REAL=${ompi_cv_f90_sizeof_REAL=4} ompi_cv_f90_alignment_REAL=${ompi_cv_f90_alignment_REAL=4} ompi_cv_f90_sizeof_REALp2=${ompi_cv_f90_sizeof_REALp2=2} ompi_cv_f90_alignment_REALp2=${ompi_cv_f90_alignment_REALp2=2} ompi_cv_f90_sizeof_REALp4=${ompi_cv_f90_sizeof_REALp4=4} ompi_cv_f90_alignment_REALp4=${ompi_cv_f90_alignment_REALp4=4} ompi_cv_f90_sizeof_REALp8=${ompi_cv_f90_sizeof_REALp8=8} ompi_cv_f90_alignment_REALp8=${ompi_cv_f90_alignment_REALp8=8} ompi_cv_f90_sizeof_REALp16=${ompi_cv_f90_sizeof_REALp16=16} ompi_cv_f90_alignment_REALp16=${ompi_cv_f90_alignment_REALp16=8} ompi_cv_f90_sizeof_DOUBLE_PRECISION=${ompi_cv_f90_sizeof_DOUBLE_PRECISION=8} ompi_cv_f90_alignment_DOUBLE_PRECISION=${ompi_cv_f90_alignment_DOUBLE_PRECISION=8} ompi_cv_f90_sizeof_COMPLEX=${ompi_cv_f90_sizeof_COMPLEX=8} ompi_cv_f90_alignment_COMPLEX=${ompi_cv_f90_alignment_COMPLEX=4} ompi_cv_f90_sizeof_COMPLEXp8=${ompi_cv_f90_sizeof_COMPLEXp8=8} ompi_cv_f90_alignment_COMPLEXp8=${ompi_cv_f90_alignment_COMPLEXp8=4} ompi_cv_f90_sizeof_COMPLEXp16=${ompi_cv_f90_sizeof_COMPLEXp16=16} ompi_cv_f90_alignment_COMPLEXp16=${ompi_cv_f90_alignment_COMPLEXp16=8} ompi_cv_f90_true_value=${ompi_cv_f90_true_value=0} ompi_cv_f90_sizeof_DOUBLE_COMPLEX=${ompi_cv_f90_sizeof_DOUBLE_COMPLEX=16} ompi_cv_f90_alignment_DOUBLE_COMPLEX=${ompi_cv_f90_alignment_DOUBLE_COMPLEX=8} ompi_cv_f90_int_kind_9=${ompi_cv_f90_int_kind_9=4} ompi_cv_f90_int_kind_18=${ompi_cv_f90_int_kind_18=8} prrte-3.0.13/contrib/platform/ornl/ornl_configure.gnu0000664000175000017500000000324515145263240023110 0ustar alastairalastair#!/bin/bash # change the following for install path, note # that VER is appended to the path. VER="trunk" SW_INSTALL_ROOT=/tmp/work/gshipman/ompi/install PLATFORM=ornl/cray_xt_cnl_romio PRTED_MAKEFILE=prte/tools/orted/Makefile if test -z "`grep "orted_LDFLAGS =.*-all-static" ${PRTED_MAKEFILE}`"; then echo "WARNING: patching ${PRTED_MAKEFILE} to build it static" sed -i 's/orted_LDFLAGS =/orted_LDFLAGS = -all-static/g' ${PRTED_MAKEFILE} fi ./configure \ NM=/usr/bin/nm \ CC=gcc \ CXX=g++ \ CFLAGS="-I/opt/xt-pe/default/include/ -I/opt/xt-catamount/default/catamount/linux/include/ " \ CPPFLAGS=-I/opt/xt-pe/default/include/ \ FCFLAGS=-I/opt/xt-pe/default/include/ \ FFLAGS=-I/opt/xt-pe/default/include/ \ LDFLAGS="-L/opt/xt-service/default/lib/snos64 -L/opt/xt-pe/default/cnos/linux/64/lib -L/opt/xt-mpt/default/lib/snos64" \ LIBS="-lpct -lalpslli -lalpsutil -lportals -lpthread" \ --with-wrapper-cflags="-Wmissing-prototypes -I/tmp/work/gshipman/ompi/install/trunk/include" \ --with-wrapper-ldflags="-Wmissing-prototypes -lnsl -lutil -lpct -lalpslli -lalpsutil -lportals -lpthread -lm -L/opt/xt-service/default/lib/snos64 -L/opt/xt-pe/default/cnos/linux/64/lib -L/opt/xt-mpt/default/lib/snos64"\ --build=x86_64-unknown-linux-gnu \ --host=x86_64-cray-linux-gnu \ --disable-mpi-f77\ --disable-mpi-f90\ --without-tm \ --with-platform=./contrib/platform/${PLATFORM} \ --with-io-romio-flags="build_alias=x86_64-unknown-linux-gnu \ host_alias=x86_64-cray-linux-gnu \ --enable-ltdl-convenience --no-recursion" \ --with-alps=yes \ --prefix="$SW_INSTALL_ROOT/$VER" | tee build.log #gmake all install | tee -a build.log #chmod -R go+rx $SW_INSTALL_ROOT/$VER-$CMP prrte-3.0.13/contrib/platform/mellanox/0000775000175000017500000000000015145263240020223 5ustar alastairalastairprrte-3.0.13/contrib/platform/mellanox/optimized.conf0000664000175000017500000000646115145263240023105 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2017-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.openmpi/mca-params.conf:$sysconf/openmpi-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense). Specifically, MPI processes each read this file # during their startup to determine what default values for MCA # parameters should be used. mpirun does not bundle up the values in # this file from the node where it was run and send them to all nodes; # the default value decisions are effectively distributed. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Disable run-time MPI parameter checking # mpi_param_check = 0 # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # component_path = /usr/local/lib/openmpi:~/my_openmpi_components # See "ompi_info --param all all" for a full listing of Open MPI MCA # parameters available and their default values. #rmaps_base_mapping_policy = dist:auto coll = ^ml hwloc_base_binding_policy = core btl = self # Basic behavior to smooth startup mca_base_component_show_load_errors = 0 prte_abort_timeout = 10 opal_set_max_sys_limits = 1 # # Define timeout for daemons to report back during launch prte_startup_timeout = 1000 oob_tcp_peer_retries = 1000 oob_tcp_disable_family = IPv6 oob_tcp_listen_mode = listen_thread oob_tcp_sndbuf = 32768 oob_tcp_rcvbuf = 32768 opal_event_include=epoll bml_r2_show_unreach_errors = 0 prrte-3.0.13/contrib/platform/mellanox/optimized0000664000175000017500000000257315145263240022161 0ustar alastairalastairenable_mca_no_build=coll-ml,btl-uct enable_debug_symbols=yes enable_prterun_prefix_by_default=yes with_verbs=no with_devel_headers=yes enable_oshmem=yes enable_oshmem_fortran=yes disable_wrapper_rpath=yes mellanox_autodetect=${mellanox_autodetect:="no"} mellanox_debug=${mellanox_debug:="no"} mellanox_threads=${mellanox_threads:="no"} if [ "$mellanox_threads" == "yes" ]; then enable_mpi_thread_multiple=yes enable_opal_multi_threads=yes fi if [ "$mellanox_autodetect" == "yes" ]; then ucx_dir=${ucx_dir:="$(pkg-config --variable=prefix ucx)"} if [ -d $ucx_dir ]; then with_ucx=$ucx_dir fi hcoll_dir=${hcoll_dir:="$(pkg-config --variable=prefix hcoll)"} if [ -d $hcoll_dir ]; then with_hcoll=$hcoll_dir fi slurm_dir=${slurm_dir:="/usr"} if [ -f $slurm_dir/include/slurm/slurm.h ]; then with_slurm=$slurm_dir with_pmi=$slurm_dir fi fi if [ "$mellanox_debug" == "yes" ]; then enable_debug=yes enable_memchecker=yes with_valgrind=yes CXXFLAGS="-O0 -g" CCASFLAGS="-O0 -g" FCFLAGS="-O0 -g" CFLAGS="-O0 -g" else enable_debug=no enable_mem_debug=no enable_mem_profile=no enable_memchecker=no enable_picky=no enable_heterogeneous=no enable_ft_thread=no with_mpi_param_check=no CXXFLAGS="-O3 -g" CCASFLAGS="-O3 -g" FCFLAGS="-O3 -g" CFLAGS="-O3 -g" fi prrte-3.0.13/contrib/platform/ps30000664000175000017500000000111215145263240017027 0ustar alastairalastairenable_io_romio=no enable_static=yes enable_shared=no with_threads=no enable_pretty_print_stacktrace=no enable_dlopen=no with_memory_manager=none enable_mca_no_build=maffinity,paffinity,timer,allocator-basic,rcache-vma,plm-gridengine,plm-slurm,ras-slurm,ras-gridengine,btl-sm,coll-hierarch,coll-sm,common-sm,mpool-sm,pml-dr,pml-cm,mpool-rdma,osc-rdma,ess-slurm,backtrace-darwin,memory-darwin,memory-malloc_hook,memory_ptmalloc2 enable_heterogeneous=no enable_pty_support=no enable_mem_debug=no enable_mem_profile=no enable_debug_symbols=no enable_binaries=yes enable_mca_direct=pml-ob1 prrte-3.0.13/contrib/platform/optimized0000664000175000017500000000007215145263240020332 0ustar alastairalastairenable_mem_debug=no enable_mem_profile=no enable_debug=no prrte-3.0.13/contrib/search_replace.pl0000775000175000017500000000410615145263240020061 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # use File::Find; if (scalar(@ARGV) != 2) { print "Usage: search_replace.pl search_string replace_string\n"; exit 1; } $search_string = @ARGV[0]; $replace_string = @ARGV[1]; print "search: $search_string\n"; print "replace: $replace_string\n"; sub replace { # don't process directories or links, and dont' recurse down # "special" directories if ( -l $_ ) { return; } if ( -d $_ ) { if ((/\.svn/) || (/\.deps/) || (/\.libs/) || (/\.hg/) || (/\.git/) || (/autom4te\.cache/)) { $File::Find::prune = true; } return; } # $File::Find::name is the path relative to the starting point. # $_ contains the file's basename. The code automatically changes # to the processed directory, so we want to open / close $_. $process_file = $_; print "--> $File::Find::name\n"; my $replace = 0; open(INFILE, $process_file) || die "Could not open " . $File::Find::name . ": $!\n"; open(OUTFILE, "> " . $process_file . ".tmp") || die "Could not open " . $File::Find::name . ".tmp: $!\n"; while () { $replace += s/$search_string/$replace_string/g; print OUTFILE $_; } close(OUTFILE); close(INFILE); if ($replace) { rename($process_file . ".tmp", $process_file); } else { unlink($process_file . ".tmp"); } } find(\&replace, "."); prrte-3.0.13/contrib/check-owner.pl0000775000175000017500000001051415145263240017326 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2015 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Simple script to traverse the OMPI source tree that looks for # owner.txt files, and generates _mca_owner.md files. # use strict; use Cwd; use File::Find; use Getopt::Long; use Data::Dumper; my $num_warnings = 0; my $num_errors = 0; ########################################################################### my $VERBOSE = 0; my $HELP = 0; GetOptions( "help|h" => \$HELP, "verbose|v" => \$VERBOSE, ) or die "unable to parse options, aborted"; if ($HELP) { print <Purity(1)->Indent(1); my $s = $d->Dump; print $s; } sub isTopDir { my ($d) = @_; # master if (-f "$d/Makefile.prte-rules") { return 1; } # v1.8 if (-f "$d/Makefile.man-page-rules") { return 1; } return 0; } ########################################################################### # Find the top-level OMPI source tree dir my $start = cwd(); my $top = $start; while (!isTopDir($top)) { chdir(".."); $top = cwd(); die "Can't find top-level Open MPI directory" if ($top eq "/"); } chdir($start); ########################################################################### my @owner_files; # Helper: Search for all owner files sub match_files { # Don't process sym links return if (-l $_); # Don't recurse down "special" directories if (-d $_ && ((/^\.deps$/) || (/^\.libs$/) || (/^\.svn$/) || (/^\.hg$/) || (/^\.git$/))) { $File::Find::prune = 1; return; } # $File::Find::name is the path relative to the starting point. # $_ contains the file's basename. The code automatically changes # to the processed directory, so we want to open / close $_. verbose("--> $File::Find::name\n"); my $relative = $File::Find::name; $relative =~ s/^$top//; $relative =~ s/^\///; my $short = $_; if ($short =~ "owner.txt") { push(@owner_files, { full => $File::Find::name, short => $short, relative => $relative, }); verbose(" Found owner file: $short\n"); } } # Find all owner files print "Searching for owner files...\n"; my $startrel = $start; if ($top ne $start) { $startrel =~ s/^$top//; $startrel =~ s/^\///; } find(\&match_files, "."); ########################################################################### # Index all help files my $help_topics; my $help_file_refs; print "Indexing owner files (from entire source tree)...\n"; my $old_lib = ""; my $filename; foreach my $info (@owner_files) { verbose("Indexing owner: $info->{full}\n"); open(OWNERFILE, $info->{full}) || die "Could not open file $info->{full}\n"; my $owner="unknown"; my $status="unknown"; my $label="unknown"; while () { next if /^#/; chop; if (/^owner/) { ($label,$owner) = split(/:/); $owner =~ s/\s//ge; # get rid of white space, may need something better } if (/^status/) { ($label,$status) = split(/:/); $status =~ s/\s//ge; } } my @components = split(/\//,$info->{full}); shift(@components); my $lib = shift(@components); if ($lib ne $old_lib) { $filename = $lib."_mca_owner.md"; printf("hey, found a new lib %s filename %s\n",$lib,$filename); open(MDFILE, ">$filename") || die "Could not open file $filename\n"; printf(MDFILE "| Framework | Component | Owner | Status |\n"); printf(MDFILE "| --- | --- | --- | --- |\n"); $old_lib = $lib; } shift(@components); my $frame = shift(@components); my $comp = shift(@components); printf("For lib %s framework %s component %s owner %s status %s\n", $lib, $frame, $comp, $owner, $status); printf(MDFILE "| $frame | $comp | $owner | $status |\n"); close (OWNERFILE); } prrte-3.0.13/contrib/purge-tab-indents.pl0000775000175000017500000001124715145263240020455 0ustar alastairalastair#!/usr/bin/perl -w use strict; use Cwd; use File::Basename; use Text::Tabs; use Getopt::Long; # Set to true if the script should merely check for up-to-date copyrights. # Will exit with status 111 if there are out of date copyrights which this # script can correct. my $CHECK_ONLY = 0; # used by $CHECK_ONLY logic for bookeeping my $would_replace = 0; # Set to true to suppress most informational messages. Only out of date files # will be printed. my $QUIET = 0; # Set to true if we just want to see the help message my $HELP = 0; # Set to true if we want to strip blank lines from all files my $ALL = 0; GetOptions( "help" => \$HELP, "quiet" => \$QUIET, "check-only" => \$CHECK_ONLY, "all" => \$ALL, ) or die "unable to parse options, stopped"; if ($HELP) { print <; close(FILE); my @expanded_lines = expand(@lines_with_tabs); open(TEMP, ">temp.txt"); print TEMP @expanded_lines; close(TEMP); system("mv temp.txt $f"); chmod($mode, $f); } # Returns a list of file names (relative to pwd) which the VCS considers to be modified. sub find_modified_files { my @files = (); # Number of path entries to remove from ${top}-relative paths. # (--show-cdup either returns the empty string or sequence of "../" # entries, always ending in a "/") my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1; # "." restricts scope, but does not get us relative path names my $cmd = "git status -z --porcelain --untracked-files=no ."; my $lines = `$cmd`; # From git-status(1): # X Y Meaning # ------------------------------------------------- # [MD] not updated # M [ MD] updated in index # A [ MD] added to index # D [ M] deleted from index # R [ MD] renamed in index # C [ MD] copied in index # [MARC] index and work tree matches # [ MARC] M work tree changed since index # [ MARC] D deleted in work tree # ------------------------------------------------- # D D unmerged, both deleted # A U unmerged, added by us # U D unmerged, deleted by them # U A unmerged, added by them # D U unmerged, deleted by us # A A unmerged, both added # U U unmerged, both modified # ------------------------------------------------- # ? ? untracked # ------------------------------------------------- my $s1 = ""; my $s2 = ""; my $fullname = ""; foreach my $line (split /\x{00}/, $lines) { my $keep = 0; unless (($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/) { next; } if ($ALL) { $keep = 1; } else { # ignore all merge cases next if ($s1 eq "D" and $s2 eq "D"); next if ($s1 eq "A" and $s2 eq "A"); next if ($s1 eq "U" or $s2 eq "U"); # only update for actually added/modified cases, no copies, # renames, etc. $keep = 1 if ($s1 eq "M" or $s2 eq "M"); $keep = 1 if ($s1 eq "A"); } if ($keep) { my $relname = $fullname; $relname =~ s!^([^/]*/){$n_strip}!!g; push @files, $relname if (-f $relname); } } return @files; } exit 0; prrte-3.0.13/contrib/generate_file_list.pl0000775000175000017500000000211115145263240020737 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # if (scalar(@ARGV) != 1) { print "Usage: generate_file_list \n"; exit(3); } $file_name = @ARGV[0]; open(FILE,"$file_name") || print "File count not be opened\n"; open(TEMP,"> file_list") || print "Could not open file for writing\n"; while () { if (/Index/) { s/^Index:\s*//g; print TEMP; } } close(TEMP); close($file_name); prrte-3.0.13/contrib/scaling/0000775000175000017500000000000015145263240016200 5ustar alastairalastairprrte-3.0.13/contrib/scaling/Makefile.include0000664000175000017500000000152615145263240021266 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved. # Copyright (c) 2019 Intel, Inc. All rights reserved. # Copyright (c) 2020 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # Note that this file does not stand on its own. It is included by a # higher-level Makefile so that Automake features such as "make dist" # work properly (and include all the relevant files in this directory # in the distribution tarball). # If you are looking for the file that builds these examples, look at # "Makefile" in this same directory (it is *NOT* generated by # Automake). EXTRA_DIST += \ contrib/scaling/Makefile \ contrib/scaling/mpi_barrier.c \ contrib/scaling/mpi_no_op.c \ contrib/scaling/prte_no_op.c \ scaling.pl prrte-3.0.13/contrib/scaling/orte_no_op.c0000664000175000017500000000071515145263240020512 0ustar alastairalastair/* -*- C -*- * * $HEADER$ * * The most basic of applications */ #include #include "prte/constants.h" #include "prte/runtime/runtime.h" int main(int argc, char* argv[]) { if (PRTE_SUCCESS != prte_init(&argc, &argv, PRTE_PROC_NON_MPI)) { fprintf(stderr, "Failed prte_init\n"); exit(1); } if (PRTE_SUCCESS != prte_finalize()) { fprintf(stderr, "Failed prte_finalize\n"); exit(1); } return 0; } prrte-3.0.13/contrib/scaling/mpi_no_op.c0000664000175000017500000000032315145263240020321 0ustar alastairalastair/* -*- C -*- * * $HEADER$ * * The most basic of MPI applications */ #include #include "mpi.h" int main(int argc, char* argv[]) { MPI_Init(&argc, &argv); MPI_Finalize(); return 0; } prrte-3.0.13/contrib/scaling/mpi_memprobe.c0000664000175000017500000001645015145263240021025 0ustar alastairalastair/* -*- C -*- * * $HEADER$ * * The most basic of MPI applications */ #include "prte_config.h" #include #include "mpi.h" #include "opal/mca/pmix/pmix.h" #include "opal/util/argv.h" #include "opal/util/pmix_printf.h" #include "prte/runtime/runtime.h" #include "prte/util/proc_info.h" #include "prte/util/name_fns.h" #include "prte/runtime/prte_globals.h" #include "prte/mca/errmgr/errmgr.h" static int rank, size; static volatile bool wait_for_release = true; #define MEMPROBE_RELEASE 12345 static void _release_fn(int status, const opal_process_name_t *source, opal_list_t *info, opal_list_t *results, opal_pmix_notification_complete_fn_t cbfunc, void *cbdata) { /* must let the notifier know we are done */ if (NULL != cbfunc) { cbfunc(OPAL_ERR_HANDLERS_COMPLETE, NULL, NULL, NULL, cbdata); } /* flag that the debugger is complete so we can exit */ wait_for_release = false; } static void _register_fn(int status, size_t evhandler_ref, void *cbdata) { volatile int *active = (volatile int*)cbdata; if (0 != status) { fprintf(stderr, "Client EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n", status, (unsigned long)evhandler_ref); } *active = status; } static void qcbfunc(int status, opal_list_t *info, void *cbdata, opal_pmix_release_cbfunc_t release_fn, void *release_cbdata) { opal_list_t *results = (opal_list_t*)cbdata; opal_value_t *kv; if (NULL != info) { while (NULL != (kv = (opal_value_t*)opal_list_remove_first(info))) { opal_list_append(results, &kv->super); } } if (NULL != release_fn) { release_fn(release_cbdata); } wait_for_release = false; } static void notifycbfunc(int status, void *cbdata) { volatile int *active = (volatile int*)cbdata; *active = status; } static void sample(void) { opal_value_t *kv, *ival; opal_pmix_query_t *q; opal_list_t query, response, *lt; volatile int active; char **answer = NULL, *tmp, *msg; OBJ_CONSTRUCT(&query, opal_list_t); OBJ_CONSTRUCT(&response, opal_list_t); q = OBJ_NEW(opal_pmix_query_t); opal_list_append(&query, &q->super); opal_argv_append_nosize(&q->keys, OPAL_PMIX_QUERY_MEMORY_USAGE); /* qualify that we just want local avg, min/max values reported */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_QUERY_LOCAL_ONLY); kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&q->qualifiers, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_QUERY_REPORT_AVG); kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&q->qualifiers, &kv->super); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_QUERY_REPORT_MINMAX); kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&q->qualifiers, &kv->super); /* issue the request */ wait_for_release = true; opal_pmix.query(&query, qcbfunc, (void*)&response); /* wait for the query to complete */ while (wait_for_release) { usleep(10); } wait_for_release = true; /* log my own results as a single string so the output * doesn't get garbled on the other end */ opal_asprintf(&tmp, "Data for node %s", prte_process_info.nodename); opal_argv_append_nosize(&answer, tmp); free(tmp); OPAL_LIST_FOREACH(kv, &response, opal_value_t) { lt = (opal_list_t*)kv->data.ptr; if (NULL != lt) { OPAL_LIST_FOREACH(ival, lt, opal_value_t) { if (0 == strcmp(ival->key, OPAL_PMIX_DAEMON_MEMORY)) { opal_asprintf(&tmp, "\tDaemon: %f", ival->data.fval); opal_argv_append_nosize(&answer, tmp); free(tmp); } else if (0 == strcmp(ival->key, OPAL_PMIX_CLIENT_AVG_MEMORY)) { opal_asprintf(&tmp, "\tClient: %f", ival->data.fval); opal_argv_append_nosize(&answer, tmp); free(tmp); } else { fprintf(stderr, "\tUnknown key: %s", ival->key); } } } } opal_argv_append_nosize(&answer, "\n"); OPAL_LIST_DESTRUCT(&response); /* construct the log output */ OBJ_CONSTRUCT(&response, opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_LOG_STDOUT); kv->type = OPAL_STRING; kv->data.string = opal_argv_join(answer, '\n'); opal_list_append(&response, &kv->super); opal_argv_free(answer); active = -1; opal_pmix.log(&response, notifycbfunc, (void*)&active); while (-1 == active) { usleep(10); } OPAL_LIST_DESTRUCT(&response); if (0 == rank) { /* send the notification to release the other procs */ wait_for_release = true; OBJ_CONSTRUCT(&response, opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup(OPAL_PMIX_EVENT_NON_DEFAULT); kv->type = OPAL_BOOL; kv->data.flag = true; opal_list_append(&response, &kv->super); active = -1; if (OPAL_SUCCESS != opal_pmix.notify_event(MEMPROBE_RELEASE, NULL, OPAL_PMIX_RANGE_GLOBAL, &response, NULL, NULL)) { fprintf(stderr, "Notify event failed\n"); exit(1); } } else { /* now wait for notification */ while (wait_for_release) { usleep(10); } } } int main(int argc, char* argv[]) { opal_list_t *codes; opal_value_t *kv; volatile int active; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (0 == rank) { fprintf(stderr, "Sampling memory usage after MPI_Init\n"); } /* everyone registers their event handler */ codes = OBJ_NEW(opal_list_t); kv = OBJ_NEW(opal_value_t); kv->key = strdup("errorcode"); kv->type = OPAL_INT; kv->data.integer = MEMPROBE_RELEASE; opal_list_append(codes, &kv->super); active = -1; opal_pmix.register_evhandler(codes, NULL, _release_fn, _register_fn, (void*)&active); while (-1 == active) { usleep(10); } /* if I am the local leader (i.e., local_rank=0), then I ask * my daemon to report the local memory usage, and send it * to rank=0 */ if (0 == prte_process_info.my_local_rank) { sample(); } else { /* now wait for notification */ while (wait_for_release) { usleep(10); } } wait_for_release = true; /* perform a barrier so some communication will occur, thus * requiring exchange of endpoint info */ MPI_Barrier(MPI_COMM_WORLD); if (0 == rank) { fprintf(stderr, "\n\nSampling memory usage after MPI_Barrier\n"); } if (0 == prte_process_info.my_local_rank) { if (0 != rank) { /* wait a little */ usleep(1000); } sample(); } else { /* wait again while memory is sampled */ while (wait_for_release) { usleep(10); } } MPI_Finalize(); return 0; } prrte-3.0.13/contrib/scaling/scaling.pl0000775000175000017500000003210015145263240020154 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2012 Los Alamos National Security, Inc. # All rights reserved. # Copyright (c) 2015-2016 Intel, Inc. All rights reserved. # Copyright (c) 2017-2018 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. use strict; use Getopt::Long; # globals my $num_nodes = 2; my $my_arg; my $reps = 5; my $usedvm = 0; my $usesrun = 0; my $usempirun = 0; my $useaprun = 0; my $useaprun = 0; my $myapp; my $runall = 1; my $rawoutput = 0; my $myresults = "myresults"; my $ppn = 1; my $npmin = 1; my @csvrow; my $multiplier = 1; my @tests = qw(/bin/true ./prte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op); my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0"); my @starterlist = qw(mpirun prun srun aprun); my @starteroptionlist = (" --novm --timeout 600", " --system-server-only", " --distribution=cyclic", ""); # Set to true if the script should merely print the cmds # it would run, but don't run them my $SHOWME = 0; # Set to true to suppress most informational messages. my $QUIET = 0; # Set to true if we just want to see the help message my $HELP = 0; GetOptions( "help" => \$HELP, "quiet" => \$QUIET, "showme" => \$SHOWME, "reps=s" => \$reps, "dvm" => \$usedvm, "srun" => \$usesrun, "aprun" => \$useaprun, "mpirun" => \$usempirun, "myapp=s" => \$myapp, "results=s" => \$myresults, "rawout" => \$rawoutput, "ppn=s" => \$ppn, "multiplier=s" => \$multiplier, "npmin=s" => \$npmin, ) or die "unable to parse options, stopped"; if ($HELP) { print "$0 [options] --help | -h This help message --quiet | -q Only output critical messages to stdout --showme Show the actual commands without executing them --reps=s Number of times to run each test (for statistics) --mpirun Use mpirun (or its equivalent prterun) --dvm Use prte-dvm to execute the test --srun Use srun (if available) to execute the test --arpun Use aprun (if available) to execute the test --myapp=s In addition to the standard tests, run this specific application (including any args) --results=file File where results are to be stored in comma-separated value format --rawout Provide raw timing output to the file --ppn=n Run n procs/node --multiplier=n Run n daemons/node (only for DVM and mpirun) --npmin=n Minimal number of nodes "; exit(0); } my $n = 1; my $cmd; my $starter; my $test; my $output; my @lines; my $line; my @results; my $res; my $idx; my $option; my $havedvm = 0; my @starters; my @starteroptions; my $pid; # if they explicitly requested specific starters, then # only use those if ($useaprun || $usempirun || $usesrun || $usedvm) { $runall = 0 } # if they didn't specify something, then set all starters to requested if ($runall) { $useaprun = 1; $usempirun = 1; $usesrun = 1; $usedvm = 1; } # see which starters are available my @path = split(":", $ENV{PATH}); my $exists = 0; my $opt; $idx=0; foreach $starter (@starterlist) { $exists = 0; foreach my $path (@path) { if ( -x "$path/$starter") { $exists = 1; last; } } if ($exists) { if ($usedvm && $starter eq "prun") { push @starters, $starter; $opt = $starteroptionlist[$idx] . " --npernode " . $ppn; push @starteroptions, $opt; } elsif ($usempirun && $starter eq "mpirun") { push @starters, $starter; $opt = $starteroptionlist[$idx] . " --npernode " . $ppn; if ($multiplier gt 1) { $opt = $opt . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier; } push @starteroptions, $opt; } elsif ($useaprun && $starter eq "aprun") { push @starters, $starter; $opt = $starteroptionlist[$idx] . " -N " . $ppn; push @starteroptions, $opt; } elsif ($usesrun && $starter eq "srun") { push @starters, $starter; $opt = $starteroptionlist[$idx] . " --ntasks-per-node " . $ppn; push @starteroptions, $opt; } } $idx = $idx + 1; } # bozo check if (scalar @starters == 0) { print "No available starters\n"; exit; } # if they gave us an app, add it to the list of tests if ($myapp) { push @tests, $myapp; } if ($myresults) { # open the results file open FILE, ">$myresults" || die "file could not be opened"; } # determine the number of nodes - doesn't # matter which starter we use $cmd = "mpirun --pernode hostname"; $output = `$cmd`; @lines = split(/\n/, $output); $num_nodes = $#lines + 1; # get the local date and time my ($sec,$min,$hour,$day,$month,$yr19,@rest) = localtime(time); my $pstarts = join(", ", @starters); # start by printing out the resulting configuration print "\n--------------------------------------------------\n"; print "\nTest configuration:\n"; print "\tDate:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";; print "\tNum nodes:\t" . $num_nodes . "\n"; print "\tStarters:\t" . $pstarts . "\n"; print "\n--------------------------------------------------\n"; # and tag the output file as well if ($myresults) { print FILE "Test configuration:\n"; print FILE "Date:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";; print FILE "Num nodes:\t" . $num_nodes . "\n"; print FILE "Starters:\t" . $pstarts . "\n"; } my $index = 0; sub runcmd() { my $rc; for (1..$reps) { $output = `$cmd`; # Check the error code of the command; if the error code is alright # just add a 0 in front of the number to neutraly mark the success; # If the code is not correct, add a ! in front of the number to mark # it invalid. if($? != 0) { $rc = "0"; } else { $rc = "!"; } if ($myresults && $rawoutput) { print FILE $n . " " . $output . " $rc\n"; } @lines = split(/\n/, $output); foreach $line (@lines) { if (0 <= index($line, "real") || 0 <= index($line, "elapsed")) { # we know that at least one item of interest is # in this line, so let's look for it - start # by getting rid of any leading whitespace $line =~ s/^\s+//; @results = split (/ +/,$line); $idx = 0; foreach $res (@results) { # we are only interested in the real or elapsed time my $strloc = index($res, "real"); if (0 <= $strloc) { # some systems put the number in front of # this word, and some append the word to # the number - consider both cases if (0 == $strloc) { if (0 == $idx) { # it must be in the next location push @csvrow,join $rc,$results[1]; } else { # it must be in the prior location push @csvrow,join $rc,$results[$idx-1]; } } else { # take the portion of the string up to the tag push @csvrow,join $rc,substr($res, 0, $strloc); } } else { $strloc = index($res, "elapsed"); if (0 <= $strloc) { # some systems put the number in front of # this word, and some append the word to # the number - consider both cases if (0 == $strloc) { if (0 == $idx) { # it must be in the next location push @csvrow,join $rc,$results[1]; } else { # it must be in the prior location push @csvrow,join $rc,$results[$idx-1]; } } else { # take the portion of the string up to the tag push @csvrow,join $rc,substr($res, 0, $strloc); } } } $idx = $idx + 1; } } } } # we have now completed all the reps, so log the results if ($myresults) { my $myout; my $mycnt=0; while ($mycnt <= $#csvrow) { if (0 == $mycnt) { $myout = $csvrow[$mycnt]; } else { $myout = $myout . "," . $csvrow[$mycnt]; } $mycnt = $mycnt + 1; } print FILE "$myout\n"; # clear the output @csvrow = (); } print "\n"; } foreach $starter (@starters) { my $dvmout; print "STARTER: $starter\n"; # if we are going to use the dvm, then we if ($starter eq "prun") { my $dvm = "prte-dvm --system-server"; if ($multiplier gt 1) { $dvm = $dvm . " --mca rtc ^hwloc --mca ras_base_multiplier " . $multiplier; } # need to start it print "##DVM: Launching $dvm\n"; if ($myresults) { print FILE "\n\n$dvm\n"; } if (!$SHOWME) { $havedvm = open($dvmout, $dvm."|") or die "##DVM: Spawn error $!\n"; print "##DVM: pid=$havedvm\n"; # Wait that the dvm reports that it is ready my $waitready = <$dvmout>; if($waitready =~ /DVM ready/i) { print "##DVM: $waitready\n"; } else { die "##DVM: error: $waitready\n"; } } } else { if ($myresults) { print FILE "\n\n"; } } if ($myresults) { print FILE "$starter $starteroptions[$index]\n\n"; } my $testnum = 0; foreach $test (@tests) { $option = $options[$testnum]; if ($starter eq "aprun") { $option =~ s/-mca\s+(\S+)\s+(\S+)/-e OMPI_MCA_$1=$2/g; } if ($starter eq "srun") { $option =~ s/-mca\s+(\S+)\s+(\S+)\s*/OMPI_MCA_$1=$2,/g; $option =~ s/\s*(OMPI_MCA\S+)/ --export=$1ALL/g; } if (-e $test) { if ($myresults) { print FILE "#nodes,$test,$option\n"; } if (!$SHOWME) { # pre-position the executable $cmd = $starter . $starteroptions[$index] . " $test 2>&1"; my $error; $error = `$cmd`; if (0 != $error) { if ($myresults) { print FILE "Command $cmd returned error $error\n"; $testnum = $testnum + 1; next; } } } $n = $npmin; while ($n <= $num_nodes) { push @csvrow,$n; if ($starter eq "prun" or $starter eq "mpirun" or $starter eq "aprun") { my $np = $n * $ppn; $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -n $np $test 2>&1"; } else { $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option -N $n $test 2>&1"; } print $cmd . "\n"; if (!$SHOWME) { runcmd(); } $n = 2 * $n; } if (0 != $num_nodes & $n) { $cmd = "time " . $starter . " " . $starteroptions[$index] . " $option $test 2>&1"; print $cmd . "\n"; if (!$SHOWME) { runcmd(); } } print "\n--------------------------------------------------\n"; } else { print "Test " . $test . " was not found - test skipped\n"; print "\n--------------------------------------------------\n"; } $testnum = $testnum + 1; if ($starter eq "srun" or $starter eq "aprun") { if ($testnum ge 3) { last; } } } if ($havedvm) { if (!$SHOWME) { $cmd = "prun --system-server-only --terminate"; system($cmd); waitpid($havedvm, 0); } $havedvm = 0; } $index = $index + 1; } if ($myresults) { close(FILE); } prrte-3.0.13/contrib/scaling/Makefile0000664000175000017500000000046015145263240017640 0ustar alastairalastairPROGS = prte_no_op mpi_no_op mpi_memprobe all: $(PROGS) CFLAGS = -O prte_no_op: prte_no_op.c prtecc -o prte_no_op prte_no_op.c mpi_no_op: mpi_no_op.c mpicc -o mpi_no_op mpi_no_op.c mpi_memprobe: mpi_memprobe.c mpicc -o mpi_memprobe mpi_memprobe.c -lopen-pal -lopen-rte clean: rm -f $(PROGS) *~ prrte-3.0.13/contrib/header_replacement.sh0000775000175000017500000001367515145263240020742 0ustar alastairalastair#!/bin/sh # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # # prte_show_help_replacement.sh Script to detect occurences of # #include "prte/util/show_help.h", where actually either # 1. #include "opal/util/output.h" # 2. #include "prte/mca/rml/rml_types.h" # were required. # # Some grep/sed mojo may be of interest to others... # # # Function adds into FILE a HEADER as the first #include # # Checks for #if to do the right thing, but does not handle # single-line or (even harder) multi-line comments # function add_header_first() { file=$1 # File to add header to header=$2 # E.g. opal/util/output.h for #include "opal/util/output.h" line=`grep -n "#include " $file | cut -f1 -d':' | head -n1` # check if this is a header wrapped in #ifdef HAVE_LALALA_H, if so, add before #if or #ifdef prev_line=$(($line - 1)) if [ $prev_line = -1 -o $prev_line = 0 ] ; then prev_line=1 fi head -n $prev_line $file | tail -n1 | grep -q "#if" \ && sed -i -e "${prev_line}s:#if.*:#include \"$header\"\n\n\0:" $file \ || sed -i -e "${line}s:#include.*:#include \"$header\"\n\0:" $file } function add_header() { file=$1 # File to add header to header=$2 # E.g. opal/util/output.h for #include "opal/util/output.h" after_header_pattern=$3 # Add after occurences of pattern, e.g. opal/util line=`grep -n "#include \"$after_header_pattern" $file | cut -f1 -d':' | head -n1` if [ $# -gt 3 -a "x$line" = "x" ] ; then after_header_pattern=$4 # If above pattern is not found, try more generic, e.g. opal/ line=`grep -n "#include \"$after_header_pattern" $file | cut -f1 -d':' | head -n1` # If we have a final even more general pattern to search for... if [ $# -eq 5 -a "x$line" = "x" ] ; then after_header_pattern=$5 # If above pattern is not found, try even more generic, e.g. opal/ line=`grep -n "#include \"$after_header_pattern" $file | cut -f1 -d':' | head -n1` fi # If still not found, go for plain '#include "' if [ "x$line" = "x" ] ; then echo Can neither find pattern $3 nor pattern $4 in file $file -- will include after the first include line=`grep -n "#include \"" $file | cut -f1 -d':' | head -n1` if [ "x$line" = "x" ] ; then echo REAL ERROR -- NO INCLUDES AT ALL. INCLUDE MANUALLY return fi # check if this is a header wrapped in #ifdef HAVE_LALALA_H, if so, add after endif next_line=$(($line + 1)) head -n $next_line $file | tail -n1 | grep -q \#endif \ && sed -i -e "${next_line}s:#endif.*:\0\n\n#include \"$header\":" $file \ || sed -i -e "${line}s:#include.*:\0\n#include \"$header\":" $file return fi fi # check if this is a header wrapped in #ifdef HAVE_LALALA_H, if so, add after endif next_line=$(($line + 1)) head -n $next_line $file | tail -n1 | grep -q \#endif \ && sed -i -e "${next_line}s:#endif.*:\0\n\n#include \"$header\":" $file \ || sed -i -e "${line}s:#include \"$after_header_pattern.*:\0\n#include \"$header\":" $file } function del_header() { file=$1 header=`echo $2 | sed 's/\//\\\\\//g'` line=`grep -n "#include \"$2" $file | cut -f1 -d':' | head -n1` if [ "x$line" = "x" ] ; then echo Can not find pattern $header file $file -- will not delete return fi # Remove the header including any characters at end of the line MULTI_LINE COMMENTS...? sed -i -e "/#include \"$header\".*/d" $file } SEARCH_HEADER=show_help.h # Search for all source files with show_help.h in it. for i in `find . -type f '(' -name '*.[cChysSfF]' -o -iname '*.cc' -o -name '*.cpp' -o -name '*.[fF]77' -o -name '*.[fF]90' ')' | sort | xargs grep -n $SEARCH_HEADER | cut -f1 -d':' | sort | uniq` ; do # Now we do know that we have prte/util/show_help.h found_prte_show_help_h=1 need_prte_show_help_h=0 found_opal_util_output_h=0 need_opal_util_output_h=0 found_prte_mca_rml_rml_types_h=0 need_prte_mca_rml_rml_types_h=0 grep -q prte_show_help $i && need_prte_show_help_h=1 grep -q opal\/util\/output.h $i && found_opal_util_output_h=1 grep -q opal_output $i && need_opal_util_output_h=1 grep -q prte\/mca\/rml\/rml_types.h $i && found_prte_mca_rml_rml_types_h=1 grep -q -E '(prte_rml_tag_t|PRTE_RML_)' $i && need_prte_mca_rml_rml_types_h=1 if [ $need_opal_util_output_h -eq 1 -a $found_opal_util_output_h -eq 0 ] ; then echo -e $i \\t Found opal_output in file, but not include opal/util/output.h add_header $i opal/util/output.h opal/util/ opal/class/ opal/ fi if [ $need_prte_mca_rml_rml_types_h -eq 1 -a $found_prte_mca_rml_rml_types_h -eq 0 ] ; then echo -e $i \\t Found prte_rml_tag_t or PRTE_RML_ in file, but no include prte/mca/rml/rml_types.h add_header $i prte/mca/rml/rml_types.h prte/mca/rml/ prte/mca/ prte/ fi if [ $need_prte_show_help_h -eq 0 ] ; then echo -e $i \\t Found prte_rml_tag_t or PRTE_RML_ in file, but no include prte/mca/rml/rml_types.h del_header $i prte/util/show_help.h fi done prrte-3.0.13/contrib/find_occurence.pl0000775000175000017500000000240615145263240020070 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # if (scalar(@ARGV) != 2) { print "Usage: #find_occurence \n"; exit(3); } $search_string = @ARGV[0]; $source_path = @ARGV[1]; open (SOURCE_FILES, "find $source_path -name *.c |") || print "could not open the pipe\n"; while () { #open the file and delete the occurence $file_name = $_; open (FILE, "$file_name") || print "Could not open $file_name for reading\n"; while () { if (/$search_string/) { print $file_name; } } close(FILE); } close(SOURCE_FILES); prrte-3.0.13/contrib/annual-maintenance/0000775000175000017500000000000015145263240020316 5ustar alastairalastairprrte-3.0.13/contrib/annual-maintenance/find-committers-from-last-year.pl0000775000175000017500000000577215145263240026635 0ustar alastairalastair#!/usr/bin/env perl use strict; use Data::Dumper; ########################################################################## # Read in the AUTHORS file die "Run this script at the top of an OMPI SVN tree" if (! -r "AUTHORS"); print "Reading AUTHORS file...\n"; open(AUTHORS, "AUTHORS") || die "Can't open AUTHORS file"; my $in = 0; my $authors; my $count = 0; while () { chomp; if ($in) { if (length($_) == 0) { $in = 0; next; } # There's probably a good regexp that will sort this better, # but I'm a little too lazy at the moment. :-( So just use # fixed widths for the name and affiliation fields, and then # strip off trailing whitespace. m/(\S+)\s+/; my $username = $1; my $name = substr($_, 14, 27); my $affiliation = substr($_, 42); $name =~ s/(\s+)$//; $affiliation =~ s/(\s+)$//; $authors->{$username} = { username => $username, name => $name, affiliation => $affiliation, active => 0, }; ++$count; } else { $in = 1 if (/^------.+ .+ .+-----$/); } } close(AUTHORS); print "Read $count authors from AUTHORS file\n"; ########################################################################## # Read committers from SVN log over the past year my $trunk = "https://svn.open-mpi.org/svn/ompi/trunk"; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; my $today = sprintf("%04d-%02d-%02d", $year, $mon+1, $mday); my $year_ago = sprintf("%04d-%02d-%02d", $year-1, $mon+1, $mday); $count = 0; my $committers; print "Reading SVN log to find committers over past year...\n"; open(SVN, "svn log -v -r '{$today}:{$year_ago}' $trunk|") || die "Can't open svn log"; while () { if (m/^(r\d+) \| (\S+) \| (\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d)/) { print "Indexed $1 ($3)\n"; if (!exists($committers->{$2})) { $committers->{$2} = 1; ++$count; } if (!exists($authors->{$2})) { print "WARNING: committer $2 is not in AUTHORS!\n"; } else { $authors->{$2}->{active} = 1; } } } close(SVN); print "Found $count SVN committers over the last year\n"; ########################################################################## # Print list of committers over the past year print " AUTHORS with commits in the past year: --------------------------------------\n"; foreach my $username (sort(keys(%{$authors}))) { if ($authors->{$username}->{active}) { print "$username -> $authors->{$username}->{name}, $authors->{$username}->{affiliation}\n"; } } print " AUTHORS with NO commits in the past year: -----------------------------------------\n"; foreach my $username (sort(keys(%{$authors}))) { if (!$authors->{$username}->{active}) { print "$username -> $authors->{$username}->{name}, $authors->{$username}->{affiliation}\n"; } } prrte-3.0.13/contrib/annual-maintenance/convert-to-git.txt0000664000175000017500000000503415145263240023742 0ustar alastairalastairFrom Dave: For fun, here's the Git version of the "AUTHORS with commits in the past year" part of your script: ----8<---- savbu-usnic-a ~/g/ompi-svn-mirror git:master ❮❮❮ git log --all --since='1 year ago' --pretty=tformat:'%ae' | sort | uniq -c 39 adrian@open-mpi-git-mirror.example.com 3 alex@open-mpi-git-mirror.example.com 5 alinas@open-mpi-git-mirror.example.com 9 amikheev@open-mpi-git-mirror.example.com 90 bosilca@open-mpi-git-mirror.example.com 22 brbarret@open-mpi-git-mirror.example.com 5 devendar@open-mpi-git-mirror.example.com 60 dgoodell@open-mpi-git-mirror.example.com 8 edgar@open-mpi-git-mirror.example.com 48 ggouaillardet@open-mpi-git-mirror.example.com 4 hadi@open-mpi-git-mirror.example.com 295 hjelmn@open-mpi-git-mirror.example.com 4 hpcchris@open-mpi-git-mirror.example.com 2 hppritcha@open-mpi-git-mirror.example.com 28 jladd@open-mpi-git-mirror.example.com 2 jroman@open-mpi-git-mirror.example.com 697 jsquyres@open-mpi-git-mirror.example.com 11 jurenz@open-mpi-git-mirror.example.com 3 manjugv@open-mpi-git-mirror.example.com 209 miked@open-mpi-git-mirror.example.com 24 mpiteam@open-mpi-git-mirror.example.com 4 naughtont@open-mpi-git-mirror.example.com 50 osvegis@open-mpi-git-mirror.example.com 2 pasha@open-mpi-git-mirror.example.com 5 regrant@open-mpi-git-mirror.example.com 6 rfaucett@open-mpi-git-mirror.example.com 1516 rhc@open-mpi-git-mirror.example.com 79 rolfv@open-mpi-git-mirror.example.com 2 swise@open-mpi-git-mirror.example.com 7 vasily@open-mpi-git-mirror.example.com 1 vvenkatesan@open-mpi-git-mirror.example.com 10 yosefe@open-mpi-git-mirror.example.com ----8<---- And the "NO commits": ----8<---- savbu-usnic-a ~/g/ompi-svn-mirror git:master ❯❯❯ git log --all --since='1 year ago' --pretty=tformat:'%ae' | sort | uniq > /tmp/active ; git log --pretty=tformat:'%ae' --all | sort | uniq > /tmp/all ; diff -u /tmp/all /tmp/active | grep '^-[^-]' -abbyz@open-mpi-git-mirror.example.com -adi@open-mpi-git-mirror.example.com -adkulkar@open-mpi-git-mirror.example.com -afriedle@open-mpi-git-mirror.example.com -alekseys@open-mpi-git-mirror.example.com -alexma@open-mpi-git-mirror.example.com -angskun@open-mpi-git-mirror.example.com -Anya@open-mpi-git-mirror.example.com -apant@open-mpi-git-mirror.example.com -bbenton@open-mpi-git-mirror.example.com -bouteill@open-mpi-git-mirror.example.com -casswell@open-mpi-git-mirror.example.com -coti@open-mpi-git-mirror.example.com [...] ----8<---- prrte-3.0.13/contrib/fix_indent.pl0000775000175000017500000000326415145263240017254 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2016 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # #To keep brian happy use Text::Tabs; if (scalar(@ARGV) != 1) { print "We need a source tree path\n"; exit(3); } $source_path = @ARGV[0]; open(HEADERS, "find $source_path -name *.h |"); while() { open(TEMP, ">temp.txt"); $file_name = $_; print $file_name; open(FILE, "$file_name"); my @lines_with_tabs = ; close(FILE); my @expanded_lines = expand(@lines_with_tabs); print TEMP join("\n",@expanded_lines),"\n"; close(TEMP); system("mv temp.txt $file_name"); } close(HEADERS); open(SOURCES, "find $source_path -name *.c |"); while() { open(TEMP, ">temp.txt"); $file_name = $_; print $file_name; open(FILE, "$file_name"); my @lines_with_tabs = ; close(FILE); my @expanded_lines = expand(@lines_with_tabs); print TEMP join("\n",@expanded_lines),"\n"; close(TEMP); system("mv temp.txt $file_name"); } close(SOURCES); prrte-3.0.13/contrib/search_compare.pl0000775000175000017500000002055215145263240020077 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2013-2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # use File::Find; use File::Basename; use File::Compare; use File::Copy; use File::Path; use Getopt::Long; use Text::Diff; my $src_arg; my $tgt_arg; my $src_dir; my $target_dir; my @src_tree = (); my @tgt_tree = (); my $flag; my $help_arg = 0; my $diff_file = ""; my $diff_arg; my $update_arg; my $modified_arg; my $repo_type; my $cmd; sub construct { # don't process directories or links, and don't recurse down # "special" directories if ( -l $_ ) { return; } if ( -d $_ ) { if ((/\.deps/) || (/\.libs/) || (/\.git/) || (/\.dSYM/) || ($_ eq "autom4te.cache") || ($_ eq "libltdl")) { $File::Find::prune = true; } return; } # $File::Find::name is the path relative to the starting point. # $_ contains the file's basename. The code automatically changes # to the processed directory, so we want to add the full pathname. # ignore some obvious files we don't care about if (($_ =~ /\.dirstamp$/i) || ($_ =~ /\.DS_Store$/i) || ($_ =~ /\.lo$/i) || ($_ =~ /\.la$/i) || ($_ =~ /\.o$/i) || ($_ =~ /\.\d$/i)) { $File::Find::prune = true; return; } if (($_ =~ /\.pc$/i) || ($_ eq "config.h") || ($_ eq "opal_config.h") || ($_ eq "opalcc-wrapper-data.txt") || ($_ eq "opalc++-wrapper-data.txt")) { $File::Find::prune = true; return; } if (($_ eq "stamp-h$")) { $File::Find::prune = true; return; } if (($_ eq "Makefile") || ($_ eq "Makefile.in") || ($_ eq "config.log") || ($_ eq "config.status") || ($_ eq "static-components.h")) { $File::Find::prune = true; return; } # ignore executables if (-x $File::Find::name) { $File::Find::prune = true; return; } if ($flag == 0) { push(@src_tree, $File::Find::name); } else { push(@tgt_tree, $File::Find::name); } } # Command line parameters my $ok = Getopt::Long::GetOptions("help|h" => \$help_arg, "src=s" => \$src_arg, "tgt=s" => \$tgt_arg, "diff=s" => \$diff_arg, "update" => \$update_arg, "update-modified" => \$modified_arg ); if (!$ok || $help_arg) { print "Invalid command line argument.\n\n" if (!$ok); print "Options: --diff | -diff Output diff of changed files to specified file --src | -src Head of source directory --tgt | -tgt Head of target directory --update | -update Apply changes to update target --update-modified Only update modified files (do not add/delete files)\n"; exit($ok ? 0 : 1); } if (!$src_arg || !$tgt_arg) { print "Missing src or tgt directory\n"; exit(1); } $src_dir = File::Spec->rel2abs($src_arg); $target_dir = File::Spec->rel2abs($tgt_arg); my @srcpth = (); my @newpth = (); my $spath; my $npath; # if we are updating, then identify the # leading elements of the src_dir path that # must be replaced when pointing to the # target location for a file copy if ($update_arg || $modified_arg) { my $s; my $t; my @srcpath = File::Spec->splitdir($src_dir); my @tgtpath = File::Spec->splitdir($target_dir); # find the place where they first differ - since # they cannot be identical, they must differ # somewhere my $found = 0; while ($found == 0) { $s = shift(@srcpath); $t = shift(@tgtpath); push(@srcpth, $s); push(@newpth, $t); if ($s ne $t) { $found = 1; } } # if either path has been exhausted, then we are done if (0 != scalar(@srcpath) && 0 != scalar(@tgtpath)) { # find the place where they re-converge - this # might be nowhere, e.g., if they provided the # top of two different source trees } $spath = join("/", @srcpth); $npath = join("/", @newpth); print "Source: " . $spath . " New: " . $npath . "\n"; } if ($diff_arg) { $diff_file = File::Spec->rel2abs($diff_arg); unlink ($diff_file); open(MYFILE, ">$diff_file"); } my $len_src_dir = length($src_dir); my $len_tgt_dir = length($target_dir); # construct a tree of all files in the source directory tree $flag = 0; find(\&construct, $src_dir); # construct a tree of all files in the target directory tree $flag = 1; find(\&construct, $target_dir); print "size of src_tree: " . @src_tree . ".\n"; print "size of tgt_tree: " . @tgt_tree . ".\n"; # print a list of files in the source tree that need to be added to the target my $found; my $src_file; my $tgt_file; my @modified = (); my @src_pared = (); my $i; foreach $src (@src_tree) { # strip the leading elements of the path that was given to us $src_file = substr($src, $len_src_dir); $found = 0; $i = -1; foreach $tgt (@tgt_tree) { $i = $i + 1; $tgt_file = substr($tgt, $len_tgt_dir); if ($src_file eq $tgt_file) { # printf "Matched: " . $src_file . " " . $tgt_file . "\n"; # file has been found - ignore it $found = 1; if (compare($src, $tgt) != 0) { if ($diff_arg) { my $diff = diff $tgt, $src, { STYLE => "Unified" }; print MYFILE $diff . "\n"; push(@modified, $src); } elsif ($update_arg || $modified_arg) { print "Updating $src to $tgt\n"; copy("$src", "$tgt") or die "Copy failed: src=$src tgt=$tgt\n"; } else { push(@modified, $src); } } # remove this file from the target tree as it has been found # splice @tgt_tree, $i, 1; break; } } if (!$modified_arg && $found == 0) { if ($update_arg) { my $targetpath = $src; $targetpath =~ s/$spath/$npath/; my $tpath = dirname($targetpath); if (! -d $tpath) { my $dirs = eval { mkpath($tpath) }; if (!$dirs) { print "Failed to create path $tpath\n"; exit; } print "Adding $tpath to repo\n"; $cmd = "pushd $target_dir >& /dev/null; git add $tpath >& /dev/null; popd >& /dev/null"; system($cmd); } print "Adding $src to repo\n"; copy("$src", "$targetpath") or die "Update failed: src=$src tgt=$targetpath\n"; $cmd = "pushd $target_dir >& /dev/null; git add $targetpath >& /dev/null; popd >& /dev/null"; system($cmd); } else { print "Add: " . $src . "\n"; } } else { push(@src_pared, $src); } } print "\n"; # print a list of files in the target tree that need to be deleted if (!$modified_arg) { foreach $tgt (@tgt_tree) { $found = 0; $tgt_file = substr($tgt, $len_tgt_dir); foreach $src (@src_pared) { $src_file = substr($src, $len_src_dir); if ($src_file eq $tgt_file) { # file has been found - ignore it $found = 1; break; } } if ($found == 0) { if ($update_arg) { print "Removing $tgt_file from repo\n"; $cmd = "pushd $target_dir >& /dev/null; git rm .$tgt_file >& /dev/null; popd >& /dev/null"; system($cmd); } else { print "Delete: " . $tgt . "\n"; } } } } print "\n"; # print a list of files that have been modified foreach $tgt (@modified) { print "Modified: " . $tgt . "\n"; } if ($diff_arg) { close(MYFILE); } prrte-3.0.13/contrib/dist/0000775000175000017500000000000015145263240015523 5ustar alastairalastairprrte-3.0.13/contrib/dist/find-copyrights.pl0000775000175000017500000001067715145263240021207 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. # use strict; use Data::Dumper; use File::Find; # Find all files in the tree (except LICENSE) and look for all # copyright notices. Build a consolidated list and print it out for a # human to check and include in the LICENSE file. my $copyrights; # Ensure that we're in the top of an SVN or hg directory. die "Must be in root of OMPI tree" if (! -d ".git"); # Find all interesting files (skip the top-level LICENSE file) my @files; &File::Find::find( sub { push(@files, $File::Find::name) if ($_ ne "." && $_ ne ".." && !($_ eq "LICENSE" && $File::Find::dir eq ".") && $_ !~ /~$/ && $_ !~ /\.bak$/ && $_ !~ /\.orig$/ && -f $_ && ! -l $_ && $File::Find::dir !~ /\.git/ && $File::Find::dir !~ /\.libs/ && $File::Find::dir !~ /\.deps/); }, "."); print "Found $#files files. Checking each one...\n"; # Go through each of them and look for copyrights my $copyrights; my $core; sub save { my ($org, $year, $file) = @_; # Remove leading and trailing spaces $org =~ s/^\s*(\S[\S\s]+?)\s*$/\1/; # Remove any of "All rights reserved." at the end $org =~ s/\.$//; $org =~ s/ *reserved//i; $org =~ s/ *rights//i; $org =~ s/ *all//i; $org =~ s/\.$//; # Save a range of years if ($year =~ m/([0-9]{4})-([0-9]{4})/) { my $y = $1; while ($y <= $2) { save($org, $y, $file); ++$y; } return; } # Save a single year if (!exists($copyrights->{$core}->{$org}->{$year}->{$file})) { $copyrights->{$core}->{$org}->{$year}->{$file} = 1; } else { ++$copyrights->{$core}->{$org}->{$year}->{$file} } } foreach my $f (@files) { # Is this core OMPI or non-core? $core = 1; $core = 0 if ($f =~ /ompi\/contrib\/[a-zA-Z0-9]+\// || $f =~ /opal\/event/ || $f =~ /ompi\/mca\/io\/romio\/romio/); # Scan the file for copyrights open FILE, $f || die "Can't open file: $f"; my $year; my $current; while () { my $line = $_; # End of all copyrights in this file if ($line =~ /\$COPYRIGHT\$/) { save($current, $year, $f) if (defined($current)); last; } # Beginning of a new copyright elsif ($line =~ m/Copyright \(c\) ([0-9\-]+) (.+)$/) { # Save the last copyright save($current, $year, $f) if (defined($current)); $year = $1; $current = $2; } # Beginning of something else elsif (defined($current) && ($line =~ /\s*\*\s*$/ || $line =~ /^\s*$/ || $line =~ /^\s*\#\s*$/)) { save($current, $year, $f); $current = undef; } } close(FILE); } # Check for duplicate copyrights in the same file foreach my $c (qw/1 0/) { foreach my $org (sort(keys(%{$copyrights->{$c}}))) { foreach my $year (sort(keys(%{$copyrights->{$c}->{$org}}))) { foreach my $f (keys(%{$copyrights->{$c}->{$org}->{$year}})) { if ($copyrights->{$c}->{$org}->{$year}->{$f} > 1) { print "WARNING: repeated copyright in $f:\n $org ($year)\n"; } } } } } # Check for weird copyright years my ($sec,$min,$hour,$mday,$mon,$year_now,$wday,$yday,$isdst) = localtime(time); $year_now += 1970; foreach my $c (qw/1 0/) { foreach my $org (sort(keys(%{$copyrights->{$c}}))) { foreach my $year (sort(keys(%{$copyrights->{$c}->{$org}}))) { if ($year < 1996 || $year > $year_now) { print "WARNING: Suspicious copyright year ($org:$year):\n"; foreach my $f (keys(%{$copyrights->{$c}->{$org}->{$year}})) { print " $f\n"; } } } } } # Print out what we found print "Found copyrights:\n"; foreach my $c (qw/1 0/) { print "========= Core: $c\n"; foreach my $org (sort(keys(%{$copyrights->{$c}}))) { print "$org: " . join(",", sort(keys(%{$copyrights->{$c}->{$org}}))) . "\n"; } } open OUT, ">out.txt" || die "can't open out"; my $d = new Data::Dumper([$copyrights]); $d->Purity(1)->Indent(1); my $s = $d->Dump; print OUT $s; close(OUT); print "Done!\n"; prrte-3.0.13/contrib/dist/linux/0000775000175000017500000000000015145263240016662 5ustar alastairalastairprrte-3.0.13/contrib/dist/linux/prrte.spec0000664000175000017500000007212615145263240020702 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. # All rights reserved. # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2017-2022 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # ############################################################################ # # Copyright (c) 2003, The Regents of the University of California, through # Lawrence Berkeley National Laboratory (subject to receipt of any # required approvals from the U.S. Dept. of Energy). All rights reserved. # # Initially written by: # Greg Kurtzer, # ############################################################################ ############################################################################# # # Configuration Options # # Options that can be passed in via rpmbuild's --define option. Note # that --define takes *1* argument: a multi-token string where the first # token is the name of the variable to define, and all remaining tokens # are the value. For example: # # shell$ rpmbuild ... --define 'install_in_opt 1' ... # # Or (a multi-token example): # # shell$ rpmbuild ... \ # --define 'configure_options CFLAGS=-g --with-pmix=/usr/local/pmix' ... # ############################################################################# # Define this if you want to make this SRPM build in # /opt/NAME/VERSION-RELEASE instead of the default /usr/. # type: bool (0/1) %{!?install_in_opt: %define install_in_opt 0} # Define this if you want this RPM to install environment setup # shell scripts. # type: bool (0/1) %{!?install_shell_scripts: %define install_shell_scripts 0} # type: string (root path to install shell scripts) %{!?shell_scripts_path: %define shell_scripts_path %{_bindir}} # type: string (base name of the shell scripts) %{!?shell_scripts_basename: %define shell_scripts_basename mpivars} # Define this to 1 if you want this RPM to install a modulefile. # type: bool (0/1) %{!?install_modulefile: %define install_modulefile 0} # type: string (root path to install modulefiles) %{!?modulefile_path: %define modulefile_path /usr/share/Modules/modulefiles} # type: string (subdir to install modulefile) %{!?modulefile_subdir: %define modulefile_subdir %{name}} # type: string (name of modulefile) %{!?modulefile_name: %define modulefile_name %{version}} # The name of the modules RPM. Can vary from system to system. # RHEL6 calls it "environment-modules". # type: string (name of modules RPM) %{!?modules_rpm_name: %define modules_rpm_name environment-modules} # Should we use the mpi-selector functionality? # type: bool (0/1) %{!?use_mpi_selector: %define use_mpi_selector 0} # The name of the mpi-selector RPM. Can vary from system to system. # type: string (name of mpi-selector RPM) %{!?mpi_selector_rpm_name: %define mpi_selector_rpm_name mpi-selector} # The location of the mpi-selector executable (can be a relative path # name if "mpi-selector" can be found in the path) # type: string (path to mpi-selector exectuable) %{!?mpi_selector: %define mpi_selector mpi-selector} # Should we build a debuginfo RPM or not? # type: bool (0/1) %{!?build_debuginfo_rpm: %define build_debuginfo_rpm 0} # Should we build an all-in-one RPM, or several sub-package RPMs? # type: bool (0/1) %{!?build_all_in_one_rpm: %define build_all_in_one_rpm 1} # Should we use the default "check_files" RPM step (i.e., check for # unpackaged files)? It is discouraged to disable this, but some # installers need it (e.g., older versions of OFED, because they # installed lots of other stuff in the BUILD_ROOT before Open MPI/SHMEM). # type: bool (0/1) %{!?use_check_files: %define use_check_files 1} # By default, RPM supplies a bunch of optimization flags, some of # which may not work with non-gcc compilers. We attempt to weed some # of these out (below), but sometimes it's better to just ignore them # altogether (e.g., PGI 6.2 will warn about unknown compiler flags, # but PGI 7.0 will error -- and RPM_OPT_FLAGS contains a lot of flags # that PGI 7.0 does not understand). The default is to use the flags, # but you can set this variable to 0, indicating that RPM_OPT_FLAGS # should be erased (in which case you probabl want to supply your own # optimization flags!). # type: bool (0/1) %{!?use_default_rpm_opt_flags: %define use_default_rpm_opt_flags 1} # Some compilers can be installed via tarball or RPM (e.g., Intel, # PGI). If they're installed via RPM, then rpmbuild's auto-dependency # generation stuff will work fine. But if they're installed via # tarball, then rpmbuild's auto-dependency generation stuff will # break; complaining that it can't find a bunch of compiler .so files. # So provide an option to turn this stuff off. # type: bool (0/1) %{!?disable_auto_requires: %define disable_auto_requires 0} # On some platforms, Open MPI/SHMEM just flat-out doesn't work with # -D_FORTIFY_SOURCE (e.g., some users have reported that there are # problems on ioa64 platforms). In this case, just turn it off # (meaning: this specfile will strip out that flag from the # OS-provided compiler flags). We already strip out _FORTIFY_SOURCE # for non-GCC compilers; setting this option to 0 will *always* strip # it out, even if you're using GCC. # type: bool (0/1) %{!?allow_fortify_source: %define allow_fortify_source 1} # Select md5 packing algorithm, that src.rpm created on one distro can be read on another. %global _binary_filedigest_algorithm 1 %global _source_filedigest_algorithm 1 ############################################################################# # # Configuration Logic # ############################################################################# %if %{install_in_opt} %define _prefix /opt/%{name}/%{version} %define _sysconfdir /opt/%{name}/%{version}/etc %define _libdir /opt/%{name}/%{version}/lib %define _includedir /opt/%{name}/%{version}/include %endif %if !%{build_debuginfo_rpm} %define debug_package %{nil} %endif %if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) %global _sysconfdir /etc %else %global _sysconfdir %{_prefix}/etc %endif # Is the sysconfdir under the prefix directory? This affects # whether we list the sysconfdir separately in the files sections, # below. %define sysconfdir_in_prefix %(test "`echo %{_sysconfdir} | grep %{_prefix}`" = "" && echo 0 || echo 1) %{!?_pkgdatadir: %define _pkgdatadir %{_datadir}/prrte} %if !%{use_check_files} %define __check_files %{nil} %endif %{!?configure_options: %define configure_options %{nil}} %if !%{use_default_rpm_opt_flags} %define optflags "" %endif ############################################################################# # # Preamble Section # ############################################################################# Summary: PMIx Reference RunTime Environment (PRRTE) Name: %{?_name:%{_name}}%{!?_name:prrte} Version: $VERSION Release: 1%{?dist} License: BSD Group: Development/Libraries Source: %{name}-%{version}.tar.$EXTENSION Packager: %{?_packager:%{_packager}}%{!?_packager:%{_vendor}} Vendor: %{?_vendorinfo:%{_vendorinfo}}%{!?_vendorinfo:%{_vendor}} Distribution: %{?_distribution:%{_distribution}}%{!?_distribution:%{_vendor}} Prefix: %{_prefix} Provides: prrte = %{version} BuildRoot: /var/tmp/%{name}-%{version}-%{release}-root BuildRequires: gcc BuildRequires: flex BuildRequires: libevent-devel BuildRequires: pmix >= 4.2.0 BuildRequires: hwloc-devel %if %{disable_auto_requires} AutoReq: no %endif %if %{install_modulefile} Requires: %{modules_rpm_name} %endif %description PRRTE is the PMIx Reference Run Time Environment. The project is formally referred to in documentation by "PRRTE", and the GitHub repository is "prrte". However, we have found that most users do not like typing the two consecutive "r"s in the name. Hence, all of the internal API symbols, environment variables, MCA frameworks, and CLI executables all use the abbreviated "prte" (one "r", not two) for convenience. This RPM contains all the tools necessary to compile, link, and run the PRRTE system. %if !%{build_all_in_one_rpm} ############################################################################# # # Preamble Section (runtime) # ############################################################################# %package runtime Summary: Tools and plugin modules for running PRRTE Group: Development/Libraries Provides: prrte = %{version} Provides: prrte-runtime = %{version} %if %{disable_auto_requires} AutoReq: no %endif %if %{install_modulefile} Requires: %{modules_rpm_name} %endif %description runtime PRRTE is the PMIx Reference Run Time Environment. The project is formally referred to in documentation by "PRRTE", and the GitHub repository is "prrte". However, we have found that most users do not like typing the two consecutive "r"s in the name. Hence, all of the internal API symbols, environment variables, MCA frameworks, and CLI executables all use the abbreviated "prte" (one "r", not two) for convenience. This subpackage provides general tools (prte, prun, prterun, etc.) and the Module Component Architecture (MCA) base and plugins necessary for running the PRRTE system. %endif ############################################################################# # # Preamble Section (devel) # ############################################################################# %package devel Summary: Development tools and header files for PRRTE Group: Development/Libraries %if %{disable_auto_requires} AutoReq: no %endif Provides: prrte-devel = %{version} Requires: %{name}-runtime %description devel PRRTE is the PMIx Reference Run Time Environment. The project is formally referred to in documentation by "PRRTE", and the GitHub repository is "prrte". However, we have found that most users do not like typing the two consecutive "r"s in the name. Hence, all of the internal API symbols, environment variables, MCA frameworks, and CLI executables all use the abbreviated "prte" (one "r", not two) for convenience. This subpackage provides the development files for PRRTE, such as wrapper compilers and header files for development of PRRTE plugins. ############################################################################# # # Prepatory Section # ############################################################################# %prep # Unbelievably, some versions of RPM do not first delete the previous # installation root (e.g., it may have been left over from a prior # failed build). This can lead to Badness later if there's files in # there that are not meant to be packaged. rm -rf $RPM_BUILD_ROOT %setup -q -n %{name}-%{version} ############################################################################# # # Build Section # ############################################################################# %build # rpmbuild processes seem to be geared towards the GNU compilers -- # they pass in some flags that will only work with gcc. So if we're # trying to build with some other compiler, the process will choke. # This is *not* something the user can override with a well-placed # --define on the rpmbuild command line, unless they find and override # all "global" CFLAGS kinds of RPM macros (every distro names them # differently). For example, non-gcc compilers cannot use # FORTIFY_SOURCE (at least, not as of 6 Oct 2006). We can really only # examine the basename of the compiler, so search for it in a few # places. %if %{allow_fortify_source} using_gcc=1 if test "$CC" != ""; then # Do horrible things to get the basename of just the compiler, # particularly in the case of multword values for $CC eval "set $CC" if test "`basename $1`" != "gcc"; then using_gcc=0 fi fi if test "$using_gcc" = "1"; then # Do wretched things to find a CC=* token eval "set -- %{configure_options}" compiler= while test "$1" != "" -a "$compiler" = ""; do case "$1" in CC=*) compiler=`echo $1 | cut -d= -f2-` ;; esac shift done # Now that we *might* have the compiler name, do a best-faith # effort to see if it's gcc. Blah! if test "$compiler" != ""; then if test "`basename $compiler`" != "gcc"; then using_gcc=0 fi fi fi %else # If we're not allowing _FORTIFY_SOURCE, then just set using_gcc to 0 and # the logic below will strip _FORTIFY_SOURCE out if it's present. using_gcc=0 %endif # If we're not using the default RPM_OPT_FLAGS, then wipe them clean # (the "optflags" macro has already been wiped clean, above). %if !%{use_default_rpm_opt_flags} RPM_OPT_FLAGS= export RPM_OPT_FLAGS %endif # If we're not GCC, strip out any GCC-specific arguments in the # RPM_OPT_FLAGS before potentially propagating them everywhere. if test "$using_gcc" = 0; then # Non-gcc compilers cannot handle FORTIFY_SOURCE (at least, not as # of Oct 2006) RPM_OPT_FLAGS="`echo $RPM_OPT_FLAGS | sed -e 's@-D_FORTIFY_SOURCE[=0-9]*@@'`" # Non-gcc compilers will generate warnings for several flags # placed in RPM_OPT_FLAGS by RHEL5, but -mtune=generic will cause # an error for icc 9.1. RPM_OPT_FLAGS="`echo $RPM_OPT_FLAGS | sed -e 's@-mtune=generic@@'`" fi CFLAGS="%{?cflags:%{cflags}}%{!?cflags:$RPM_OPT_FLAGS}" export CFLAGS %configure %{configure_options} %{__make} %{?mflags} ############################################################################# # # Install Section # ############################################################################# %install %{__make} install DESTDIR=$RPM_BUILD_ROOT %{?mflags_install} # We've had cases of config.log being left in the installation tree. # We don't need that in an RPM. find $RPM_BUILD_ROOT -name config.log -exec rm -f {} \; # First, the [optional] modulefile %if %{install_modulefile} %{__mkdir_p} $RPM_BUILD_ROOT/%{modulefile_path}/%{modulefile_subdir}/ cat <$RPM_BUILD_ROOT/%{modulefile_path}/%{modulefile_subdir}/%{modulefile_name} #%Module # NOTE: This is an automatically-generated file! (generated by the # PRRTE RPM). Any changes made here will be lost a) if the RPM is # uninstalled, or b) if the RPM is upgraded or uninstalled. proc ModulesHelp { } { puts stderr "This module adds PRRTE v%{version} to various paths" } module-whatis "Sets up PRRTE v%{version} in your enviornment" prepend-path PATH "%{_prefix}/bin/" prepend-path LD_LIBRARY_PATH %{_libdir} EOF %endif # End of modulefile if # Next, the [optional] shell scripts %if %{install_shell_scripts} %{__mkdir_p} $RPM_BUILD_ROOT/%{shell_scripts_path} cat < $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.sh # NOTE: This is an automatically-generated file! (generated by the # PRRTE RPM). Any changes made here will be lost if the RPM is # uninstalled or upgraded. # PATH if test -z "\`echo \$PATH | grep %{_bindir}\`"; then PATH=%{_bindir}:\${PATH} export PATH fi # LD_LIBRARY_PATH if test -z "\`echo \$LD_LIBRARY_PATH | grep %{_libdir}\`"; then LD_LIBRARY_PATH=%{_libdir}\${LD_LIBRARY_PATH:+:}\${LD_LIBRARY_PATH} export LD_LIBRARY_PATH fi cat < $RPM_BUILD_ROOT/%{shell_scripts_path}/%{shell_scripts_basename}.csh # NOTE: This is an automatically-generated file! (generated by the # PRRTE RPM). Any changes made here will be lost if the RPM is # uninstalled or upgraded. # path if ("" == "\`echo \$path | grep %{_bindir}\`") then set path=(%{_bindir} \$path) endif # LD_LIBRARY_PATH if ("1" == "\$?LD_LIBRARY_PATH") then if ("\$LD_LIBRARY_PATH" !~ *%{_libdir}*) then setenv LD_LIBRARY_PATH %{_libdir}:\${LD_LIBRARY_PATH} endif else setenv LD_LIBRARY_PATH %{_libdir} endif %endif # End of shell_scripts if %if !%{build_all_in_one_rpm} # Build lists of files that are specific to each package that are not # easily identifiable by a single directory (e.g., the different # libraries). In a somewhat lame move, we can't just pipe everything # together because if the user, for example, did --disable-shared # --enable-static, the "grep" for .so files will not find anything and # therefore return a non-zero exit status. This will cause RPM to # barf. So be super lame and dump the egrep through /bin/true -- this # always gives a 0 exit status. # First, find all the files rm -f all.files runtime.files remaining.files devel.files docs.files find $RPM_BUILD_ROOT -type f -o -type l | \ sed -e "s@$RPM_BUILD_ROOT@@" \ > all.files | /bin/true # Runtime files. This should generally be library files and some # executables (no man pages, no doc files, no header files). Do *not* # include wrapper compilers. cat all.files | egrep '/lib/|/lib64/|/lib32/|/bin/|/etc/|/help-' > tmp.files | /bin/true # Snip out a bunch of executables (e.g., wrapper compilers, pkgconfig # files, .la and .a files) egrep -vi 'pcc|pkgconfig|\.la$|\.a$' tmp.files > runtime.files | /bin/true rm -f tmp.files # Now take the runtime files out of all.files so that we don't get # duplicates. grep -v -f runtime.files all.files > remaining.files # Devel files, potentially including VT files. Basically -- just # exclude the man pages and doc files. cat remaining.files | \ egrep -v '/man/|/doc/' \ > devel.files | /bin/true # Now take those files out of reaming.files so that we don't get # duplicates. grep -v -f devel.files remaining.files > docs.files ################################################# # Now that we have a final list of files for each of the runtime and # devel RPMs, snip even a few more files out of those lists # because for directories that are wholly in only one RPM, we just # list that directory in the file lists below, and RPM will pick up # all files in that tree. We therefore don't want to list any files # in those trees in our *.files file lists. Additionally, the man # pages may get compressed by rpmbuild after this "install" step, so we # might not even have their final filenames, anyway. # runtime sub package %if !%{sysconfdir_in_prefix} grep -v %{_sysconfdir} runtime.files > tmp.files mv tmp.files runtime.files %endif grep -v %{_pkgdatadir} runtime.files > tmp.files mv tmp.files runtime.files # devel sub package grep -v %{_includedir} devel.files > tmp.files mv tmp.files devel.files %endif # End of build_all_in_one_rpm ############################################################################# # # Clean Section # ############################################################################# %clean # We may be in the directory that we're about to remove, so cd out of # there before we remove it cd /tmp # Remove installed driver after rpm build finished rm -rf $RPM_BUILD_DIR/%{name}-%{version} test "x$RPM_BUILD_ROOT" != "x" && rm -rf $RPM_BUILD_ROOT ############################################################################# # # Post Install Section # ############################################################################# ############################################################################# # # Pre Uninstall Section # ############################################################################# ############################################################################# # # Files Section # ############################################################################# %if %{build_all_in_one_rpm} # # All in one RPM # # Easy; just list the prefix and then specifically call out the doc # files. # %files %defattr(-, root, root, -) %if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) %{_bindir}/* %{_libdir}/* %{_datadir} %{_includedir} %else %{_prefix} %endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} %endif # If %%{install_in_opt}, then we're instaling PRRTE to # /opt/prrte/. But be sure to also explicitly mention # /opt/prrte so that it can be removed by RPM when everything under # there is also removed. %if %{install_in_opt} %dir /opt/%{name} %endif # If we're installing the modulefile, get that, too %if %{install_modulefile} %{modulefile_path} %endif # If we're installing the shell scripts, get those, too %if %{install_shell_scripts} %{shell_scripts_path}/%{shell_scripts_basename}.sh %{shell_scripts_path}/%{shell_scripts_basename}.csh %endif %doc README.md LICENSE %else # # Sub-package RPMs # # Harder than all-in-one. We list the directories specifically so # that if the RPM creates directories when it is installed, we will # remove them when the RPM is uninstalled. We also have to use # specific file lists. # %files runtime -f runtime.files %defattr(-, root, root, -) %if %(test "%{_prefix}" = "/usr" && echo 1 || echo 0) %{_bindir}/* %{_libdir}/* %{_datadir} %else %{_prefix} %endif # If the sysconfdir is not under the prefix, then list it explicitly. %if !%{sysconfdir_in_prefix} %{_sysconfdir} %endif # If %%{install_in_opt}, then we're instaling PRRTE to # /opt/prte/. But be sure to also explicitly mention # /opt/prte so that it can be removed by RPM when everything under # there is also removed. Also list /opt/prrte//share so # that it can be removed as well. %if %{install_in_opt} %dir /opt/%{name} %dir /opt/%{name}/%{version}/share %endif # If we're installing the modulefile, get that, too %if %{install_modulefile} %{modulefile_path} %endif # If we're installing the shell scripts, get those, too %if %{install_shell_scripts} %{shell_scripts_path}/%{shell_scripts_basename}.sh %{shell_scripts_path}/%{shell_scripts_basename}.csh %endif %doc README.md LICENSE %{_pkgdatadir} %files devel -f devel.files %defattr(-, root, root, -) %{_includedir} %endif ############################################################################# # # Changelog # ############################################################################# %changelog * Wed Aug 10 2022 Ralph Castain - Major cleanup of cruft from prior history that does not pertain to PRRTE. Cleanup from bad global search/replace of "prrte" with "prte" * Thu Apr 7 2022 Adam Goldman - Several minor fixes: added _includedir to build_all_in_one_rpm, escape macro in comment, and use %{name} instead of hard-coded value * Tue Mar 28 2017 Jeff Squyres - Reverting a decision from a prior changelog entry: if install_in_opt==1, then even put the modulefile under /opt. * Thu Nov 12 2015 Gilles Gouaillardet - Revamp packaging when prefix is /usr * Tue Jan 20 2015 Bert Wesarg - Remove VampirTrace wrapper from package. * Mon Jul 07 2014 Jeff Squyres - Several minor fixes from Oliver Lahaye: fix dates in changelog, added %{?dist} tag to the Release field, and added some Provides fields in case %{name} is overridden. * Mon Jun 24 2013 Igor Ivanov - Add Open SHMEM parallel programming library as part of Open MPI * Tue Dec 11 2012 Jeff Squyres - Re-release 1.6.0-1.6.3 SRPMs (with new SRPM Release numbers) with patch for VampirTrace's configure script to make it install the private "libtool" script in the right location (the script is used to build user VT applications). - Update the regexps/methodology used to generate the lists of files in the multi-RPM sub-packages; it's been broken for a little while. - No longer explicitly list the bin dir executables in the multi-RPM sub-packages - Per https://svn.open-mpi.org/trac/ompi/ticket/3382, remove all files named "config.log" from the install tree so that we can use this spec file to re-release all OMPI v1.6.x SRPMs. * Wed Jun 27 2012 Jeff Squyres - Remove the "ofed" and "munge_build_into_install" options, because OFED no longer distributes MPI implementations. Yay! * Mon Jun 04 2012 Jeff Squyres - Didn't change the specfile, but changed the script that generates the SRPM to force the use of MD5 checksums (vs. SHA1 checksums) so that the SRPM is friendly to older versions of RPM, such as that on RHEL 5.x. * Fri Feb 17 2012 Jeff Squyres - Removed OSCAR defines. - If use_mpi_selector==1, then also set install_shell_scripts to 1. - Change modules default RPM name and modulefiles path to the defaults on RHEL6. * Mon Dec 14 2009 Jeff Squyres - Add missing executables to specfile (ompi-server, etc.) - Fix: pull in VT files when building multiple RPMs (reported by Jim Kusznir). - Add allow_fortify_source option to let users selectively disable _FORTIFY_SOURCE processing on platforms where it just doesn't work (even with gcc; also reported by Jim Kusznir). * Tue Sep 8 2009 Jeff Squyres - Change shell_scripts_basename to not include version number to accomodate what mpi-selector expects. * Mon Feb 4 2008 Jeff Squyres - OFED 1.3 has a much better installer; remove all the leave_build_root kludge nastyness. W00t! * Fri Jan 18 2008 Jeff Squyres - Remove the hard-coded "prte" name from two Requires statements and use %{name} instead (FWIW, %{_name} caused rpmbuild to barf). * Wed Jan 2 2008 Jeff Squyres - Remove duplicate %{_sysconfdir} in the % files sections when building the sub-packages. - When building the sub-packages, ensure that devel.files also picks up the F90 module. - Hard-code the directory name "prte" into _pkglibdir (vs. using %{name}) because the OMPI code base has it hard-coded as well. Thanks to Jim Kusznir for noticing the problem. * Tue Dec 4 2007 Jeff Squyres - Added define option for disabling the use of rpmbuild's auto-dependency generation stuff. This is necessary for some compilers that allow themselves to be installed via tarball (not RPM), such as the Portland Group compiler. * Thu Jul 12 2007 Jeff Squyres - Change default doc location when using install_in_opt. Thanks to Alex Tumanov for pointing this out and to Doug Ledford for suggestions where to put docdir in this case. * Thu May 3 2007 Jeff Squyres - Ensure to move out of $RPM_BUILD_ROOT before deleting it in % clean. - Remove a debugging "echo" that somehow got left in there * Thu Apr 12 2007 Jeff Squyres - Ensure that _pkglibdir is always defined, suggested by Greg Kurtzer. * Wed Apr 4 2007 Jeff Squyres - Fix several mistakes in the generated profile.d scripts - Fix several bugs with identifying non-GNU compilers, stripping of FORTIFY_SOURCE, -mtune, etc. * Fri Feb 9 2007 Jeff Squyres - Revamp to make profile.d scripts more general: default to making the shell script files be %{_bindir}/mpivars.{sh|csh} - Add %{munge_build_into_install} option for OFED 1.2 installer on SLES - Change shell script files and modulefile to *pre*pend all the OMPI paths - Make shell script and modulefile installation indepdendent of %{install_in_opt} (they're really separate issues) - Add more "ofed" shortcut qualifiers - Slightly better test for basename CC in the fortify source section - Fix some problems in the csh shell script * Fri Oct 6 2006 Jeff Squyres - Remove LANL section; they don't want it - Add some help for OFED building - Remove some outdated "rm -f" lines for executables that we no longer ship * Wed Apr 26 2006 Jeff Squyres - Revamp files listings to ensure that rpm -e will remove directories if rpm -i created them. - Simplify options for making modulefiles and profile.d scripts. - Add oscar define. - Ensure to remove the previous installation root during prep. - Cleanup the modulefile specification and installation; also ensure that the profile.d scripts get installed if selected. - Ensure to list sysconfdir in the files list if it's outside of the prefix. * Thu Mar 30 2006 Jeff Squyres - Lots of bit rot updates - Reorganize and rename the subpackages - Add / formalize a variety of rpmbuild --define options - Comment out the docs subpackage for the moment (until we have some documentation -- coming in v1.1!) * Tue May 03 2005 Jeff Squyres - Added some defines for LANL defaults - Added more defines for granulatirty of installation location for modulefile - Differentiate between installing in /opt and whether we want to install environment script files - Filled in files for man and mca-general subpackages * Thu Apr 07 2005 Greg Kurtzer - Added opt building - Added profile.d/modulefile logic and creation - Minor cleanups * Fri Apr 01 2005 Greg Kurtzer - Added comments - Split package into subpackages - Cleaned things up a bit - Sold the code to Microsoft, and now I am retiring. Thanks guys! * Wed Mar 23 2005 Mezzanine - Specfile auto-generated by Mezzanine prrte-3.0.13/contrib/dist/linux/buildrpm.sh0000775000175000017500000002710515145263240021044 0ustar alastairalastair#!/bin/bash -f # # Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # # # External script parameters # The folowing parameters could be used to affect script behaviour. # Please, do NOT set the same settings with parameters and config vars. # # -b # If you specify this option, only the all-in-one binary RPM will # be built. By default, only the source RPM (SRPM) is built. Other # parameters that affect the all-in-one binary RPM will be ignored # unless this option is specified. # # -n name # This option will change the name of the produced RPM to the "name". # It is useful to use with "-o" and "-m" options if you want to have # multiple PRRTE versions installed simultaneously in the same # enviroment. Requires use of option "-b". # # -o # With this option the install path of the binary RPM will be changed # to /opt/_NAME_/_VERSION_. Requires use of option "-b". # # -m # This option causes the RPM to also install modulefiles # to the location specified in the specfile. Requires use of option "-b". # # -i # Also build a debuginfo RPM. By default, the debuginfo RPM is not built. # Requires use of option "-b". # # -f lf_location # Include support for Libfabric. "lf_location" is Libfabric install # path. Requires use of option "-b". # # -t tm_location # Include support for Torque/PBS Pro. "tm_location" is path of the # Torque/PBS Pro header files. Requires use of option "-b". # # -d # Build with debugging support. By default, # the RPM is built without debugging support. # # -c parameter # Add custom configure parameter. # # -r parameter # Add custom RPM build parameter. # # -s # If specified, the script will try to unpack the prrte.spec # file from the tarball specified on the command line. By default, # the script will look for the specfile in the current directory. # # -h # Prints script usage information. # # # General config vars # The following vars can be set from outside and will affect script behave: # prefix,rpmbuild_options,configure_options,build_srpm,build_single,build_multiple,rpmtopdir # specfile="prrte.spec" prefix=${prefix:-"/opt/prrte"} rpmbuild_options=${rpmbuild_options:-"--define 'mflags -j4' --define '_source_filedigest_algorithm md5' --define '_binary_filedigest_algorithm md5'"} configure_options=${configure_options:-""} unpack_spec=0 # Helpful when debugging #rpmbuild_options="--define 'mflags -j4' --define 'install_in_opt 1' --define 'cflags -g' --define 'install_modulefile 1' --define 'modules_rpm_name dhcp'" #configure_options="--disable-mpi-f77 --without-io-romio --disable-mpi-f90" # Some distro's will attempt to force using bizarre, custom compiler # names (e.g., i386-redhat-linux-gnu-gcc). So hardwire them to use # "normal" names. #export CC=gcc #export CXX=g++ #export F77=f77 #export FC= # Note that this script can build one or all of the following RPMs: # SRPM, all-in-one, multiple. # If you want to build the SRPM, put "yes" here build_srpm=${build_srpm:-"yes"} # If you want to build the "all in one RPM", put "yes" here build_single=${build_single:-"no"} # If you want to build the "multiple" RPMs, put "yes" here build_multiple=${build_multiple:-"no"} ######################################################################### # You should not need to change anything below this line ######################################################################### # # save original parameters # orig_param="$@" # # usage information # usage="Usage: $0 [-b][-o][-m][-d][-u][-s][-h] [-n name][-f lf_location][-t tm_location] tarball -b build all-in-one binary RPM only (required for all other flags to work) {default: build only SRPM} -n name name of the resulting RPM package set to name. Requires -b flag. {default: prrte} -o install in /opt/_NAME_/_VERSION_. Requires -b flag. {default: install in /usr} -m install modulefiles during RPM installation. Requires -b flag. {default: modulefiles will NOT be installed} -i build debuginfo RPM. Requires -b flag. {default: do NOT build debuginfo RPM} -f lf_location include Libfabric support from . Requires -b flag. {default: try to build with Libfabric support} -t tm_location include Torque/PBS Pro support from tm_location. Requires -b flag. {default: try to build with Torque/PBS Pro} -d build with Debugging support {default: without debugging support} -s try to unpack prrte.spec file from tarball {default: search for prrte.spec in current directory} -c parameter add custom configure parameter -r parameter add custom RPM build parameter -h print this message and exit tarball path to PRRTE source tarball " # # parse args # libfabric_path="" while getopts bn:omif:t:dc:r:sh flag; do case "$flag" in b) build_srpm="no" build_single="yes" ;; n) rpmbuild_options="$rpmbuild_options --define '_name $OPTARG'" ;; o) rpmbuild_options="$rpmbuild_options --define 'install_in_opt 1'" configure_options="$configure_options --enable-prte-prefix-by-default" ;; m) rpmbuild_options="$rpmbuild_options --define 'install_modulefile 1'" ;; i) rpmbuild_options="$rpmbuild_options --define 'build_debuginfo_rpm 1'" ;; f) libfabric_path="$OPTARG" ;; t) configure_options="$configure_options --with-tm=$OPTARG" ;; d) configure_options="$configure_options --enable-debug" ;; c) configure_options="$configure_options $OPTARG" ;; r) configure_options="$rpmbuild_options $OPTARG" ;; s) unpack_spec="1" ;; h) echo "$usage" 1>&2 exit 0 ;; esac done shift $(( OPTIND - 1 )); # # get the tarball name # tarball="$1" if test "$tarball" = ""; then echo "$usage" exit 1 fi if test ! -f $tarball; then echo "Can't find $tarball" exit 1 fi echo "--> Found tarball: $tarball" # # get the extension from the tarball (gz or bz2) # extension=`echo $tarball | egrep '\.bz2'` if test -n "$extension"; then extension=bz2 else extension=gz fi # # Get the version number # first="`basename $tarball | cut -d- -f2`" version="`echo $first | sed -e 's/\.tar\.'$extension'//'`" unset first echo "--> Found PRRTE version: $version" # # Try to unpack spec file from tarball # if test $unpack_spec -eq 1; then tar -xf $tarball --wildcards --no-anchored 'prrte.spec' --strip=4 fi # # do we have the spec files? # if test ! -r $specfile; then echo "can't find $specfile" exit 1 fi echo "--> Found specfile: $specfile" # # Find where the top RPM-building directory is # rpmtopdir=${rpmtopdir:-$HOME/RPMBUILD} if test "$rpmtopdir" != ""; then rpmbuild_options="$rpmbuild_options --define '_topdir $rpmtopdir'" if test ! -d "$rpmtopdir"; then mkdir -p "$rpmtopdir" mkdir -p "$rpmtopdir/BUILD" mkdir -p "$rpmtopdir/RPMS" mkdir -p "$rpmtopdir/RPMS/i386" mkdir -p "$rpmtopdir/RPMS/i586" mkdir -p "$rpmtopdir/RPMS/i686" mkdir -p "$rpmtopdir/RPMS/noarch" mkdir -p "$rpmtopdir/RPMS/athlon" mkdir -p "$rpmtopdir/SOURCES" mkdir -p "$rpmtopdir/SPECS" mkdir -p "$rpmtopdir/SRPMS" fi need_root=0 elif test -d /usr/src/RPM; then need_root=1 rpmtopdir="/usr/src/RPM" elif test -d /usr/src/packages; then need_root=1 rpmtopdir="/usr/src/packages" else need_root=1 rpmtopdir="/usr/src/redhat" fi echo "--> Found RPM top dir: $rpmtopdir" # # If we're not root, try to sudo # if test "$need_root" = "1" -a "`whoami`" != "root"; then echo "--> Trying to sudo: \"$0 $orig_param\"" echo "------------------------------------------------------------" sudo -u root sh -c "$0 $orig_param" echo "------------------------------------------------------------" echo "--> sudo finished" exit 0 fi # # make sure we have write access to the directories we need # if test ! -w $rpmtopdir/SOURCES ; then echo "Problem creating rpms: You do not have a $rpmtopdir directory" echo "tree or you do not have write access to the $rpmtopdir directory" echo "tree. Please remedy and try again." exit 1 fi echo "--> Have write access to $rpmtopdir/SOURCES" # # move the tarball file to the rpm directory # cp $tarball $rpmtopdir/SOURCES # # Print out the compilers # cat < Hard-wired for compilers: CC = $CC CXX = $CXX F77 = $F77 FC = $FC EOF # # what command should we use? # RH 8.0 changed from using "rpm -ba" to "rpmbuild -ba". ARRGGH!!! # which rpmbuild 2>&1 >/dev/null if test "$?" = "0"; then rpm_cmd="rpmbuild" else rpm_cmd="rpm" fi # # from the specfile # specdest="$rpmtopdir/SPECS/prrte-$version.spec" sed -e 's/\$VERSION/'$version'/g' \ -e 's/\$EXTENSION/'$extension'/g' \ $specfile > "$specdest" echo "--> Created destination specfile: $specdest" release=`egrep -i release: $specdest | cut -d\ -f2` # # Setup compiler string # if test "$CC" != ""; then configure_options="$configure_options CC=$CC" fi if test "$CXX" != ""; then configure_options="$configure_options CXX=$CXX" fi if test "$F77" != ""; then configure_options="$configure_options F77=$F77" fi if test "$FC" != ""; then configure_options="$configure_options FC=$FC" fi # # Make the SRPM # if test "$build_srpm" = "yes"; then echo "--> Building the PRRTE SRPM" rpmbuild_options="$rpmbuild_options --define 'dist %{nil}'" cmd="$rpm_cmd $rpmbuild_options -bs $specdest" echo "--> $cmd" eval $cmd if test $? != 0; then echo "*** FAILURE BUILDING SRPM!" echo "Aborting" exit 1 fi echo "--> Done building the SRPM" fi # # Make the single RPM # if test "$build_single" = "yes"; then echo "--> Building the single PRRTE RPM" cmd="$rpm_cmd -bb $rpmbuild_options --define 'build_all_in_one_rpm 1'" if test "$configure_options" != ""; then cmd="$cmd --define 'configure_options $configure_options'" fi cmd="$cmd $specdest" echo "--> $cmd" eval $cmd if test $? != 0; then echo "*** FAILURE BUILDING SINGLE RPM!" echo "Aborting" exit 1 fi echo "--> Done building the single RPM" fi # # Make the multi RPM # if test "$build_multiple" = "yes"; then echo "--> Building the multiple PRRTE RPM" cmd="$rpm_cmd -bb $rpmbuild_options --define 'build_all_in_one_rpm 0'" if test "$configure_options" != ""; then cmd="$cmd --define 'configure_options $configure_options'" fi cmd="$cmd $specdest" echo "--> $cmd" eval $cmd if test $? != 0; then echo "*** FAILURE BUILDING MULTIPLE RPM!" echo "Aborting" exit 1 fi echo "--> Done building the multiple RPM" fi # # Done # cat <86/ The sources rpms are located in: $rpmtopdir/SRPMS/ The spec files are located in: $rpmtopdir/SPECS/ ------------------------------------------------------------------------------ EOF prrte-3.0.13/contrib/dist/linux/README0000664000175000017500000000754715145263240017557 0ustar alastairalastairCopyright (c) 2004-2006 The Trustees of Indiana University and Indiana University Research and Technology Corporation. All rights reserved. Copyright (c) 2004-2006 The University of Tennessee and The University of Tennessee Research Foundation. All rights reserved. Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, University of Stuttgart. All rights reserved. Copyright (c) 2004-2006 The Regents of the University of California. All rights reserved. Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. Copyright (c) 2016 Intel, Inc. All rights reserved. Copyright (c) 2022 Nanook Consulting. All rights reserved. $COPYRIGHT$ Additional copyrights may follow $HEADER$ =========================================================================== Note that you probably want to download the latest release of the SRPM for any given PRRTE version. The SRPM release number is the version after the dash in the SRPM filename. For example, "prrte-3.0.0-2.src.rpm" is the 2nd release of the SRPM for PRRTE v3.0.0. Subsequent releases of SRPMs typically contain bug fixes for the RPM packaging, but not PRRTE itself. The buildrpm.sh script takes a single mandatory argument -- a filename pointing to a PRRTE tarball (may be either .gz or .bz2). It will create one or more RPMs from this tarball: 1. Source RPM 2. "All in one" RPM, where all of PRRTE is put into a single RPM. 3. "Multiple" RPM, where PRRTE is split into several sub-package RPMs: - prrte-runtime - prrte-devel The following arguments could be used to affect script behaviour. Please, do NOT set the same settings with parameters and config vars. -b If you specify this option, only the all-in-one binary RPM will be built. By default, only the source RPM (SRPM) is built. Other parameters that affect the all-in-one binary RPM will be ignored unless this option is specified. -n name This option will change the name of the produced RPM to the "name". It is useful to use with "-o" and "-m" options if you want to have multiple PRRTE versions installed simultaneously in the same enviroment. Requires use of option "-b". -o With this option the install path of the binary RPM will be changed to /opt/_NAME_/_VERSION_. Requires use of option "-b". -m This option causes the RPM to also install modulefiles to the location specified in the specfile. Requires use of option "-b". -i Also build a debuginfo RPM. By default, the debuginfo RPM is not built. Requires use of option "-b". -f lf_location Include support for Libfabric. "lf_location" is Libfabric install path. Requires use of option "-b". -t tm_location Include support for Torque/PBS Pro. "tm_location" is path of the Torque/PBS Pro header files. Requires use of option "-b". -d Build with debugging support. By default, the RPM is built without debugging support. -c parameter Add custom configure parameter. -r parameter Add custom RPM build parameter. -s If specified, the script will try to unpack the prrte.spec file from the tarball specified on the command line. By default, the script will look for the specfile in the current directory. -h Prints script usage information. Target architecture is currently hard-coded in the beginning of the buildrpm.sh script. Alternatively, you can build directly from the prrte.spec spec file or SRPM directly. Many options can be passed to the building process via rpmbuild's --define option (there are older versions of rpmbuild that do not seem to handle --define'd values properly in all cases, but we generally don't care about those old versions of rpmbuild...). The available options are described in the comments in the beginning of the spec file in this directory. prrte-3.0.13/contrib/dist/make-authors.pl0000775000175000017500000001076315145263240020472 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2008-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 Amazon.com, Inc. or its affiliates. # All Rights reserved. # use strict; use Data::Dumper; use Getopt::Long; use Cwd; # Ensure that we're in the root of a writeable Git clone my $in_git_clone = 1; my $skip_ok = 0; my $quiet = 0; my $srcdir = "."; my $destdir = getcwd(); GetOptions("skip-ok" => \$skip_ok, "quiet" => \$quiet, "srcdir=s" => \$srcdir, "destdir=s" => \$destdir) or die("Error in command line arguments\n"); # we still work with git old enough to not have the -C option, and the # --git-dir option screws up .mailmap, so just jump into the source # directory and make life easier. chdir($srcdir); if (! -d ".git") { if ($skip_ok == 0) { print STDERR "I don't seem to be in a git repo :(\n"; exit(1); } else { # called from make dist, just exit quietly (for case where # user runs "make dist" from a dist tarball) exit(0); } } ###################################################################### my $people; ###################################################################### # Run git log to get a list of committers open (GIT, "git log --no-merges --format=tformat:'%aN <%aE>'|") || die "Can't run 'git log'."; while () { chomp; m/^\s*(.+)\s+<(.+)>\s*$/; my $email = lc($2); # special case from the SVN migration if ($email eq 'no-author@open-mpi.org') { next; } # skip the mpi bot... if ($email eq 'mpiteam@open-mpi.org') { next; } if (!exists($people->{$1})) { # The person doesn't exist, so save a new entry $people->{$1} = { name => $1, emails => { $email => 1, } }; if ($quiet == 0) { print STDOUT "Found Git committer: $1 <$email>\n"; } } else { # The person already exists, so just add (or overwrite) this # email address $people->{$1}->{emails}->{$email} = 1; } } close(GIT); if (scalar(keys(%{$people})) == 0) { print STDERR "Found no author entries, assuming git broke. Aborting!\n"; exit(1); } ###################################################################### # Output a new AUTHORS file open (AUTHORS, ">$destdir/AUTHORS") || die "Can't write to AUTHORS file"; my $header = <<'END_HEADER'; Open MPI Authors ================ The following cumulative list contains the names and email addresses of all individuals who have committed code to the Open MPI repository (either directly or through a third party, such as through a Github.com pull request). Note that these email addresses are not guaranteed to be current; they are simply a unique indicator of the individual who committed them. END_HEADER print AUTHORS $header; my $email_dups; my @sorted_people = sort(keys(%{$people})); foreach my $p (@sorted_people) { print AUTHORS "$p\n"; foreach my $e (sort(keys(%{$people->{$p}->{emails}}))) { # Sanity check: make sure this email address does not show up # with any other person/name my $dup; foreach my $p2 (@sorted_people) { next if ($p eq $p2); foreach my $e2 (keys(%{$people->{$p2}->{emails}})) { if ($e eq $e2) { $dup = $p2; # Record this so that we can warn about it if ($p le $p2) { $email_dups->{$p} = $p2; } else { $email_dups->{$p2} = $p; } last; } } last if (defined($dup)); } print AUTHORS " $e"; print AUTHORS " (**** DUPLICATE EMAIL ADDRESS WITH $dup ***)" if (defined($dup)); print AUTHORS "\n"; } } close(AUTHORS); print STDOUT "New AUTHORS file written.\n"; ###################################################################### # Output any relevant warnings my $warned = 0; my @k = sort(keys(%{$email_dups})); if ($#k >= 0) { $warned = 1; print STDERR "\n*** WARNING: The following people had the same email address:\n"; foreach my $p (@k) { print STDERR "*** $p, $email_dups->{$p}\n"; } } if ($warned) { print STDERR " ******************************************************************************* *** YOU SHOULD EDIT THE .mailmap FILE TO RESOLVE THESE WARNINGS! *******************************************************************************\n"; } exit($warned); prrte-3.0.13/contrib/dist/make_tarball0000777000175000017500000000000015145263240023443 2make_dist_tarballustar alastairalastairprrte-3.0.13/contrib/dist/make_dist_tarball0000775000175000017500000003166415145263240021124 0ustar alastairalastair#!/bin/sh # # Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2016-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # # Version of auto tools that we want # M4_TARGET_VERSION=1.4.17 AM_TARGET_VERSION=1.15 AC_TARGET_VERSION=2.69 LT_TARGET_VERSION=2.4.6 FLEX_TARGET_VERSION=2.5.35 # # When running "make distcheck", use these parallelization flags. Can # significantly decrease the time required for "make distcheck" because # that target includes multiple builds of the entire code base. # DISTCHECK_MAKE_FLAGS=-j4 ######################################################################### # # Check command line flags # # Default to requiring *exact* versions if we're making distribution # tarballs; but higher-than-expected versions are ok for # non-distribution tarballs. autogen_args= distdir=".." greekonly=0 nogreek=0 dirty_ok=0 gnu_version_ignore=0 dist_target=distcheck distcheck_flags="AM_MAKEFLAGS=$DISTCHECK_MAKE_FLAGS" git_update=1 if test "`basename $0`" = "make_tarball"; then dist_target=dist distcheck_flags="AM_MAKEFLAGS=-j32" highok=1 dirty_ok=1 greekonly=1 else highok=0 fi while test "$1" != ""; do case $1 in --greekonly) greekonly=1 ;; --no-greek) nogreek=1 ;; --highok) highok=1 ;; --autogen-args) autogen_args=$2; shift ;; --config-args=*) config_args="${i#*=}"; shift ;; --distdir) distdir=$2; shift ;; --dirtyok) dirty_ok=1 ;; --verok) gnu_version_ignore=1;; --no-git-update) git_update=0;; *) cat <&1 && touch success) | tee auto.out if test ! -f success; then echo "Autogen failed. Aborting" exit 1 fi # # Configure # echo "*** Running configure..." rm -f success (./configure $config_args 2>&1 && touch success) | tee config.out if test ! -f success; then echo "Configure failed. Aborting" exit 1 fi # # Remove all generated *_lex.c files so that we ensure to invoke # flex from here in this script (to ensure that we're using a good # version of flex, and not picking up random *_lex.c files that # happened to be in the tree already). # echo "*** Removing old generated flex files..." find . -name \*_lex.c -exec chmod ug+rw {} \; -exec rm -f {} \; -print # # make tarball # echo "*** Running make $dist_target..." save_LD=$LD_LIBRARY_PATH LD_LIBRARY_PATH= rm -f success (make $distcheck_flags $dist_target 2>&1 && touch success) | tee dist.out if test ! -f success; then echo "Make $dist_target failed. Aborting" exit 1 fi rm -f success LD_LIBRARY_PATH=$save_LD # # move # echo "*** Moving tarballs..." mv prrte-* $distdir echo "*** All done" } ######################################################################### # main ######################################################################### start=`date` echo "*** Start time: $start" echo "*** Checking tools versions..." if test "$gnu_version_ignore" = "0"; then check_gnu_version m4 $M4_TARGET_VERSION check_gnu_version automake $AM_TARGET_VERSION check_gnu_version autoconf $AC_TARGET_VERSION check_gnu_version libtool $LT_TARGET_VERSION # Windows needs a recent version of flex; old versions don't generate # Windows-friendly *_lex.c files. check_gnu_version flex $FLEX_TARGET_VERSION fi # # Verify that we're in a top PRTE dir # echo "*** Checking to ensure in top-level PMIx Reference RunTime Environment directory..." if test -f VERSION -a -f configure.ac -a -f config/prte_get_version.m4 ; then happy=1 else echo "Do not appear to be in an PMIx Reference RunTime Environment top directory. Abort!" exit 1 fi if test $git_update -eq 1; then # # Get the latest # echo "*** Git pull to get the latest..." git pull --rebase if test $? -ne 0; then echo "*** Git pull failed. Cannot continue." exit 1 fi fi # # Ensure we have a clean repo # if test $dirty_ok -eq 0; then echo "*** Checking if source tree is dirty..." dirty=0 file=git-status.$$.out git status > $file if test "`grep 'Changes not staged for commit' $file`" != "" || test "`grep 'Changes staged for commit' $file`" != ""; then dirty=1 fi rm -f $file if test $dirty -eq 1; then echo " Source tree is dirty. Cannot continue." exit 1 fi fi # # Get our repo_rev # echo "*** Removing old VERSION file..." rm -f VERSION echo "*** Restoring pristine VERSION file..." git checkout VERSION echo "*** Getting git version..." repo_rev=`git describe --tags --always` echo " Repo rev number: $repo_rev" # Sanity checks if test "$repo_rev" = ""; then echo "Somehow the repo rev number is empty. Abort!" exit 1 elif test "`echo $repo_rev | grep ' '`" != ""; then echo "Somehow the repo rev number has a space in it -- bad!" exit 1 fi # # Set final values in VERSION # echo "*** Removing version numbers from VERSION..." version_files=VERSION release_date=`date '+%b %d, %Y'` echo " Release date: $release_date" for file in $version_files; do echo " - $file" sed -e 's/^repo_rev=.*/'repo_rev=$repo_rev/ \ -e "s/^date=.*/date=\"$release_date\"/" \ -e "s/^tarball_version=.*/tarball_version=/" \ $file > $file.new cp -f $file.new $file rm $file.new done # # Make 2 tarballs: # # - one with the greek # - one without the greek # # unless the user specifically said --greekonly, then only make the # greek tarball. Making both tarballs at once allows us to guarantee # to have two tarballs -- one greek and one not -- that have exactly # the same SVN r number (as opposed to, for example, running this # script to make a greek tarball, then running it again to make a # non-greek tarball -- there is a race condition that someone could # commit in the meantime and change the SVN r number in the 2nd # tarball) # # First, if ! --no-greek, make greek tarball if test $nogreek -eq 0; then echo "*** Making greek tarball" make_tarball fi # Now if ! --greekonly, make the non-greek tarball if test $greekonly -eq 0; then echo "*** REMOVING ALL GREEK FROM VERSION NUMBERS!!" for file in $version_files; do echo " - $file" sed -e 's/^greek=.*/greek=/' $file > $file.new cp -f $file.new $file rm $file.new done echo "Making non-greek tarball" make_tarball fi # Put the VERSION file back the way it was rm -f VERSION git checkout VERSION echo " " echo "*** Start time: $start" echo "*** Finish time: `date`" prrte-3.0.13/contrib/headers.txt0000664000175000017500000000000015145263240016722 0ustar alastairalastairprrte-3.0.13/contrib/check_unnecessary_headers.sh0000775000175000017500000007670615145263240022326 0ustar alastairalastair#!/bin/sh # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. # # # Some grep/sed mojo may be of interest to others... # Find certain unnecessary headers, and remove, if not used... # function del_header() { file=$1 header=`echo $2 | sed 's/\//\\\\\//g'` line=`grep -n "#include \"$2" $file | cut -f1 -d':' | head -n1` if [ "x$line" = "x" ] ; then echo Can not find pattern $header file $file -- will not delete return fi # Remove the header including any characters at end of the line MULTI_LINE COMMENTS...? sed -i -e "/#include \"$header\".*/d" $file } # # In Subdirectory DIR, search for headers provided in array SEARCH_HEADER # SEARCH_HEADER must initilized with a[0]="header_file.h typdefs macros functions" # # If any of the typedefs, macros or functions show up, then # including the header is valid required, otherwise del_header # # # XXX Dont ask, why it does NOT take the SEARCH_HEADER as parameter -- seems not to work with arrays # function delete_unnessary_header() { DIR=$1 i=0 while [ "x${SEARCH_HEADER[i]}" != "x" ] ; do HEADER=`echo ${SEARCH_HEADER[i]} | cut -f 1 -d' '` HEADER_BASENAME=`basename $HEADER` for file in `find $DIR -type f '(' -name '*.[cChysSfF]' -o \ -iname '*.cc' -o -name '*.cpp' -o \ -iname '*.f77' -o -iname '*.f90' ')' | sort | xargs grep -n $HEADER | cut -f1 -d':' | sort | uniq` ; do need_h=0 ARGS=`echo ${SEARCH_HEADER[i]} | cut -f2- -d' '` for arg in $ARGS ; do # echo $HEADER $file $arg # If the poor argument is part of the header's name, how to find it? Well, need to play with grep! if test "x${arg}.h" = "x${HEADER_BASENAME}" ; then grep $arg $file | grep -qv $HEADER && need_h=1 else grep -q $arg $file && need_h=1 fi [ $need_h -eq 1 ] && break done if [ $need_h -eq 0 ] ; then echo -e Delete $HEADER from \\t $file # as we did not find any of $ARGS del_header $file $HEADER fi done i=$(( i + 1 )) done } # # First element of the array is the header file in question, all following # strings are functions, types, macros that are being checked for... # # WARNING: variables should NOT be separated by MORE than 1 space # SEARCH_HEADER[0]="opal/align.h OPAL_ALIGN_PTR OPAL_ALIGN" SEARCH_HEADER[1]="opal/class/opal_atomic_lifo.h opal_atomic_lifo_t opal_atomic_lifo_is_empty opal_atomic_lifo_push opal_atomic_lifo_pop" SEARCH_HEADER[2]="opal/class/opal_bitmap.h opal_bitmap_t opal_bitmap_set_max_size opal_bitmap_init opal_bitmap_set_bit opal_bitmap_clear_bit opal_bitmap_is_set_bi opal_bitmap_find_and_set_first_unset_bit opal_bitmap_clear_all_bits opal_bitmap_set_all_bits opal_bitmap_size opal_bitmap_copy" SEARCH_HEADER[3]="opal/class/opal_free_list.h opal_free_list_t opal_free_list_item_t opal_free_list_init opal_free_list_grow OPAL_FREE_LIST_GET OPAL_FREE_LIST_WAIT OPAL_FREE_LIST_RETURN" SEARCH_HEADER[4]="opal/class/opal_graph.h opal_graph_vertex_t opal_graph_edge_t opal_adjacency_list_t opal_graph_t opal_graph_copy_vertex_data opal_graph_free_vertex_data opal_graph_alloc_vertex_data vertex_distance_from_t opal_graph_add_vertex opal_graph_remove_vertex opal_graph_add_edge opal_graph_remove_edge opal_graph_adjacent opal_graph_get_order opal_graph_get_size opal_graph_find_vertex opal_graph_get_graph_vertices opal_graph_get_adjacent_vertices opal_graph_duplicate opal_graph_spf opal_graph_dijkstra opal_graph_print" SEARCH_HEADER[5]="opal/class/opal_hash_table.h opal_hash_table_t opal_hash_table_init opal_hash_table_get_size opal_hash_table_remove_all opal_hash_table_get_value_uint32 opal_hash_table_set_value_uint32 opal_hash_table_remove_value_uint32 opal_hash_table_get_value_uint64 opal_hash_table_set_value_uint64 opal_hash_table_remove_value_uint64 opal_hash_table_get_value_ptr opal_hash_table_set_value_ptr opal_hash_table_remove_value_ptr opal_hash_table_get_first_key_uint32 opal_hash_table_get_next_key_uint32 opal_hash_table_get_first_key_uint64 opal_hash_table_get_next_key_uint64" SEARCH_HEADER[6]="opal/class/opal_list.h opal_list_t opal_list_item_t opal_list_get_next opal_list_get_prev opal_list_is_empty opal_list_get_first opal_list_get_last opal_list_get_begin opal_list_get_end opal_list_get_size opal_list_remove_item opal_list_append opal_list_prepend opal_list_remove_first opal_list_remove_last opal_list_insert_pos opal_list_insert opal_list_join opal_list_splice opal_list_sort opal_list_item_compare_fn_t" SEARCH_HEADER[7]="opal/class/opal_object.h opal_object_t opal_class_t opal_construct_t opal_destruct_t OPAL_OBJ_STATIC_INIT OBJ_CLASS OBJ_CLASS_INSTANCE OBJ_CLASS_DECLARATION OBJ_NEW OBJ_RETAIN OBJ_RELEASE OBJ_CONSTRUCT OBJ_DESTRUCT opal_class_initialize opal_class_finalize opal_obj_run_constructors opal_obj_run_destructors opal_obj_new opal_obj_update" SEARCH_HEADER[8]="opal/class/opal_pointer_array.h opal_pointer_array_t opal_pointer_array_init opal_pointer_array_add opal_pointer_array_set_item opal_pointer_array_get_item opal_pointer_array_get_size opal_pointer_array_set_size opal_pointer_array_test_and_set_item opal_pointer_array_remove_all" SEARCH_HEADER[9]="opal/class/opal_value_array.h opal_value_array_t opal_value_array_init opal_value_array_reserve opal_value_array_get_size opal_value_array_set_size OPAL_VALUE_ARRAY_GET_ITEM opal_value_array_get_item OPAL_VALUE_ARRAY_SET_ITEM opal_value_array_set_item opal_value_array_append_item opal_value_array_remove_item OPAL_VALUE_ARRAY_GET_BASE" SEARCH_HEADER[10]="opal/constants.h OPAL_SUCCESS OPAL_ERROR OPAL_ERR_ OPAL_EXISTS" SEARCH_HEADER[11]="opal/dss/dss.h opal_dss_open opal_dss_close opal_dss_t opal_dss" SEARCH_HEADER[12]="opal/dss/dss_internal.h OPAL_DSS_DEFAULT_INITIAL_SIZE OPAL_DSS_DEFAULT_THRESHOLD_SIZE DSS_TYPE_SIZE_T DSS_TYPE_BOOL DSS_TYPE_INT DSS_TYPE_UINT DSS_TYPE_PID_T UNPACK_SIZE_MISMATCH opal_dss_type_info_t opal_dss_initialized opal_dss_debug opal_dss_verbose opal_dss_initial_size opal_dss_threshold_size opal_dss_types opal_dss_num_reg_types opal_dss_set opal_dss_get opal_dss_pack opal_dss_unpack opal_dss_copy opal_dss_compare opal_dss_print opal_dss_dump opal_dss_size opal_dss_peek opal_dss_unload opal_dss_load opal_dss_copy_payload opal_dss_register opal_dss_release opal_dss_lookup_data_type opal_dss_dump_data_types opal_dss_pack_buffer opal_dss_unpack_buffer opal_dss_std_copy opal_dss_compare_ opal_dss_std_size opal_dss_size_ opal_dss_print_ opal_dss_std_release opal_dss_std_obj_release opal_dss_release_byte_object opal_dss_buffer_extend opal_dss_too_small opal_dss_find_type opal_dss_store_data_type opal_dss_get_data_type" SEARCH_HEADER[13]="opal/dss/dss_types.h OPAL_DATA_TYPE_T OPAL_DSS_ID_MAX OPAL_DSS_ID_INVALID OPAL_UNDEF OPAL_BYTE OPAL_BOOL OPAL_STRING OPAL_SIZE OPAL_PID OPAL_INT OPAL_UINT OPAL_BYTE_OBJECT OPAL_DATA_TYPE OPAL_NULL OPAL_DATA_VALUE OPAL_PSTAT OPAL_DSS_ID_DYNAMIC OPAL_VALUE1_GREATER OPAL_VALUE2_GREATER OPAL_EQUAL opal_dss_value_t OPAL_DATA_VALUE_EMPTY OPAL_PSTAT_MAX_STRING_LEN opal_pstats_t OPAL_DSS_STRUCTURED OPAL_DSS_UNSTRUCTURED OPAL_DSS_BUFFER_NON_DESC OPAL_DSS_BUFFER_FULLY_DESC OPAL_DSS_BUFFER_TYPE_HTON OPAL_DSS_BUFFER_TYPE_NTOH opal_buffer_t" SEARCH_HEADER[14]="opal/hash_string.h OPAL_HASH_STR" SEARCH_HEADER[15]="opal/mca/crs.h OPAL_CRS_ opal_crs" SEARCH_HEADER[16]="opal/opal_socket_errno.h opal_socket_errno" SEARCH_HEADER[17]="opal/prefetch.h OPAL_LIKELY OPAL_UNLIKELY OPAL_PREFETCH" SEARCH_HEADER[18]="opal/util/sys_limits.h opal_sys_limits opal_util_init_sys_limits" SEARCH_HEADER[19]="opal/runtime/opal_progress.h opal_progress" SEARCH_HEADER[20]="opal_stdint.h" # "int8_t int16_t int32_t int64_t intptr_t SIZE_MAX PRId PRIi PRIo PRIu PRIx PRIX PRIsize_t" SEARCH_HEADER[21]="opal/threads/condition.h opal_condition_t opal_condition_wait opal_condition_timedwait opal_condition_signal opal_condition_broadcast" SEARCH_HEADER[22]="opal/threads/mutex.h opal_uses_threads opal_mutex_check_locks opal_mutex_t opal_mutex_trylock opal_mutex_lock opal_mutex_unlock opal_mutex_atomic_trylock opal_mutex_atomic_lock opal_mutex_atomic_unlock opal_using_threads OPAL_THREAD_LOCK OPAL_THREAD_TRYLOCK OPAL_THREAD_UNLOCK OPAL_THREAD_SCOPED_LOCK OPAL_THREAD_ADD OPAL_HAVE_ATOMIC_CMPSET" SEARCH_HEADER[23]="opal/threads/threads.h opal_thread_fn_t opal_thread_t opal_thread_start opal_thread_join opal_thread_self_compare opal_thread_get_self" SEARCH_HEADER[24]="opal/types.h ompi_ptr_t ompi_iov_base_ptr_t opal_socklen_t hton64 ntoh64 ompi_ptr_ptol ompi_ptr_ltop opal_swap_bytes2 opal_swap_bytes4 opal_swap_bytes8" SEARCH_HEADER[25]="opal/util/arch.h OPAL_ARCH_ opal_arch_compute_local_id opal_arch_checkmask opal_arch_isbigendian opal_arch_ldisintel opal_arch_setmask" SEARCH_HEADER[26]="opal/util/argv.h opal_argv_append opal_argv_append_nosize opal_argv_append_unique_nosize opal_argv_free opal_argv_split opal_argv_split_with_empty opal_argv_count opal_argv_join opal_argv_join_range opal_argv_len opal_argv_copy opal_argv_delete opal_argv_insert" SEARCH_HEADER[27]="opal/util/basename.h opal_basename opal_dirname" SEARCH_HEADER[28]="opal/util/bit_ops.h opal_hibit opal_cube_dim" SEARCH_HEADER[29]="opal/util/cmd_line.h opal_cmd_line_t OPAL_CMD_LINE_TYPE_ opal_cmd_line_type_t opal_cmd_line_init_t opal_cmd_line_create opal_cmd_line_make_opt_mca opal_cmd_line_make_opt opal_cmd_line_make_opt3 opal_cmd_line_parse opal_cmd_line_get_usage_msg opal_cmd_line_is_taken opal_cmd_line_get_argc opal_cmd_line_get_argv opal_cmd_line_get_ninsts opal_cmd_line_get_param opal_cmd_line_get_tail" SEARCH_HEADER[30]="opal/util/convert.h opal_size2int" SEARCH_HEADER[31]="opal/util/daemon_init.h opal_daemon_init" SEARCH_HEADER[32]="opal/util/error.h opal_perror opal_strerror opal_strerror_r opal_err2str_fn_t opal_error_register" SEARCH_HEADER[33]="opal/util/if.h IF_NAMESIZE opal_ifnametoaddr opal_ifaddrtoname opal_ifnametoindex opal_ifnametokindex opal_ifindextokindex opal_ifcount opal_ifbegin opal_ifnext opal_ifindextoname opal_ifkindextoname opal_ifindextoaddr opal_ifindextomask opal_ifindextoflags opal_ifislocal opal_iffinalize" SEARCH_HEADER[34]="opal/util/net.h opal_net_init opal_net_finalize opal_net_prefix2netmask opal_net_islocalhost opal_net_samenetwork opal_net_addr_isipv4public opal_net_get_hostname opal_net_get_port" SEARCH_HEADER[35]="opal/util/opal_environ.h opal_environ_merge opal_setenv opal_unsetenv opal_home_directory opal_tmp_directory environ" SEARCH_HEADER[36]="opal/util/opal_getcwd.h opal_getcwd" SEARCH_HEADER[37]="opal/util/os_dirpath.h opal_os_dirpath_create opal_os_dirpath_is_empty opal_os_dirpath_access opal_os_dirpath_destroy" SEARCH_HEADER[38]="opal/util/pmix_os_path.h opal_os_path opal_make_filename_os_friendly" SEARCH_HEADER[39]="opal/util/output.h opal_output_stream_t opal_output_init opal_output_finalize opal_output_open opal_output_reopen opal_output_switch opal_output_reopen_all opal_output_close opal_output opal_output_verbose opal_output_vverbose opal_output_string opal_output_vstring opal_output_set_verbosity opal_output_get_verbosity opal_output_set_output_file_info OPAL_OUTPUT OPAL_OUTPUT_VERBOSE" SEARCH_HEADER[40]="opal/util/path.h opal_path_find opal_path_findv opal_path_is_absolute opal_find_absolute_path opal_path_access" SEARCH_HEADER[41]="opal/util/pmix_printf.h snprintf vsnprintf" SEARCH_HEADER[42]="opal/util/show_help.h opal_show_help_init opal_show_help_finalize opal_show_help opal_show_vhelp opal_show_help_string opal_show_help_finish_parsing" SEARCH_HEADER[43]="opal/util/strncpy.h opal_strncpy" SEARCH_HEADER[44]="" delete_unnessary_header . #################################### SEARCH_HEADER[0]="prte/mca/errmgr/errmgr.h PRTE_ERROR_NAME PRTE_ERROR_LOG prte_errmgr_base_log prte_errmgr" SEARCH_HEADER[1]="prte/mca/ess/ess.h prte_ess" SEARCH_HEADER[2]="prte/mca/filem/filem.h prte_filem PRTE_FILEM_TYPE_ PRTE_FILEM_MOVE_ prte_filem_base_process_set_1_0_0_t prte_filem_base_process_set_t prte_filem_base_file_set_1_0_0_t prte_filem_base_file_set_t prte_filem_base_request_1_0_0_t prte_filem_base_request_t prte_filem_base_component_2_0_0_t prte_filem_base_component_t prte_filem_base_module_1_0_0_t prte_filem_base_module_t" SEARCH_HEADER[3]="prte/mca/grpcomm/grpcomm.h prte_grpcomm" SEARCH_HEADER[4]="prte/mca/iof/iof.h prte_iof" SEARCH_HEADER[5]="prte/mca/iof/iof_types.h prte_iof_tag_t PRTE_IOF_" SEARCH_HEADER[6]="prte/mca/notifier/notifier.h PRTE_NOTIFIER_MAX_BUF PRTE_NOTIFIER_INFRA PRTE_NOTIFIER_WARNING prte_notifier" SEARCH_HEADER[7]="prte/mca/odls/base/base.h prte_odls_base_open prte_odls_base_t prte_odls_base prte_base_default_waitpid_fired" SEARCH_HEADER[8]="prte/mca/odls/odls.h prte_odls_base_module_1_3_0_t prte_odls_base_module_t prte_odls_base_component_2_0_0_t prte_odls_base_component_t prte_odls" SEARCH_HEADER[9]="prte/mca/odls/odls_types.h prte_daemon_cmd_flag_t PRTE_DAEMON_ prte_odls_child_t prte_odls_job_t" SEARCH_HEADER[10]="prte/mca/oob/oob.h mca_oob_1_0_0_t mca_oob_t mca_oob_base_component_2_0_0_t mca_oob_base_component_t mca_oob" SEARCH_HEADER[11]="prte/mca/plm/plm.h prte_plm" SEARCH_HEADER[12]="prte/mca/plm/plm_types.h prte_exit_code_t PRTE_EXIT_CODE_T prte_proc_state_t PRTE_PROC_STATE_ prte_job_state_t PRTE_JOB_STATE_ PRTE_JOB_NEVER_LAUNCHED prte_node_state_t PRTE_NODE_STATE_ prte_plm_cmd_flag_t PRTE_PLM_CMD PRTE_PLM_LAUNCH_JOB_CMD PRTE_PLM_UPDATE_PROC_STATE PRTE_PLM_HEARTBEAT_CMD" ### SEARCH_HEADER[13]="prte/mca/rmaps/rmaps.h prte_rmaps" SEARCH_HEADER[14]="prte/mca/rmaps/rmaps_types.h PRTE_RMAPS_ prte_job_map_t" ### SEARCH_HEADER[15]="prte/mca/rml/base/rml_contact.h prte_rml_base_get_contact_info prte_rml_base_update_contact_info prte_rml_base_parse_uris" SEARCH_HEADER[16]="prte/mca/rml/rml.h prte_rml" SEARCH_HEADER[17]="prte/mca/rml/rml_types.h PRTE_RML_TAG_ prte_rml_tag_t prte_rml_cmd_flag_t PRTE_RML_CMD PRTE_RML_UPDATE_CMD PRTE_RML_NON_PERSISTENT PRTE_RML_PEEK PRTE_RML_TRUNC PRTE_RML_ALLOC PRTE_RML_PERSISTENT PRTE_RML_FLAG_RECURSIVE_CALLBACK prte_rml_exception_t PRTE_RML_PEER_UNREACH PRTE_RML_PEER_DISCONNECTED" ### SEARCH_HEADER[18]="prte/runtime/prte_data_server.h prte_data_server_init prte_data_server_finalize prte_data_server_cmd_t PRTE_DATA_SERVER_" SEARCH_HEADER[19]="prte/runtime/prte_globals.h prte_debug_verbosity prte_prohibited_session_dirs prte_xml_output prte_help_want_aggregate PRTE_NAME_WILDCARD prte_name_wildcard PRTE_NAME_INVALID prte_name_invalid PRTE_PROC_MY_NAME PRTE_PROC_MY_HNP PRTE_PROC_MY_DAEMON prte_in_parallel_debugger PRTE_GLOBAL_ARRAY_BLOCK_SIZE PRTE_GLOBAL_ARRAY_MAX_SIZE PRTE_ERROR_DEFAULT_EXIT_CODE PRTE_UPDATE_EXIT_STATUS PRTE_COMPUTE_TIME_DIFF prte_app_context_t prte_node_t prte_job_controls_t PRTE_JOB_CONTROL prte_job_t prte_proc_t prte_attr_t prte_nid_t prte_pmap_t prte_jmap_t prte_get_job_data_object prte_timing prte_debug_daemons_flag prte_debug_daemons_file_flag prte_leave_session_attached prte_do_not_launch prted_spin_flag prte_static_ports prte_contiguous_nodes prte_keep_fqdn_hostnames prte_show_resolved_nodenames prted_debug_failure prte_homogeneous_nodes prte_hetero_apps prte_never_launched prte_devel_level_output prte_launch_environ prte_hnp_is_allocated prte_allocation_required prte_launch_agent prted_cmd_line prte_debugger_daemon prte_enable_debug_cospawn_while_running prte_debugger_check_rate prte_exit orteds_exit prte_exit_status prte_abnormal_term_ordered prte_routing_is_enabled prte_dvm_abort_ordered prte_heartbeat_rate prte_startup_timeout prte_timeout_usec_per_proc prte_max_timeout prte_default_hostfile prte_tree_launch_cmd prte_job_data prte_node_pool prte_clean_output prte_send_profile prte_nidmap prte_jobmap prte_local_children prte_local_jobdata prte_forward_job_control prte_tag_output prte_tag_output prte_timestamp_output prte_output_filename prte_xterm prte_rsh_agent" SEARCH_HEADER[20]="prte/runtime/runtime.h prte_version_string prte_initialized prte_finalizing prte_debug_output prte_debug_flag PRTE_NON_TOOL PRTE_TOOL prte_init prte_register_params prte_finalize" ### SEARCH_HEADER[21]="prte/types.h prte_std_cntr_t PRTE_STD_CNTR_ prte_local_rank_t prte_node_rank_t PRTE_LOCAL_RANK PRTE_NODE_RANK PRTE_LOCAL_RANK_MAX PRTE_NODE_RANK_MAX PRTE_LOCAL_RANK_INVALID PRTE_NODE_RANK_INVALID prte_jobid_t PRTE_JOBID_ prte_vpid_t PRTE_VPID_ PRTE_PROCESS_NAME_HTON PRTE_PROCESS_NAME_NTOH PRTE_NAME_ARGS PRTE_JOBID_INVALID PRTE_VPID_INVALID PRTE_JOBID_WILDCARD PRTE_VPID_WILDCARD prte_process_name_t prte_iov_base_ptr_t PRTE_STD_CNTR PRTE_NAME PRTE_VPID PRTE_JOBID PRTE_NODE_STATE PRTE_PROC_STATE PRTE_JOB_STATE PRTE_EXIT_CODE PRTE_VALUE PRTE_APP_CONTEXT PRTE_NODE_DESC PRTE_SLOT_DESC PRTE_JOB PRTE_NODE PRTE_PROC PRTE_JOB_MAP PRTE_RML_TAG PRTE_DAEMON_CMD PRTE_GRPCOMM_MODE PRTE_IOF_TAG" ### SEARCH_HEADER[22]="prte/util/hnp_contact.h prte_hnp_contact_t prte_write_hnp_contact_file prte_read_hnp_contact_file prte_list_local_hnps" ### SEARCH_HEADER[23]="prte/util/name_fns.h PRTE_NS_CMP_ prte_ns_cmp_bitmask_t prte_util_print_name_args PRTE_NAME_PRINT prte_util_print_jobids PRTE_JOBID_PRINT prte_util_print_vpids PRTE_VPID_PRINT prte_util_print_job_family PRTE_JOB_FAMILY_PRINT prte_util_print_local_jobid PRTE_LOCAL_JOBID_PRINT PRTE_JOB_FAMILY PRTE_HNP_NAME_FROM_JOB PRTE_LOCAL_JOBID PRTE_CONSTRUCT_LOCAL_JOBID PRTE_PROC_IS_DAEMON prte_namelist_t prte_util_convert_ prte_util_create_process_name prte_util_compare_name_fields prte_util_hash_name" ### SEARCH_HEADER[24]="prte/util/prte_wait.h prte_trigger_event_t prte_wait_enable prte_wait_disable prte_waitpid prte_wait_cb prte_wait_event PRTE_PROGRESSED_WAIT prte_trigger_event prte_message_event_t PRTE_MESSAGE_EVENT_DELAY PRTE_MESSAGE_EVENT prte_notify_event_t PRTE_DETECT_TIMEOUT PRTE_TIMER_EVENT prte_wait_init prte_wait_kill prte_wait_finalize" ### SEARCH_HEADER[25]="prte/util/parse_options.h prte_util_parse_range_options" ### SEARCH_HEADER[26]="prte/util/proc_info.h PRTE_MAX_HOSTNAME_SIZE prte_proc_info_t prte_process_info prte_proc_info prte_proc_info_finalize" ### SEARCH_HEADER[27]="prte/util/session_dir.h prte_session_dir" ### SEARCH_HEADER[28]="prte/util/show_help.h prte_show_help_init prte_show_help_finalize prte_show_help prte_show_help_recv" ### SEARCH_HEADER[29]="" delete_unnessary_header . #################################### SEARCH_HEADER[0]="ompi/attribute/attribute.h ATTR_HASH_SIZE OMPI_KEYVAL_PREDEFINED OMPI_KEYVAL_F77 ompi_attribute_type_t ompi_mpi1_fortran_copy_attr_function ompi_mpi1_fortran_delete_attr_function ompi_mpi2_fortran_copy_attr_function ompi_mpi2_fortran_delete_attr_function MPI_Comm_internal_copy_attr_function MPI_Type_internal_copy_attr_function MPI_Win_internal_copy_attr_function ompi_attribute_keyval_destructor_fn_t ompi_attribute_fn_ptr_union_t ompi_attribute_fortran_ptr_t ompi_attribute_keyval_t ompi_attr_hash_init ompi_attr_init ompi_attr_finalize ompi_attr_create_keyval ompi_attr_free_keyval ompi_attr_set_c ompi_attr_set_fortran_mpi1 ompi_attr_set_fortran_mpi2 ompi_attr_get_c ompi_attr_get_fortran_mpi1 ompi_attr_get_fortran_mpi2 ompi_attr_delete ompi_attr_copy_all ompi_attr_delete_all ompi_attr_create_predefined ompi_attr_free_predefined" SEARCH_HEADER[1]="ompi/class/ompi_free_list.h ompi_free_list_item_init_fn_t ompi_free_list_t ompi_free_list_item_t ompi_free_list_init_ex ompi_free_list_init ompi_free_list_init_ex_new ompi_free_list_init_new ompi_free_list_grow ompi_free_list_resize ompi_free_list_pos_t OMPI_FREE_LIST_POS_BEGINNING ompi_free_list_parse OMPI_FREE_LIST_GET OMPI_FREE_LIST_WAIT __ompi_free_list_wait OMPI_FREE_LIST_RETURN" SEARCH_HEADER[2]="ompi/class/ompi_rb_tree.h ompi_rb_tree_nodecolor_t ompi_rb_tree_node_t ompi_rb_tree_comp_fn_t ompi_rb_tree_t ompi_rb_tree_condition_fn_t ompi_rb_tree_action_fn_t ompi_rb_tree_construct ompi_rb_tree_destruct ompi_rb_tree_init ompi_rb_tree_insert ompi_rb_tree_find_with ompi_rb_tree_find ompi_rb_tree_delete ompi_rb_tree_destroy ompi_rb_tree_traverse ompi_rb_tree_size" SEARCH_HEADER[3]="ompi/class/ompi_seq_tracker.h ompi_seq_tracker_range_t ompi_seq_tracker_t ompi_seq_tracker_check_duplicate ompi_seq_tracker_insert ompi_seq_tracker_copy" SEARCH_HEADER[4]="ompi/communicator/communicator.h MPI_Comm MPI_COMM_WORLD ompi_communicator_t OMPI_COMM_INTER OMPI_COMM_CART OMPI_COMM_GRAPH OMPI_COMM_NAMEISSET OMPI_COMM_ISFREED OMPI_COMM_INTRINSIC OMPI_COMM_DYNAMIC OMPI_COMM_INVALID OMPI_COMM_PML_ADDED OMPI_COMM_IS_ OMPI_COMM_SET_ OMPI_COMM_ALLGATHER_TAG OMPI_COMM_BARRIER_TAG OMPI_COMM_ALLREDUCE_TAG OMPI_COMM_CID_ OMPI_COMM_BLOCK_ ompi_predefined_communicator_t ompi_mpi_comm_parent ompi_mpi_comm_world ompi_mpi_comm_self ompi_mpi_comm_null ompi_comm_invalid ompi_comm_rank ompi_comm_size ompi_comm_remote_size ompi_comm_get_cid ompi_comm_lookup ompi_comm_peer_lookup ompi_comm_peer_invalid ompi_comm_init ompi_comm_link_function ompi_comm_group ompi_comm_create ompi_topo_create ompi_comm_split ompi_comm_dup ompi_comm_compare ompi_comm_free ompi_comm_allocate ompi_comm_nextcid ompi_comm_finalize ompi_comm_set ompi_comm_get_rprocs ompi_comm_overlapping_groups ompi_comm_determine_first ompi_comm_activate ompi_comm_dump ompi_comm_set_name ompi_comm_reg_init ompi_comm_reg_finalize ompi_comm_num_dyncomm ompi_mpi_cxx_comm_errhandler_invoke" SEARCH_HEADER[5]="ompi/datatype/convertor.h OMPI_COMM_INTER OMPI_COMM_CART OMPI_COMM_GRAPH OMPI_COMM_NAMEISSET OMPI_COMM_ISFREED OMPI_COMM_INTRINSIC OMPI_COMM_DYNAMIC OMPI_COMM_INVALID OMPI_COMM_PML_ADDED OMPI_COMM_IS_ OMPI_COMM_SET_ OMPI_COMM_ALLGATHER_TAG OMPI_COMM_BARRIER_TAG OMPI_COMM_ALLREDUCE_TAG OMPI_COMM_CID_ OMPI_COMM_BLOCK_ ompi_predefined_communicator_t ompi_mpi_comm_parent ompi_mpi_comm_null ompi_comm_invalid ompi_comm_rank ompi_comm_size ompi_comm_remote_size ompi_comm_get_cid ompi_comm_lookup ompi_comm_peer_lookup ompi_comm_peer_invalid ompi_comm_init ompi_comm_link_function ompi_comm_group ompi_comm_create ompi_topo_create ompi_comm_split ompi_comm_dup ompi_comm_compare ompi_comm_free ompi_comm_allocate ompi_comm_nextcid ompi_comm_finalize ompi_comm_set ompi_comm_get_rprocs ompi_comm_overlapping_groups ompi_comm_determine_first ompi_comm_activate ompi_comm_dump ompi_comm_set_name ompi_comm_reg_init ompi_comm_reg_finalize ompi_comm_num_dync CONVERTOR_DATATYPE_MASK CONVERTOR_SEND_CONVERSION CONVERTOR_RECV CONVERTOR_SEND CONVERTOR_HOMOGENEOUS CONVERTOR_NO_OP CONVERTOR_WITH_CHECKSUM CONVERTOR_TYPE_MASK CONVERTOR_STATE_START CONVERTOR_STATE_COMPLETE CONVERTOR_STATE_ALLOC CONVERTOR_COMPLETED ompi_convertor_t ompi_convertor_master_t dt_stack_t DT_STATIC_STACK_SIZE ompi_convertor_get_checksum ompi_convertor_pack ompi_convertor_unpack ompi_convertor_create ompi_convertor_cleanup ompi_convertor_need_buffers ompi_convertor_get_packed_size ompi_convertor_get_unpacked_size ompi_convertor_get_current_pointer ompi_convertor_prepare_for_send ompi_convertor_copy_and_prepare_for_send ompi_convertor_prepare_for_recv ompi_convertor_copy_and_prepare_for_recv ompi_convertor_raw ompi_convertor_set_position_nocheck ompi_convertor_set_position ompi_convertor_personalize ompi_convertor_clone ompi_convertor_clone_with_position ompi_convertor_dump ompi_ddt_dump_stack ompi_convertor_generic_simple_position MPI_Datatype" SEARCH_HEADER[6]="ompi/datatype/datatype.h MPI_Datatype DT_MAX_PREDEFINED DT_FLAG_ MAX_DT_COMPONENT_COUNT opal_ddt_count_t dt_type_desc_t ompi_datatype_t ompi_predefined_datatype_t ompi_ddt_init ompi_ddt_finalize ompi_ddt_create_ ompi_ddt_duplicate ompi_ddt_is_predefined ompi_ddt_create_from_packed_description" SEARCH_HEADER[7]="ompi/datatype/datatype_internal.h DDT_DUMP_STACK DT_ ddt_elem_id_description ddt_elem_desc ddt_elem_desc_t ddt_loop_desc ddt_loop_desc_t ddt_endloop_desc ddt_endloop_desc_t dt_elem_desc CREATE_LOOP_START CREATE_LOOP_END CREATE_ELEM ompi_complex_float_t ompi_complex_double_t ompi_complex_long_double_t ompi_ddt_basicDatatypes BASIC_DDT_FROM_ELEM ompi_ddt_default_convertors_init ompi_ddt_default_convertors_fini SAVE_STACK PUSH_STACK ompi_ddt_safeguard_pointer_debug_breakpoint OMPI_DDT_SAFEGUARD_POINTER GET_FIRST_NON_LOOP UPDATE_INTERNAL_COUNTERS ompi_ddt_print_args" SEARCH_HEADER[8]="ompi/errhandler/errhandler.h OMPI_ERRHANDLER_LANG_ ompi_errhandler_lang_t OMPI_ERRHANDLER_TYPE_ ompi_errhandler_type_t ompi_errhandler_t ompi_predefined_errhandler_t ompi_mpi_errhandler_null OMPI_ERRHANDLER_CHECK OMPI_ERRHANDLER_RETURN ompi_errhandler_init ompi_errhandler_finalize OMPI_ERRHANDLER_INVOKE ompi_errhandler_invoke ompi_errhandler_request_invoke ompi_errhandler_create ompi_errhandler_is_intrinsic ompi_errhandler_fortran_handler_fn_t OMPI_ERR_INIT_FINALIZE MPI_Errhandler" SEARCH_HEADER[9]="ompi/errhandler/errhandler_predefined.h ompi_mpi_errors_are_fatal_ ompi_mpi_errors_return_ ompi_mpi_errors_throw_exceptions" ### SEARCH_HEADER[10]="ompi/file/file.h OMPI_FILE_ISCLOSED OMPI_FILE_HIDDEN ompi_file_t ompi_predefined_file_t ompi_mpi_file_null ompi_file_f_to_c_table ompi_file_init ompi_file_open ompi_file_set_name ompi_file_close ompi_file_finalize ompi_file_invalid MPI_File MPI_FILE_NULL ompi_mpi_cxx_file_errhandler_invoke" # THE LAST ONE WAS FOR THE CXX INTERFACE SEARCH_HEADER[11]="ompi/group/group.h ompi_group_sporadic_list_t ompi_group_sporadic_data_t ompi_group_strided_data_t ompi_group_bitmap_data_t ompi_group_t ompi_predefined_group_t OMPI_GROUP_ ompi_group_f_to_c_table ompi_mpi_group_null ompi_group_allocate ompi_group_increment_proc_count ompi_group_decrement_proc_count ompi_group_size ompi_group_rank ompi_set_group_rank ompi_group_translate_ranks ompi_group_free ompi_group_get_proc_ptr ompi_group_calc_ ompi_group_peer_lookup ompi_group_div_ceil MPI_Group" SEARCH_HEADER[12]="ompi/info/info.h MPI_Info ompi_info_t ompi_predefined_info_t ompi_info_f_to_c_table ompi_info_entry_t ompi_mpi_info_null ompi_info_init ompi_info_finalize ompi_info_dup ompi_info_set ompi_info_free ompi_info_get_bool ompi_info_get ompi_info_delete ompi_info_get_valuelen ompi_info_get_nthkey ompi_info_value_to_bool ompi_info_value_to_int ompi_info_is_freed" ### SEARCH_HEADER[13]="ompi/mca/allocator/allocator.h mca_allocator_base_module_t mca_allocator_base_output mca_allocator_base_component_t" ### SEARCH_HEADER[14]="ompi/mca/bml/bml.h mca_bml_base_btl_t mca_bml_base_btl_array_t mca_bml_base_btl_array_get_size mca_bml_base_btl_array_set_size mca_bml_base_btl_array_insert mca_bml_base_btl_array_remove mca_bml_base_btl_array_get_index mca_bml_base_btl_array_get_next mca_bml_base_btl_array_find mca_bml_base_endpoint_t mca_bml_base_alloc mca_bml_base_free mca_bml_base_send mca_bml_base_send_status mca_bml_base_sendi mca_bml_base_put mca_bml_base_get mca_bml_base_prepare_src mca_bml_base_prepare_dst mca_bml_base_component_t mca_bml_base_module_t" ### SEARCH_HEADER[15]="ompi/mca/btl/btl.h mca_btl_base_tag_t MCA_BTL_AM_FRAMEWORK_MASK MCA_BTL_TAG_ MCA_BTL_FLAGS_ MCA_BTL_EXCLUSIVITY_ MCA_BTL_ERROR_FLAGS_FATAL mca_btl_base_segment_t mca_btl_base_descriptor_t MCA_BTL_DES_ mca_btl_base_header_t MCA_BTL_BASE_HEADER_HTON MCA_BTL_BASE_HEADER_NTOH mca_btl_base_component_t mca_btl_base_module_t" ### SEARCH_HEADER[16]="ompi/mca/coll/coll.h mca_coll_base_component_ mca_coll_base_module_ mca_coll_base_comm_coll_t" ### SEARCH_HEADER[17]="ompi/mca/dpm/dpm.h OMPI_RML_TAG_ OMPI_COMM_JOIN_TAG ompi_dpm ompi_dpm_base_component_t" ### SEARCH_HEADER[18]="ompi/mca/mpool/mpool.h mca_mpool_base_registration_t mca_mpool_base_component_t mca_mpool_base_module_t mca_mpool_base_alloc mca_mpool_base_free mca_mpool_base_tree_node_compare mca_mpool_base_insert mca_mpool_base_remove" ### SEARCH_HEADER[19]="ompi/mca/pml/pml.h mca_pml_base_send_mode_t OMPI_ANY_TAG OMPI_ANY_SOURCE OMPI_PROC_NULL mca_pml_base_component_t mca_pml_base_module_t MCA_PML_CALL mca_pml" ### SEARCH_HEADER[20]="ompi/mca/topo/topo.h mca_topo_base_module_ mca_topo_base_component_2_0_0_t mca_topo_base_component_t mca_topo_base_comm_1_0_0_t mca_topo_base_comm_t mca_topo_base_module_t" ### SEARCH_HEADER[21]="ompi/op/op.h OMPI_OP_FLAGS_ ompi_op_f_to_c_table ompi_op_t ompi_predefined_op_t ompi_op_ddt_map ompi_mpi_op_null ompi_mpi_op_max ompi_mpi_op_min ompi_mpi_op_sum ompi_mpi_op_prod ompi_mpi_op_land ompi_mpi_op_band ompi_mpi_op_lor ompi_mpi_op_bor ompi_mpi_op_lxor ompi_mpi_op_bxor ompi_mpi_op_maxloc ompi_mpi_op_minloc ompi_mpi_op_replace ompi_op_init ompi_op_finalize ompi_op_create_user ompi_op_set_cxx_callback ompi_op_is_intrinsic ompi_op_is_commute ompi_op_is_float_assoc ompi_op_is_valid ompi_op_reduce ompi_3buff_op_reduce ompi_op_fortran_handler_fn_t MPI_Op" ### SEARCH_HEADER[22]="ompi/proc/proc.h ompi_proc_t ompi_proc_local_proc ompi_proc_init ompi_proc_set_arch ompi_proc_finalize ompi_proc_world ompi_proc_all ompi_proc_self ompi_proc_local ompi_proc_find ompi_proc_pack ompi_proc_unpack ompi_proc_refresh" ### SEARCH_HEADER[23]="ompi/request/request.h ompi_request_t ompi_request_type_t OMPI_REQUEST_ ompi_request_state_t ompi_mpi_object_t ompi_predefined_request_t OMPI_REQUEST_INIT OMPI_REQUEST_FINI ompi_request_fns_t ompi_request_f_to_c_table ompi_request_waiting ompi_request_completed ompi_request_poll ompi_request_lock ompi_request_cond ompi_request_null ompi_request_empty ompi_status_empty ompi_request_functions ompi_request_init ompi_request_persistent_proc_null_free ompi_request_finalize ompi_request_cancel ompi_request_free ompi_request_test ompi_request_wait ompi_request_wait_completion ompi_request_complete" ### SEARCH_HEADER[24]="ompi/runtime/ompi_module_exchange.h ompi_modex_send ompi_modex_send_string ompi_modex_recv ompi_modex_recv_string" ### SEARCH_HEADER[25]="ompi/runtime/params.h ompi_mpi_param_check ompi_debug_show_handle_leaks ompi_debug_show_mpi_alloc_mem_leaks ompi_debug_no_free_handles ompi_mpi_show_mca_params ompi_mpi_show_mca_params_file ompi_mpi_paffinity_alone ompi_mpi_keep_peer_hostnames ompi_mpi_abort_print_stack ompi_mpi_abort_delay ompi_mpi_leave_pinned ompi_mpi_leave_pinned_pipeline ompi_have_sparse_group_storage ompi_use_sparse_group_storage ompi_mpi_register_params ompi_show_all_mca_params MPI_PARAM_CHECK" ### SEARCH_HEADER[26]="ompi/win/win.h OMPI_WIN_FREED OMPI_WIN_INVALID OMPI_WIN_NO_LOCKS OMPI_WIN_ACCESS_EPOCH OMPI_WIN_EXPOSE_EPOCH OMPI_WIN_FENCE OMPI_WIN_POSTED OMPI_WIN_STARTED OMPI_WIN_LOCK_ACCESS ompi_mpi_windows ompi_win_t ompi_predefined_win_t ompi_mpi_win_null ompi_win_init ompi_win_finalize ompi_win_create ompi_win_free ompi_win_set_name ompi_win_get_name ompi_win_group ompi_win_invalid ompi_win_peer_invalid ompi_win_rank ompi_win_allow_locks ompi_win_get_mode ompi_win_set_mode ompi_win_append_mode ompi_win_remove_mode ompi_win_access_epoch ompi_win_exposure_epoch ompi_win_comm_allowed MPI_Win" SEARCH_HEADER[27]="" delete_unnessary_header . echo "PLEASE RUN the following lines in ompi/mpi/c; then run this script again (please header_replacement.sh for add_header)" echo "# cd ompi/mpi/c" echo "# for i in *.c ; do grep -q '#include \"ompi/errhandler/errhandler.h\"' $$i || add_header $$i ompi/errhandler/errhandler.h ompi/mpi/c/bindings.h ; done" echo "# for i in *.c ; do grep -q '#include \"ompi/communicator/communicator.h\"' $$i || add_header $$i ompi/communicator/communicator.h ompi/mpi/c/bindings.h ; done" echo "# for i in *.c ; do grep -q '#include \"ompi/runtime/params.h\"' $$i || add_header $$i ompi/runtime/params.h ompi/mpi/c/bindings.h ; done" # Finally erase a header that has been introduced for the STCI replacement # and is not necessary, if no occurences of #include "prte..." reside... SEARCH_HEADER[0]="rte.h prte" SEARCH_HEADER[1]="" delete_unnessary_header . prrte-3.0.13/contrib/code_counter.pl0000775000175000017500000001152215145263240017572 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # Primitive script to give approximate code counts in the Open MPI tree # use strict; use File::Find; use File::stat; use Cwd; # Setup some directories my $verbose = 0; my @skip_dirs; my @code_dirs = ( "ompi", "prte", "opal", "test" ); my @doc_dirs = ( ); my @meta_dirs = ( ".svn", ".deps", ".libs", "libltdl" ); my @skip_files = ( "Makefile.in", "Makefile", ".ompi_built" , "config.cache", "libtool", "depcomp", "aclocal.m4", "install-sh", "missing", "mkinstalldirs", "compile", "config.sub", "config.guess", "config.log", "config.status", "TAGS", ".", "configure", "ltmain.sh", "ChangeLog"); my @skip_patterns = ( ".o\$", ".lo\$", ".out\$", "autom4te", ".in\$", ".bak\$", "~\$", ".gz\$", "^stamp-", "^.#", "^#.+#\$", "dynamic-mca" ); my $loc = 0; # Primitive check to find the top OMPI dir my @tlds = @code_dirs; for (my $i = 0; $i <= $#doc_dirs; ++$i) { $tlds[$#tlds + 1] = $doc_dirs[$i]; } my $good = 0; do { $good = 1; for (my $i = 0; $i <= $#tlds; ++$i) { if (! -d $tlds[$i]) { $good = 0; last; } } if (!$good) { chdir(".."); my $dir = getcwd(); if ($dir eq "/") { print("Unable to find Open MPI top dir; aborting\n"); exit(1); } } } while ($good != 1); # Found top dir print("Found top Open MPI directory: " . getcwd() . "\n"); # Now count up code my %files_found; my %dirs_found; sub wanted { # Setup my $file = $_; my @dirnames = split('/', $File::Find::dir); my $dir = $dirnames[$#dirnames]; # Is the file a dir? my $is_dir = (-d $file); # Do we want this dir? for (my $i = 0; $i <= $#skip_dirs; ++$i) { if ($skip_dirs[$i] eq $dir || ($is_dir && $skip_dirs[$i] eq $file)) { print("Skipping dir: $File::Find::dir / $file\n") if ($verbose); $File::Find::prune = 1; return 0; } } # Do we want this file? for (my $i = 0; $i <= $#skip_files; ++$i) { if ($skip_files[$i] eq $file) { return 0; } } for (my $i = 0; $i <= $#skip_patterns; ++$i) { $file; if (/$skip_patterns[$i]/) { print("Skipping file pattern: $File::Find::dir/$file\n") if ($verbose); $File::Find::prune = 1 if ($is_dir); return 0; } } if ($is_dir) { print ("Found dir: $File::Find::dir/$file\n") if ($verbose); } else { $files_found{$File::Find::name} = 1; print ("Found file: $File::Find::name\n") if ($verbose); # Count the \n's my $sb = stat($file) || die "Can't stat $File::Find::name -- $!"; open FILE, $file || die "Can't open $File::Find::name"; my $data; my $size = $sb->size; sysread(FILE, $data, $size); close FILE; # Cool. :-) my $local_loc = 0; $data =~ s/(\n)/$local_loc++;$1/eg; $loc += $local_loc; print "Loc: $File::Find::name / $size / $local_loc / $loc\n" if ($verbose); } $dirs_found{$File::Find::dir} = 1; 1; } # Look for code $loc = 0; %files_found = (); %dirs_found = (); @skip_dirs = @meta_dirs; if ($#doc_dirs >= 0) { for (my $i = 0; $i <= $#doc_dirs; ++$i) { $skip_dirs[$#skip_dirs + 1] = $doc_dirs[$i]; } print("Searching for code files...\n"); find(\&wanted, "."); my @files = keys(%files_found); my @dirs = keys(%dirs_found); print ("Found files $#files\n"); print ("Found dirs $#dirs\n"); print ("Lines of code: $loc\n"); } # Total files %files_found = (); %dirs_found = (); $loc = 0; @skip_dirs = @meta_dirs; print("Searching for all files...\n"); find(\&wanted, "."); my @files = keys(%files_found); my @dirs = keys(%dirs_found); print ("Found files $#files\n"); print ("Found dirs $#dirs\n"); print ("Lines of code: $loc\n"); prrte-3.0.13/contrib/docker/0000775000175000017500000000000015145263240016027 5ustar alastairalastairprrte-3.0.13/contrib/docker/Dockerfile.centos8.ssh0000664000175000017500000001711115145263240022200 0ustar alastairalastair# Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. # # Base Build box for PRRTE # Requires: # - Basic compile tooling and runtime support # - libevent - retrieve v2.1.11-stable from web # - hwloc - retrieve v2.2.0 from web # - curl # - libjansson - retrieve v2.13.1 from web # - PMIx - cloned from 'master' branch # - PRRTE - cloned from 'master' branch # FROM centos:8 MAINTAINER Ralph Castain # ------------------------------------------------------------ # Install required packages # ------------------------------------------------------------ RUN dnf -y update && \ dnf -y install epel-release && \ dnf repolist && \ dnf -y install \ systemd openssh-server openssh-clients \ gcc gdb strace \ binutils less wget which sudo \ perl perl-Data-Dumper numactl \ autoconf automake libtool flex bison \ iproute net-tools make git \ libnl3 gtk2 atk cairo tcl tcsh tk pciutils lsof ethtool bc file \ valgrind curl curl-devel && \ dnf clean all # ------------------------------------------------------------ # Define support libraries # - hwloc # - libevent # - libjansson # ------------------------------------------------------------ RUN mkdir -p /opt/hpc/local/build RUN mkdir -p /opt/hpc/rndvz ARG LIBEVENT_INSTALL_PATH=/opt/hpc/local/libevent ENV LIBEVENT_INSTALL_PATH=$LIBEVENT_INSTALL_PATH ARG HWLOC_INSTALL_PATH=/opt/hpc/local/hwloc ENV HWLOC_INSTALL_PATH=$HWLOC_INSTALL_PATH ARG LIBJANSSON_INSTALL_PATH=/opt/hpc/local/libjansson ENV LIBJANSSON_INSTALL_PATH=$LIBJANSSON_INSTALL_PATH RUN cd /opt/hpc/local/build && \ wget https://github.com/libevent/libevent/releases/download/release-2.1.11-stable/libevent-2.1.11-stable.tar.gz && \ tar xf libevent-2.1.11-stable.tar.gz && \ cd libevent-2.1.11-stable && \ ./configure --prefix=${LIBEVENT_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null RUN cd /opt/hpc/local/build && \ wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-2.2.0.tar.gz && \ tar xf hwloc-2.2.0.tar.gz && \ cd hwloc-2.2.0 && \ ./configure --prefix=${HWLOC_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/src /opt/hpc/local/build/* RUN cd /opt/hpc/local/build && \ wget https://digip.org/jansson/releases/jansson-2.13.1.tar.gz && \ tar xf jansson-2.13.1.tar.gz && \ cd jansson-2.13.1 && \ ./configure --prefix=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/build/* ENV LD_LIBRARY_PATH="$HWLOC_INSTALL_PATH/bin:$LIBEVENT_INSTALL_PATH/lib:$LIBJANSSON_INSTALL_PATH/lib:${LD_LIBRARY_PATH}" # ------------------------------------------------------------ # PMIx Install # ------------------------------------------------------------ ENV PMIX_ROOT=/opt/hpc/local/pmix ENV LD_LIBRARY_PATH="$PMIX_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/openpmix.git && \ cd openpmix && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PMIX_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-curl \ --with-jansson=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ cd .. && rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # PRRTE Install # ------------------------------------------------------------ ENV PRRTE_ROOT=/opt/hpc/local/prrte ENV PATH="$PRRTE_ROOT/bin:${PATH}" ENV LD_LIBRARY_PATH="$PRRTE_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/prrte.git && \ cd prrte && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PRRTE_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-pmix=${PMIX_ROOT} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # Fixup the ssh login # ------------------------------------------------------------ RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" && \ ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N "" && \ ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N "" && \ echo " LogLevel ERROR" >> /etc/ssh/ssh_config && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \ echo " UserKnownHostsFile=/dev/null" >> /etc/ssh/ssh_config # ------------------------------------------------------------ # Adjust default ulimit for core files # ------------------------------------------------------------ RUN echo '* hard core -1' >> /etc/security/limits.conf && \ echo '* soft core -1' >> /etc/security/limits.conf && \ echo 'ulimit -c unlimited' >> /root/.bashrc # ------------------------------------------------------------ # Create a user account # ------------------------------------------------------------ RUN groupadd -r prteuser && useradd --no-log-init -r -m -b /home -g prteuser -G wheel prteuser USER prteuser RUN cd /home/prteuser && \ ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && chmod og+rX . && \ cd .ssh && cat id_rsa.pub > authorized_keys && chmod 644 authorized_keys && \ exit # ------------------------------------------------------------ # Give the user passwordless sudo powers # ------------------------------------------------------------ USER root RUN echo "prteuser ALL = NOPASSWD: ALL" >> /etc/sudoers # ------------------------------------------------------------ # Adjust the default environment # ------------------------------------------------------------ USER root ENV PRRTE_MCA_prrte_default_hostfile=/opt/hpc/etc/hostfile.txt ENV PATH=$PATH:/opt/hpc/local/hwloc/bin # Need to do this so that the 'prteuser' can have them too, not just root RUN echo "export PMIX_ROOT=/opt/hpc/external/pmix" >> /etc/bashrc && \ echo "export PRRTE_ROOT=/opt/hpc/external/prrte" >> /etc/bashrc && \ echo "export PATH=\$PRRTE_ROOT/bin:\$PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PMIX_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=$HWLOC_INSTALL_PATH/lib:$LIBEVENT_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$LIBJANSSON_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PRRTE_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export PRRTE_MCA_prrte_default_hostfile=$PRRTE_MCA_prrte_default_hostfile" >> /etc/bashrc && \ echo "ulimit -c unlimited" >> /etc/bashrc && \ echo "alias pd=pushd" >> /etc/bashrc # ------------------------------------------------------------ # Kick off the ssh daemon # ------------------------------------------------------------ EXPOSE 22 CMD ["/usr/sbin/sshd", "-D"] # ------------------------------------------------------------ # Tickle ptrace scope for Mac stacktrace # ------------------------------------------------------------ CMD ["echo", "0", ">", "/proc/sys/kernel/yama/ptrace_scope"] # ------------------------------------------------------------ # Kick off the PRRTE daemon # ------------------------------------------------------------ USER prteuser CMD ["prte"] prrte-3.0.13/contrib/docker/Dockerfile.centos7.ssh0000664000175000017500000001712015145263240022177 0ustar alastairalastair# Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. # # Base Build box for PRRTE # Requires: # - Basic compile tooling and runtime support # - libevent - retrieve v2.1.11-stable from web # - hwloc - retrieve v2.2.0 from web # - curl # - libjansson - retrieve v2.13.1 from web # - PMIx - cloned from 'master' branch # - PRRTE - cloned from 'master' branch # FROM centos:7 MAINTAINER Ralph Castain # ------------------------------------------------------------ # Install required packages # ------------------------------------------------------------ RUN yum -y update && \ yum -y install epel-release && \ yum repolist && \ yum -y install \ systemd openssh-server openssh-clients \ gcc gdb strace \ binutils less wget which sudo \ perl perl-Data-Dumper numactl \ autoconf automake libtool flex bison \ iproute net-tools make git pandoc \ libnl3 gtk2 atk cairo tcl tcsh tk pciutils lsof ethtool bc file \ valgrind curl curl-devel && \ yum clean all # ------------------------------------------------------------ # Define support libraries # - hwloc # - libevent # - libjansson # ------------------------------------------------------------ RUN mkdir -p /opt/hpc/local/build RUN mkdir -p /opt/hpc/rndvz ARG LIBEVENT_INSTALL_PATH=/opt/hpc/local/libevent ENV LIBEVENT_INSTALL_PATH=$LIBEVENT_INSTALL_PATH ARG HWLOC_INSTALL_PATH=/opt/hpc/local/hwloc ENV HWLOC_INSTALL_PATH=$HWLOC_INSTALL_PATH ARG LIBJANSSON_INSTALL_PATH=/opt/hpc/local/libjansson ENV LIBJANSSON_INSTALL_PATH=$LIBJANSSON_INSTALL_PATH RUN cd /opt/hpc/local/build && \ wget https://github.com/libevent/libevent/releases/download/release-2.1.11-stable/libevent-2.1.11-stable.tar.gz && \ tar xf libevent-2.1.11-stable.tar.gz && \ cd libevent-2.1.11-stable && \ ./configure --prefix=${LIBEVENT_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null RUN cd /opt/hpc/local/build && \ wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-2.2.0.tar.gz && \ tar xf hwloc-2.2.0.tar.gz && \ cd hwloc-2.2.0 && \ ./configure --prefix=${HWLOC_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/src /opt/hpc/local/build/* RUN cd /opt/hpc/local/build && \ wget https://digip.org/jansson/releases/jansson-2.13.1.tar.gz && \ tar xf jansson-2.13.1.tar.gz && \ cd jansson-2.13.1 && \ ./configure --prefix=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/build/* ENV LD_LIBRARY_PATH="$HWLOC_INSTALL_PATH/bin:$LIBEVENT_INSTALL_PATH/lib:$LIBJANSSON_INSTALL_PATH/lib:${LD_LIBRARY_PATH}" # ------------------------------------------------------------ # PMIx Install # ------------------------------------------------------------ ENV PMIX_ROOT=/opt/hpc/local/pmix ENV LD_LIBRARY_PATH="$PMIX_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/openpmix.git && \ cd openpmix && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PMIX_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-curl \ --with-jansson=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ cd .. && rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # PRRTE Install # ------------------------------------------------------------ ENV PRRTE_ROOT=/opt/hpc/local/prrte ENV PATH="$PRRTE_ROOT/bin:${PATH}" ENV LD_LIBRARY_PATH="$PRRTE_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/prrte.git && \ cd prrte && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PRRTE_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-pmix=${PMIX_ROOT} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # Fixup the ssh login # ------------------------------------------------------------ RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" && \ ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N "" && \ ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N "" && \ echo " LogLevel ERROR" >> /etc/ssh/ssh_config && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \ echo " UserKnownHostsFile=/dev/null" >> /etc/ssh/ssh_config # ------------------------------------------------------------ # Adjust default ulimit for core files # ------------------------------------------------------------ RUN echo '* hard core -1' >> /etc/security/limits.conf && \ echo '* soft core -1' >> /etc/security/limits.conf && \ echo 'ulimit -c unlimited' >> /root/.bashrc # ------------------------------------------------------------ # Create a user account # ------------------------------------------------------------ RUN groupadd -r prteuser && useradd --no-log-init -r -m -b /home -g prteuser -G wheel prteuser USER prteuser RUN cd /home/prteuser && \ ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && chmod og+rX . && \ cd .ssh && cat id_rsa.pub > authorized_keys && chmod 644 authorized_keys && \ exit # ------------------------------------------------------------ # Give the user passwordless sudo powers # ------------------------------------------------------------ USER root RUN echo "prteuser ALL = NOPASSWD: ALL" >> /etc/sudoers # ------------------------------------------------------------ # Adjust the default environment # ------------------------------------------------------------ USER root ENV PRRTE_MCA_prrte_default_hostfile=/opt/hpc/etc/hostfile.txt ENV PATH=$PATH:/opt/hpc/local/hwloc/bin # Need to do this so that the 'prteuser' can have them too, not just root RUN echo "export PMIX_ROOT=/opt/hpc/external/pmix" >> /etc/bashrc && \ echo "export PRRTE_ROOT=/opt/hpc/external/prrte" >> /etc/bashrc && \ echo "export PATH=\$PRRTE_ROOT/bin:\$PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PMIX_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=$HWLOC_INSTALL_PATH/lib:$LIBEVENT_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$LIBJANSSON_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PRRTE_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export PRRTE_MCA_prrte_default_hostfile=$PRRTE_MCA_prrte_default_hostfile" >> /etc/bashrc && \ echo "ulimit -c unlimited" >> /etc/bashrc && \ echo "alias pd=pushd" >> /etc/bashrc # ------------------------------------------------------------ # Kick off the ssh daemon # ------------------------------------------------------------ EXPOSE 22 CMD ["/usr/sbin/sshd", "-D"] # ------------------------------------------------------------ # Tickle ptrace scope for Mac stacktrace # ------------------------------------------------------------ CMD ["echo", "0", ">", "/proc/sys/kernel/yama/ptrace_scope"] # ------------------------------------------------------------ # Kick off the PRRTE daemon # ------------------------------------------------------------ USER prteuser CMD ["prte"] prrte-3.0.13/contrib/docker/prte.service0000664000175000017500000000022215145263240020357 0ustar alastairalastair[Unit] Description=PRRTE persistent service. [Service] Type=simple ExecStart=/opt/hpc/local/prrte/bin/prte [Install] WantedBy=multi-user.target prrte-3.0.13/contrib/docker/startdaemon.sh0000775000175000017500000000573215145263240020716 0ustar alastairalastair#!/bin/bash # # Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. # # Default values # IMAGE_NAME=prrte/leap15:latest OVERLAY_NETWORK=prte-net RNDVZ_DIR= COMMON_PREFIX=$USER"-" SHUTDOWN_FILE=$PWD/tmp/shutdown-`hostname -s`.sh DRYRUN=0 # # Argument parsing # while [[ $# -gt 0 ]] ; do case $1 in "-h" | "--help") printf "Usage: %s [option] -i | --image NAME Name of the container image (Required) -r | --rndvz DIR Full path to the 'rendezvous' directory -d | --dryrun Dry run. Do not actually start anything. -h | --help Print this help message\n" \ `basename $0` exit 0 ;; "-i" | "--image" | "-img") shift IMAGE_NAME=$1 ;; "--rndvz") shift RNDVZ_DIR=$1 ;; "-d" | "--dryrun") DRYRUN=1 ;; *) printf "Unkonwn option: %s\n" $1 exit 1 ;; esac shift done if [ "x$IMAGE_NAME" == "x" ] ; then echo "Error: --image must be specified" exit 1 fi # # Spin up the container # ALL_CONTAINERS=() startup_container() { C_ID=0 C_HOSTNAME=`printf "%s%s%02d" $COMMON_PREFIX "node" $C_ID` if [ 0 != $DRYRUN ] ; then echo "" echo "Starting: $C_HOSTNAME" echo "---------------------" else echo "Starting: $C_HOSTNAME" fi # Add other volume mounts here _OTHER_ARGS="" if [ "x" != "x$RNDVZ_DIR" ] ; then _OTHER_ARGS+=" -v $RNDVZ_DIR:/opt/hpc/rndvz" fi CMD="docker run --rm \ --cap-add=SYS_NICE --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ $_OTHER_ARGS \ --network $OVERLAY_NETWORK \ -h $C_HOSTNAME --name $C_HOSTNAME \ --detach $IMAGE_NAME" echo $CMD if [ 0 != $DRYRUN ] ; then return fi C_FULL_ID=`$CMD` RTN=$? if [ 0 != $RTN ] ; then echo "Error: Failed to create $C_HOSTNAME" echo $C_FULL_ID exit 1 fi C_SHORT_ID=`echo $C_FULL_ID | cut -c -12` ALL_CONTAINERS+=($C_SHORT_ID) } mkdir -p tmp # Create network CMD="docker network create --driver overlay --attachable $OVERLAY_NETWORK" if [ 0 == $DRYRUN ] ; then echo "Establish network: $OVERLAY_NETWORK" RTN=`$CMD` else echo "" echo "Establish network: $OVERLAY_NETWORK" echo "---------------------" echo $CMD fi startup_container if [ 0 != $DRYRUN ] ; then exit 0 fi # # Create a shutdown file to help when we cleanup rm -f $SHUTDOWN_FILE touch $SHUTDOWN_FILE chmod +x $SHUTDOWN_FILE for cid in "${ALL_CONTAINERS[@]}" ; do echo "docker stop $cid" >> $SHUTDOWN_FILE done CMD="docker network rm $OVERLAY_NETWORK" if [ 0 == $DRYRUN ] ; then echo $CMD >> $SHUTDOWN_FILE else echo "" echo "Remove network: $OVERLAY_NETWORK" echo "---------------------" echo $CMD fi prrte-3.0.13/contrib/docker/Dockerfile.leap15.ssh0000664000175000017500000001710615145263240021710 0ustar alastairalastair# Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. # # Base Build box for PRRTE # Requires: # - Basic compile tooling and runtime support # - libevent - retrieve v2.1.11-stable from web # - hwloc - retrieve v2.2.0 from web # - curl # - libjansson - retrieve v2.13.1 from web # - PMIx - cloned from 'master' branch # - PRRTE - cloned from 'master' branch # # FROM opensuse/leap MAINTAINER Ralph Castain # ------------------------------------------------------------ # Install required packages # ------------------------------------------------------------ RUN zypper --non-interactive refresh && \ zypper --non-interactive update && \ zypper --non-interactive install \ openssh \ gcc gdb strace \ binutils less wget which sudo \ perl numactl gzip \ autoconf automake libtool flex bison \ iproute net-tools make git pandoc \ atk cairo tcl tcsh tk pciutils lsof ethtool bc file \ valgrind curl curl-devel && \ zypper --non-interactive clean # ------------------------------------------------------------ # Define support libraries # - hwloc # - libevent # - libjansson # ------------------------------------------------------------ RUN mkdir -p /opt/hpc/local/build RUN mkdir -p /opt/hpc/rndvz ARG LIBEVENT_INSTALL_PATH=/opt/hpc/local/libevent ENV LIBEVENT_INSTALL_PATH=$LIBEVENT_INSTALL_PATH ARG HWLOC_INSTALL_PATH=/opt/hpc/local/hwloc ENV HWLOC_INSTALL_PATH=$HWLOC_INSTALL_PATH ARG LIBJANSSON_INSTALL_PATH=/opt/hpc/local/libjansson ENV LIBJANSSON_INSTALL_PATH=$LIBJANSSON_INSTALL_PATH RUN cd /opt/hpc/local/build && \ wget https://github.com/libevent/libevent/releases/download/release-2.1.11-stable/libevent-2.1.11-stable.tar.gz && \ ls && \ tar xf libevent-2.1.11-stable.tar.gz && \ cd libevent-2.1.11-stable && \ ./configure --prefix=${LIBEVENT_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null RUN cd /opt/hpc/local/build && \ wget https://download.open-mpi.org/release/hwloc/v2.2/hwloc-2.2.0.tar.gz && \ tar xf hwloc-2.2.0.tar.gz && \ cd hwloc-2.2.0 && \ ./configure --prefix=${HWLOC_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/build/* RUN cd /opt/hpc/local/build && \ wget https://digip.org/jansson/releases/jansson-2.13.1.tar.gz && \ tar xf jansson-2.13.1.tar.gz && \ cd jansson-2.13.1 && \ ./configure --prefix=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make > /dev/null && \ make install > /dev/null && \ cd .. && \ rm -rf /opt/hpc/local/build/* ENV LD_LIBRARY_PATH="$HWLOC_INSTALL_PATH/bin:$LIBEVENT_INSTALL_PATH/lib:$LIBJANSSON_INSTALL_PATH/lib:${LD_LIBRARY_PATH}" # ----------------------------- # ------------------------------------------------------------ # PMIx Install # ------------------------------------------------------------ ENV PMIX_ROOT=/opt/hpc/local/pmix ENV LD_LIBRARY_PATH="$PMIX_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/openpmix.git && \ cd openpmix && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PMIX_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-curl \ --with-jansson=${LIBJANSSON_INSTALL_PATH} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ cd .. && rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # PRRTE Install # ------------------------------------------------------------ ENV PRRTE_ROOT=/opt/hpc/local/prrte ENV PATH="$PRRTE_ROOT/bin:${PATH}" ENV LD_LIBRARY_PATH="$PRRTE_ROOT/lib:${LD_LIBRARY_PATH}" RUN cd /opt/hpc/local/build && \ git clone -q -b master https://github.com/openpmix/prrte.git && \ cd prrte && \ ./autogen.pl > /dev/null && \ ./configure --prefix=${PRRTE_ROOT} \ --with-hwloc=${HWLOC_INSTALL_PATH} \ --with-libevent=${LIBEVENT_INSTALL_PATH} \ --with-pmix=${PMIX_ROOT} > /dev/null && \ make -j 10 > /dev/null && \ make -j 10 install > /dev/null && \ rm -rf /opt/hpc/local/build/* # ------------------------------------------------------------ # Fixup the ssh login # ------------------------------------------------------------ RUN ssh-keygen -t rsa -f /etc/ssh/ssh_host_rsa_key -N "" && \ ssh-keygen -t ecdsa -f /etc/ssh/ssh_host_ecdsa_key -N "" && \ ssh-keygen -t ed25519 -f /etc/ssh/ssh_host_ed25519_key -N "" && \ echo " LogLevel ERROR" >> /etc/ssh/ssh_config && \ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config && \ echo " UserKnownHostsFile=/dev/null" >> /etc/ssh/ssh_config # ------------------------------------------------------------ # Adjust default ulimit for core files # ------------------------------------------------------------ RUN echo '* hard core -1' >> /etc/security/limits.conf && \ echo '* soft core -1' >> /etc/security/limits.conf && \ echo 'ulimit -c unlimited' >> /root/.bashrc # ------------------------------------------------------------ # Create a user account # ------------------------------------------------------------ RUN groupadd -r prteuser && useradd -r -m -b /home -g prteuser prteuser USER prteuser RUN cd /home/prteuser && \ ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa && chmod og+rX . && \ cd .ssh && cat id_rsa.pub > authorized_keys && chmod 644 authorized_keys && \ exit # ------------------------------------------------------------ # Give the user passwordless sudo powers # ------------------------------------------------------------ USER root RUN echo "prteuser ALL = NOPASSWD: ALL" >> /etc/sudoers # ------------------------------------------------------------ # Adjust the default environment # ------------------------------------------------------------ USER root ENV PRRTE_MCA_prrte_default_hostfile=/opt/hpc/etc/hostfile.txt ENV PATH=$PATH:/opt/hpc/local/hwloc/bin # Need to do this so that the 'prteuser' can have them too, not just root RUN echo "export PMIX_ROOT=/opt/hpc/install/pmix" >> /etc/bashrc && \ echo "export PRRTE_ROOT=/opt/hpc/install/prrte" >> /etc/bashrc && \ echo "export PATH=\$PRRTE_ROOT/bin:\$PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PMIX_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=$HWLOC_INSTALL_PATH/lib:$LIBEVENT_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$LIBJANSSON_INSTALL_PATH/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export LD_LIBRARY_PATH=\$PRRTE_ROOT/lib:\$LD_LIBRARY_PATH" >> /etc/bashrc && \ echo "export PRRTE_MCA_prrte_default_hostfile=$PRRTE_MCA_prrte_default_hostfile" >> /etc/bashrc && \ echo "ulimit -c unlimited" >> /etc/bashrc && \ echo "alias pd=pushd" >> /etc/bashrc # ------------------------------------------------------------ # Kick off the ssh daemon # ------------------------------------------------------------ EXPOSE 22 CMD ["/usr/sbin/sshd", "-D"] # ------------------------------------------------------------ # Tickle ptrace scope for Mac stacktrace # ------------------------------------------------------------ CMD ["echo", "0", ">", "/proc/sys/kernel/yama/ptrace_scope"] # ------------------------------------------------------------ # Kick off the PRRTE daemon # ------------------------------------------------------------ USER prteuser CMD ["prte"] prrte-3.0.13/contrib/docker/Makefile0000664000175000017500000000067715145263240017501 0ustar alastairalastair# Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. all: leap15 centos7 centos8 leap15: Dockerfile.leap15.ssh docker build -t prrte/leap15:latest -f Dockerfile.leap15.ssh . centos7: Dockerfile.centos7.ssh docker build -t prrte/centos7:latest -f Dockerfile.centos7.ssh . centos8: Dockerfile.centos8.ssh docker build -t prrte/centos8:latest -f Dockerfile.centos8.ssh . prrte-3.0.13/contrib/docker/drop-in.sh0000775000175000017500000000040115145263240017731 0ustar alastairalastair#!/bin/bash # # Copyright (c) 2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 IBM Corporation. All rights reserved. docker exec -it -u prteuser -w /home/prteuser/ --env COLUMNS=`tput cols` --env LINES=`tput lines` $USER-node00 bash prrte-3.0.13/contrib/update-my-copyright.pl0000775000175000017500000003053715145263240021043 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2010-2014 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2016-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # # Short version: # # This script automates the tedious task of updating copyright notices # in the tops of OMPI/PRTE/OPAL source files before committing back to # the respository. Set the environment variable # OMPI_COPYRIGHT_SEARCH_NAME to a short (case-insensitive) name that # indicates your copyright line (e.g., "cisco"), and set the env # variable OMPI_COPYRIGHT_FORMAL_NAME with your organization's formal # name and copyright statement (e.g., "Cisco Systems, Inc. All rights # reserved.") before running the script. # More details: # # This is a simple script to traverse the tree looking for added and # changed files (via "svn st ." or "hg st .", depending on what meta # directory is found in this tree). Note that the search starts in # the current directory -- not the top-level directory. # # All added and changed files are examined. If the special # "$COPYRIGHT$" token is found, then lines above that token are # examined to find the "search" copyright name. # # - If the search name is found, that line is examined to see if the # current year is in the copyright year range. If it is not, the line # is modified to include the current year. # - If the search name is not found, a new line is created in the # copyright block of the file using the formal name and the current # year. # # NOTE: this script currently doesn't handle multi-line copyright # statements, such as: # # Copyright (c) 2010 University of Blabbityblah and the Trustees of # Schblitbittyboo. All rights reserved. # # Someone could certainly extend this script to do so, if they cared # (my organizations' copyright fits on a single line, so I wasn't # motivated to handle the multi-line case :-) ). # use strict; use Cwd; use Getopt::Long; # Set to true if the script should merely check for up-to-date copyrights. # Will exit with status 111 if there are out of date copyrights which this # script can correct. my $CHECK_ONLY = 0; # used by $CHECK_ONLY logic for bookeeping my $would_replace = 0; # Set to true to suppress most informational messages. Only out of date files # will be printed. my $QUIET = 0; # Set to true if we just want to see the help message my $HELP = 0; # Defaults my $my_search_name = "Cisco"; my $my_formal_name = "Cisco Systems, Inc. All rights reserved."; my $my_manual_list = ""; # Protected directories my @protected = qw( opal\\/mca\\/pmix\\/pmix.+?\\/pmix\\/ opal\\/mca\\/hwloc\\/hwloc.+?\\/hwloc\\/ opal\\/mca\\/event\\/libevent.+?\\/libevent\\/ contrib\\/update-my-copyright.pl ); # Override the defaults if some values are set in the environment $my_search_name = $ENV{OMPI_COPYRIGHT_SEARCH_NAME} if (defined($ENV{OMPI_COPYRIGHT_SEARCH_NAME})); $my_formal_name = $ENV{OMPI_COPYRIGHT_FORMAL_NAME} if (defined($ENV{OMPI_COPYRIGHT_FORMAL_NAME})); $my_manual_list = $ENV{OMPI_COPYRIGHT_MANUAL_LIST} if (defined($ENV{OMPI_COPYRIGHT_MANUAL_LIST})); GetOptions( "help" => \$HELP, "quiet" => \$QUIET, "check-only" => \$CHECK_ONLY, "search-name=s" => \$my_search_name, "formal-name=s" => \$my_formal_name, "manual-list=s" => \$my_manual_list, ) or die "unable to parse options, stopped"; if ($HELP) { print < Copyright search name: $my_search_name\n"; quiet_print "==> Copyright formal name: $my_formal_name\n"; # Get the year my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime; $year += 1900; quiet_print "==> This year: $year\n"; # Find the top-level source tree dir in a git repo my $start = cwd(); my $top = $start; while (! -d "$top/.git") { chdir(".."); $top = cwd(); die "Can't find top-level repository directory" if ($top eq "/"); } chdir($start); quiet_print "==> Top-level repository dir: $top\n"; quiet_print "==> Current directory: $start\n"; # Select VCS used to obtain modification info. Choose in increasing priority # order (last hit wins). my $vcs; $vcs = "git" if (-d "$top/.git"); $vcs = "hg" if (-d "$top/.hg"); $vcs = "svn" if (-d "$top/.svn"); $vcs = "manual" if ("$my_manual_list" ne ""); my @files = find_modified_files($vcs); if ($#files < 0) { quiet_print "No added / changed files -- nothing to do\n"; exit(0); } # Examine each of the files and see if they need an updated copyright foreach my $f (@files) { # ignore embedded copies of external codes as we shouldn't # be overwriting their copyrights - if someone actually # modified any of those files, they can manually update # the copyright my $ignore = 0; foreach my $p (@protected) { if (eval("\$f =~ /$p/")) { quiet_print "Ignoring protected file $f\n"; $ignore = 1; last; } } if (1 == $ignore) { next; } quiet_print "Processing added/changed file: $f\n"; open(FILE, $f) || die "Can't open file: $f"; # Read in the file, and look for the "$COPYRIGHT$" token; that's # the end of the copyright block that we're allowed to edit. Do # not edit any copyright notices that may appear below that. my $i = 0; my $found_copyright = 0; my $found_me = 0; my @lines; my $my_line_index; my $token_line_index; while () { push(@lines, $_); if (!$found_copyright && $_ =~ /\$COPYRIGHT\$/) { $token_line_index = $i; $found_copyright = 1; } if (!$found_me && !defined($token_line_index) && $_ =~ /$my_search_name/i) { $my_line_index = $i; $found_me = 1; } ++$i; } close(FILE); # If there was not copyright token, don't do anything if (!defined($token_line_index)) { quiet_print "==> WARNING: Did not find the \$COPYRIGHT\$ token!\n"; quiet_print " File left unchanged\n"; next; } # Figure out the line prefix $lines[$token_line_index] =~ m/^(.+)\$COPYRIGHT\$/; my $prefix = $1; # Now act on it if (!defined($my_line_index)) { quiet_print "--- My copyright line not found; adding:\n"; my $str = "${prefix}Copyright (c) $year $my_formal_name\n"; quiet_print " $str"; $lines[$token_line_index] = $str . $lines[$token_line_index]; } else { quiet_print "--- Found existing copyright line:\n"; quiet_print " $lines[$my_line_index]"; $lines[$my_line_index] =~ m/([\d+\-]+)/; my $years = $1; die "Could not find years in copyright line!" if (!defined($years)); # If it's a range, separate them out my $first_year; my $last_year; if ($years =~ /\-/) { $years =~ m/(\d+)\s*-\s*(\d+)/; $first_year = $1; $last_year = $2; } else { $first_year = $last_year = $years; } # Sanity check die "Copyright looks like it extends before 1990...?" if ($first_year < 1990); die "Copyright in the future...?" if ($last_year > $year); # Do we need to do anything? if ($year > $last_year) { $lines[$my_line_index] = "${prefix}Copyright (c) $first_year-$year $my_formal_name\n"; quiet_print " Updated to:\n"; quiet_print " $lines[$my_line_index]"; } else { quiet_print " This year already included in copyright; not changing file\n"; next; } } # If we got this far, we want to write out a new file my $newf = "$f.new-copyright"; unlink($newf); open(FILE, ">$newf") || die "Can't open file: $newf"; print FILE join('', @lines); close(FILE); if ($CHECK_ONLY) { # intentional "loud" print to be more useful in a pre-commit hook print "==> '$f' has a stale/missing copyright\n"; unlink($newf); ++$would_replace; } else { # Now replace the old one unlink($f); rename($newf, $f); } } if ($CHECK_ONLY and $would_replace) { exit(111); } #------------------------------------------------------------------------------- # Takes two arguments, the top level directory and the VCS method. Returns a # list of file names (relative to pwd) which the VCS considers to be modified. sub find_modified_files { my $vcs = shift; my @files = (); if ($vcs eq "git") { # Number of path entries to remove from ${top}-relative paths. # (--show-cdup either returns the empty string or sequence of "../" # entries, always ending in a "/") my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1; # "." restricts scope, but does not get us relative path names my $cmd = "git status -z --porcelain --untracked-files=no ."; quiet_print "==> Running: \"$cmd\"\n"; my $lines = `$cmd`; # From git-status(1): # X Y Meaning # ------------------------------------------------- # [MD] not updated # M [ MD] updated in index # A [ MD] added to index # D [ M] deleted from index # R [ MD] renamed in index # C [ MD] copied in index # [MARC] index and work tree matches # [ MARC] M work tree changed since index # [ MARC] D deleted in work tree # ------------------------------------------------- # D D unmerged, both deleted # A U unmerged, added by us # U D unmerged, deleted by them # U A unmerged, added by them # D U unmerged, deleted by us # A A unmerged, both added # U U unmerged, both modified # ------------------------------------------------- # ? ? untracked # ------------------------------------------------- foreach my $line (split /\x{00}/, $lines) { my $keep = 0; my ($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/; # ignore all merge cases next if ($s1 eq "D" and $s2 eq "D"); next if ($s1 eq "A" and $s2 eq "A"); next if ($s1 eq "U" or $s2 eq "U"); # only update for actually added/modified cases, no copies, # renames, etc. $keep = 1 if ($s1 eq "M" or $s2 eq "M"); $keep = 1 if ($s1 eq "A"); if ($keep) { my $relname = $fullname; $relname =~ s!^([^/]*/){$n_strip}!!g; push @files, $relname if (-f $relname); } } } elsif ($vcs eq "hg" or $vcs eq "svn") { my $cmd = "$vcs st ."; # Run the command, parsing the output. Make a list of files that are # added or modified. quiet_print "==> Running: \"$cmd\"\n"; open(CMD, "$cmd|") || die "Can't run command"; while () { chomp; if ($_ =~ /^M/ || $_ =~ /^A/) { my @tokens = split(/\s+/, $_); # Handle output of both forms: # M filenameA # A + filenameB my $filename = $tokens[1]; $filename = $tokens[2] if ($tokens[1] =~ /\+/); # Don't bother saving directory names push(@files, $filename) if (-f $filename); } } close(CMD); } elsif ($vcs eq "manual") { @files = split(/\n/, `cat $my_manual_list`); } else { die "unknown VCS '$vcs', stopped"; } return @files; } prrte-3.0.13/contrib/test_headers_in_ompi.pl0000775000175000017500000001160515145263240021307 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # #this is the perl scripty foo which does the following tasks # 1. Extract the #include <*.h> files which are present in both header and source files # 2. Do some basic formatting # 3. Check if these included files are present on the platform which has been given if (scalar(@ARGV) != 2) { print "Usage: ./depend.pl \n"; exit(3); } $includes_file = "headers.txt"; $return = &get_header_files($includes_file); $test_file = "test_headers.txt"; $return = &parse_header_files($includes_file, $test_file); $source_tree = @ARGV[0]; $CC = @ARGV[1]; $result_file = "results.txt"; $return = &test_for_headers($test_file, $result_file, $CC); # this file is used to extract which header files are included in a particular # source file. Kind of a neat implementation sub get_header_files { local($dump_file) = @_; open(C_FILES, "find $source_tree -name \*.c |") || print "could not find source files\n"; open(H_FILES, "find $source_tree -name \*.c |") || print "could not find header files\n"; open(DUMP, "> $dump_file") || print "Could not open $dump_file\n"; while () { $file_h = $_; print DUMP "Processing $file_h"; open(FILE_H, "$file_h") || print "could not open file for reading\n"; #grep for the pattern which we want while () { if (/#include ) { $file_h = $_; print DUMP "Processing $file_h"; open(FILE_H, "$file_h") || print "could not open file for reading\n"; #grep for the pattern which we want while () { if (/#include $test_file") || print "Could not open $test_file for reading\n"; while () { if (/#include temp.txt") || print "Could not open temp.txt for writing\n"; while(
) { #remove leading white spaces s/^\s*//; #remove anything after <*.h> s/>{1,1}.*\n/>\n/; #remove anything before #include s/^.*#include/#include/; print TEMP $_; } close(HEADER); close(TEMP); #remove duplicate occurences of the file system("sort temp.txt | uniq > $test_file"); return 0; } #this suroutine is used to test if a particular header is present or absent in a particular language sub test_for_headers { local($test_file, $result_file, $CC) = @_; local($temp) = "temp.c"; print "CC = $CC\n"; open(HEADER, "$test_file") || print "Could not open $test_file for reading\n"; open(RESULTS, "> $result_file") || print "Could not open $result_file for writing\n"; while(
) { print $_; #create the file for compilation chomp $_; $string = " $_ /*this is the include file to be tested for*/ int main(int argc, char **argv) { return 0; } "; open(TEMP, "> $temp") || print "Could not open $temp for writing\n"; print TEMP $string; close(TEMP); $compiled = system("$CC $temp"); if ($compiled == 0) { print "$_ is present\n"; } else { print RESULTS "$_\n"; } system("rm -Rf $temp"); system("rm -Rf temp.*"); } close(HEADER); close(RESULTS); return 0; } prrte-3.0.13/contrib/coverity/0000775000175000017500000000000015145263240016424 5ustar alastairalastairprrte-3.0.13/contrib/coverity/coverity-model.c0000664000175000017500000000015215145263240021530 0ustar alastairalastairvoid opal_btl_usnic_util_abort(const char *msg, const char *file, int line) { __coverity_panic__(); } prrte-3.0.13/contrib/cleanperms0000775000175000017500000000117315145263240016641 0ustar alastairalastair#!/usr/bin/bash find . -type f -name "*.c" -perm /u+x -print -exec chmod -x {} \; find . -type f -name Makefile.am -perm /u+x -print -exec chmod -x {} \; find . -type f -name "*.h" -perm /u+x -print -exec chmod -x {} \; find . -type f -name Makefile.include -perm /u+x -print -exec chmod -x {} \; find . -type f -name Makefile -perm /u+x -print -exec chmod -x {} \; find . -type f -name "*.m4" -perm /u+x -print -exec chmod -x {} \; find . -type f -name "*.ac" -perm /u+x -print -exec chmod -x {} \; find . -type f -name "*.txt" -perm /u+x -print -exec chmod -x {} \; find . -type f -name "*.l" -perm /u+x -print -exec chmod -x {} \; prrte-3.0.13/contrib/gen_stats.pl0000775000175000017500000002144215145263240017112 0ustar alastairalastair#!/usr/bin/perl # Currently, we implement only the directory option # The following features have been requested for # # 1. Get the name of the directory and produce a # statistic for all the files which have been touched # in all the subdirectories starting from that # directory # # 2. Since gcov spits out a statistic for all the header # files included by all the source files, we need to # aggregate them somehow into a single file. This might # have to be done manually by going through all the lines # which have been executed in that file since a header # file may have multiple inclusions meaning that they # might have multiple .gcov files in different directories # # 3. Support command line parsing of arguments # # 4. Support giving a listing of all the files which have # been covered less than a certain percentage, say for # eg., all files which have been covered less that 5% # use strict; use Switch; use File::Basename; #global variables my $index = 0; my $num_args = scalar(@ARGV); my $percentage = 0.0; my $ret1 = 0; my $ret2 = 0; my $dir_list_given = 0; my $req_list_given = 0; $ret1 = open(DIR_FILE, "dir_list.txt"); $ret2 = open(REQ_FILE, "req_list.txt"); if ($ret1 < 0 || $ret2 < 0) { print "ERROR: opening dir_list.txt or req_list.txt\n"; exit(3); } # check the error condition if (0 == ($num_args % 2)) { } else { print "ERROR: the number of arguments has to be even\n"; exit(3); } # process the arguments while($num_args > 0) { switch ($ARGV[$index]) { case "-d" { print DIR_FILE $ARGV[$index+1]; $index += 2; $num_args -= 2; $dir_list_given = 1; } case "-f" { my $filename = `find . -name $ARGV[$index+1]`; print REQ_FILE $filename; $index += 2; $num_args -= 2; $req_list_given = 1; } case "-p" { $percentage = $ARGV[$index]; $index += 2; $num_args -= 2; } else { print "ERROR: Incorrect command line option\n"; exit(3); } } } close(DIR_FILE); close(REQ_FILE); # when nothing is specified, everything defaults to just "src" if (0 == $dir_list_given && 0 == $req_list_given) { print "WARN: No directory or file has been requested\ndefaulting to src directory\n"; system("echo src > dir_list.txt"); $dir_list_given = 1; } #Now, we have both the requested file list and the directory list #So simply call the procedure generate_stats with the required arguments if (1 == $dir_list_given) { get_file_list("./dir_list.txt", "touched_files.txt", "untouched_files.txt"); generate_stats("touched_files.txt", # file_list "coverage_stats.txt",# generic coverage numbers "percent_stats.txt", # files below a certain % $percentage, # percentage below which we report 1); # 1 to report } if (1 == $req_list_given) { generate_stats("req_list.txt", # file_list "req_stats.txt",# generic coverage "", # no percentage coverage required 0, # not required 0); # 0 to not report percentages } sub get_file_list { my ($input, $touched, $untouched) = @_; my $ret = open(DIRFILES, "< $input"); if ($ret < 0) { print "ERROR: could not open directory listing\n"; exit(3); } while() { chomp(); my $c_files = `find $_ -name \"*.c\"`; my $cc_files = `find $_ -name \"*.cc\"`; $cc_files =~ s/\.cc//g; $c_files =~ s/\.c//g; $c_files = $c_files . $cc_files; my @C_FILES = split(/\n/, $c_files); my $da_files = `find $_ -name \"*.da\" -o -name \"*.gcda\"`; $da_files =~ s/\.gcda//g; $da_files =~ s/\.da//g; my @DA_FILES = split(/\n/, $da_files); open (TEMP1, "> temp1"); open (TEMP2, "> temp2"); print TEMP1 $c_files; print TEMP2 $da_files; close(TEMP1); close(TEMP2); # Now do the manual diff open(TEMP1, "< temp1"); open(UNTOUCHED_FILES, ">> $untouched"); open(TOUCHED_FILES, ">> $touched"); while() { my $c_file = $_; my $found = 0; my $file_name; my $dir_name; my $file_ext; my $search_file; ($file_name, $dir_name, $file_ext) = fileparse($c_file, ('\.c') ); # For our libtool build, every .gcda and .gcno file lands in the .libs dir. $search_file = $dir_name . ".libs/" . $file_name; open(TEMP2, "< temp2"); while() { if ($search_file eq $_) { $found = 1; } } close(TEMP2); if ($found == 0) { print UNTOUCHED_FILES $c_file; } else { chomp($c_file); $c_file = $c_file . ".c\n"; print TOUCHED_FILES $c_file; } } close(UNTOUCHED_FILES); close(TOUCHED_FILES); close(TEMP1); system("rm temp1 temp2"); } close(DIRFILES); #Now to print the stats of all the files which are touched system("sort $touched -o temp; uniq temp $touched"); system("sort $untouched -o temp; uniq temp $untouched"); } # This is the function which generates the statistics and dumps it out # to a file. Details are pretty straightforward at this point sub generate_stats { my ($input_file, $coverage_file, $percent_file, $percentage, $calculate) = @_; my $k = 0; my $l = 0; open (INPUT, "< $input_file"); open (COVERAGE, "> $coverage_file"); if ($calculate == 1) { open (PERCENT, "> percent_coverage.txt"); } print COVERAGE "#Index Directory Filename Usage(%)\n"; print COVERAGE "#======================================================================================================\n"; if ($calculate == 1) { print PERCENT "#Index Directory Filename Usage(%)\n"; print PERCENT "#======================================================================================================\n"; } my $average = 0.0; my $num_files = `wc -l touched_files.txt`; print "num_files:", $num_files; while () { #generate the gcov file for this particular file #1. Get the directory name and filename seperately #2. Invoke gcov on the file #3. Print the statistic onto a file chomp(); my $full_name = $_; my $dir_name; my $file_name; my $file_ext; my $file_gcda; my $found_file; ($file_name, $dir_name, $file_ext) = fileparse($full_name, ('\.c') ); $file_gcda = $file_name . ".gcda"; open(RESULT, "cd $dir_name; gcov $file_gcda -o .libs 2> /dev/null | "); while () { if (/Creating/) { $found_file = 0; } else { # print "check: ", $_; # Do not check including the file_extension; might be .c or .cc or .C if (/^File '$file_name/) { # print "Found File:\n", $_; $found_file = 1; } #Now we are doing the right line. Search for this file if (/^Lines/ && $found_file == 1) { # print "Found Lines:\n", $_; s/([\s,0-9]*\.[0-9]+\%)\.*/$1/; my $val = $1; $average += $val; $k++; my $print_string = sprintf("%4d %40s %40s %3.2f\n", $k, $dir_name, $file_name, $val); if ($calculate == 1) { if ($val <= $percentage) { $l++; my $zero_string = sprintf("%4d %40s %40s %3.2f\n", $l, $dir_name, $file_name, $val); print PERCENT $zero_string; } } print COVERAGE $print_string; # Need to detect the next round $found_file = 0; } } } close(RESULT); } if ($num_files != 0){ print COVERAGE "==============================================================\n"; print COVERAGE "Average coverage was: ", $average/$num_files, " \n"; print COVERAGE "==============================================================\n"; } close(INPUT); close(COVERAGE); if ($calculate == 1) { close(PERCENT); } } prrte-3.0.13/contrib/libadd_mca_comp_update.py0000664000175000017500000001676415145263240021567 0ustar alastairalastair#!/usr/bin/env python # Copyright (c) 2017 IBM Corporation. All rights reserved. # Copyright (c) 2017-2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # import glob, os, re, shutil projects= { 'prte' : ["$(top_builddir)/src/libprrte.la"], } no_anchor_file = [] missing_files = [] skipped_files = [] partly_files = [] updated_files = [] # # Check of all of the libadd fields are accounted for in the LIBADD # Return a list indicating which are missing (positional) # def check_libadd(content, libadd_field, project): global projects libadd_list = projects[project] libadd_missing = [True] * len(libadd_list) on_libadd = False for line in content: # First libadd line if re.search( r"^\s*"+libadd_field, line): # If line continuation, then keep searching after this point if line[-2] == '\\': on_libadd = True for idx, lib in enumerate(libadd_list): if True == libadd_missing[idx]: if 0 <= line.find(lib): libadd_missing[idx] = False # Line continuation elif True == on_libadd: for idx, lib in enumerate(libadd_list): if True == libadd_missing[idx]: if 0 <= line.find(lib): libadd_missing[idx] = False # No more line continuations, so stop processing if line[-2] != '\\': on_libadd = False break return libadd_missing # # Update all of the Makefile.am's with the proper LIBADD additions # def update_makefile_ams(): global projects global no_anchor_file global missing_files global skipped_files global partly_files global updated_files for project, libadd_list in projects.items(): libadd_str = " \\\n\t".join(libadd_list) print("="*40) print("Project: "+project) print("LIBADD:\n"+libadd_str) print("="*40) # # Walk the directory structure # for root, dirs, files in os.walk(project+"/mca"): parts = root.split("/") if len(parts) != 4: continue if parts[-1] == ".libs" or parts[-1] == ".deps" or parts[-1] == "base": continue if parts[2] == "common": continue print("Processing: "+root) # # Find Makefile.am # make_filename = os.path.join(root, "Makefile.am") if False == os.path.isfile( make_filename ): missing_files.append("Missing: "+make_filename) print(" ---> Error: "+make_filename+" is not present in this directory") continue # # Stearching for: mca_FRAMEWORK_COMPONENT_la_{LIBADD|LDFLAGS} # First scan file to see if it has an LIBADD / LDFLAGS # libadd_field = "mca_"+parts[2]+"_"+parts[3]+"_la_LIBADD" ldflags_field = "mca_"+parts[2]+"_"+parts[3]+"_la_LDFLAGS" has_ldflags = False has_libadd = False r_fd = open(make_filename, 'r') orig_content = r_fd.readlines() r_fd.close() libadd_missing = [] for line in orig_content: if re.search( r"^\s*"+ldflags_field, line): has_ldflags = True elif re.search( r"^\s*"+libadd_field, line): has_libadd = True if True == has_libadd: libadd_missing = check_libadd(orig_content, libadd_field, project) # # Sanity Check: Was there an anchor field. # If not skip, we might need to manually update or it might be a # static component. # if False == has_ldflags and False == has_libadd: no_anchor_file.append("No anchor ("+ldflags_field+"): "+make_filename) print(" ---> Error: Makefile.am does not contain necessary anchor") continue # # Sanity Check: This file does not need to be updated. # if True == has_libadd and all(False == v for v in libadd_missing): skipped_files.append("Skip: "+make_filename) print(" Skip: Already updated Makefile.am") continue # # Now go though and create a new version of the Makefile.am # r_fd = open(make_filename, 'r') w_fd = open(make_filename+".mod", 'w') num_libadds=0 for line in r_fd: # LDFLAGS anchor if re.search( r"^\s*"+ldflags_field, line): w_fd.write(line) # If there is no LIBADD, then put it after the LDFLAGS if False == has_libadd: w_fd.write(libadd_field+" = "+libadd_str+"\n") # Existing LIBADD field to extend elif 0 == num_libadds and re.search( r"^\s*"+libadd_field, line): parts = line.partition("=") num_libadds += 1 if parts[0][-1] == '+': w_fd.write(libadd_field+" += ") else: w_fd.write(libadd_field+" = ") # If all libs are missing, then add the full string # Otherwise only add the missing items if all(True == v for v in libadd_missing): w_fd.write(libadd_str) # Only add a continuation if there is something to continue if 0 != len(parts[2].strip()): w_fd.write(" \\") w_fd.write("\n") else: partly_files.append("Partly updated: "+make_filename) for idx, lib in enumerate(libadd_list): if True == libadd_missing[idx]: w_fd.write(lib+" \\\n") # Original content (unless it's just a line continuation) if 0 != len(parts[2].strip()) and parts[2].strip() != "\\": w_fd.write("\t"+parts[2].lstrip()) # Non matching line, just echo else: w_fd.write(line) r_fd.close() w_fd.close() # # Replace the original with the updated version # shutil.move(make_filename+".mod", make_filename) updated_files.append(make_filename) if __name__ == "__main__": update_makefile_ams() print("") print("="*40); print("{:>3} : Files skipped".format(len(skipped_files))) print("="*40); print("="*40); print("{:>3} : Files updated, but had some libs already in place.".format(len(partly_files))) print("="*40); for fn in partly_files: print(fn) print("="*40); print("{:>3} : Files fully updated".format(len(updated_files))) print("="*40); for fn in updated_files: print(fn) print("="*40); print("{:>3} : Missing Makefile.am".format(len(missing_files))) print("="*40); for err in missing_files: print(err) print("="*40); print("{:>3} : Missing Anchor for parsing (might be static-only components)".format(len(no_anchor_file))) print("="*40); for err in no_anchor_file: print(err) prrte-3.0.13/contrib/Makefile.am0000664000175000017500000000266315145263240016623 0ustar alastairalastair# # Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2010 IBM Corporation. All rights reserved. # Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. # Copyright (c) 2013-2018 Los Alamos National Security, Inc. All rights # reserved. # Copyright (c) 2013-2019 Intel, Inc. All rights reserved. # Copyright (c) 2017 Amazon.com, Inc. or its affiliates. # All Rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # amca_paramdir = $(AMCA_PARAM_SETS_DIR) dist_amca_param_DATA = amca-param-sets/example.conf EXTRA_DIST = \ dist/make_dist_tarball \ dist/make-authors.pl \ dist/linux/prrte.spec \ platform/optimized prrte-3.0.13/contrib/fix_headers.pl0000775000175000017500000001064415145263240017406 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # if (scalar(@ARGV) != 1) { print "Usage: fix_headers.pl \n"; exit(3); } $header_file = @ARGV[0]; $temp_file = "/tmp/temp.c"; open(HEADERS, "$header_file") || print "Could not open $header_file\n"; open(MOD_FILES, "> modified_files.txt") || print "Could not open modified.txt\n"; while () { #open all the c files #check if this header is present #if it is present, then substitute it with the protection $header_string = $_; chomp($header_string); $protection = $_; $protection =~ s/\./_/; $protection =~ s/\//_/; $protection =~ s/#include//; $protection =~ tr/a-z/A-z/; $protection =~ s/\s//; $protection = "HAVE_" . $protection; print $protection; $string_to_replace = "#ifdef $protection$_#endif\n"; print $string_to_replace; open(C_FILES, "find . -name *.c |") || print "find failed\n"; while () { $c_file = $_; if (not /mca/) { open(C_FILE, "$c_file") || print "Open failed on $c_file\n"; chomp($protection); #ensure that this protection has not been already put in place $protected = 0; $written_to_file = 0; while () { if (/$protection/) { $protected = 1; } } close (C_FILE); if ($protected == 0) { #this file is not yet protected open(C_FILE, "$c_file") || print "Open failed on $c_file\n"; open(TEMP, "> $temp_file") || print "Open failed on temp.c \n"; while () { if (/$header_string/) { print TEMP $string_to_replace; print "Replacing defintion ---- $c_file"; if ($written_to_file == 0) { print MOD_FILES $c_file; $written_to_file = 1; } } else { print TEMP $_; } } close (TEMP_C); system("cp $temp_file $c_file"); } } } close (C_FILES); #Now to do the same for header files open(H_FILES, "find . -name *.h |") || print "find failed\n"; while () { $h_file = $_; if (not /mca/) { open(H_FILE, "$h_file") || print "Open failed on $h_file\n"; chomp($protection); #ensure that this protection has not been already put in place $protected = 0; $written_to_file = 0; while () { if (/$protection/) { $protected = 1; } } close (H_FILE); if ($protected == 0) { #this file is not yet protected open(H_FILE, "$h_file") || print "Open failed on $h_file\n"; open(TEMP, "> $temp_file") || print "Open failed on temp.c \n"; while () { if (/$header_string/) { print TEMP $string_to_replace; print "Replacing defintion ---- $h_file"; if ($written_to_file == 0) { print MOD_FILES $h_file; $written_to_file = 1; } } else { print TEMP $_; } } close (TEMP_C); system("cp $temp_file $h_file"); } } } close (H_FILES); } close(HEADERS); close(MOD_FILES); system("rm -f $temp_file"); prrte-3.0.13/contrib/check-help-strings.pl0000775000175000017500000002023415145263240020613 0ustar alastairalastair#!/usr/bin/env perl # # Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Simple script to check all the opal_show_help (and prte_show_help) # strings against what is found in help files. # use strict; use Cwd; use File::Find; use Getopt::Long; use Data::Dumper; my $num_warnings = 0; my $num_errors = 0; ########################################################################### my $VERBOSE = 0; my $HELP = 0; GetOptions( "help|h" => \$HELP, "verbose|v" => \$VERBOSE, ) or die "unable to parse options, aborted"; if ($HELP) { print <Purity(1)->Indent(1); my $s = $d->Dump; print $s; } sub isTopDir { my ($d) = @_; # trunk if (-f "$d/Makefile.prte-rules") { return 1; } # v1.8 if (-f "$d/Makefile.man-page-rules") { return 1; } return 0; } ########################################################################### # Find the top-level OMPI source tree dir my $start = cwd(); my $top = $start; while (!isTopDir($top)) { chdir(".."); $top = cwd(); die "Can't find top-level Open MPI directory" if ($top eq "/"); } chdir($start); ########################################################################### my @source_files; my @help_files; # Helper: Search for all source and help files sub match_files { # Don't process sym links return if (-l $_); # Don't recurse down "special" directories if (-d $_ && ((/^\.deps$/) || (/^\.libs$/) || (/^\.svn$/) || (/^\.hg$/) || (/^\.git$/))) { $File::Find::prune = 1; return; } # $File::Find::name is the path relative to the starting point. # $_ contains the file's basename. The code automatically changes # to the processed directory, so we want to open / close $_. verbose("--> $File::Find::name\n"); my $relative = $File::Find::name; $relative =~ s/^$top//; $relative =~ s/^\///; my $short = $_; if ($short =~ /^help-.*\.txt$/) { push(@help_files, { full => $File::Find::name, short => $short, relative => $relative, }); verbose(" Found help file: $short\n"); } if ($short =~ /\.c$/ || $short =~ /\.h$/ || $short =~ /\.cc$/) { push(@source_files, { full => $File::Find::name, short => $short, relative => $relative, }); verbose(" Found source file: $short\n"); } } # Find all source and help files print "Searching for source and help files...\n"; my $startrel = $start; if ($top ne $start) { $startrel =~ s/^$top//; $startrel =~ s/^\///; } find(\&match_files, "."); ########################################################################### # Index all help files my $help_topics; my $help_file_refs; print "Indexing help files (from entire source tree)...\n"; foreach my $info (@help_files) { verbose("Indexing help: $info->{full}\n"); # Check for short name collision if (exists($help_topics->{$info->{short}})) { # Found a collision! Find the original's full name. my $collide_relative = "unknown"; foreach my $i (@help_files) { if ($i->{short} eq $info->{short}) { $collide_relative = $i->{relative}; last; } } # Print error message print "*** ERROR: Help file name collision: File 1: $info->{relative} File 2: $collide_relative\n"; ++$num_errors; } # Read in file, find all of its topics my $num_topics = 0; open(FH, $info->{full}) || die "Can't open $info->{full}"; while () { if (m/^\s*\[(.+?)\]\s*$/) { my $topic = $1; verbose(" Topic: $topic\n"); $help_topics->{$info->{short}}->{topic}->{$topic} = 0; $help_topics->{$info->{short}}->{full} = $info->{full}; ++$num_topics; } } close(FH); if (0 == $num_topics) { print "*** WARNING: Empty help file (no topics) Help file: $info->{full}\n"; ++$num_warnings; } } ########################################################################### # Search source files for calls to opal_show_help and (o)rte_show_help if ($start eq $top) { print "Searching source files (from entire source tree)...\n"; } else { print "Searching source files (under $startrel)...\n"; } # Helper: for a given filename/topic, see if it exists sub check_file_topic { my $info = shift; my $file = shift; my $topic = shift; verbose("Found $info->{short}: $file / $topic\n"); # Do we have a help file for this? if (!exists($help_topics->{$file})) { print "*** ERROR: Source-referenced help file does not exist Source file: $info->{relative} Help file referenced: $file\n"; ++$num_errors; } # Do we have a topic in that help file for this? elsif (!exists($help_topics->{$file}->{topic}->{$topic})) { print "*** ERROR: Source-referenced help topic does not exist Source file: $info->{relative} Help file referenced: $file which is: $help_topics->{$file}->{full} Help topic referenced: $topic\n"; ++$num_errors; } # Yes, we do have a topic in that help file for this. # Increase its ref count. else { ++$help_topics->{$file}->{topic}->{$topic}; } } # Helper: search source file for a regexps matching a help filename # and topic. sub check_name { my $info = shift, my $name = shift; my $sep = shift; my $src = shift; while ($src =~ m/$name\s*$sep\s*"(.+?)"\s*,\s*"(.+?)"/) { my $file = $1; my $topic = $2; check_file_topic($info, $file, $topic); # Don't find this one again $src =~ s/$name\s*$sep\s*"(.+?)"\s*,\s*"(.+?)"/SHOW_HELP_REPLACED/; } return $src; } # Check to ensure helpfile/topic combos exist foreach my $info (@source_files) { verbose("Searching source: $info->{full}\n"); # If this source file is not in the target area, then skip it next if ($info->{relative} != /^$startrel/); my $src; open(FH, $info->{full}) || die "Can't open $info->{full}"; while () { # Eliminate newlines, just for regexp simplicity later chomp; $src .= $_; } close(FH); # Find calls to opal_show_help() $src = check_name($info, "opal_show_help", "\\(", $src); # Find calls to opal_show_help_string() $src = check_name($info, "opal_show_help_string", "\\(", $src); # Find calls to rte_show_help() (and also prte_show_help()) $src = check_name($info, "rte_show_help", "\\(", $src); # Find special tokens from comments $src = check_name($info, "SHOW_HELP", ":", $src); } ########################################################################### # Check that all indexed help strings were referenced print "Checking for stale help messages / files...\n"; foreach my $file (sort(keys(%{$help_topics}))) { my $num_used = 0; foreach my $topic (sort(keys(%{$help_topics->{$file}->{topic}}))) { if (0 == $help_topics->{$file}->{topic}->{$topic}) { print "*** WARNING: Possibly unused help topic Help file: $help_topics->{$file}->{full} Help topic: $topic\n"; ++$num_warnings; } else { ++$num_used; } } # Were no topics used in this file at all? if (0 == $num_used) { print "*** WARNING: Possibly unused help file (no topics used from this file) Help file: $help_topics->{$file}->{full}\n"; ++$num_warnings; } } ########################################################################### # All done if (0 == $num_errors && 0 == $num_warnings) { print "+++ All seems good!\n"; exit(0); } else { print "Total number of warnings: $num_warnings Total number of errors: $num_errors\n"; exit(1); } prrte-3.0.13/contrib/amca-param-sets/0000775000175000017500000000000015145263240017533 5ustar alastairalastairprrte-3.0.13/contrib/amca-param-sets/example.conf0000664000175000017500000000013615145263240022035 0ustar alastairalastair# # This is an example file illustrating how to setup an Aggregate MCA parameters # file. # # prrte-3.0.13/contrib/states/0000775000175000017500000000000015145263240016063 5ustar alastairalastairprrte-3.0.13/contrib/states/timediffs.pl0000775000175000017500000000765015145263240020405 0ustar alastairalastair#!/usr/bin/env perl # Fri Jun 05 2020 15:06:24PM EDT Thomas Naughton # # Compare and show time differences based on log output when running # with '--prtemca prrte_state_base_verbose 1' # # Input: List of time stamps (first line is considered start/first) # Output: Time offsets for each step (relative to the first and predecessor) # # Usage: # ./timediffs.pl TIME_LOG # --or-- # cat TIME_LOG | ./timediffs.pl - # # Example: # # Ignore comments, and # # and get only the HNP (0th jobID "[9239,0]") # ./statechop.pl logfile-ex1.txt\ # | grep -v '^#' \ # | grep '9239,0],0' \ # > TIME_LOG # # ./timediffs.pl TIME_LOG # # --or-- # # Avoid logfile and use STDIN # ./statechop.pl logfile-ex1.txt \ # | grep -v '^#' \ # | grep '9239,0],0' \ # | ./timediffs.pl - # # ChangeLog: # - v0.2 add support to show event name with timing info # - v0.1 Initial script #### use strict; my $VERSION="v0.2"; # DEBUG (show JobID) (set to 1 to enable) my $DBG_SHOW_JOBID=0; # Trim whitespace from front/rear sub trim($) { my $str = shift; $str =~ s/^\s*//; # Trim leading space $str =~ s/\s*$//; # Trim trailing space return ($str); } #### # Process line of input into separate record fields, # returning array with two values: [0]time, [1]evtname, [2]jobid # # Example input: # [batch4:39389]; [[8240,0],0]; [1585138484.185501]; PENDING ALLOCATION # # Which is: Node; JobID; TimeStamp; EventName #### sub get_evt_info($) { my $str = shift; my @fields = split(/;/, $str); my @arr; $arr[0] = get_value($fields[2]); $arr[1] = trim($fields[3]); if ($DBG_SHOW_JOBID) { $arr[2] = trim($fields[1]); } else { $arr[2] = ""; } return (@arr); } sub get_value($) { my $val = shift; $val = trim($val); $val =~ s/^\[//; # Trim leading bracket $val =~ s/\]$//; # Trim trailing bracket return ($val); } sub get_value_diff($$) { my $val = shift; # event time my $base = shift; # base time (to compare against) my $diff = 0.0; $diff = $base - $val; return ($diff); } # # MAIN # my $file = $ARGV[0]; print "DBG: FILE: $file\n"; if ("$file" =~ /^-$/) { # Read from stdin #print "DBG: GET STDIN\n"; } elsif (! -f "$file") { print "ERROR: Failed to read input file '$file'\n"; exit(1); } my @data; my @input; if ("$file" =~ /^-$/) { # Read from stdin #print "# File: STDIN\n"; @input= ; chomp(@input); } else { #print "# File: $file\n"; open(FH, "<$file") or die "Error: failed to open '$file'\n"; @input = ; chomp(@input); close(FH); } # Skip all comments from input foreach my $i (@input) { next if $i =~ /^\s*#/; push @data, $i; } # Node JobID TimeStamp EventName # [batch4:39389]; [[8240,0],0]; [1585138484.185501]; PENDING ALLOCATION my $base = shift(@data); # Get first line of data and treat as "base" time my ($base_time, $base_evtname, $jobid) = get_evt_info($base); print "####################################################\n"; print "# EvtTime -- actual event time\n"; print "# BaseDiff -- difference from initial base time\n"; print "# RelDiff -- difference from previous event time\n"; print "####################################################\n"; print "# EvtTime \t BaseDiff \t RelDiff \t EvtName\n"; print "#------------ ------------ ------------ -------------\n"; printf " %0.6f \t n/a \t n/a \t %s \t %s\n", $base_time, $base_evtname, $jobid; my $prev_time = $base_time; foreach my $line (@data) { my ($evt_time, $evt_name, $jobid) = get_evt_info($line); my $evt_diff = get_value_diff($evt_time, $base_time); my $relative_diff = get_value_diff($evt_time, $prev_time); $prev_time = $evt_time; printf " %0.6f \t %0.6f \t %0.6f \t %s \t %s\n", $evt_time, $evt_diff, $relative_diff, $evt_name, $jobid; } prrte-3.0.13/contrib/states/README.md0000664000175000017500000000576215145263240017354 0ustar alastairalastairState Timestamps ---------------- A couple scripts for chopping out the "state" lines for JOB and PROC state-machine transitions. Then you can pass that into the timediff script to get idea of time between the events. The scripts make several assumptions so your mileage may vary. We use the `PENDING ALLOCATION` as the basis for time and compare other events to this origin (BaseDiff) and also to the preceeding state (RelDiff). This obviously assumes the states apppear in order in time. There are comments at the top of the files and a couple example log files for playing with things. This assumes you have enabled verbose state output for PRRTE, e.g.,`--prtemca prte_state_base_verbose 1`. Examples ------- - Basic example running `sleep 5` to show delay in RUNNING to TERMINATE, for a single shot `prterun` case ``` shell:$ prterun \ --prtemca prte_state_base_verbose 1 \ --np 2 \ sleep 5 >& LOG.txt shell:$ ./statechop.pl LOG.txt elk:$ ./statechop.pl LOG.txt | ./timediffs.pl - DBG: FILE: - #################################################### # EvtTime -- actual event time # BaseDiff -- difference from initial base time # RelDiff -- difference from previous event time #################################################### # EvtTime BaseDiff RelDiff EvtName #------------ ------------ ------------ ------------- 1591385117.048860 n/a n/a PENDING ALLOCATION 1591385117.049572 -0.000712 -0.000712 ALLOCATION COMPLETE 1591385117.049609 -0.000749 -0.000037 PENDING DAEMON LAUNCH 1591385117.049720 -0.000860 -0.000111 ALL DAEMONS REPORTED 1591385117.049764 -0.000904 -0.000044 VM READY 1591385117.049806 -0.000946 -0.000042 PENDING INIT 1591385117.049850 -0.000990 -0.000044 INIT_COMPLETE 1591385117.049880 -0.001020 -0.000030 ALLOCATION COMPLETE 1591385117.049913 -0.001053 -0.000033 PENDING DAEMON LAUNCH 1591385117.049945 -0.001085 -0.000032 ALL DAEMONS REPORTED 1591385117.049977 -0.001117 -0.000032 VM READY 1591385117.050012 -0.001152 -0.000035 PENDING MAPPING 1591385117.050123 -0.001263 -0.000111 MAP COMPLETE 1591385117.050164 -0.001304 -0.000041 PENDING FINAL SYSTEM PREP 1591385117.050197 -0.001337 -0.000033 PENDING APP LAUNCH 1591385117.050661 -0.001801 -0.000464 SENDING LAUNCH MSG 1591385117.058262 -0.009402 -0.007601 RUNNING 1591385122.059255 -5.010395 -5.000993 NORMALLY TERMINATED 1591385122.060270 -5.011410 -0.001015 DAEMONS TERMINATED shell:$ - Using past logfile with multiple `prun` jobs so need to trim lines of interest down a bit using `grep` before passing into timediff script ``` shell:$ ./statechop.pl example-output2.txt \ | grep -v '^#' \ | grep '40915,0],0' \ | ./timediffs.pl - ``` prrte-3.0.13/contrib/states/example-output.txt0000664000175000017500000001233615145263240021622 0ustar alastairalastairelk:$ prterun --np 1 hostname node0 elk:$ prterun --np 1 --prtemca prte_state_base_verbose 1 hostname [node0:22783] [[44906,0],0] [1591381342.861267] ACTIVATE JOB [44906,0] STATE PENDING ALLOCATION AT prte.c:978 [node0:22783] [[44906,0],0] [1591381342.861337] ACTIVATING JOB [44906,0] STATE PENDING ALLOCATION PRI 4 [node0:22783] [[44906,0],0] [1591381342.862020] ACTIVATE JOB [44906,0] STATE ALLOCATION COMPLETE AT base/ras_base_allocate.c:522 [node0:22783] [[44906,0],0] [1591381342.862054] ACTIVATING JOB [44906,0] STATE ALLOCATION COMPLETE PRI 4 [node0:22783] [[44906,0],0] [1591381342.862083] ACTIVATE JOB [44906,0] STATE PENDING DAEMON LAUNCH AT base/plm_base_launch_support.c:206 [node0:22783] [[44906,0],0] [1591381342.862108] ACTIVATING JOB [44906,0] STATE PENDING DAEMON LAUNCH PRI 4 [node0:22783] [[44906,0],0] [1591381342.862213] ACTIVATE JOB [44906,0] STATE ALL DAEMONS REPORTED AT plm_rsh_module.c:1062 [node0:22783] [[44906,0],0] [1591381342.862241] ACTIVATING JOB [44906,0] STATE ALL DAEMONS REPORTED PRI 4 [node0:22783] [[44906,0],0] [1591381342.862273] ACTIVATE JOB [44906,0] STATE VM READY AT base/plm_base_launch_support.c:177 [node0:22783] [[44906,0],0] [1591381342.862308] ACTIVATING JOB [44906,0] STATE VM READY PRI 4 [node0:22783] [[44906,0],0] [1591381342.862345] ACTIVATE JOB [INVALID] STATE PENDING INIT AT plm_rsh_module.c:908 [node0:22783] [[44906,0],0] [1591381342.862367] ACTIVATING JOB [INVALID] STATE PENDING INIT PRI 4 [node0:22783] [[44906,0],0] [1591381342.862419] ACTIVATE JOB [44906,1] STATE INIT_COMPLETE AT base/plm_base_launch_support.c:326 [node0:22783] [[44906,0],0] [1591381342.862442] ACTIVATING JOB [44906,1] STATE INIT_COMPLETE PRI 4 [node0:22783] [[44906,0],0] [1591381342.862477] ACTIVATE JOB [44906,1] STATE ALLOCATION COMPLETE AT state_dvm.c:249 [node0:22783] [[44906,0],0] [1591381342.862506] ACTIVATING JOB [44906,1] STATE ALLOCATION COMPLETE PRI 4 [node0:22783] [[44906,0],0] [1591381342.862528] ACTIVATE JOB [44906,1] STATE PENDING DAEMON LAUNCH AT base/plm_base_launch_support.c:206 [node0:22783] [[44906,0],0] [1591381342.862547] ACTIVATING JOB [44906,1] STATE PENDING DAEMON LAUNCH PRI 4 [node0:22783] [[44906,0],0] [1591381342.862585] ACTIVATE JOB [44906,1] STATE ALL DAEMONS REPORTED AT plm_rsh_module.c:1062 [node0:22783] [[44906,0],0] [1591381342.862603] ACTIVATING JOB [44906,1] STATE ALL DAEMONS REPORTED PRI 4 [node0:22783] [[44906,0],0] [1591381342.862628] ACTIVATE JOB [44906,1] STATE VM READY AT base/plm_base_launch_support.c:177 [node0:22783] [[44906,0],0] [1591381342.862675] ACTIVATING JOB [44906,1] STATE VM READY PRI 4 [node0:22783] [[44906,0],0] [1591381342.862707] ACTIVATE JOB [44906,1] STATE PENDING MAPPING AT state_dvm.c:232 [node0:22783] [[44906,0],0] [1591381342.862728] ACTIVATING JOB [44906,1] STATE PENDING MAPPING PRI 4 [node0:22783] [[44906,0],0] [1591381342.862839] ACTIVATE JOB [44906,1] STATE MAP COMPLETE AT base/rmaps_base_map_job.c:601 [node0:22783] [[44906,0],0] [1591381342.862870] ACTIVATING JOB [44906,1] STATE MAP COMPLETE PRI 4 [node0:22783] [[44906,0],0] [1591381342.862897] ACTIVATE JOB [44906,1] STATE PENDING FINAL SYSTEM PREP AT base/plm_base_launch_support.c:264 [node0:22783] [[44906,0],0] [1591381342.862916] ACTIVATING JOB [44906,1] STATE PENDING FINAL SYSTEM PREP PRI 4 [node0:22783] [[44906,0],0] [1591381342.862945] ACTIVATE JOB [44906,1] STATE PENDING APP LAUNCH AT base/plm_base_launch_support.c:439 [node0:22783] [[44906,0],0] [1591381342.862967] ACTIVATING JOB [44906,1] STATE PENDING APP LAUNCH PRI 4 [node0:22783] [[44906,0],0] [1591381342.863427] ACTIVATE JOB [44906,1] STATE SENDING LAUNCH MSG AT base/odls_base_default_fns.c:157 [node0:22783] [[44906,0],0] [1591381342.863486] ACTIVATING JOB [44906,1] STATE SENDING LAUNCH MSG PRI 4 [node0:22783] [[44906,0],0] [1591381342.868199] ACTIVATE PROC [[44906,1],0] STATE RUNNING AT base/odls_base_default_fns.c:1039 [node0:22783] [[44906,0],0] [1591381342.868273] ACTIVATING PROC [[44906,1],0] STATE RUNNING PRI 4 [node0:22783] [[44906,0],0] [1591381342.868338] ACTIVATE JOB [44906,1] STATE RUNNING AT base/state_base_fns.c:653 [node0:22783] [[44906,0],0] [1591381342.868362] ACTIVATING JOB [44906,1] STATE RUNNING PRI 4 node0 [node0:22783] [[44906,0],0] [1591381342.869128] ACTIVATE PROC [[44906,1],0] STATE IOF COMPLETE AT iof_hnp_read.c:335 [node0:22783] [[44906,0],0] [1591381342.869164] ACTIVATING PROC [[44906,1],0] STATE IOF COMPLETE PRI 4 [node0:22783] [[44906,0],0] [1591381342.869302] ACTIVATE PROC [[44906,1],0] STATE WAITPID FIRED AT base/odls_base_default_fns.c:1718 [node0:22783] [[44906,0],0] [1591381342.869327] ACTIVATING PROC [[44906,1],0] STATE WAITPID FIRED PRI 4 [node0:22783] [[44906,0],0] [1591381342.869353] ACTIVATE PROC [[44906,1],0] STATE NORMALLY TERMINATED AT base/state_base_fns.c:684 [node0:22783] [[44906,0],0] [1591381342.869372] ACTIVATING PROC [[44906,1],0] STATE NORMALLY TERMINATED PRI 4 [node0:22783] [[44906,0],0] [1591381342.869722] ACTIVATE JOB [44906,1] STATE NORMALLY TERMINATED AT base/state_base_fns.c:757 [node0:22783] [[44906,0],0] [1591381342.869758] ACTIVATING JOB [44906,1] STATE NORMALLY TERMINATED PRI 4 [node0:22783] [[44906,0],0] [1591381342.870645] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT prted/prted_comm.c:470 [node0:22783] [[44906,0],0] [1591381342.870680] ACTIVATING JOB NULL STATE DAEMONS TERMINATED PRI 4 elk:$ prrte-3.0.13/contrib/states/example-output2.txt0000664000175000017500000025362215145263240021711 0ustar alastairalastair# TJN-BEGIN: Wed Mar 25 14:47:18 EDT 2020 ##################################### # PWD: /gpfs/alpine/proj-shared/stf010/naughton/summit/ompix/scaling/tests ##################################### # Run-Hostname: batch2 ##################################### # LSB_JOBID: 994949 ##################################### # LS_JOBID: ##################################### # BEGIN-module-list: Currently Loaded Modules: 1) hsi/5.0.2.p5 3) DefApps 5) cuda/10.1.243 7) openmpi/master 2) lsf-tools/2.0 4) gcc/6.4.0 6) ucx/1.7.0 # END-module-list: ################################################### ## ## NOTE -- REMAINING INFO IS FROM COMPUTE NODE ## (i.e., via aprun commands) ## ################################################### ##################################### ##################################### # RUN-MPIRUN-TESTS ##################################### # DBG: TTL_NODES=33 # DBG: TTL_COMPUTE_NODES=32 #################### # STAGING-RUN-BEGIN # STAGE-CMD: mpirun --prtemca prrte_state_base_verbose 1 --prtemca plm_rsh_num_concurrent 32 --prtemca routed direct --prtemca rmaps_base_no_schedule_local 1 --np 32 --map-by ppr:1:node:NOLOCAL --hostfile /gpfs/alpine/proj-shared/stf010/naughton/summit/ompix/scaling/tests/fqdn-hostfile.81788 -x PATH=/sw/sources/lsf-tools/2.0/summit/bin:/sw/summit/ums/ompix/DEVELOP/gcc/6.4.0/install/openmpi-br-master/bin:/sw/summit/ums/ompix/DEVELOP/gcc/6.4.0/install/ucx-1.7.0/bin:/sw/summit/cuda/10.1.243/bin:/sw/summit/gcc/6.4.0/bin:/opt/ibm/spectrumcomputing/lsf/10.1.0.9/linux3.10-glibc2.17-ppc64le-csm/bin:/usr/bin:/usr/sbin:/sw/sources/hpss/bin:/opt/ibm/spectrumcomputing/lsf/10.1.0.9/linux3.10-glibc2.17-ppc64le-csm/etc:/opt/ibm/csm/bin:/usr/local/bin:/usr/local/sbin:/opt/ibm/flightlog/bin:/opt/ibutils/bin:/opt/ibm/spectrum_mpi/jsm_pmix/bin:/opt/puppetlabs/bin:/usr/lpp/mmfs/bin -x LD_LIBRARY_PATH=/sw/summit/ums/ompix/DEVELOP/gcc/6.4.0/install/openmpi-br-master/lib:/sw/summit/ums/ompix/DEVELOP/gcc/6.4.0/install/ucx-1.7.0/lib:/sw/summit/cuda/10.1.243/lib64:/sw/summit/gcc/6.4.0/lib64:/opt/ibm/spectrumcomputing/lsf/10.1.0.9/linux3.10-glibc2.17-ppc64le-csm/lib /gpfs/alpine/proj-shared/stf010/naughton/summit/ompix/scaling/tests/hello_mpi [batch2:82271] [[40915,0],0] [1585162039.032522] ACTIVATE JOB [40915,0] STATE PENDING ALLOCATION AT ../../../../../../../../source/openmpi-br-master/prrte/src/tools/prte/prte.c:455 [batch2:82271] [[40915,0],0] [1585162039.033920] ACTIVATE JOB [40915,0] STATE ALLOCATION COMPLETE AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/ras/base/ras_base_allocate.c:530 [batch2:82271] [[40915,0],0] [1585162039.033933] ACTIVATE JOB [40915,0] STATE PENDING DAEMON LAUNCH AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:200 [batch2:82271] [[40915,0],0] [1585162040.015379] ACTIVATE JOB [40915,0] STATE ALL DAEMONS REPORTED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:1587 [batch2:82271] [[40915,0],0] [1585162040.015427] ACTIVATE JOB [40915,0] STATE VM READY AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:179 [batch2:82271] [[40915,0],0] [1585162040.026719] ACTIVATE JOB [INVALID] STATE PENDING INIT AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/rsh/plm_rsh_module.c:907 [batch2:82271] [[40915,0],0] [1585162040.027924] ACTIVATE JOB [40915,1] STATE INIT_COMPLETE AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:320 [batch2:82271] [[40915,0],0] [1585162040.027933] ACTIVATE JOB [40915,1] STATE PENDING ALLOCATION AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/dvm/state_dvm.c:243 [batch2:82271] [[40915,0],0] [1585162040.027939] ACTIVATE JOB [40915,1] STATE ALLOCATION COMPLETE AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/ras/base/ras_base_allocate.c:530 [batch2:82271] [[40915,0],0] [1585162040.027945] ACTIVATE JOB [40915,1] STATE PENDING DAEMON LAUNCH AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:200 [batch2:82271] [[40915,0],0] [1585162040.027956] ACTIVATE JOB [40915,1] STATE ALL DAEMONS REPORTED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/rsh/plm_rsh_module.c:1061 [batch2:82271] [[40915,0],0] [1585162040.027975] ACTIVATE JOB [40915,1] STATE VM READY AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:179 [batch2:82271] [[40915,0],0] [1585162040.027981] ACTIVATE JOB [40915,1] STATE PENDING MAPPING AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/dvm/state_dvm.c:231 [batch2:82271] [[40915,0],0] [1585162040.041606] ACTIVATE JOB [40915,1] STATE MAP COMPLETE AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/rmaps/base/rmaps_base_map_job.c:514 [batch2:82271] [[40915,0],0] [1585162040.041619] ACTIVATE JOB [40915,1] STATE PENDING FINAL SYSTEM PREP AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:258 [batch2:82271] [[40915,0],0] [1585162040.041633] ACTIVATE JOB [40915,1] STATE PENDING APP LAUNCH AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_launch_support.c:433 [batch2:82271] [[40915,0],0] [1585162040.042307] ACTIVATE JOB [40915,1] STATE SENDING LAUNCH MSG AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:157 [b06n15:147995] [[40915,0],1] [1585162040.076256] ACTIVATE PROC [[40915,1],0] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.076452] ACTIVATE PROC [[40915,1],0] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n15:147995] [[40915,0],1] [1585162040.076324] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b06n16:125939] [[40915,0],2] [1585162040.076541] ACTIVATE PROC [[40915,1],1] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.076707] ACTIVATE PROC [[40915,1],1] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n16:125939] [[40915,0],2] [1585162040.076602] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [batch2:82271] [[40915,0],0] [1585162040.079657] ACTIVATE PROC [[40915,1],2] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n17:106679] [[40915,0],3] [1585162040.079509] ACTIVATE PROC [[40915,1],2] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [b06n17:106679] [[40915,0],3] [1585162040.079567] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b06n18:128515] [[40915,0],4] [1585162040.080275] ACTIVATE PROC [[40915,1],3] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.080461] ACTIVATE PROC [[40915,1],3] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n18:128515] [[40915,0],4] [1585162040.080346] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b17n15:141994] [[40915,0],6] [1585162040.094837] ACTIVATE PROC [[40915,1],5] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.095020] ACTIVATE PROC [[40915,1],5] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n12:23915] [[40915,0],10] [1585162040.094890] ACTIVATE PROC [[40915,1],9] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.095067] ACTIVATE PROC [[40915,1],9] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b17n15:141994] [[40915,0],6] [1585162040.094906] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n12:23915] [[40915,0],10] [1585162040.094955] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b07n01:139664] [[40915,0],5] [1585162040.095005] ACTIVATE PROC [[40915,1],4] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.095172] ACTIVATE PROC [[40915,1],4] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b07n01:139664] [[40915,0],5] [1585162040.095054] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n10:130337] [[40915,0],28] [1585162040.095673] ACTIVATE PROC [[40915,1],27] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.095849] ACTIVATE PROC [[40915,1],27] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n11:49033] [[40915,0],9] [1585162040.095728] ACTIVATE PROC [[40915,1],8] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [h27n10:130337] [[40915,0],28] [1585162040.095734] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [batch2:82271] [[40915,0],0] [1585162040.095924] ACTIVATE PROC [[40915,1],8] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n07:103492] [[40915,0],25] [1585162040.095819] ACTIVATE PROC [[40915,1],24] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.095985] ACTIVATE PROC [[40915,1],24] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n11:49033] [[40915,0],9] [1585162040.095801] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n07:103492] [[40915,0],25] [1585162040.095884] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n06:87678] [[40915,0],24] [1585162040.095996] ACTIVATE PROC [[40915,1],23] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [h27n09:105965] [[40915,0],27] [1585162040.096015] ACTIVATE PROC [[40915,1],26] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.096168] ACTIVATE PROC [[40915,1],23] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162040.096180] ACTIVATE PROC [[40915,1],26] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n06:87678] [[40915,0],24] [1585162040.096058] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n09:105965] [[40915,0],27] [1585162040.096084] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n05:127708] [[40915,0],23] [1585162040.096152] ACTIVATE PROC [[40915,1],22] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.096308] ACTIVATE PROC [[40915,1],22] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n05:127708] [[40915,0],23] [1585162040.096219] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n09:40885] [[40915,0],7] [1585162040.096850] ACTIVATE PROC [[40915,1],6] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.097021] ACTIVATE PROC [[40915,1],6] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n09:40885] [[40915,0],7] [1585162040.096917] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n11:133026] [[40915,0],29] [1585162040.099912] ACTIVATE PROC [[40915,1],28] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.100074] ACTIVATE PROC [[40915,1],28] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n11:133026] [[40915,0],29] [1585162040.099980] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n08:104866] [[40915,0],26] [1585162040.100494] ACTIVATE PROC [[40915,1],25] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.100667] ACTIVATE PROC [[40915,1],25] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n08:104866] [[40915,0],26] [1585162040.100561] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h28n04:16897] [[40915,0],31] [1585162040.100686] ACTIVATE PROC [[40915,1],30] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.100855] ACTIVATE PROC [[40915,1],30] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h28n06:158971] [[40915,0],32] [1585162040.100728] ACTIVATE PROC [[40915,1],31] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.100888] ACTIVATE PROC [[40915,1],31] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h28n04:16897] [[40915,0],31] [1585162040.100745] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h28n06:158971] [[40915,0],32] [1585162040.100788] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [h27n12:122423] [[40915,0],30] [1585162040.100837] ACTIVATE PROC [[40915,1],29] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.100993] ACTIVATE PROC [[40915,1],29] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n12:122423] [[40915,0],30] [1585162040.100897] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b31n01:160528] [[40915,0],17] [1585162040.103550] ACTIVATE PROC [[40915,1],16] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.103706] ACTIVATE PROC [[40915,1],16] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b31n01:160528] [[40915,0],17] [1585162040.103612] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n14:20639] [[40915,0],12] [1585162040.105257] ACTIVATE PROC [[40915,1],11] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.105414] ACTIVATE PROC [[40915,1],11] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n14:20639] [[40915,0],12] [1585162040.105314] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [f23n16:23315] [[40915,0],21] [1585162040.105376] ACTIVATE PROC [[40915,1],20] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [b31n02:171328] [[40915,0],18] [1585162040.105408] ACTIVATE PROC [[40915,1],17] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.105596] ACTIVATE PROC [[40915,1],20] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162040.105604] ACTIVATE PROC [[40915,1],17] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [f23n16:23315] [[40915,0],21] [1585162040.105458] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b31n04:127470] [[40915,0],20] [1585162040.105510] ACTIVATE PROC [[40915,1],19] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [b31n02:171328] [[40915,0],18] [1585162040.105485] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [batch2:82271] [[40915,0],0] [1585162040.105689] ACTIVATE PROC [[40915,1],19] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n10:94844] [[40915,0],8] [1585162040.105586] ACTIVATE PROC [[40915,1],7] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.105732] ACTIVATE PROC [[40915,1],7] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b31n04:127470] [[40915,0],20] [1585162040.105578] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n16:38673] [[40915,0],14] [1585162040.105649] ACTIVATE PROC [[40915,1],13] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.105809] ACTIVATE PROC [[40915,1],13] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n10:94844] [[40915,0],8] [1585162040.105643] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n17:18592] [[40915,0],15] [1585162040.105715] ACTIVATE PROC [[40915,1],14] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [b30n18:65526] [[40915,0],16] [1585162040.105747] ACTIVATE PROC [[40915,1],15] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.105875] ACTIVATE PROC [[40915,1],14] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162040.105911] ACTIVATE PROC [[40915,1],15] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n16:38673] [[40915,0],14] [1585162040.105718] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n17:18592] [[40915,0],15] [1585162040.105773] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n18:65526] [[40915,0],16] [1585162040.105820] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [batch2:82271] [[40915,0],0] [1585162040.107358] ACTIVATE PROC [[40915,1],0] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b06n15:147995] [[40915,0],1] [1585162040.107254] ACTIVATE PROC [[40915,1],0] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [b06n16:125939] [[40915,0],2] [1585162040.107434] ACTIVATE PROC [[40915,1],1] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.107533] ACTIVATE PROC [[40915,1],1] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [batch2:82271] [[40915,0],0] [1585162040.111616] ACTIVATE PROC [[40915,1],2] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b06n17:106679] [[40915,0],3] [1585162040.111509] ACTIVATE PROC [[40915,1],2] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.113698] ACTIVATE PROC [[40915,1],3] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b06n18:128515] [[40915,0],4] [1585162040.113611] ACTIVATE PROC [[40915,1],3] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.124128] ACTIVATE PROC [[40915,1],23] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n06:87678] [[40915,0],24] [1585162040.124026] ACTIVATE PROC [[40915,1],23] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [h27n07:103492] [[40915,0],25] [1585162040.124332] ACTIVATE PROC [[40915,1],24] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.124431] ACTIVATE PROC [[40915,1],24] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b31n03:167060] [[40915,0],19] [1585162040.126205] ACTIVATE PROC [[40915,1],18] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.126386] ACTIVATE PROC [[40915,1],18] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162040.126483] ACTIVATE PROC [[40915,1],22] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n05:127708] [[40915,0],23] [1585162040.126365] ACTIVATE PROC [[40915,1],22] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [b31n03:167060] [[40915,0],19] [1585162040.126277] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b07n01:139664] [[40915,0],5] [1585162040.126485] ACTIVATE PROC [[40915,1],4] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.126579] ACTIVATE PROC [[40915,1],4] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n10:130337] [[40915,0],28] [1585162040.127417] ACTIVATE PROC [[40915,1],27] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.127526] ACTIVATE PROC [[40915,1],27] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n09:105965] [[40915,0],27] [1585162040.128117] ACTIVATE PROC [[40915,1],26] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.128211] ACTIVATE PROC [[40915,1],26] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [batch2:82271] [[40915,0],0] [1585162040.132004] ACTIVATE PROC [[40915,1],28] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n11:133026] [[40915,0],29] [1585162040.131915] ACTIVATE PROC [[40915,1],28] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.132111] ACTIVATE PROC [[40915,1],30] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h28n04:16897] [[40915,0],31] [1585162040.132010] ACTIVATE PROC [[40915,1],30] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.132558] ACTIVATE PROC [[40915,1],31] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h28n06:158971] [[40915,0],32] [1585162040.132466] ACTIVATE PROC [[40915,1],31] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.132846] ACTIVATE PROC [[40915,1],29] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n12:122423] [[40915,0],30] [1585162040.132761] ACTIVATE PROC [[40915,1],29] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.133356] ACTIVATE PROC [[40915,1],25] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [h27n08:104866] [[40915,0],26] [1585162040.133241] ACTIVATE PROC [[40915,1],25] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.145324] ACTIVATE PROC [[40915,1],5] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b17n15:141994] [[40915,0],6] [1585162040.145218] ACTIVATE PROC [[40915,1],5] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [b30n12:23915] [[40915,0],10] [1585162040.145478] ACTIVATE PROC [[40915,1],9] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.145572] ACTIVATE PROC [[40915,1],9] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n11:49033] [[40915,0],9] [1585162040.146506] ACTIVATE PROC [[40915,1],8] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.146603] ACTIVATE PROC [[40915,1],8] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [batch2:82271] [[40915,0],0] [1585162040.151815] ACTIVATE PROC [[40915,1],20] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [f23n16:23315] [[40915,0],21] [1585162040.151713] ACTIVATE PROC [[40915,1],20] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.153289] ACTIVATE PROC [[40915,1],16] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b31n01:160528] [[40915,0],17] [1585162040.153187] ACTIVATE PROC [[40915,1],16] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.154399] ACTIVATE PROC [[40915,1],17] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b31n02:171328] [[40915,0],18] [1585162040.154302] ACTIVATE PROC [[40915,1],17] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [b30n18:65526] [[40915,0],16] [1585162040.154712] ACTIVATE PROC [[40915,1],15] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.154804] ACTIVATE PROC [[40915,1],15] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n16:38673] [[40915,0],14] [1585162040.154958] ACTIVATE PROC [[40915,1],13] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.155060] ACTIVATE PROC [[40915,1],13] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [batch2:82271] [[40915,0],0] [1585162040.156042] ACTIVATE PROC [[40915,1],7] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n10:94844] [[40915,0],8] [1585162040.155940] ACTIVATE PROC [[40915,1],7] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [g30n11:31396] [[40915,0],22] [1585162040.165055] ACTIVATE PROC [[40915,1],21] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.165209] ACTIVATE PROC [[40915,1],21] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [g30n11:31396] [[40915,0],22] [1585162040.165115] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [batch2:82271] [[40915,0],0] [1585162040.175407] ACTIVATE PROC [[40915,1],18] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b31n03:167060] [[40915,0],19] [1585162040.175309] ACTIVATE PROC [[40915,1],18] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.213413] ACTIVATE PROC [[40915,1],21] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [g30n11:31396] [[40915,0],22] [1585162040.213328] ACTIVATE PROC [[40915,1],21] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.235678] ACTIVATE PROC [[40915,1],19] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b31n04:127470] [[40915,0],20] [1585162040.235574] ACTIVATE PROC [[40915,1],19] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.236164] ACTIVATE PROC [[40915,1],6] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n09:40885] [[40915,0],7] [1585162040.236050] ACTIVATE PROC [[40915,1],6] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [b30n13:107044] [[40915,0],11] [1585162040.347683] ACTIVATE PROC [[40915,1],10] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.347836] ACTIVATE PROC [[40915,1],10] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n13:107044] [[40915,0],11] [1585162040.347738] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n15:39290] [[40915,0],13] [1585162040.367716] ACTIVATE PROC [[40915,1],12] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1038 [batch2:82271] [[40915,0],0] [1585162040.367898] ACTIVATE PROC [[40915,1],12] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162040.367908] ACTIVATE JOB [40915,1] STATE RUNNING AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/base/state_base_fns.c:660 [b30n15:39290] [[40915,0],13] [1585162040.367791] ACTIVATE JOB [40915,1] STATE LOCAL LAUNCH COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:314 [b30n17:18592] [[40915,0],15] [1585162040.435087] ACTIVATE PROC [[40915,1],14] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.435192] ACTIVATE PROC [[40915,1],14] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [batch2:82271] [[40915,0],0] [1585162040.443533] ACTIVATE PROC [[40915,1],10] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n13:107044] [[40915,0],11] [1585162040.443424] ACTIVATE PROC [[40915,1],10] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.464734] ACTIVATE PROC [[40915,1],12] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n15:39290] [[40915,0],13] [1585162040.464617] ACTIVATE PROC [[40915,1],12] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.601615] ACTIVATE PROC [[40915,1],11] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:452 [b30n14:20639] [[40915,0],12] [1585162040.601511] ACTIVATE PROC [[40915,1],11] STATE SYNC REGISTERED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:92 [batch2:82271] [[40915,0],0] [1585162040.601625] ACTIVATE JOB [40915,1] STATE SYNC REGISTERED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/base/state_base_fns.c:669 (b06n15: 0) I am rank: 0 of 32 (host=b06n15) (b30n10: 7) I am rank: 7 of 32 (host=b30n10) (b30n17: 14) I am rank: 14 of 32 (host=b30n17) (b06n18: 3) I am rank: 3 of 32 (host=b06n18) (b06n17: 2) I am rank: 2 of 32 (host=b06n17) (h28n06: 31) I am rank: 31 of 32 (host=h28n06) (b31n03: 18) I am rank: 18 of 32 (host=b31n03) (b30n15: 12) I am rank: 12 of 32 (host=b30n15) (b06n16: 1) I am rank: 1 of 32 (host=b06n16) (b31n01: 16) I am rank: 16 of 32 (host=b31n01) (b31n04: 19) I am rank: 19 of 32 (host=b31n04) (b30n09: 6) I am rank: 6 of 32 (host=b30n09) (b17n15: 5) I am rank: 5 of 32 (host=b17n15) (b30n13: 10) I am rank: 10 of 32 (host=b30n13) (b30n12: 9) I am rank: 9 of 32 (host=b30n12) (b07n01: 4) I am rank: 4 of 32 (host=b07n01) (b30n18: 15) I am rank: 15 of 32 (host=b30n18) (b30n16: 13) I am rank: 13 of 32 (host=b30n16) (h27n05: 22) I am rank: 22 of 32 (host=h27n05) (h27n09: 26) I am rank: 26 of 32 (host=h27n09) (h27n08: 25) I am rank: 25 of 32 (host=h27n08) (h27n10: 27) I am rank: 27 of 32 (host=h27n10) (b31n02: 17) I am rank: 17 of 32 (host=b31n02) (b30n14: 11) I am rank: 11 of 32 (host=b30n14) (f23n16: 20) I am rank: 20 of 32 (host=f23n16) (h27n06: 23) I am rank: 23 of 32 (host=h27n06) (h27n12: 29) I am rank: 29 of 32 (host=h27n12) (b30n11: 8) I am rank: 8 of 32 (host=b30n11) (h27n07: 24) I am rank: 24 of 32 (host=h27n07) (h28n04: 30) I am rank: 30 of 32 (host=h28n04) (h27n11: 28) I am rank: 28 of 32 (host=h27n11) (g30n11: 21) I am rank: 21 of 32 (host=g30n11) [b30n16:38673] [[40915,0],14] [1585162042.074889] ACTIVATE PROC [[40915,1],13] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n16:38673] [[40915,0],14] [1585162042.075035] ACTIVATE PROC [[40915,1],13] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n16:38673] [[40915,0],14] [1585162042.075045] ACTIVATE PROC [[40915,1],13] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n18:65526] [[40915,0],16] [1585162042.075171] ACTIVATE PROC [[40915,1],15] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n18:65526] [[40915,0],16] [1585162042.075333] ACTIVATE PROC [[40915,1],15] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n18:65526] [[40915,0],16] [1585162042.075346] ACTIVATE PROC [[40915,1],15] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b31n03:167060] [[40915,0],19] [1585162042.075411] ACTIVATE PROC [[40915,1],18] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b31n03:167060] [[40915,0],19] [1585162042.075549] ACTIVATE PROC [[40915,1],18] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b31n03:167060] [[40915,0],19] [1585162042.075559] ACTIVATE PROC [[40915,1],18] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n17:18592] [[40915,0],15] [1585162042.075753] ACTIVATE PROC [[40915,1],14] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h28n04:16897] [[40915,0],31] [1585162042.075732] ACTIVATE PROC [[40915,1],30] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n17:18592] [[40915,0],15] [1585162042.075891] ACTIVATE PROC [[40915,1],14] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h28n04:16897] [[40915,0],31] [1585162042.075870] ACTIVATE PROC [[40915,1],30] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h28n04:16897] [[40915,0],31] [1585162042.075883] ACTIVATE PROC [[40915,1],30] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n17:18592] [[40915,0],15] [1585162042.075902] ACTIVATE PROC [[40915,1],14] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n14:20639] [[40915,0],12] [1585162042.076033] ACTIVATE PROC [[40915,1],11] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.076186] ACTIVATE PROC [[40915,1],13] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n14:20639] [[40915,0],12] [1585162042.076161] ACTIVATE PROC [[40915,1],11] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n14:20639] [[40915,0],12] [1585162042.076172] ACTIVATE PROC [[40915,1],11] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.076464] ACTIVATE PROC [[40915,1],15] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.076600] ACTIVATE PROC [[40915,1],18] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [g30n11:31396] [[40915,0],22] [1585162042.076531] ACTIVATE PROC [[40915,1],21] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [g30n11:31396] [[40915,0],22] [1585162042.076699] ACTIVATE PROC [[40915,1],21] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [g30n11:31396] [[40915,0],22] [1585162042.076712] ACTIVATE PROC [[40915,1],21] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.076949] ACTIVATE PROC [[40915,1],30] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.077114] ACTIVATE PROC [[40915,1],11] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n08:104866] [[40915,0],26] [1585162042.077019] ACTIVATE PROC [[40915,1],25] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b06n17:106679] [[40915,0],3] [1585162042.077088] ACTIVATE PROC [[40915,1],2] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h28n06:158971] [[40915,0],32] [1585162042.077111] ACTIVATE PROC [[40915,1],31] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n12:122423] [[40915,0],30] [1585162042.077166] ACTIVATE PROC [[40915,1],29] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n08:104866] [[40915,0],26] [1585162042.077168] ACTIVATE PROC [[40915,1],25] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n08:104866] [[40915,0],26] [1585162042.077181] ACTIVATE PROC [[40915,1],25] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n15:39290] [[40915,0],13] [1585162042.077169] ACTIVATE PROC [[40915,1],12] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b06n17:106679] [[40915,0],3] [1585162042.077232] ACTIVATE PROC [[40915,1],2] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h28n06:158971] [[40915,0],32] [1585162042.077240] ACTIVATE PROC [[40915,1],31] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h28n06:158971] [[40915,0],32] [1585162042.077251] ACTIVATE PROC [[40915,1],31] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b06n17:106679] [[40915,0],3] [1585162042.077243] ACTIVATE PROC [[40915,1],2] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b31n01:160528] [[40915,0],17] [1585162042.077266] ACTIVATE PROC [[40915,1],16] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n09:40885] [[40915,0],7] [1585162042.077237] ACTIVATE PROC [[40915,1],6] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.077413] ACTIVATE PROC [[40915,1],14] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n16:125939] [[40915,0],2] [1585162042.077301] ACTIVATE PROC [[40915,1],1] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n12:122423] [[40915,0],30] [1585162042.077317] ACTIVATE PROC [[40915,1],29] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n12:122423] [[40915,0],30] [1585162042.077328] ACTIVATE PROC [[40915,1],29] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n15:39290] [[40915,0],13] [1585162042.077306] ACTIVATE PROC [[40915,1],12] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n15:39290] [[40915,0],13] [1585162042.077321] ACTIVATE PROC [[40915,1],12] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n09:40885] [[40915,0],7] [1585162042.077380] ACTIVATE PROC [[40915,1],6] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n09:40885] [[40915,0],7] [1585162042.077393] ACTIVATE PROC [[40915,1],6] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b31n01:160528] [[40915,0],17] [1585162042.077414] ACTIVATE PROC [[40915,1],16] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b06n16:125939] [[40915,0],2] [1585162042.077446] ACTIVATE PROC [[40915,1],1] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n07:103492] [[40915,0],25] [1585162042.077448] ACTIVATE PROC [[40915,1],24] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b31n01:160528] [[40915,0],17] [1585162042.077425] ACTIVATE PROC [[40915,1],16] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b06n16:125939] [[40915,0],2] [1585162042.077459] ACTIVATE PROC [[40915,1],1] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n12:23915] [[40915,0],10] [1585162042.077515] ACTIVATE PROC [[40915,1],9] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n06:87678] [[40915,0],24] [1585162042.077512] ACTIVATE PROC [[40915,1],23] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n10:130337] [[40915,0],28] [1585162042.077525] ACTIVATE PROC [[40915,1],27] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n07:103492] [[40915,0],25] [1585162042.077572] ACTIVATE PROC [[40915,1],24] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [batch2:82271] [[40915,0],0] [1585162042.077743] ACTIVATE PROC [[40915,1],21] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n10:130337] [[40915,0],28] [1585162042.077655] ACTIVATE PROC [[40915,1],27] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n10:130337] [[40915,0],28] [1585162042.077666] ACTIVATE PROC [[40915,1],27] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [h27n06:87678] [[40915,0],24] [1585162042.077657] ACTIVATE PROC [[40915,1],23] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n07:103492] [[40915,0],25] [1585162042.077583] ACTIVATE PROC [[40915,1],24] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [h27n11:133026] [[40915,0],29] [1585162042.077625] ACTIVATE PROC [[40915,1],28] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n12:23915] [[40915,0],10] [1585162042.077669] ACTIVATE PROC [[40915,1],9] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n12:23915] [[40915,0],10] [1585162042.077680] ACTIVATE PROC [[40915,1],9] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b07n01:139664] [[40915,0],5] [1585162042.077728] ACTIVATE PROC [[40915,1],4] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n06:87678] [[40915,0],24] [1585162042.077668] ACTIVATE PROC [[40915,1],23] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [h27n11:133026] [[40915,0],29] [1585162042.077778] ACTIVATE PROC [[40915,1],28] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b07n01:139664] [[40915,0],5] [1585162042.077868] ACTIVATE PROC [[40915,1],4] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n11:133026] [[40915,0],29] [1585162042.077790] ACTIVATE PROC [[40915,1],28] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b07n01:139664] [[40915,0],5] [1585162042.077881] ACTIVATE PROC [[40915,1],4] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b06n18:128515] [[40915,0],4] [1585162042.077981] ACTIVATE PROC [[40915,1],3] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.078212] ACTIVATE PROC [[40915,1],31] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078236] ACTIVATE PROC [[40915,1],25] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n15:147995] [[40915,0],1] [1585162042.078128] ACTIVATE PROC [[40915,1],0] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n13:107044] [[40915,0],11] [1585162042.078126] ACTIVATE PROC [[40915,1],10] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [f23n16:23315] [[40915,0],21] [1585162042.078170] ACTIVATE PROC [[40915,1],20] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b06n15:147995] [[40915,0],1] [1585162042.078266] ACTIVATE PROC [[40915,1],0] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n13:107044] [[40915,0],11] [1585162042.078271] ACTIVATE PROC [[40915,1],10] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [batch2:82271] [[40915,0],0] [1585162042.078408] ACTIVATE PROC [[40915,1],2] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n09:105965] [[40915,0],27] [1585162042.078271] ACTIVATE PROC [[40915,1],26] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.078429] ACTIVATE PROC [[40915,1],6] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b06n15:147995] [[40915,0],1] [1585162042.078280] ACTIVATE PROC [[40915,1],0] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.078475] ACTIVATE PROC [[40915,1],29] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n13:107044] [[40915,0],11] [1585162042.078284] ACTIVATE PROC [[40915,1],10] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b31n04:127470] [[40915,0],20] [1585162042.078276] ACTIVATE PROC [[40915,1],19] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [f23n16:23315] [[40915,0],21] [1585162042.078316] ACTIVATE PROC [[40915,1],20] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [f23n16:23315] [[40915,0],21] [1585162042.078327] ACTIVATE PROC [[40915,1],20] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.078507] ACTIVATE PROC [[40915,1],16] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [h27n09:105965] [[40915,0],27] [1585162042.078411] ACTIVATE PROC [[40915,1],26] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n09:105965] [[40915,0],27] [1585162042.078422] ACTIVATE PROC [[40915,1],26] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b31n04:127470] [[40915,0],20] [1585162042.078434] ACTIVATE PROC [[40915,1],19] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b31n04:127470] [[40915,0],20] [1585162042.078447] ACTIVATE PROC [[40915,1],19] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [h27n05:127708] [[40915,0],23] [1585162042.078443] ACTIVATE PROC [[40915,1],22] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.078585] ACTIVATE PROC [[40915,1],1] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078654] ACTIVATE PROC [[40915,1],24] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078662] ACTIVATE PROC [[40915,1],27] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b31n02:171328] [[40915,0],18] [1585162042.078546] ACTIVATE PROC [[40915,1],17] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [h27n05:127708] [[40915,0],23] [1585162042.078601] ACTIVATE PROC [[40915,1],22] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [h27n05:127708] [[40915,0],23] [1585162042.078612] ACTIVATE PROC [[40915,1],22] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.078737] ACTIVATE PROC [[40915,1],9] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078748] ACTIVATE PROC [[40915,1],23] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b31n02:171328] [[40915,0],18] [1585162042.078678] ACTIVATE PROC [[40915,1],17] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b31n02:171328] [[40915,0],18] [1585162042.078689] ACTIVATE PROC [[40915,1],17] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.078790] ACTIVATE PROC [[40915,1],28] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078837] ACTIVATE PROC [[40915,1],12] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.078904] ACTIVATE PROC [[40915,1],4] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n11:49033] [[40915,0],9] [1585162042.078930] ACTIVATE PROC [[40915,1],8] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b30n10:94844] [[40915,0],8] [1585162042.079015] ACTIVATE PROC [[40915,1],7] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [b06n18:128515] [[40915,0],4] [1585162042.079076] ACTIVATE PROC [[40915,1],3] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n11:49033] [[40915,0],9] [1585162042.079056] ACTIVATE PROC [[40915,1],8] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b30n11:49033] [[40915,0],9] [1585162042.079069] ACTIVATE PROC [[40915,1],8] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b30n10:94844] [[40915,0],8] [1585162042.079150] ACTIVATE PROC [[40915,1],7] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b06n18:128515] [[40915,0],4] [1585162042.079089] ACTIVATE PROC [[40915,1],3] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.079258] ACTIVATE PROC [[40915,1],0] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.079311] ACTIVATE PROC [[40915,1],10] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b30n10:94844] [[40915,0],8] [1585162042.079161] ACTIVATE PROC [[40915,1],7] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [b17n15:141994] [[40915,0],6] [1585162042.079219] ACTIVATE PROC [[40915,1],5] STATE IOF COMPLETE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/iof/prted/iof_prted_read.c:162 [batch2:82271] [[40915,0],0] [1585162042.079366] ACTIVATE PROC [[40915,1],20] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [b17n15:141994] [[40915,0],6] [1585162042.079368] ACTIVATE PROC [[40915,1],5] STATE WAITPID FIRED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/odls/base/odls_base_default_fns.c:1716 [b17n15:141994] [[40915,0],6] [1585162042.079379] ACTIVATE PROC [[40915,1],5] STATE NORMALLY TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/prted/state_prted.c:389 [batch2:82271] [[40915,0],0] [1585162042.079513] ACTIVATE PROC [[40915,1],26] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.079520] ACTIVATE PROC [[40915,1],19] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.079747] ACTIVATE PROC [[40915,1],22] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.079754] ACTIVATE PROC [[40915,1],17] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.080122] ACTIVATE PROC [[40915,1],8] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.080185] ACTIVATE PROC [[40915,1],3] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.080340] ACTIVATE PROC [[40915,1],7] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.080468] ACTIVATE PROC [[40915,1],5] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/plm/base/plm_base_receive.c:413 [batch2:82271] [[40915,0],0] [1585162042.080477] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/base/state_base_fns.c:761 [batch2:82271] [[40915,0],0] [1585162042.080840] ACTIVATE JOB [40915,1] STATE NOTIFY COMPLETED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/dvm/state_dvm.c:633 [batch2:82271] [[40915,0],0] [1585162042.080874] ACTIVATE JOB [40915,1] STATE NOTIFIED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/state/dvm/state_dvm.c:840 [batch2:82271] [[40915,0],0] [1585162042.080974] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b06n15:147995] [[40915,0],1] [1585162042.081192] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b06n16:125939] [[40915,0],2] [1585162042.081212] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b06n18:128515] [[40915,0],4] [1585162042.081230] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b17n15:141994] [[40915,0],6] [1585162042.081240] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n14:20639] [[40915,0],12] [1585162042.081256] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n10:94844] [[40915,0],8] [1585162042.081255] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n11:49033] [[40915,0],9] [1585162042.081260] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n17:18592] [[40915,0],15] [1585162042.081275] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n13:107044] [[40915,0],11] [1585162042.081254] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b31n02:171328] [[40915,0],18] [1585162042.081285] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n09:40885] [[40915,0],7] [1585162042.081245] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n12:23915] [[40915,0],10] [1585162042.081253] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b31n01:160528] [[40915,0],17] [1585162042.081284] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n15:39290] [[40915,0],13] [1585162042.081271] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b31n03:167060] [[40915,0],19] [1585162042.081297] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b07n01:139664] [[40915,0],5] [1585162042.081227] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n07:103492] [[40915,0],25] [1585162042.081327] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n18:65526] [[40915,0],16] [1585162042.081287] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b31n04:127470] [[40915,0],20] [1585162042.081294] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n05:127708] [[40915,0],23] [1585162042.081321] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n06:87678] [[40915,0],24] [1585162042.081333] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b30n16:38673] [[40915,0],14] [1585162042.081287] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [f23n16:23315] [[40915,0],21] [1585162042.081319] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h28n04:16897] [[40915,0],31] [1585162042.081354] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b06n17:106679] [[40915,0],3] [1585162042.081224] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n11:133026] [[40915,0],29] [1585162042.081351] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n10:130337] [[40915,0],28] [1585162042.081335] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n08:104866] [[40915,0],26] [1585162042.081330] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n12:122423] [[40915,0],30] [1585162042.081362] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h27n09:105965] [[40915,0],27] [1585162042.081349] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [g30n11:31396] [[40915,0],22] [1585162042.081321] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [h28n06:158971] [[40915,0],32] [1585162042.081359] ACTIVATE JOB [40915,1] STATE NORMALLY TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/pmix/pmix_server_gen.c:451 [b06n15:147995] [[40915,0],1] [1585162042.123249] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b06n16:125939] [[40915,0],2] [1585162042.123262] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n11:49033] [[40915,0],9] [1585162042.123305] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b17n15:141994] [[40915,0],6] [1585162042.123288] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b06n18:128515] [[40915,0],4] [1585162042.123278] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n13:107044] [[40915,0],11] [1585162042.123307] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n17:18592] [[40915,0],15] [1585162042.123331] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n16:38673] [[40915,0],14] [1585162042.123336] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n10:94844] [[40915,0],8] [1585162042.123305] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n12:23915] [[40915,0],10] [1585162042.123313] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n15:39290] [[40915,0],13] [1585162042.123316] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b07n01:139664] [[40915,0],5] [1585162042.123274] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b31n03:167060] [[40915,0],19] [1585162042.123347] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b31n02:171328] [[40915,0],18] [1585162042.123351] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n18:65526] [[40915,0],16] [1585162042.123337] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n14:20639] [[40915,0],12] [1585162042.123320] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b31n04:127470] [[40915,0],20] [1585162042.123350] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b31n01:160528] [[40915,0],17] [1585162042.123348] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n07:103492] [[40915,0],25] [1585162042.123387] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [f23n16:23315] [[40915,0],21] [1585162042.123374] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n10:130337] [[40915,0],28] [1585162042.123408] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b06n17:106679] [[40915,0],3] [1585162042.123273] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n05:127708] [[40915,0],23] [1585162042.123386] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n06:87678] [[40915,0],24] [1585162042.123392] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n12:122423] [[40915,0],30] [1585162042.123407] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [g30n11:31396] [[40915,0],22] [1585162042.123382] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n08:104866] [[40915,0],26] [1585162042.123402] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n09:105965] [[40915,0],27] [1585162042.123406] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h27n11:133026] [[40915,0],29] [1585162042.123411] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h28n04:16897] [[40915,0],31] [1585162042.123427] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [h28n06:158971] [[40915,0],32] [1585162042.123434] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [b30n09:40885] [[40915,0],7] [1585162042.123292] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../source/openmpi-br-master/prrte/src/prted/prted_comm.c:549 [batch2:82271] [[40915,0],0] [1585162042.124946] ACTIVATE PROC [[40915,0],1] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125064] ACTIVATE PROC [[40915,0],12] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125084] ACTIVATE PROC [[40915,0],7] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125102] ACTIVATE PROC [[40915,0],11] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125143] ACTIVATE PROC [[40915,0],5] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125218] ACTIVATE PROC [[40915,0],28] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125236] ACTIVATE PROC [[40915,0],19] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125241] ACTIVATE PROC [[40915,0],20] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125247] ACTIVATE PROC [[40915,0],13] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125253] ACTIVATE PROC [[40915,0],18] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125387] ACTIVATE PROC [[40915,0],32] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125394] ACTIVATE PROC [[40915,0],10] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125400] ACTIVATE PROC [[40915,0],2] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125405] ACTIVATE PROC [[40915,0],25] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125426] ACTIVATE PROC [[40915,0],26] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125431] ACTIVATE PROC [[40915,0],21] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125444] ACTIVATE PROC [[40915,0],9] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125449] ACTIVATE PROC [[40915,0],16] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125455] ACTIVATE PROC [[40915,0],31] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125460] ACTIVATE PROC [[40915,0],4] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125466] ACTIVATE PROC [[40915,0],29] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125574] ACTIVATE PROC [[40915,0],22] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125581] ACTIVATE PROC [[40915,0],15] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125586] ACTIVATE PROC [[40915,0],14] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125591] ACTIVATE PROC [[40915,0],8] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125597] ACTIVATE PROC [[40915,0],27] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125602] ACTIVATE PROC [[40915,0],6] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125608] ACTIVATE PROC [[40915,0],23] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125664] ACTIVATE PROC [[40915,0],17] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125679] ACTIVATE PROC [[40915,0],3] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125692] ACTIVATE PROC [[40915,0],24] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125705] ACTIVATE PROC [[40915,0],30] STATE COMMUNICATION FAILURE AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/oob/tcp/oob_tcp_component.c:1022 [batch2:82271] [[40915,0],0] [1585162042.125719] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125734] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125756] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125767] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125778] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125789] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125863] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125908] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125943] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125956] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 [batch2:82271] [[40915,0],0] [1585162042.125968] ACTIVATE JOB NULL STATE DAEMONS TERMINATED AT ../../../../../../../../../source/openmpi-br-master/prrte/src/mca/errmgr/dvm/errmgr_dvm.c:297 # SLEEP 2 #################### # RESULTS-DBG: RC:0 NNODES:32 PES:32 NPERNODE:1 TIME:4.499 ##################################### # Results Dir: /gpfs/alpine/proj-shared/stf010/naughton/summit/ompix/scaling/tests ##################################### # TJN-END: Wed Mar 25 14:47:25 EDT 2020 ------------------------------------------------------------ Sender: LSF System Subject: Job 994949: in cluster Done Job was submitted from host by user in cluster at Wed Mar 25 14:46:49 2020 Job was executed on host(s) <1*batch2>, in queue , as user in cluster at Wed Mar 25 14:47:01 2020 <42*b06n15> <42*b06n16> <42*b06n17> <42*b06n18> <42*b07n01> <42*b17n15> <42*b30n09> <42*b30n10> <42*b30n11> <42*b30n12> <42*b30n13> <42*b30n14> <42*b30n15> <42*b30n16> <42*b30n17> <42*b30n18> <42*b31n01> <42*b31n02> <42*b31n03> <42*b31n04> <42*f23n16> <42*g30n11> <42*h27n05> <42*h27n06> <42*h27n07> <42*h27n08> <42*h27n09> <42*h27n10> <42*h27n11> <42*h27n12> <42*h28n04> <42*h28n06> was used as the home directory. was used as the working directory. Started at Wed Mar 25 14:47:01 2020 Terminated at Wed Mar 25 14:47:25 2020 Results reported at Wed Mar 25 14:47:25 2020 The output (if any) is above this job summary. prrte-3.0.13/contrib/states/statechop.pl0000775000175000017500000000242515145263240020420 0ustar alastairalastair#!/usr/bin/env perl # Fri Jun 05 2020 15:06:24PM EDT Thomas Naughton # # Process logs from PRRTE # with '--prtemca prrte_state_base_verbose 1' # enabled and the TJN timing patch applied # # Input: Logfile from a testrun (`mpirun ... a.out >& LOG`) # Output: Column oriented output with semicolon (`;`) separator # # Usage: # prterun --prtemca prrte_state_base_verbose 1 -np 1 ./a.out >& LOG # ./statechop.pl LOG > STATE_LOG.txt # #### use strict; my $file = $ARGV[0]; if (! -f "$file") { print "ERROR: Failed to read input file '$file'\n"; exit(1); } print "# File: $file\n"; open(FH, "<$file") or die "Error: failed to open '$file'\n"; my @data = ; chomp(@data); close(FH); print "# Node; Job; Timestamp; JobState\n"; foreach my $line (@data) { next unless $line =~ /.* STATE .*/; next unless $line =~ /.* JOB .*/; # TJN: ignore the activating (reaching) msgs for now next if $line =~ /.* ACTIVATING .*/; my ($a, $b) = split("STATE", $line); #a=([node0:31036] [[9239,0],0] [1585072455.946404] ACTIVATE JOB NULL ) my ($node, $job, $timestamp, $other) = split(/ /, $a); my ($b1, $b2) = split(" AT ", $b); $b1 =~ s/^ //; #print " a=($a) b=($b1)\n"; print " $node; $job; $timestamp; $b1\n"; } prrte-3.0.13/contrib/whitespace-purge.sh0000775000175000017500000000146515145263240020401 0ustar alastairalastair#!/bin/bash # # Copyright (c) 2015-2016 Intel, Inc. All rights reserved. # Copyright (c) 2015 Los Alamos National Security, LLC. All rights # reserved # Copyright (c) 2015 Cisco Systems, Inc. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # for file in $(git ls-files) ; do # check for the mime-type and do not follow symbolic links. this # will cause file to print application/x-symlink for the mime-type # allowing us to only have to check if the type is application to # skip sym links, pdfs, etc. If any other file types should be # skipped add the check here. type=$(file -b --mime-type -h $file) if test ${type::4} == "text" ; then # Eliminate whitespace at the end of lines perl -pi -e 's/\s*$/\n/' $file fi done prrte-3.0.13/contrib/find_offenders.pl0000775000175000017500000000512215145263240020073 0ustar alastairalastair#!/usr/bin/perl # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # if (scalar(@ARGV) != 2) { print "Usage: find_offenders.pl eg.,: running from top level source tree #contrib/find_offenders.pl contrib/results.txt .\n"; exit(3); } $source_tree = @ARGV[1]; $header_file_list = @ARGV[0]; #first construct the danger list open(FILE_LIST, "$header_file_list") || print "Could not open results.txt\n"; open(DANGER_FILES, "> contrib/headers.txt") || print "Could not open headers.txt\n"; while () { #check if this file is a file in the source tree chomp($_); $file_name = $_; open(FILE, "find . -name $file_name |") || print "find failed\n"; while() { #file is found print DANGER_FILES "#include <$file_name>\n"; } close (FILE); } close (DANGER_FILES); close (FILE_LIST); open(DANGER_FILES, "contrib/headers.txt") || print "Could not open headers.txt\n"; open(OFFENSIVE, "> contrib/offenders.list") || print "Could not open offenders list\n"; while () { $header = $_; chomp($header); print; open(C_FILES, "find $source_tree -name *.c |") || print "Could not complete find command\n"; while () { $c_file = $_; open(C_FILE, "$c_file") || print "Could not open $_\n"; while () { if (/$header/) { print OFFENSIVE $header ." --> ". $c_file ; } } close (C_FILE); } close (C_FILES); open(H_FILES, "find . -name *.h |") || print "Could not complete find command\n"; while () { $h_file = $_; open(H_FILE, "$h_file") || print "Could not open $_\n"; while () { if (/$header/) { print OFFENSIVE $header ." --> ". $h_file ; } } close (H_FILE); } close (H_FILES); } close (DANGER_FILES); close (OFFENSIVE); prrte-3.0.13/contrib/purge-trailing-blank-lines.pl0000775000175000017500000001132115145263240022244 0ustar alastairalastair#!/usr/bin/perl -w use strict; use Cwd; use Getopt::Long; # Set to true if the script should merely check for up-to-date copyrights. # Will exit with status 111 if there are out of date copyrights which this # script can correct. my $CHECK_ONLY = 0; # used by $CHECK_ONLY logic for bookeeping my $would_replace = 0; # Set to true to suppress most informational messages. Only out of date files # will be printed. my $QUIET = 0; # Set to true if we just want to see the help message my $HELP = 0; # Set to true if we want to strip blank lines from all files my $ALL = 0; GetOptions( "help" => \$HELP, "quiet" => \$QUIET, "check-only" => \$CHECK_ONLY, "all" => \$ALL, ) or die "unable to parse options, stopped"; if ($HELP) { print < Working file: $f\n"; # check file size my $filesize = -s $f; if (0 == $filesize) { next; } open $fh, "+<$f" or die "$!"; binmode $fh; # Just in case my $size = 4096; my ($cur_pos, $buf); seek $fh, -$size, 2; while (1) { $cur_pos = tell $fh; read $fh, $buf, $size; last if $buf =~ m/\S/s; seek $fh, -$size*2, 1; } $buf =~ m/(\s+)$/s; $cur_pos += $-[0] || 0; truncate $fh, ++$cur_pos if $cur_pos; close $fh; } # Returns a list of file names (relative to pwd) which the VCS considers to be modified. sub find_modified_files { my @files = (); # Number of path entries to remove from ${top}-relative paths. # (--show-cdup either returns the empty string or sequence of "../" # entries, always ending in a "/") my $n_strip = scalar(split(m!/!, scalar(`git rev-parse --show-cdup`))) - 1; # "." restricts scope, but does not get us relative path names my $cmd = "git status -z --porcelain --untracked-files=no ."; my $lines = `$cmd`; # From git-status(1): # X Y Meaning # ------------------------------------------------- # [MD] not updated # M [ MD] updated in index # A [ MD] added to index # D [ M] deleted from index # R [ MD] renamed in index # C [ MD] copied in index # [MARC] index and work tree matches # [ MARC] M work tree changed since index # [ MARC] D deleted in work tree # ------------------------------------------------- # D D unmerged, both deleted # A U unmerged, added by us # U D unmerged, deleted by them # U A unmerged, added by them # D U unmerged, deleted by us # A A unmerged, both added # U U unmerged, both modified # ------------------------------------------------- # ? ? untracked # ------------------------------------------------- my $s1 = ""; my $s2 = ""; my $fullname = ""; foreach my $line (split /\x{00}/, $lines) { my $keep = 0; unless (($s1, $s2, $fullname) = $line =~ m/^(.)(.) (.*)$/) { next; } # skip opal_ignore files next if ($fullname =~ "opal_ignore"); if ($ALL) { $keep = 1; } else { # ignore all merge cases next if ($s1 eq "D" and $s2 eq "D"); next if ($s1 eq "A" and $s2 eq "A"); next if ($s1 eq "U" or $s2 eq "U"); # only update for actually added/modified cases, no copies, # renames, etc. $keep = 1 if ($s1 eq "M" or $s2 eq "M"); $keep = 1 if ($s1 eq "A"); } if ($keep) { my $relname = $fullname; $relname =~ s!^([^/]*/){$n_strip}!!g; push @files, $relname if (-f $relname); } } return @files; } exit 0; prrte-3.0.13/test/0000775000175000017500000000000015145263240014077 5ustar alastairalastairprrte-3.0.13/test/get-nofence.c0000664000175000017500000001441215145263240016437 0ustar alastairalastair#include #include #include static pmix_proc_t allproc = {}; static pmix_proc_t myproc = {}; static bool mywait = false; static bool refresh = false; static bool timeout = false; #define ERR(msg, ...) \ do { \ time_t tm = time(NULL); \ char *stm = ctime(&tm); \ stm[strlen(stm) - 1] = 0; \ fprintf(stderr, "%s ERROR: %s:%d " msg "\n", stm, __FILE__, __LINE__, ##__VA_ARGS__); \ exit(1); \ } while (0); int pmi_set_string(const char *key, void *data, size_t size) { int rc; pmix_value_t value; PMIX_VALUE_CONSTRUCT(&value); value.type = PMIX_BYTE_OBJECT; value.data.bo.bytes = data; value.data.bo.size = size; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, key, &value))) { ERR("Client ns %s rank %d: PMIx_Put failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { ERR("Client ns %s rank %d: PMIx_Commit failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } /* protect the data */ value.data.bo.bytes = NULL; value.data.bo.size = 0; PMIX_VALUE_DESTRUCT(&value); printf("%s:%d PMIx_Put on %s\n", myproc.nspace, myproc.rank, key); return 0; } int pmi_get_string(uint32_t peer_rank, const char *key, void **data_out, size_t *data_size_out) { int rc; pmix_proc_t proc; pmix_value_t *pvalue; pmix_info_t info; PMIX_LOAD_PROCID(&proc, myproc.nspace, peer_rank); if (refresh) { PMIX_INFO_LOAD(&info, PMIX_GET_REFRESH_CACHE, &refresh, PMIX_BOOL); rc = PMIx_Get(&proc, key, &info, 1, &pvalue); PMIX_INFO_DESTRUCT(&info); } else if (timeout) { rc = 2; PMIX_INFO_LOAD(&info, PMIX_TIMEOUT, &rc, PMIX_INT); rc = PMIx_Get(&proc, key, &info, 1, &pvalue); PMIX_INFO_DESTRUCT(&info); } else { rc = PMIx_Get(&proc, key, NULL, 0, &pvalue); } if (PMIX_SUCCESS != rc) { ERR("Client ns %s rank %d: PMIx_Get on rank %u %s: %s\n", myproc.nspace, myproc.rank, peer_rank, key, PMIx_Error_string(rc)); } if (pvalue->type != PMIX_BYTE_OBJECT) { ERR("Client ns %s rank %d: PMIx_Get %s: got wrong data type\n", myproc.nspace, myproc.rank, key); } *data_out = pvalue->data.bo.bytes; *data_size_out = pvalue->data.bo.size; /* protect the data */ pvalue->data.bo.bytes = NULL; pvalue->data.bo.size = 0; PMIX_VALUE_RELEASE(pvalue); PMIX_PROC_DESTRUCT(&proc); printf("%s:%d PMIx_get %s returned %zi bytes\n", myproc.nspace, myproc.rank, key, data_size_out[0]); return 0; } int pmix_exchange(bool flag) { int rc; pmix_info_t info; fprintf(stderr, "Execute fence\n"); PMIX_INFO_CONSTRUCT(&info); PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); rc = PMIx_Fence(&allproc, 1, &info, 1); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence_nb failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } PMIX_INFO_DESTRUCT(&info); return 0; } int main(int argc, char *argv[]) { char data[256] = {}; char *data_out; size_t size_out; int rc; pmix_value_t *pvalue; /* check the args */ if (1 < argc) { if (2 < argc || 0 == strncmp(argv[1], "-h", 2) || 0 == strncmp(argv[1], "--h", 3)) { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--wait Test PMIX_GET_WAIT_FOR_KEY\n"); fprintf(stderr, "\t--refresh Test PMIX_GET_REFRESH_CACHE\n"); fprintf(stderr, "\t--timeout Test PMIX_GET_WAIT_FOR_KEY, but timeout\n"); exit(0); } if (0 == strncmp(argv[1], "--w", 3)) { mywait = true; } else if (0 == strncmp(argv[1], "--r", 3)) { refresh = true; } else if (0 == strncmp(argv[1], "--t", 3)) { timeout = true; } else { fprintf(stderr, "Invalid cmd line option: %s\n", argv[1]); fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--wait Test PMIX_GET_WAIT_FOR_KEY\n"); fprintf(stderr, "\t--refresh Test PMIX_GET_REFRESH_CACHE\n"); fprintf(stderr, "\t--timeout Test PMIX_GET_WAIT_FOR_KEY, but timeout\n"); exit(1); } } if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { ERR("PMIx_Init failed"); exit(1); } if (myproc.rank == 0) { printf("PMIx initialized\n"); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_LOAD_PROCID(&allproc, myproc.nspace, PMIX_RANK_WILDCARD); /* get the number of procs in our job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&allproc, PMIX_JOB_SIZE, NULL, 0, &pvalue))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } uint32_t nprocs = pvalue->data.uint32; PMIX_VALUE_RELEASE(pvalue); /* the below two lines break the subsequent PMIx_Get query on a key set later */ snprintf(data, 256, "FIRST TIME rank %d", myproc.rank); pmi_set_string("test-key-1", data, 256); if (timeout) { sleep(10); } else if (mywait) { sleep(2); } if (0 == myproc.rank) { pmi_get_string(1, "test-key-1", (void **) &data_out, &size_out); } else { pmi_get_string(0, "test-key-1", (void **) &data_out, &size_out); } printf("%d: obtained data \"%s\"\n", myproc.rank, data_out); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { ERR("Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } if (myproc.rank == 0) printf("PMIx finalized\n"); exit(0); } prrte-3.0.13/test/iostress.c0000664000175000017500000000455715145263240016131 0ustar alastairalastair/* * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #define _GNU_SOURCE #include #include #include #include #include #include #include static pmix_proc_t myproc; static char buffer[1024]; int main(int argc, char **argv) { pmix_status_t rc; pid_t pid; char hostname[1024]; int numbytes; int n = 0; pid = getpid(); gethostname(hostname, 1024); /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes the * location of all procs in our job */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } fprintf(stderr, "[%s:%d:%lu]: Running on node %s\n", myproc.nspace, myproc.rank, (unsigned long) pid, hostname); if (0 == myproc.rank) { /* we are going to read stdin and just throw it * away, taking a break between chunks */ while (1) { memset(buffer, 0, 1024); numbytes = read(STDIN_FILENO, buffer, 1024); if (0 > numbytes) { if (EAGAIN == errno || EINTR == errno) { /* retry */ continue; } /* unrecoverable error */ fprintf(stderr, "[%s:%d:%lu]: Unrecoverable read error\n", myproc.nspace, myproc.rank, (unsigned long) pid); break; } else if (0 == numbytes) { fprintf(stderr, "Read complete\n"); break; } if (0 == n % 1000) { fprintf(stderr, "[%s:%d:%lu]: Read chunk %d with %d bytes\n", myproc.nspace, myproc.rank, (unsigned long) pid, n, numbytes); } ++n; } } else { /* we are going to sleep for a time based * on our rank, and then finalize */ sleep(2 * myproc.rank); } rc = PMIx_Finalize(NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } } prrte-3.0.13/test/cmspawn.c0000664000175000017500000001160715145263240015720 0ustar alastairalastair#include #include #include #include #include #include int main(int argc, char *argv[]) { pmix_status_t rc; int size, n; pid_t pid; pmix_proc_t myproc; pmix_proc_t proc, parent; pmix_app_t app; pmix_proc_t peers[2]; char hostname[1024]; pmix_value_t *val = NULL; pmix_nspace_t nspace; pid = getpid(); gethostname(hostname, 1024); fprintf(stderr, "PID %d alive\n", (int)pid); rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client: PMIx_Init failed: %s\n", PMIx_Error_string(rc)); exit(1); } fprintf(stderr, "Client ns %s rank %d: PMIx_Init complete\n", myproc.nspace, myproc.rank); PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_GET_NUMBER(rc, val, size, int); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: get size number failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_RELEASE(val); printf("[%s:%u pid %ld] of %d starting up on node %s!\n", myproc.nspace, myproc.rank, (long) pid, size, hostname); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, NULL, 0, &val); /* If we don't find it, then we're the parent */ if (PMIX_SUCCESS != rc) { if (0 == myproc.rank) { pid = getpid(); printf("Parent [%s pid %ld] about to spawn!\n", hostname, (long) pid); PMIX_APP_CONSTRUCT(&app); app.cmd = strdup(argv[0]); PMIX_ARGV_APPEND(rc, app.argv, argv[0]); app.maxprocs = 3; rc = PMIx_Spawn(NULL, 0, &app, 1, nspace); if (PMIX_SUCCESS != rc) { printf("Child failed to spawn\n"); return rc; } printf("Parent done with spawn\n"); /* post a piece of information our children should get */ PMIX_LOAD_PROCID(&peers[0], myproc.nspace, 0); PMIX_LOAD_PROCID(&peers[1], nspace, PMIX_RANK_WILDCARD); for (n=0; n < 10; n++) { /* connect to the children */ printf("%s.%u: Connecting to children - signature %s %s\n", myproc.nspace, myproc.rank, peers[0].nspace, peers[1].nspace); rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Connect to children failed!\n"); } printf("%s.%u: Connect complete!\n", myproc.nspace, myproc.rank); printf("%s.%u: Disconnecting from children\n", myproc.nspace, myproc.rank); rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from children failed!\n"); } printf("%s.%u: Disconnect complete!\n", myproc.nspace, myproc.rank); } } PMIX_LOAD_PROCID(&peers[0], myproc.nspace, PMIX_RANK_WILDCARD); PMIx_Fence(&peers[0], 1, NULL, 0); } /* Otherwise, we're the child */ else { printf("Hello from the child %s.%u of %d on host %s pid %ld\n", myproc.nspace, myproc.rank, size, hostname, (long)pid); PMIX_LOAD_PROCID(&peers[0], val->data.proc->nspace, 0); PMIX_LOAD_PROCID(&peers[1], myproc.nspace, PMIX_RANK_WILDCARD); PMIX_VALUE_RELEASE(val); /* post some info our parent and peers should get */ /* connect to our parent */ if (1 == myproc.rank) { sleep(2); printf("\n\n\n"); } for (n=0; n < 10; n++) { printf("%s.%u: Connecting to parent - signature %s %s\n", myproc.nspace, myproc.rank, peers[0].nspace, peers[1].nspace); rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("%s.%u: Connect to parent failed!\n", myproc.nspace, myproc.rank); } printf("%s.%u: Connect complete!\n", myproc.nspace, myproc.rank); printf("%s.%u: Disconnecting from parent\n", myproc.nspace, myproc.rank); if (2 == myproc.rank) { sleep(2); printf("\n\n\n"); } rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from parent failed!\n"); } printf("%s.%u: Disconnect complete!\n", myproc.nspace, myproc.rank); } } done: PMIx_Finalize(NULL, 0); fprintf(stderr, "%d: exiting\n", pid); return 0; } prrte-3.0.13/test/cycle_forever.sh0000775000175000017500000000021515145263240017263 0ustar alastairalastair#!/bin/bash i=0 for (( ; ; )) do prun -n 1 hostname > /dev/null ((i++)) if [ $i -eq 1000 ] then echo Executed $i times i=0; fi done prrte-3.0.13/test/attachtest/0000775000175000017500000000000015145263240016243 5ustar alastairalastairprrte-3.0.13/test/attachtest/tool.c0000664000175000017500000000104415145263240017363 0ustar alastairalastair#include #include #include int main(int argc, char **argv) { pmix_proc_t proc; pmix_status_t rc = PMIX_ERROR; rc = PMIx_tool_init(&proc, NULL, 0); if (rc != PMIX_SUCCESS) { fprintf(stderr, "PMIx_tool_init failed: %s\n", PMIx_Error_string(rc)); return EXIT_FAILURE; } rc = PMIx_tool_finalize(); if (rc != PMIX_SUCCESS) { fprintf(stderr, "PMIx_tool_finalize failed: %s\n", PMIx_Error_string(rc)); return EXIT_FAILURE; } return EXIT_SUCCESS; } prrte-3.0.13/test/attachtest/app.c0000664000175000017500000000123715145263240017172 0ustar alastairalastair#include #include #include int main(int argc, char **argv) { int pause = 0; if (argc > 1) { pause = atoi(argv[1]); } pmix_proc_t proc; pmix_status_t rc = PMIX_ERROR; rc = PMIx_Init(&proc, NULL, 0); if (rc != PMIX_SUCCESS) { fprintf(stderr, "PMIx_Init failed: %s\n", PMIx_Error_string(rc)); return EXIT_FAILURE; } printf("Hello\n"); sleep(pause); printf("Bye\n"); rc = PMIx_Finalize(NULL, 0); if (rc != PMIX_SUCCESS) { fprintf(stderr, "PMIx_Finalize failed: %s\n", PMIx_Error_string(rc)); return EXIT_FAILURE; } return EXIT_SUCCESS; } prrte-3.0.13/test/mpi/0000775000175000017500000000000015145263240014664 5ustar alastairalastairprrte-3.0.13/test/mpi/spawn_multiple.c0000664000175000017500000000313415145263240020074 0ustar alastairalastair#include #include #include #include int main(int argc, char* argv[]) { int msg; MPI_Comm parent, child; int rank, size; char hostname[1024]; pid_t pid; int i; char *cmds[2]; char *argv0[] = { "foo", NULL }; char *argv1[] = { "bar", NULL }; char **spawn_argv[2]; int maxprocs[] = { 2, 2 }; MPI_Info info[] = { MPI_INFO_NULL, MPI_INFO_NULL }; cmds[1] = cmds[0] = argv[0]; spawn_argv[0] = argv0; spawn_argv[1] = argv1; MPI_Init(NULL, NULL); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_get_parent(&parent); /* If we get COMM_NULL back, then we're the parent */ if (MPI_COMM_NULL == parent) { pid = getpid(); printf("Parent [pid %ld] about to spawn!\n", (long)pid); MPI_Comm_spawn_multiple(2, cmds, spawn_argv, maxprocs, info, 0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE); printf("Parent done with spawn\n"); MPI_Comm_disconnect(&child); printf("Parent disconnected\n"); } /* Otherwise, we're the child */ else { MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); gethostname(hostname, sizeof(hostname)); pid = getpid(); printf("Hello from the child %d of %d on host %s pid %ld: argv[1] = %s\n", rank, size, hostname, (long)pid, argv[1]); MPI_Comm_disconnect(&parent); printf("Child %d disconnected\n", rank); } MPI_Finalize(); return 0; } prrte-3.0.13/test/qspawn.c0000664000175000017500000000624215145263240015560 0ustar alastairalastair#include #include #include #include #include #include int main(int argc, char *argv[]) { pmix_status_t rc; int size; pid_t pid; pmix_proc_t myproc; pmix_proc_t proc, parent; pmix_app_t app; pmix_proc_t peers[2]; char hostname[1024]; pmix_value_t *val = NULL; pmix_nspace_t nspace; pid = getpid(); gethostname(hostname, 1024); rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client: PMIx_Init failed: %s\n", PMIx_Error_string(rc)); exit(1); } PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_GET_NUMBER(rc, val, size, int); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: get size number failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_RELEASE(val); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, NULL, 0, &val); /* If we don't find it, then we're the parent */ if (PMIX_SUCCESS != rc) { if (0 == myproc.rank) { pid = getpid(); PMIX_APP_CONSTRUCT(&app); app.cmd = strdup(argv[0]); PMIX_ARGV_APPEND(rc, app.argv, argv[0]); app.maxprocs = 3; rc = PMIx_Spawn(NULL, 0, &app, 1, nspace); if (PMIX_SUCCESS != rc) { printf("Child failed to spawn\n"); return rc; } /* post a piece of information our children should get */ PMIX_LOAD_PROCID(&peers[0], myproc.nspace, 0); PMIX_LOAD_PROCID(&peers[1], nspace, PMIX_RANK_WILDCARD); /* connect to the children */ rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Connect to children failed!\n"); } rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from children failed!\n"); } } PMIX_LOAD_PROCID(&peers[0], myproc.nspace, PMIX_RANK_WILDCARD); PMIx_Fence(&peers[0], 1, NULL, 0); } /* Otherwise, we're the child */ else { PMIX_LOAD_PROCID(&peers[0], val->data.proc->nspace, 0); PMIX_LOAD_PROCID(&peers[1], myproc.nspace, PMIX_RANK_WILDCARD); PMIX_VALUE_RELEASE(val); /* connect to our parent */ if (1 == myproc.rank) { sleep(1); } rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("%s.%u: Connect to parent failed!\n", myproc.nspace, myproc.rank); } if (2 == myproc.rank) { sleep(1); } rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from parent failed!\n"); } } done: PMIx_Finalize(NULL, 0); return 0; } prrte-3.0.13/test/spawn_timeout.c0000664000175000017500000000640615145263240017147 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include #define _GNU_SOURCE #include #include #include #include #include #include "examples.h" #include static pmix_proc_t myproc; int main(int argc, char **argv) { int rc, exitcode; pmix_value_t value; pmix_value_t *val = &value; char nsp2[PMIX_MAX_NSLEN + 1]; pmix_app_t *app; char hostname[1024], dir[1024]; if (0 > gethostname(hostname, sizeof(hostname))) { exit(1); } if (NULL == getcwd(dir, 1024)) { exit(1); } /* init us */ if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc); exit(rc); } fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, NULL, 0, &val); if (PMIX_SUCCESS != rc || NULL == val) { // we are the parent PMIX_APP_CREATE(app, 1); if (0 > asprintf(&app->cmd, "%s/%s", dir, argv[0])) { exitcode = 1; goto done; } app->maxprocs = 1; PMIX_ARGV_APPEND(rc, app->argv, app->cmd); fprintf(stderr, "Client ns %s rank %d: calling PMIx_Spawn\n", myproc.nspace, myproc.rank); if (PMIX_SUCCESS != (rc = PMIx_Spawn(NULL, 0, app, 1, nsp2))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Spawn failed: %s(%d)\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc), rc); exitcode = rc; goto done; } else { fprintf(stderr, "Spawn success.\n"); } PMIX_APP_FREE(app, 1); } sleep(15); done: rc = PMIx_Finalize(NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } fflush(stderr); printf("exit\n"); return (exitcode); } prrte-3.0.13/test/spawn_multiple.c0000664000175000017500000000255015145263240017310 0ustar alastairalastair #include #include #include #include int main(int argc, char* argv[]) { pmix_proc_t myproc; char hostname[1024]; pid_t pid; pmix_value_t *val = NULL; pmix_app_t apps[2]; pmix_nspace_t nspace; pmix_status_t rc; pid = getpid(); gethostname(hostname, 1024); PMIx_Init(&myproc, NULL, 0); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, NULL, 0, &val); /* If we don't find it, then we're the parent */ if (PMIX_SUCCESS != rc) { printf("Parent [pid %ld] about to spawn!\n", (long)pid); PMIX_APP_CONSTRUCT(&apps[0]); apps[0].cmd = strdup(argv[0]); PMIX_ARGV_APPEND(rc, apps[0].argv, "This is job 1"); apps[0].maxprocs = 2; PMIX_APP_CONSTRUCT(&apps[1]); apps[1].cmd = strdup(argv[0]); PMIX_ARGV_APPEND(rc, apps[1].argv, "This is job 2"); apps[1].maxprocs = 2; rc = PMIx_Spawn(NULL, 0, apps, 2, nspace); if (PMIX_SUCCESS != rc) { printf("Child failed to spawn\n"); return rc; } printf("Parent done with spawn\n"); } /* Otherwise, we're the child */ else { printf("Hello from the child %s.%u on host %s pid %ld argv[1] = %s\n", myproc.nspace, myproc.rank, hostname, (long)pid, argv[1]); } PMIx_Finalize(NULL, 0); return 0; } prrte-3.0.13/test/loop_spawn.c0000664000175000017500000001521215145263240016425 0ustar alastairalastair/*file .c : spawned the file Exe*/ #include #include #include #include #include #include #include #include #include #include "test.h" #define EXE_TEST "./loop_child" static bool verbose = false; static void regcbfunc(pmix_status_t status, size_t ref, void *cbdata) { mylock_t *lock = (mylock_t *) cbdata; DEBUG_WAKEUP_THREAD(lock); } static void evhandler(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t *results, size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { mylock_t *lock = NULL; pmix_status_t jobstatus = 0; pmix_nspace_t jobid = {0}; size_t n; char *msg = NULL; /* we should always have info returned to us - if not, there is * nothing we can do */ if (NULL != info) { for (n = 0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_JOB_TERM_STATUS, PMIX_MAX_KEYLEN)) { jobstatus = info[n].value.data.status; } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { PMIX_LOAD_NSPACE(jobid, info[n].value.data.proc->nspace); } else if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { lock = (mylock_t *) info[n].value.data.ptr; } else if (0 == strncmp(info[n].key, PMIX_EVENT_TEXT_MESSAGE, PMIX_MAX_KEYLEN)) { msg = info[n].value.data.string; } } if (verbose) { fprintf(stdout, "JOB %s COMPLETED WITH STATUS %d MSG %s\n", jobid, jobstatus, (NULL == msg) ? "NONE" : msg); } } if (NULL != lock) { /* save the status */ lock->status = jobstatus; if (NULL != msg) { lock->msg = strdup(msg); } /* release the lock */ DEBUG_WAKEUP_THREAD(lock); } /* we _always_ have to execute the evhandler callback or * else the event progress engine will hang */ if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } } int main(int argc, char **argv) { int iter, itermax = 100; bool sync = false; pmix_status_t rc; pmix_proc_t myproc; pmix_proc_t procs[2]; pmix_app_t app; pmix_nspace_t nspace; pmix_proc_t pname; mylock_t lock, rellock; pmix_info_t iptr[3]; static struct option myoptions[] = { {"iters", required_argument, NULL, 'i'}, {"sync", no_argument, NULL, 's'}, {"verbose", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {"report", required_argument, NULL, 'r'}, {0, 0, 0, 0} }; int option_index; int opt; int freq = 100; while ((opt = getopt_long(argc, argv, "hsvi:r:", myoptions, &option_index)) != -1) { switch (opt) { case 'i': itermax = strtol(optarg, NULL, 10); break; case 's': sync = true; break; case 'v': verbose = true; break; case 'r': freq = strtol(optarg, NULL, 10); break; case 'h': fprintf(stderr, "Usage: %s\n Options:\n" " [-i N] [number of iterations]\n" " [-s] [Sync mode - wait for termination before spawning next child]\n" " [-v] [Verbose]\n" " [-r N] [Report progress every N iterations]\n", argv[0]); exit(1); default: fprintf(stderr, "Usage: %s\n Options:\n" " [-i N] [number of iterations]\n" " [-s] [Sync mode - wait for termination before spawning next child]\n" " [-v] [Verbose]\n" " [-r N] [Report progress every N iterations]\n", argv[0]); exit(1); } } if (verbose) { printf("parent*******************************\n"); printf("parent: Launching Child*\n"); } rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Failed to init\n"); exit(1); } PMIX_APP_CONSTRUCT(&app); app.cmd = strdup(EXE_TEST); PMIX_ARGV_APPEND(rc, app.argv, EXE_TEST); if (verbose) { PMIX_ARGV_APPEND(rc, app.argv, "--verbose"); } app.maxprocs = 1; PMIX_XFER_PROCID(&procs[0], &myproc); for (iter = 0; iter < itermax; ++iter) { rc = PMIx_Spawn(NULL, 0, &app, 1, nspace); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Failed to spawn iteration %d: %s\n", iter, PMIx_Error_string(rc)); goto done; } if (sync) { DEBUG_CONSTRUCT_LOCK(&rellock); rc = PMIX_EVENT_JOB_END; /* give the handler a name */ PMIX_INFO_LOAD(&iptr[0], PMIX_EVENT_HDLR_NAME, "JOB_TERMINATION_EVENT", PMIX_STRING); /* specify we only want to be notified when our * child job terminates */ PMIX_LOAD_PROCID(&pname, nspace, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&iptr[1], PMIX_EVENT_AFFECTED_PROC, &pname, PMIX_PROC); /* request that they return our lock object */ PMIX_INFO_LOAD(&iptr[2], PMIX_EVENT_RETURN_OBJECT, &rellock, PMIX_POINTER); /* do the registration */ DEBUG_CONSTRUCT_LOCK(&lock); PMIx_Register_event_handler(&rc, 1, iptr, 3, evhandler, regcbfunc, &lock); DEBUG_WAIT_THREAD(&lock); DEBUG_DESTRUCT_LOCK(&lock); PMIX_INFO_DESTRUCT(&iptr[0]); PMIX_INFO_DESTRUCT(&iptr[1]); PMIX_INFO_DESTRUCT(&iptr[2]); } PMIX_LOAD_PROCID(&procs[1], nspace, 0); rc = PMIx_Connect(procs, 2, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s.%u]: Failed to connect\n", myproc.nspace, myproc.rank); exit(1); } PMIx_Disconnect(procs, 2, NULL, 0); if (sync) { DEBUG_WAIT_THREAD(&rellock); DEBUG_DESTRUCT_LOCK(&rellock); } if (!verbose && 0 == (iter % freq)) { fprintf(stderr, "Completed iteration %d\n", iter); } } done: PMIx_Finalize(NULL, 0); if (verbose) { printf("parent: End .\n" ); } return 0; } prrte-3.0.13/test/connect.c0000664000175000017500000000605115145263240015676 0ustar alastairalastair/* -*- C -*- * * $HEADER$ * * Test of connect */ #include #include "pmix.h" int main(int argc, char* argv[]) { pmix_status_t rc; pmix_proc_t myproc; pmix_proc_t wildcard; pmix_proc_t remote; pmix_value_t value; pmix_value_t *returnval; pmix_info_t info; rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Failed to init\n"); exit(1); } printf("Hello from rank %u\n", myproc.rank); /* put some remote values */ PMIX_VALUE_CONSTRUCT(&value); value.type = PMIX_STRING; value.data.string="12345"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, "spastic-global", &value))) { fprintf(stderr, "%u: Global put failed\n", myproc.rank); exit(1); } value.data.string = "67890"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_REMOTE, "spastic-remote", &value))) { fprintf(stderr, "%u: Remote put failed\n", myproc.rank); exit(1); } value.data.string = "abcdef"; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_LOCAL, "spastic-local", &value))) { fprintf(stderr, "%u: Local put failed\n", myproc.rank); exit(1); } /* commit them */ rc = PMIx_Commit(); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%u: Failed to commit\n", myproc.rank); exit(1); } printf("%u: Connecting\n", myproc.rank); PMIX_LOAD_PROCID(&wildcard, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Connect(&wildcard, 1, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Failed to connect\n"); exit(1); } printf("%u: Connect succeeded!\n", myproc.rank); if (0 != myproc.rank && 1 != myproc.rank) { goto done; } /* try to get a remote value */ PMIX_LOAD_NSPACE(remote.nspace, myproc.nspace); if (0 == myproc.rank) { remote.rank = 1; } else { remote.rank = 0; } printf("%u: Attempt to get global value\n", myproc.rank); PMIX_INFO_LOAD(&info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); rc = PMIx_Get(&remote, "spastic-global", &info, 1, &returnval); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%u: Unable to retrieve global data from %u\n", myproc.rank, remote.rank); goto done; } printf("%u: Global value for rank %u obtained\n", myproc.rank, remote.rank); printf("%u: Attempt to get remote value\n", myproc.rank); rc = PMIx_Get(&remote, "spastic-remote", &info, 1, &returnval); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%u: Unable to retrieve remote data from %u\n", myproc.rank, remote.rank); goto done; } printf("%u: Remote value for rank %u obtained\n", myproc.rank, remote.rank); remote.rank = 0; rc = PMIx_Get(&remote, PMIX_GROUP_CONTEXT_ID, &info, 1, &returnval); if (PMIX_SUCCESS != rc) { fprintf(stderr, "%u: Unable to retrieve context ID from %u\n", myproc.rank, remote.rank); goto done; } printf("%u: Context ID %lu obtained\n", myproc.rank, (unsigned long)returnval->data.uint32); done: PMIx_Finalize(NULL, 0); return 0; } prrte-3.0.13/test/clichk.c0000664000175000017500000000040015145263240015472 0ustar alastairalastair#define _GNU_SOURCE #include #include #include #include #include int main(int argc, char **argv) { char *tmp = PMIx_Argv_join(argv, ' '); fprintf(stderr, "CMDLINE: %s\n", tmp); return 0; } prrte-3.0.13/test/loop_child.c0000664000175000017500000000313215145263240016356 0ustar alastairalastair#include #include #include #include #include #include #include #include static bool verbose = false; int main( int argc, char **argv ) { pmix_status_t rc; pmix_proc_t myproc; pmix_proc_t procs[2]; pmix_value_t *returnval; pmix_info_t info; static struct option myoptions[] = {{"verbose", no_argument, NULL, 'v'}, {0, 0, 0, 0}}; int option_index; int opt; while ((opt = getopt_long(argc, argv, "v", myoptions, &option_index)) != -1) { switch (opt) { case 'v': verbose = true; break; } } rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Failed to init\n"); exit(1); } PMIX_INFO_LOAD(&info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, &info, 1, &returnval); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s.%u]: Unable to retrieve parent\n", myproc.nspace, myproc.rank); goto done; } PMIX_XFER_PROCID(&procs[0], returnval->data.proc); PMIX_VALUE_RELEASE(returnval); PMIX_XFER_PROCID(&procs[1], &myproc); rc = PMIx_Connect(procs, 2, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "[%s.%u]: Failed to connect\n", myproc.nspace, myproc.rank); exit(1); } PMIx_Disconnect(procs, 2, NULL, 0); done: PMIx_Finalize(NULL, 0); if (verbose) { printf("[%s.%u]: exiting\n", myproc.nspace, myproc.rank); } return 0; } prrte-3.0.13/test/runloop.bash0000775000175000017500000000107415145263240016441 0ustar alastairalastair#!/bin/bash for i in {1..100} ; do rm -rf ~/tmp/prte* if [ $(expr $i % 2) == "0" ]; then echo "================================" echo "================================ Iteration $i" echo "================================" fi gtimeout -k 27 25 prterun -n 1 ./qspawn #timeout -k 133 130 mpiexec -n 1 python test/runtests.py -v -i test_spawn RTN=$? if [[ $RTN != 0 ]] ; then echo "=-=-=-=->> Error: Failed with $RTN" exit 1 fi if [ $(expr $i % 2) == "0" ]; then echo "" fi done exit 0 prrte-3.0.13/test/hello.c0000664000175000017500000000715715145263240015360 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #define _GNU_SOURCE #include #include #include #include #include static pmix_proc_t myproc; int main(int argc, char **argv) { pmix_status_t rc; pid_t pid; char hostname[1024]; pmix_value_t *val; uint16_t localrank; size_t n; pmix_info_t optional; pid = getpid(); gethostname(hostname, 1024); /* init us - note that the call to "init" includes the return of * any job-related info provided by the RM. This includes any * debugger flag instructing us to stop-in-init. If such a directive * is included, then the process will be stopped in this call until * the "debugger release" notification arrives */ rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc && PMIX_ERR_UNREACH != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); exit(0); } /* get our local rank */ if (PMIX_SUCCESS != (rc = PMIx_Get(&myproc, PMIX_LOCAL_RANK, NULL, 0, &val))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get local rank failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } localrank = val->data.uint16; PMIX_VALUE_RELEASE(val); fprintf(stderr, "Client ns %s rank %d pid %lu: Running on host %s localrank %d\n", myproc.nspace, myproc.rank, (unsigned long)pid, hostname , (int)localrank); pmix_proc_t wild; PMIX_LOAD_PROCID(&wild, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Get(&wild, PMIX_LOCAL_PEERS, NULL, 0, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Unable to get local peers\n"); } else { fprintf(stderr, "%s:%u - local peers %s\n", myproc.nspace, myproc.rank, val->data.string); } PMIX_LOAD_PROCID(&wild, myproc.nspace, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&optional, PMIX_OPTIONAL, NULL, PMIX_BOOL); rc = PMIx_Get(&wild, PMIX_NODE_OVERSUBSCRIBED, &optional, 1, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Not oversubscribed\n"); } else { fprintf(stderr, "%s:%u - oversubscribed\n", myproc.nspace, myproc.rank); } done: /* finalize us */ if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } fflush(stderr); return(0); } prrte-3.0.13/test/simple_spawn.c0000664000175000017500000001144415145263240016750 0ustar alastairalastair#include #include #include #include #include #include int main(int argc, char *argv[]) { pmix_status_t rc; int size; pid_t pid; pmix_proc_t myproc; pmix_proc_t proc, parent; pmix_app_t app; pmix_proc_t peers[2]; char hostname[1024]; pmix_value_t *val = NULL; pmix_nspace_t nspace; char *env; pid = getpid(); gethostname(hostname, 1024); fprintf(stderr, "PID %d alive\n", (int)pid); rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client: PMIx_Init failed: %s\n", PMIx_Error_string(rc)); exit(1); } fprintf(stderr, "Client ns %s rank %d: PMIx_Init complete\n", myproc.nspace, myproc.rank); PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_GET_NUMBER(rc, val, size, int); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: get size number failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_RELEASE(val); printf("[%s:%u pid %ld] of %d starting up on node %s!\n", myproc.nspace, myproc.rank, (long) pid, size, hostname); rc = PMIx_Get(&myproc, PMIX_PARENT_ID, NULL, 0, &val); /* If we don't find it, then we're the parent */ if (PMIX_SUCCESS != rc) { if (0 == myproc.rank) { pid = getpid(); printf("Parent [%s pid %ld] about to spawn!\n", hostname, (long) pid); PMIX_APP_CONSTRUCT(&app); app.cmd = strdup(argv[0]); PMIX_ARGV_APPEND(rc, app.argv, argv[0]); app.maxprocs = 3; rc = PMIx_Spawn(NULL, 0, &app, 1, nspace); if (PMIX_SUCCESS != rc) { printf("Child failed to spawn\n"); return rc; } printf("Parent done with spawn\n"); /* post a piece of information our children should get */ PMIX_LOAD_PROCID(&peers[0], myproc.nspace, 0); PMIX_LOAD_PROCID(&peers[1], nspace, PMIX_RANK_WILDCARD); /* connect to the children */ printf("%s.%u: Connecting to children - signature %s %s\n", myproc.nspace, myproc.rank, peers[0].nspace, peers[1].nspace); rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Connect to children failed!\n"); } printf("%s.%u: Connect complete!\n", myproc.nspace, myproc.rank); printf("%s.%u: Disconnecting from children\n", myproc.nspace, myproc.rank); rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from children failed!\n"); } printf("%s.%u: Disconnect complete!\n", myproc.nspace, myproc.rank); } PMIX_LOAD_PROCID(&peers[0], myproc.nspace, PMIX_RANK_WILDCARD); PMIx_Fence(&peers[0], 1, NULL, 0); } /* Otherwise, we're the child */ else { printf("Hello from the child %s.%u of %d on host %s pid %ld\n", myproc.nspace, myproc.rank, size, hostname, (long)pid); PMIX_LOAD_PROCID(&peers[0], val->data.proc->nspace, 0); PMIX_LOAD_PROCID(&peers[1], myproc.nspace, PMIX_RANK_WILDCARD); PMIX_VALUE_RELEASE(val); /* post some info our parent and peers should get */ /* connect to our parent */ if (1 == myproc.rank) { sleep(2); printf("\n\n\n"); } printf("%s.%u: Connecting to parent - signature %s %s\n", myproc.nspace, myproc.rank, peers[0].nspace, peers[1].nspace); rc = PMIx_Connect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("%s.%u: Connect to parent failed!\n", myproc.nspace, myproc.rank); } printf("%s.%u: Connect complete!\n", myproc.nspace, myproc.rank); printf("%s.%u: Disconnecting from parent\n", myproc.nspace, myproc.rank); if (2 == myproc.rank) { sleep(2); printf("\n\n\n"); } rc = PMIx_Disconnect(peers, 2, NULL, 0); if (PMIX_SUCCESS != rc) { printf("Disonnect from parent failed!\n"); } printf("%s.%u: Disconnect complete!\n", myproc.nspace, myproc.rank); } env = getenv("foo"); printf("%s.%u: FOO %s\n", myproc.nspace, myproc.rank, (NULL == env) ? "NULL" : env); done: PMIx_Finalize(NULL, 0); fprintf(stderr, "%d: exiting\n", pid); return 0; } prrte-3.0.13/test/reinit.c0000664000175000017500000000434515145263240015543 0ustar alastairalastair/* * Copyright (c) 2020 Triad National Security, LLC. * All rights reserved. * * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /* Test of rank position identification inside group */ #include "pmix.h" #include #include /* #include #include #include #include */ pmix_proc_t this_proc; int main(int argc, char *argv[]) { pmix_value_t *val; size_t ninfo = 0; pmix_proc_t job_proc; int count = 0; /* initialization */ try_again: fprintf(stderr, "initializing pmix\n"); PMIx_Init(&this_proc, NULL, ninfo); fprintf(stderr, "initialized pmix\n"); fprintf(stderr, "calling pmix fence\n"); PMIx_Fence(NULL, 0, NULL, 0); fprintf(stderr, "called pmix fence\n"); /* Handles everything that needs to happen after PMIx_Init() */ job_proc = this_proc; job_proc.rank = PMIX_RANK_WILDCARD; // Note that PMIX_RANK_WILDCARD == -2 fprintf(stderr, "getting job size\n"); PMIx_Get(&job_proc, PMIX_JOB_SIZE, NULL, 0, &val); /* After using PMIx_Get to get a value, we need to compare it our validation parameters we've passed as an argument; this is the main purpose of pmixt_validate_predefined(). */ fprintf(stderr, "getting universesize\n"); PMIx_Get(&job_proc, PMIX_UNIV_SIZE, NULL, 0, &val); fprintf(stderr, "getting local size\n"); PMIx_Get(&job_proc, PMIX_LOCAL_SIZE, NULL, 0, &val); fprintf(stderr, "getting local rank\n"); PMIx_Get(&this_proc, PMIX_LOCAL_RANK, NULL, 0, &val); fprintf(stderr, "getting local nodid\n"); PMIx_Get(&this_proc, PMIX_NODEID, NULL, 0, &val); fprintf(stderr, "getting local peers\n"); PMIx_Get(&job_proc, PMIX_LOCAL_PEERS, NULL, 0, &val); fprintf(stderr, "getting hostname\n"); PMIx_Get(&this_proc, PMIX_HOSTNAME, NULL, 0, &val); fprintf(stderr, "getting my rank\n"); PMIx_Get(&job_proc, PMIX_RANK, NULL, 0, &val); /* finalize */ fprintf(stderr, "finalizing pmix\n"); PMIx_Finalize(NULL, 0); fprintf(stderr, "finalized pmix\n"); count++; if (count < 2) { goto try_again; } exit(0); } prrte-3.0.13/test/double-get.c0000664000175000017500000001374215145263240016301 0ustar alastairalastair#include #include #include static pmix_proc_t allproc = {}; static pmix_proc_t myproc = {}; static bool mywait = false; static bool refresh = false; static bool timeout = false; int pmi_set_string(const char *key, void *data, size_t size) { int rc; pmix_value_t value; PMIX_VALUE_CONSTRUCT(&value); value.type = PMIX_BYTE_OBJECT; value.data.bo.bytes = data; value.data.bo.size = size; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, key, &value))) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Put failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Commit failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } /* protect the data */ value.data.bo.bytes = NULL; value.data.bo.size = 0; PMIX_VALUE_DESTRUCT(&value); printf("%s:%d PMIx_Put on %s\n", myproc.nspace, myproc.rank, key); return 0; } int pmi_get_string(uint32_t peer_rank, const char *key, void **data_out, size_t *data_size_out) { int rc; pmix_proc_t proc; pmix_value_t *pvalue; pmix_info_t info; PMIX_LOAD_PROCID(&proc, myproc.nspace, peer_rank); if (refresh) { PMIX_INFO_LOAD(&info, PMIX_GET_REFRESH_CACHE, &refresh, PMIX_BOOL); rc = PMIx_Get(&proc, key, &info, 1, &pvalue); PMIX_INFO_DESTRUCT(&info); } else if (timeout) { rc = 2; PMIX_INFO_LOAD(&info, PMIX_TIMEOUT, &rc, PMIX_INT); rc = PMIx_Get(&proc, key, &info, 1, &pvalue); PMIX_INFO_DESTRUCT(&info); } else { rc = PMIx_Get(&proc, key, NULL, 0, &pvalue); } if (PMIX_SUCCESS != rc) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Get on rank %u %s: %s\n", myproc.nspace, myproc.rank, peer_rank, key, PMIx_Error_string(rc)); } if (pvalue->type != PMIX_BYTE_OBJECT) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Get %s: got wrong data type\n", myproc.nspace, myproc.rank, key); } *data_out = pvalue->data.bo.bytes; *data_size_out = pvalue->data.bo.size; /* protect the data */ pvalue->data.bo.bytes = NULL; pvalue->data.bo.size = 0; PMIX_VALUE_RELEASE(pvalue); PMIX_PROC_DESTRUCT(&proc); printf("%s:%d PMIx_get %s returned %zi bytes\n", myproc.nspace, myproc.rank, key, data_size_out[0]); return 0; } int pmix_exchange(bool flag) { int rc; pmix_info_t info; fprintf(stderr, "Execute fence\n"); PMIX_INFO_CONSTRUCT(&info); PMIX_INFO_LOAD(&info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL); rc = PMIx_Fence(&allproc, 1, &info, 1); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Fence_nb failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } PMIX_INFO_DESTRUCT(&info); return 0; } int main(int argc, char *argv[]) { char data[256] = {}; char *data_out; size_t size_out; int rc; pmix_value_t *pvalue; /* check the args */ if (1 < argc) { if (2 < argc || 0 == strncmp(argv[1], "-h", 2) || 0 == strncmp(argv[1], "--h", 3)) { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--wait Test PMIX_GET_WAIT_FOR_KEY\n"); fprintf(stderr, "\t--refresh Test PMIX_GET_REFRESH_CACHE\n"); fprintf(stderr, "\t--timeout Test PMIX_GET_WAIT_FOR_KEY, but timeout\n"); exit(0); } if (0 == strncmp(argv[1], "--w", 3)) { mywait = true; } else if (0 == strncmp(argv[1], "--r", 3)) { refresh = true; } else if (0 == strncmp(argv[1], "--t", 3)) { timeout = true; } else { fprintf(stderr, "Invalid cmd line option: %s\n", argv[1]); fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--wait Test PMIX_GET_WAIT_FOR_KEY\n"); fprintf(stderr, "\t--refresh Test PMIX_GET_REFRESH_CACHE\n"); fprintf(stderr, "\t--timeout Test PMIX_GET_WAIT_FOR_KEY, but timeout\n"); exit(1); } } if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "ERROR: PMIx_Init failed"); exit(1); } if (myproc.rank == 0) { printf("PMIx initialized\n"); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_LOAD_PROCID(&allproc, myproc.nspace, PMIX_RANK_WILDCARD); /* get the number of procs in our job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&allproc, PMIX_JOB_SIZE, NULL, 0, &pvalue))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } uint32_t nprocs = pvalue->data.uint32; PMIX_VALUE_RELEASE(pvalue); /* the below two lines break the subsequent PMIx_Get query on a key set later */ snprintf(data, 256, "FIRST TIME rank %d", myproc.rank); pnmi_set_string("test-key-1", data, 256); pmix_exchange(true); if (1 == myproc.rank) { if (timeout) { sleep(10); } else if (mywait) { sleep(2); } } snprintf(data, 256, "SECOND TIME rank %d", myproc.rank); if (0 == myproc.rank) { pmi_set_string("test-key-2", data, 256); } else { pmi_set_string("test-key-3", data, 256); } if (0 == myproc.rank) { pmi_get_string(1, "test-key-3", (void **) &data_out, &size_out); } else { pmi_get_string(0, "test-key-2", (void **) &data_out, &size_out); } printf("%d: obtained data \"%s\"\n", myproc.rank, data_out); if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "ERROR: Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } if (myproc.rank == 0) printf("PMIx finalized\n"); exit(0); } prrte-3.0.13/test/filegen.c0000664000175000017500000000146115145263240015656 0ustar alastairalastair/* * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #define _GNU_SOURCE #include #include #include #include #include #include #include int main(int argc, char **argv) { int n, limit, rc; char buffer[1024]; if (argc < 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(1); } limit = strtol(argv[1], NULL, 10); memset(buffer, 'a', 1024); buffer[1023] = '\n'; for (n=0; n < limit; n++) { rc = write(STDOUT_FILENO, buffer, 1024); if (0 > rc) { fprintf(stderr, "Write failed: %d\n", rc); exit(1); } } exit(0); } prrte-3.0.13/test/Makefile0000664000175000017500000000333615145263240015544 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2013 Mellanox Technologies, Inc. All rights reserved. # Copyright (c) 2016-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2025 Nanook Consulting All rights reserved. # Copyright (c) 2023 Triad National Security, LLC. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # Use the PRTE-provided wrapper compiler CC = pcc # Using -g is not necessary, but it is helpful for example programs, # especially if users want to examine them with debuggers. CFLAGS = -g # Example programs to build TESTS = \ double-get \ get-nofence \ get-immediate \ attachtest/app \ attachtest/tool \ abort \ simple_spawn \ hello \ connect \ loop_child \ loop_spawn \ reinit \ cmspawn \ qspawn \ iostress \ filegen \ clichk \ chkfs \ spawn_timeout all: $(TESTS) # The usual "clean" target clean: rm -f $(TESTS) *~ *.o prrte-3.0.13/test/runtest.bash0000775000175000017500000000033715145263240016450 0ustar alastairalastair for ((i=0; i < 1; i++)); do prte --no-ready-msg --report-uri foo$i.txt & prun --map-by ppr:1:node --dvm-uri file:foo$i.txt --wait-to-connect 2 /Users/rhc/pmix/prrte2/examples/client2 pterm --dvm-uri file:foo$i.txt done prrte-3.0.13/test/abort.c0000664000175000017500000000305415145263240015354 0ustar alastairalastair/* -*- C -*- * * $HEADER$ * * The most basic of applications */ #include #include #include #include "pmix.h" int main(int argc, char *argv[]) { int size; int errcode; pmix_proc_t myproc; pmix_status_t rc; pmix_proc_t proc; pmix_value_t *val = NULL; if (1 < argc) { errcode = strtol(argv[1], NULL, 10); } else { errcode = 2; } rc = PMIx_Init(&myproc, NULL, 0); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client: PMIx_Init failed: %s\n", PMIx_Error_string(rc)); exit(errcode); } PMIX_LOAD_PROCID(&proc, myproc.nspace, PMIX_RANK_WILDCARD); rc = PMIx_Get(&proc, PMIX_JOB_SIZE, NULL, 0, &val); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_GET_NUMBER(rc, val, size, int); if (PMIX_SUCCESS != rc) { fprintf(stderr, "Client ns %s rank %d: get size number failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); goto done; } PMIX_VALUE_RELEASE(val); printf("Hello, World, I am %d of %d\n", myproc.rank, size); if (1 == size) { PMIx_Abort(errcode, "Aborting", NULL, 0); } else { if (1 == myproc.rank) { PMIx_Abort(errcode, "Aborting", NULL, 0); } else { errcode = 0; sleep(99999999); } } done: PMIx_Finalize(NULL, 0); return errcode; } prrte-3.0.13/test/test.h0000664000175000017500000001045715145263240015236 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #define _GNU_SOURCE #include #include #include #include #include #include typedef struct { pthread_mutex_t mutex; pthread_cond_t cond; volatile bool active; pmix_status_t status; int count; char *msg; size_t evhandler_ref; } mylock_t; #define DEBUG_CONSTRUCT_LOCK(l) \ do { \ pthread_mutex_init(&(l)->mutex, NULL); \ pthread_cond_init(&(l)->cond, NULL); \ (l)->active = true; \ (l)->status = PMIX_SUCCESS; \ (l)->count = 0; \ (l)->evhandler_ref = 0; \ } while (0) #define DEBUG_DESTRUCT_LOCK(l) \ do { \ pthread_mutex_destroy(&(l)->mutex); \ pthread_cond_destroy(&(l)->cond); \ } while (0) #define DEBUG_WAIT_THREAD(lck) \ do { \ pthread_mutex_lock(&(lck)->mutex); \ while ((lck)->active) { \ pthread_cond_wait(&(lck)->cond, &(lck)->mutex); \ } \ pthread_mutex_unlock(&(lck)->mutex); \ } while (0) #define DEBUG_WAKEUP_THREAD(lck) \ do { \ pthread_mutex_lock(&(lck)->mutex); \ (lck)->active = false; \ pthread_cond_broadcast(&(lck)->cond); \ pthread_mutex_unlock(&(lck)->mutex); \ } while (0) /* define a structure for collecting returned * info from a query */ typedef struct { mylock_t lock; pmix_info_t *info; size_t ninfo; } myquery_data_t; #define DEBUG_CONSTRUCT_MYQUERY(q) \ do { \ DEBUG_CONSTRUCT_LOCK(&((q)->lock)); \ (q)->info = NULL; \ (q)->ninfo = 0; \ } while (0) #define DEBUG_DESTRUCT_MYQUERY(q) \ do { \ DEBUG_DESTRUCT_LOCK(&((q)->lock)); \ if (NULL != (q)->info) { \ PMIX_INFO_FREE((q)->info, (q)->ninfo); \ } \ } while (0) /* define a structure for releasing when a given * nspace terminates */ typedef struct { mylock_t lock; char *nspace; int exit_code; bool exit_code_given; } myrel_t; #define DEBUG_CONSTRUCT_MYREL(r) \ do { \ DEBUG_CONSTRUCT_LOCK(&((r)->lock)); \ (r)->nspace = NULL; \ (r)->exit_code = 0; \ (r)->exit_code_given = false; \ } while (0) #define DEBUG_DESTRUCT_MYREL(r) \ do { \ DEBUG_DESTRUCT_LOCK(&((r)->lock)); \ if (NULL != (r)->nspace) { \ free((r)->nspace); \ } \ } while (0) prrte-3.0.13/test/get-immediate.c0000664000175000017500000001233515145263240016762 0ustar alastairalastair#include #include #include static pmix_proc_t allproc = {}; static pmix_proc_t myproc = {}; static bool immediate = false; static bool shouldfind = false; int pmi_set_string(const char *key, void *data, size_t size) { int rc; pmix_value_t value; PMIX_VALUE_CONSTRUCT(&value); value.type = PMIX_BYTE_OBJECT; value.data.bo.bytes = data; value.data.bo.size = size; if (PMIX_SUCCESS != (rc = PMIx_Put(PMIX_GLOBAL, key, &value))) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Put failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } if (PMIX_SUCCESS != (rc = PMIx_Commit())) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Commit failed: %s\n", myproc.nspace, myproc.rank, PMIx_Error_string(rc)); } /* protect the data */ value.data.bo.bytes = NULL; value.data.bo.size = 0; PMIX_VALUE_DESTRUCT(&value); printf("%s:%d PMIx_Put on %s\n", myproc.nspace, myproc.rank, key); return 0; } int pmi_get_string(uint32_t peer_rank, const char *key, void **data_out, size_t *data_size_out) { int rc; pmix_proc_t proc; pmix_value_t *pvalue; pmix_info_t info; PMIX_LOAD_PROCID(&proc, myproc.nspace, peer_rank); if (immediate) { PMIX_INFO_LOAD(&info, PMIX_IMMEDIATE, NULL, PMIX_BOOL); } else { PMIX_INFO_LOAD(&info, PMIX_OPTIONAL, NULL, PMIX_BOOL); } rc = PMIx_Get(&proc, key, &info, 1, &pvalue); PMIX_INFO_DESTRUCT(&info); if (immediate) { /* both ranks should find data */ shouldfind = true; } else { /* if it is optional, then neither rank will find it */ shouldfind = false; } if (shouldfind) { if (PMIX_SUCCESS != rc) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Get on rank %u did not find %s: %s\n", myproc.nspace, myproc.rank, peer_rank, key, PMIx_Error_string(rc)); return rc; } if (pvalue->type != PMIX_BYTE_OBJECT) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Get %s: got wrong data type\n", myproc.nspace, myproc.rank, key); return rc; } *data_out = pvalue->data.bo.bytes; *data_size_out = pvalue->data.bo.size; /* protect the data */ pvalue->data.bo.bytes = NULL; pvalue->data.bo.size = 0; PMIX_VALUE_RELEASE(pvalue); PMIX_PROC_DESTRUCT(&proc); printf("%s:%d PMIx_get %s returned %zi bytes\n", myproc.nspace, myproc.rank, key, data_size_out[0]); } else { if (PMIX_SUCCESS == rc) { fprintf(stderr, "ERROR: Client ns %s rank %d: PMIx_Get on rank %u found %s: %s\n", myproc.nspace, myproc.rank, peer_rank, key, PMIx_Error_string(rc)); } *data_out = strdup("NOT-FOUND"); *data_size_out = strlen(*data_out); } return 0; } int main(int argc, char *argv[]) { char data[256] = {}; char *data_out; size_t size_out; int rc; pmix_value_t *pvalue; /* check the args */ if (1 < argc) { if (2 < argc || 0 == strncmp(argv[1], "-h", 2) || 0 == strncmp(argv[1], "--h", 3)) { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--immediate Test PMIX_IMMEDIATE\n"); exit(0); } if (0 == strncmp(argv[1], "--i", 3)) { immediate = true; } else { fprintf(stderr, "Invalid cmd line option: %s\n", argv[1]); fprintf(stderr, "Usage:\n"); fprintf(stderr, "\t--immediate Test PMIX_IMMEDIATE\n"); exit(1); } } if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) { fprintf(stderr, "ERROR: PMIx_Init failed"); exit(1); } if (myproc.rank == 0) { printf("PMIx initialized\n"); } /* job-related info is found in our nspace, assigned to the * wildcard rank as it doesn't relate to a specific rank. Setup * a name to retrieve such values */ PMIX_LOAD_PROCID(&allproc, myproc.nspace, PMIX_RANK_WILDCARD); /* get the number of procs in our job */ if (PMIX_SUCCESS != (rc = PMIx_Get(&allproc, PMIX_JOB_SIZE, NULL, 0, &pvalue))) { fprintf(stderr, "Client ns %s rank %d: PMIx_Get job size failed: %d\n", myproc.nspace, myproc.rank, rc); exit(1); } uint32_t nprocs = pvalue->data.uint32; PMIX_VALUE_RELEASE(pvalue); /* each proc pushes some data */ if (immediate || 0 == myproc.rank) { if (0 == myproc.rank) { pmi_set_string("test-key-1", data, 256); } else { pmi_set_string("test-key-2", data, 256); } } sleep(2); if (0 == myproc.rank) { rc = pmi_get_string(1, "test-key-2", (void **) &data_out, &size_out); } else { rc = pmi_get_string(0, "test-key-1", (void **) &data_out, &size_out); } if (0 == rc) { printf("%d: obtained data \"%s\"\n", myproc.rank, data_out); } if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) { fprintf(stderr, "ERROR: Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc); } if (myproc.rank == 0) printf("PMIx finalized\n"); exit(0); } prrte-3.0.13/src/0000775000175000017500000000000015145263240013707 5ustar alastairalastairprrte-3.0.13/src/etc/0000775000175000017500000000000015145263240014462 5ustar alastairalastairprrte-3.0.13/src/etc/Makefile.am0000664000175000017500000001244115145263240016520 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2023 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # prte_file_from_platform = @PRTE_PARAM_FROM_PLATFORM@ prte_mca_param_file = @PRTE_DEFAULT_MCA_PARAM_CONF@ prte_config_file = @PRTE_DEFAULT_CONFIG@ EXTRA_DIST = prte-mca-params.conf prte-default-hostfile prte.conf # Note that prte-mca-params.conf and prte.conf are # "special" -- we do not list them here. prte_install_hook_files = prte-default-hostfile # Steal a little trickery from a generated Makefile to only install # files if they do not already exist at the target. Be sure to read # thread starting here # (https://www.open-mpi.org/community/lists/devel/2008/06/4080.php) for # details why the mkdir is in install-data-local. install-data-local: @ $(MKDIR_P) $(DESTDIR)$(sysconfdir); \ if test "$(prte_file_from_platform)" = "yes"; then \ $(INSTALL_DATA) $(prte_mca_param_file) $(DESTDIR)$(sysconfdir)/prte-mca-params.conf; \ $(INSTALL_DATA) $(prte_config_file) $(DESTDIR)$(sysconfdir)/prte.conf; \ else \ if test -f $(DESTDIR)$(sysconfdir)/prte-mca-params.conf; then \ echo "******************************* WARNING ************************************"; \ echo "*** Not installing new prte-mca-params.conf over existing file in:"; \ echo "*** $(DESTDIR)$(sysconfdir)/prte-mca-params.conf"; \ echo "******************************* WARNING ************************************"; \ else \ echo installing prte-mca-params.conf to $(DESTDIR)$(sysconfdir)/; \ $(INSTALL_DATA) $(srcdir)/prte-mca-params.conf $(DESTDIR)$(sysconfdir)/; \ fi; \ if test -f $(DESTDIR)$(sysconfdir)/prte.conf; then \ echo "******************************* WARNING ************************************"; \ echo "*** Not installing new prte.conf over existing file in:"; \ echo "*** $(DESTDIR)$(sysconfdir)/prte.conf"; \ echo "******************************* WARNING ************************************"; \ else \ echo installing prte.conf to $(DESTDIR)$(sysconfdir)/; \ $(INSTALL_DATA) $(srcdir)/prte.conf $(DESTDIR)$(sysconfdir)/; \ fi; \ fi @ for file in $(prte_install_hook_files); do \ if test -f $(DESTDIR)$(sysconfdir)/$$file; then \ echo "******************************* WARNING ************************************"; \ echo "*** Not installing new $$file over existing file in:"; \ echo "*** $(DESTDIR)$(sysconfdir)/$$file"; \ echo "******************************* WARNING ************************************"; \ else \ echo installing $$file to $(DESTDIR)$(sysconfdir)/; \ $(INSTALL_DATA) $(srcdir)/$$file $(DESTDIR)$(sysconfdir)/; \ fi; \ done # Only remove if exactly the same as what in our tree # NOTE TO READER: Bourne shell if ... fi evaluates the body if # the return of the evaluated command is 0 (as opposed to non-zero # as used by everyone else) uninstall-local: @ p="$(prte_install_hook_files)"; \ for file in $$p; do \ if test -f "$(DESTDIR)$(sysconfdir)/$$file"; then \ if test -f "$$file"; then d=; else d="$(srcdir)/"; fi; \ if diff "$(DESTDIR)$(sysconfdir)/$$file" "$$d$$file" > /dev/null 2>&1 ; then \ echo "rm -f $(DESTDIR)$(sysconfdir)/$$file" ; \ rm -f "$(DESTDIR)$(sysconfdir)/$$file" ; \ fi ; \ fi ; \ done ; \ if test "$(prte_file_from_platform)" = "yes"; then \ if test -f $(DESTDIR)$(sysconfdir)/prte-mca-params.conf; then \ if diff "$(DESTDIR)$(sysconfdir)/prte-mca-params.conf" "$(prte_mca_param_file)" > /dev/null 2>&1 ; then \ echo "rm -f $(DESTDIR)$(sysconfdir)/prte-mca-params.conf" ; \ rm -f "$(DESTDIR)$(sysconfdir)/prte-mca-params.conf" ; \ fi ; \ fi ; \ if test -f $(DESTDIR)$(sysconfdir)/prte.conf; then \ if diff "$(DESTDIR)$(sysconfdir)/prte.conf" "$(prte_config_file)" > /dev/null 2>&1 ; then \ echo "rm -f $(DESTDIR)$(sysconfdir)/prte.conf" ; \ rm -f "$(DESTDIR)$(sysconfdir)/prte.conf" ; \ fi ; \ fi ; \ else \ if test -f $(DESTDIR)$(sysconfdir)/prte-mca-params.conf; then \ if diff "$(DESTDIR)$(sysconfdir)/prte-mca-params.conf" "$(srcdir)/prte-mca-params.conf"> /dev/null 2>&1 ; then \ echo "rm -f $(DESTDIR)$(sysconfdir)/prte-mca-params.conf" ; \ rm -f "$(DESTDIR)$(sysconfdir)/prte-mca-params.conf" ; \ fi ; \ fi ; \ if test -f $(DESTDIR)$(sysconfdir)/prte.conf; then \ if diff "$(DESTDIR)$(sysconfdir)/prte.conf" "$(srcdir)/prte.conf"> /dev/null 2>&1 ; then \ echo "rm -f $(DESTDIR)$(sysconfdir)/prte.conf" ; \ rm -f "$(DESTDIR)$(sysconfdir)/prte.conf" ; \ fi ; \ fi ; \ fi ; prrte-3.0.13/src/etc/prte-default-hostfile0000664000175000017500000000327015145263240020616 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 Cisco Systems, Inc. All rights reserved # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default hostfile for PRTE. Notice that it does not # contain any hosts (not even localhost). This file should only # contain hosts if a system administrator wants users to always have # the same set of default hosts, and is not using a batch scheduler # (such as SLURM, PBS, etc.). # # Note that this file is *not* used when running in "managed" # environments (e.g., running in a job under a job scheduler, such as # SLURM or PBS / Torque). # # If you are primarily interested in running PRTE on one node, you # should *not* simply list "localhost" in here. A localhost-only node list is # created by the RAS component named "localhost" if no other RAS # components were able to find any hosts to run on (this behavior can # be disabled by excluding the localhost RAS component by specifying # the value "^localhost" [without the quotes] to the "ras" MCA # parameter). prrte-3.0.13/src/etc/prte-mca-params.conf0000664000175000017500000000501215145263240020320 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2018-2020 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This is the default system-wide MCA parameters defaults file. # Specifically, the MCA parameter "mca_param_files" defaults to a # value of # "$HOME/.prte/mca-params.conf:$sysconf/prte-mca-params.conf" # (this file is the latter of the two). So if the default value of # mca_param_files is not changed, this file is used to set system-wide # MCA parameters. This file can therefore be used to set system-wide # default MCA parameters for all users. Of course, users can override # these values if they want, but this file is an excellent location # for setting system-specific MCA parameters for those users who don't # know / care enough to investigate the proper values for them. # Note that this file is only applicable where it is visible (in a # filesystem sense) by PRTE daemons. Hence, # these values are only applicable on nodes that "see" this file. If # $sysconf is a directory on a local disk, it is likely that changes # to this file will need to be propagated to other nodes. If $sysconf # is a directory that is shared via a networked filesystem, changes to # this file will be visible to all nodes that share this $sysconf. # The format is straightforward: one per line, mca_param_name = # rvalue. Quoting is ignored (so if you use quotes or escape # characters, they'll be included as part of the value). For example: # Select a particular routing component # routed = binomial # Note that the value "~/" will be expanded to the current user's home # directory. For example: # Change component loading path # mca_base_component_path = /usr/local/lib/prte:~/my_prte_components # See "prte_info --param all all --level 9" for a full listing of # PRTE MCA parameters available and their default values. prrte-3.0.13/src/etc/prte.conf0000664000175000017500000000612215145263240016304 0ustar alastairalastair# # Copyright (c) 2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # The PMIx Reference RunTime Environment (PRRTE) can be instantiated # as a Distributed Virtual Machine (DVM) in two ways. First, the # `prte` command can be executed at a shell prompt. This will discover # the available resources (either from hostfile or as allocated by a # resource manager) and start a PRRTE shepherd daemon (`prted`) on each # of the indicated nodes. # # The other method, however, is to bootstrap the DVM at time of cluster # startup. Bootstrapping PRRTE allows the DVM to serve as the system-level # runtime, providing a full-service PMIx environment to sessions under # its purview. Integration to an appropriately enabled scheduler can # provide a full workload managed environment for users. # # Establishing the DVM using the bootstrap method requires that a PRRTE # configuration file be created and made available on every node of the # cluster at node startup. The configuration file provides necessary # information for establishing the communication infrastructure between # the DVM controller and the compute node daemons. It also provides a # means for easily defining DVM behavior for options such as logging, # system-level prolog and epilog scripts for each session, and other # PRRTE features. # # The configuration file can be manually created or can be created using # the PRRTE configuration tool ``/etc/prte-configurator.html`` # running in the browser of your choice. Manual creation can best be done # by editing the example configuration file (``/src/etc/prte.conf``). # This file contains all the supported configuration options, with all # entries commented out. Simply uncomment the options of interest and # set them to appropriate values. The file will be installed into the # final ``/etc`` when ``make install`` is performed. # # The configuration tool also contains all the supported options in an # easy-to-use form. Once you have filled out the desired entries, the # "submit" button will show the resulting configuration file on the # browser window - a simple "copy/paste" operation into your target # configuration file will yield the final result. # # Configuration Options # --------------------- # # The following options are supported by this version of PRRTE. # While we make every effort to maintain compatibility with prior versions, # we recommend that you check options when installing new versions to # see what may have changed and/or been added. We also recommend that # you use the prte-configurator.html for the version you are using to # ensure that it is fully compatible. # # Bootstrap Options # ----------------- #ClusterName=cluster #DVMControllerHost=host0 #DVMControllerPort=7817 #PRTEDPort=7818 #DVMNodes= # # Operational Options # ------------------- #DVMTempDir=/tmp #SessionTmpDir=/tmp # # Logging Options # --------------- #ControllerLogJobState=undefined #ControllerLogProcState=undefined #ControllerLogPath= #PRTEDLogJobState=undefined #PRTEDLogProcState=undefined #PRTEDLogPath= prrte-3.0.13/src/tools/0000775000175000017500000000000015145263240015047 5ustar alastairalastairprrte-3.0.13/src/tools/prte/0000775000175000017500000000000015145263240016021 5ustar alastairalastairprrte-3.0.13/src/tools/prte/prte.c0000664000175000017500000016713415145263240017153 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Geoffroy Vallee. All rights reserved. * Copyright (c) 2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2022-2023 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/include/constants.h" #include "src/include/version.h" #include #include #include #ifdef HAVE_STRINGS_H # include #endif /* HAVE_STRINGS_H */ #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_SYS_WAIT_H # include #endif /* HAVE_SYS_WAIT_H */ #ifdef HAVE_SYS_TIME_H # include #endif /* HAVE_SYS_TIME_H */ #include #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_POLL_H # include #endif #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/pmix/pmix-internal.h" #include "src/threads/pmix_mutex.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/prte_cmd_line.h" #include "src/util/daemon_init.h" #include "src/util/pmix_fd.h" #include "src/util/pmix_os_dirpath.h" #include "src/util/pmix_os_path.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_getcwd.h" #include "src/util/pmix_show_help.h" #include "src/util/pmix_string_copy.h" #include "src/util/session_dir.h" #include "src/class/pmix_pointer_array.h" #include "src/runtime/prte_progress_threads.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ess/base/base.h" #include "src/mca/odls/odls.h" #include "src/mca/plm/plm.h" #include "src/mca/rmaps/base/base.h" #include "src/rml/rml.h" #include "src/mca/schizo/base/base.h" #include "src/mca/state/base/base.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/runtime/runtime.h" #include "include/prte.h" #include "src/prted/pmix/pmix_server.h" #include "src/prted/pmix/pmix_server_internal.h" #include "src/prted/prted.h" typedef struct { prte_pmix_lock_t lock; pmix_status_t status; pmix_info_t *info; size_t ninfo; } mylock_t; static pmix_nspace_t spawnednspace; static pmix_proc_t myproc; static bool signals_set = false; static bool forcibly_die = false; static prte_event_t term_handler; static prte_event_t epipe_handler; static int term_pipe[2]; static pmix_mutex_t prun_abort_inprogress_lock = PMIX_MUTEX_STATIC_INIT; static prte_event_t *forward_signals_events = NULL; static char *mypidfile = NULL; static bool verbose = false; static bool want_prefix_by_default = (bool) PRTE_WANT_PRTE_PREFIX_BY_DEFAULT; static void abort_signal_callback(int signal); static void clean_abort(int fd, short flags, void *arg); static void signal_forward_callback(int fd, short args, void *cbdata); static void epipe_signal_callback(int fd, short args, void *cbdata); static int prep_singleton(const char *name); static bool keepalive = false; static void opcbfunc(pmix_status_t status, void *cbdata) { prte_pmix_lock_t *lock = (prte_pmix_lock_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(status); PMIX_ACQUIRE_OBJECT(lock); PRTE_PMIX_WAKEUP_THREAD(lock); } static void spcbfunc(pmix_status_t status, char nspace[], void *cbdata) { prte_pmix_lock_t *lock = (prte_pmix_lock_t *) cbdata; PMIX_ACQUIRE_OBJECT(lock); lock->status = status; if (PMIX_SUCCESS == status) { lock->msg = strdup(nspace); } PRTE_PMIX_WAKEUP_THREAD(lock); } static void parent_died_fn(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t results[], size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { pmix_server_req_t *cd; PRTE_HIDE_UNUSED_PARAMS(evhdlr_registration_id, status, source, info, ninfo, results, nresults); // allow the pmix event base to continue cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); // shift this into our event base cd = PMIX_NEW(pmix_server_req_t); prte_event_set(prte_event_base, &(cd->ev), -1, PRTE_EV_WRITE, clean_abort, cd); prte_event_active(&(cd->ev), PRTE_EV_WRITE, 1); } static void evhandler_reg_callbk(pmix_status_t status, size_t evhandler_ref, void *cbdata) { mylock_t *lock = (mylock_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(evhandler_ref); lock->status = status; PRTE_PMIX_WAKEUP_THREAD(&lock->lock); } static int wait_pipe[2]; static int wait_dvm(pid_t pid) { char reply; int rc; int status; close(wait_pipe[1]); do { rc = read(wait_pipe[0], &reply, 1); } while (0 > rc && EINTR == errno); if (1 == rc && 'K' == reply) { return 0; } else if (0 == rc) { waitpid(pid, &status, 0); if (WIFEXITED(status)) { return WEXITSTATUS(status); } } return 255; } static void setup_sighandler(int signal, prte_event_t *ev, prte_event_cbfunc_t cbfunc) { prte_event_signal_set(prte_event_base, ev, signal, cbfunc, ev); prte_event_signal_add(ev, NULL); } static void shutdown_callback(int fd, short flags, void *arg) { prte_timer_t *tm = (prte_timer_t *) arg; prte_job_t *jdata; PRTE_HIDE_UNUSED_PARAMS(fd, flags); if (NULL != tm) { /* release the timer */ PMIX_RELEASE(tm); } /* if we were ordered to abort, do so */ pmix_output(0, "%s is executing clean abnormal termination", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* do -not- call finalize as this will send a message to the HNP * indicating clean termination! Instead, just forcibly cleanup * the local session_dir tree and exit */ prte_odls.kill_local_procs(NULL); // mark that we are finalizing so the session directory will cleanup prte_finalizing = true; jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_RELEASE(jdata); exit(PRTE_ERROR_DEFAULT_EXIT_CODE); } #if PMIX_NUMERIC_VERSION < 0x00040205 static char *pmix_getline(FILE *fp) { char *ret, *buff; char input[1024]; ret = fgets(input, 1024, fp); if (NULL != ret) { input[strlen(input) - 1] = '\0'; /* remove newline */ buff = strdup(input); return buff; } return NULL; } #endif int main(int argc, char *argv[]) { int rc = 1, i; char *param, *tpath, *cptr; prte_pmix_lock_t lock; pmix_list_t apps, jobdata; prte_pmix_app_t *app; pmix_info_t *iptr, *iptr2, info; pmix_status_t ret; size_t n, ninfo, param_len; pmix_app_t *papps; size_t napps; mylock_t mylock; char **pargv, **split; int pargc; prte_job_t *jdata; prte_app_context_t *dapp; bool proxyrun = false, first; void *jinfo; pmix_proc_t pname; pmix_value_t *val; pmix_data_array_t darray; char **hostfiles = NULL; char **hosts = NULL; prte_schizo_base_module_t *schizo; prte_ess_base_signal_t *sig; pmix_status_t code; char *personality; pmix_cli_result_t results; pmix_cli_item_t *opt; FILE *fp; prte_info_item_t *iprteinfo; /* init the globals */ PMIX_CONSTRUCT(&apps, pmix_list_t); PMIX_CONSTRUCT(&jobdata, pmix_list_t); if (NULL == (param = getenv("PRTE_BASENAME"))) { prte_tool_basename = pmix_basename(argv[0]); } else { prte_tool_basename = strdup(param); } if (0 == strcmp(prte_tool_basename, "prterun")) { prte_tool_actual = "prterun"; } else { prte_tool_actual = "prte"; } pargc = argc; pargv = PMIx_Argv_copy(argv); /* save a pristine copy of the environment for launch purposes. * This MUST be done so that we can pass it to any local procs we * spawn - otherwise, those local procs will get a bunch of * params only relevant to PRRTE. Skip all PMIx and PRRTE params * as those are only targeting us */ prte_launch_environ = NULL; for (i=0; NULL != environ[i]; i++) { if (0 != strncmp(environ[i], "PMIX_", 5) && 0 != strncmp(environ[i], "PRTE_", 5)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_launch_environ, environ[i]); } } /* do a minimal setup of key infrastructure, including * parsing the install-level and user-level PRRTE param * files */ rc = prte_init_minimum(); if (PRTE_SUCCESS != rc) { return rc; } /* because we have to use the schizo framework and init our hostname * prior to parsing the incoming argv for cmd line options, do a hacky * search to support passing of impacted options (e.g., verbosity for schizo) */ rc = prte_schizo_base_parse_prte(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != rc) { return rc; } rc = prte_schizo_base_parse_pmix(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != rc) { return rc; } /* init the tiny part of PRTE we initially need */ prte_init_util(PRTE_PROC_MASTER); /** setup callbacks for abort signals - from this point * forward, we need to abort in a manner that allows us * to cleanup. However, we cannot directly use libevent * to trap these signals as otherwise we cannot respond * to them if we are stuck in an event! So instead use * the basic POSIX trap functions to handle the signal, * and then let that signal handler do some magic to * avoid the hang * * NOTE: posix traps don't allow us to do anything major * in them, so use a pipe tied to a libevent event to * reach a "safe" place where the termination event can * be created */ if (0 != (rc = pipe(term_pipe))) { exit(1); } /* setup an event to attempt normal termination on signal */ rc = prte_event_base_open(); if (PRTE_SUCCESS != rc) { fprintf(stderr, "Unable to initialize event library\n"); exit(1); } prte_event_set(prte_event_base, &term_handler, term_pipe[0], PRTE_EV_READ, clean_abort, NULL); prte_event_add(&term_handler, NULL); /* Set both ends of this pipe to be close-on-exec so that no children inherit it */ if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS || pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) { fprintf(stderr, "unable to set the pipe to CLOEXEC\n"); prte_progress_thread_finalize(NULL); exit(1); } /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* point the signal trap to a function that will activate that event */ signal(SIGTERM, abort_signal_callback); signal(SIGINT, abort_signal_callback); signal(SIGHUP, abort_signal_callback); /* open the SCHIZO framework */ rc = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); return rc; } if (PRTE_SUCCESS != (rc = prte_schizo_base_select())) { PRTE_ERROR_LOG(rc); return rc; } /* look for any personality specification */ personality = NULL; bool rankby_found = false; bool bindto_found = false; for (i = 0; NULL != pargv[i]; i++) { if (0 == strcmp(pargv[i], "--personality")) { personality = pargv[i + 1]; continue; } if (0 == strcmp(pargv[i], "--map-by")) { free(pargv[i]); pargv[i] = strdup("--mapby"); continue; } if (0 == strcmp(pargv[i], "--rank-by") || 0 == strcmp(pargv[i], "--rankby")) { if (rankby_found) { pmix_show_help("help-schizo-base.txt", "multi-instances", true, pargv[i]); return PRTE_ERR_BAD_PARAM; } rankby_found = true; if (0 == strcmp(pargv[i], "--rank-by")) { free(pargv[i]); pargv[i] = strdup("--rankby"); } continue; } if (0 == strcmp(pargv[i], "--bind-to") || 0 == strcmp(pargv[i], "--bindto")) { if (bindto_found) { pmix_show_help("help-schizo-base.txt", "multi-instances", true, "bind-to"); return PRTE_ERR_BAD_PARAM; } bindto_found = true; if (0 == strcmp(pargv[i], "--bind-to")) { free(pargv[i]); pargv[i] = strdup("--bindto"); } continue; } if (0 == strcmp(pargv[i], "--runtime-options")) { free(pargv[i]); pargv[i] = strdup("--rtos"); continue; } } /* detect if we are running as a proxy and select the active * schizo module for this tool */ schizo = prte_schizo_base_detect_proxy(personality); if (NULL == schizo) { pmix_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, personality); return 1; } if (0 != strcmp(schizo->name, "prte")) { proxyrun = true; } else { /* if we are using the "prte" personality, but we * are not actually running as "prte" or are actively * testing the proxy capability , then we are acting * as a proxy */ if (0 != strcmp(prte_tool_basename, "prte") || prte_schizo_base.test_proxy_launch) { proxyrun = true; } } if (NULL == personality) { personality = schizo->name; } /* ensure we don't confuse any downstream PRRTE tools on * choice of proxy since some environments forward their envars */ unsetenv("PRTE_MCA_schizo_proxy"); /* Register all global MCA Params */ if (PRTE_SUCCESS != (rc = prte_register_params())) { if (PRTE_ERR_SILENT != rc) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, "prte register params", PRTE_ERROR_NAME(rc), rc); } return 1; } /* parse the input argv to get values, including everyone's MCA params */ PMIX_CONSTRUCT(&results, pmix_cli_result_t); // check for special case of executable immediately following tool if (proxyrun && pargc > 1 && '-' != pargv[1][0]) { results.tail = PMIx_Argv_copy(&pargv[1]); } else { rc = schizo->parse_cli(pargv, &results, PMIX_CLI_WARN); if (PRTE_SUCCESS != rc) { PMIX_DESTRUCT(&results); if (PRTE_OPERATION_SUCCEEDED == rc) { return PRTE_SUCCESS; } if (PRTE_ERR_SILENT != rc) { fprintf(stderr, "%s: command line error (%s)\n", prte_tool_basename, prte_strerror(rc)); } return rc; } } // check if they asked for XML output from us opt = pmix_cmd_line_get_param(&results, PRTE_CLI_OUTPUT); if (NULL != opt) { split = PMIX_ARGV_SPLIT_COMPAT(opt->values[0], ','); for (n = 0; NULL != split[n]; n++) { if (PMIX_CHECK_CLI_OPTION(split[n], PRTE_CLI_XML)) { prte_xml_output = true; break; } } PMIX_ARGV_FREE_COMPAT(split); } /* Did the user specify a default hostfile? */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_DEFAULT_HOSTFILE); if (NULL != opt) { char cwd[PRTE_PATH_MAX]; if (PRTE_SUCCESS != (rc = pmix_getcwd(cwd, sizeof(cwd)))) { pmix_show_help("help-prun.txt", "prun:init-failure", true, "get the cwd", rc); return 1; } // can only be one value if (1 < PMIx_Argv_count(opt->values)) { // report the error and abort param = PMIx_Argv_join(opt->values, ','); pmix_show_help("help-prterun.txt", "multiple-default-hostfiles", true, param); return 1; } if (!pmix_path_is_absolute(opt->values[0])) { param = pmix_os_path(false, cwd, opt->values[0], NULL); } else { param = opt->values[0]; } if (NULL != prte_default_hostfile) { free(prte_default_hostfile); } prte_default_hostfile = strdup(param); prte_default_hostfile_given = true; } /* check if we are running as root - if we are, then only allow * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning message */ if (0 == geteuid()) { schizo->allow_run_as_root(&results); // will exit us if not allowed } // check for an appfile opt = pmix_cmd_line_get_param(&results, PRTE_CLI_APPFILE); if (NULL != opt) { // parse the file and add its context to the argv array fp = fopen(opt->values[0], "r"); if (NULL == fp) { pmix_show_help("help-prun", "appfile-failure", true, opt->values[0]); return 1; } first = true; while (NULL != (param = pmix_getline(fp))) { if (!first) { // add a colon delimiter PMIX_ARGV_APPEND_NOSIZE_COMPAT(&pargv, ":"); ++pargc; } // break the line down into parts split = PMIX_ARGV_SPLIT_COMPAT(param, ' '); for (n=0; NULL != split[n]; n++) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&pargv, split[n]); ++pargc; } PMIX_ARGV_FREE_COMPAT(split); first = false; } fclose(fp); } /* decide if we are to use a persistent DVM, or act alone */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_DVM); if (proxyrun && (NULL != opt || NULL != getenv("PRTEPROXY_USE_DVM"))) { /* use a persistent DVM - act like prun */ if (NULL != opt && NULL != opt->values && NULL != opt->values[0]) { /* they provided a directive on how to find the DVM */ if (0 == strncasecmp(opt->values[0], "file:", 5)) { /* change the key to match what prun expects */ free(opt->key); opt->key = strdup(PRTE_CLI_DVM_URI); } else if (0 == strncasecmp(opt->values[0], "uri:", 4)) { free(opt->key); opt->key = strdup(PRTE_CLI_DVM_URI); /* must remove the "uri:" prefix */ cptr = strdup(&opt->values[0][4]); free(opt->values[0]); opt->values[0] = cptr; } else if (0 == strncasecmp(opt->values[0], "pid:", 4)) { free(opt->key); opt->key = strdup(PRTE_CLI_PID); /* must remove the "pid:" prefix */ cptr = strdup(&opt->values[0][4]); free(opt->values[0]); opt->values[0] = cptr; } else if (0 == strncasecmp(opt->values[0], "ns:", 3)) { free(opt->key); opt->key = strdup(PRTE_CLI_NAMESPACE); /* must remove the "ns:" prefix */ cptr = strdup(&opt->values[0][3]); free(opt->values[0]); opt->values[0] = cptr; } else if (0 == strncasecmp(opt->values[0], "system", 6)) { /* direct to search for a system server */ free(opt->key); opt->key = strdup(PRTE_CLI_SYS_SERVER_ONLY); } else if (0 == strncasecmp(opt->values[0], "system-first", 6)) { /* direct to search for a system server first, and then * take the first available DVM */ free(opt->key); opt->key = strdup(PRTE_CLI_NAMESPACE); } else if (0 != strncasecmp(opt->values[0], "search", 6)) { /* "search" would mean to look for first available DVM, * so we wouldn't have to adjust anything as the opt * key is already set to PRTE_CLI_DVM, which will be * ignored so that the PMIx_tool_init in prun_common * will conduct its standard server search. * However, if this is not "search", then this is an * unknown option and must be reported to the user as * an error */ pmix_show_help("help-prun.txt", "bad-dvm-option", true, opt->values[0], prte_tool_basename); return 1; } } rc = prun_common(&results, schizo, argc, argv); exit(rc); } /** RUN INDEPENDENTLY */ /* if we were given a keepalive pipe, set up to monitor it now */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_KEEPALIVE); if (NULL != opt) { keepalive = true; PMIX_SETENV_COMPAT("PMIX_KEEPALIVE_PIPE", opt->values[0], true, &environ); } /* check for debug options */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG)) { prte_debug_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS)) { prte_debug_daemons_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS_FILE)) { prte_debug_daemons_file_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_LEAVE_SESSION_ATTACHED)) { prte_leave_session_attached = true; } // check for hetero nodes if (pmix_cmd_line_is_taken(&results, PRTE_CLI_HETERO_NODES)) { prte_hetero_nodes = true; } /* detach from controlling terminal * otherwise, remain attached so output can get to us */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DAEMONIZE)) { if (0 > pipe(wait_pipe)) { return PRTE_ERROR; } prte_state_base.parent_fd = wait_pipe[1]; prte_daemon_init_callback(NULL, wait_dvm); close(wait_pipe[0]); } else { #if defined(HAVE_SETSID) /* see if we were directed to separate from current session */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_SET_SID)) { setsid(); } #endif } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_NO_READY_MSG)) { prte_state_base.ready_msg = false; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_SYSTEM_SERVER)) { /* we should act as system-level PMIx server */ PMIX_SETENV_COMPAT("PRTE_MCA_pmix_system_server", "1", true, &environ); } /* always act as session-level PMIx server */ PMIX_SETENV_COMPAT("PRTE_MCA_pmix_session_server", "1", true, &environ); /* if we were asked to report a uri, set the MCA param to do so */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_REPORT_URI); if (NULL != opt) { prte_pmix_server_globals.report_uri = strdup(opt->values[0]); } /* if we were given a launch agent, set the MCA param for it */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_LAUNCH_AGENT); if (NULL != opt) { setenv("PRTE_MCA_prte_launch_agent", opt->values[0], true); // cmd line overrides all } /* if we are supporting a singleton, cache its ID * so it can get picked up and registered by server init */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_SINGLETON); if (NULL != opt) { prte_pmix_server_globals.singleton = strdup(opt->values[0]); } /* default to a persistent DVM */ prte_persistent = true; /* if we are told to daemonize, then we cannot have apps */ if (!pmix_cmd_line_is_taken(&results, PRTE_CLI_DAEMONIZE)) { /* see if they want to run an application - let's parse * the cmd line to get it */ rc = prte_parse_locals(schizo, &apps, pargv, &hostfiles, &hosts, &jobdata); // not-found => no app given if (PRTE_SUCCESS != rc && PRTE_ERR_NOT_FOUND != rc) { PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } /* did they provide an app? */ if (PMIX_SUCCESS != rc || 0 == pmix_list_get_size(&apps)) { if (proxyrun) { pmix_show_help("help-prun.txt", "prun:executable-not-specified", true, prte_tool_basename, prte_tool_basename); PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } /* nope - just need to wait for instructions */ } else { /* they did provide an app - this is only allowed * when running as a proxy! */ if (!proxyrun) { pmix_show_help("help-prun.txt", "prun:executable-incorrectly-given", true, prte_tool_basename, prte_tool_basename); PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } /* mark that we are not a persistent DVM */ prte_persistent = false; } } /* setup PRTE infrastructure */ if (PRTE_SUCCESS != (ret = prte_init(&pargc, &pargv, PRTE_PROC_MASTER))) { PRTE_ERROR_LOG(ret); // ensure we cleanup any session dir we might have dropped prte_finalizing = true; prte_job_session_dir_finalize(NULL); return ret; } /* get my proc ID */ ret = PMIx_Get(NULL, PMIX_PROCID, NULL, 0, &val); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } memcpy(&myproc, val->data.proc, sizeof(pmix_proc_t)); PMIX_VALUE_RELEASE(val); /* setup callbacks for signals we should forward */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_FWD_SIGNALS); if (NULL != opt) { param = opt->values[0]; } else { param = NULL; } if (PMIX_SUCCESS != (rc = prte_ess_base_setup_signals(param))) { PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } if (0 < (i = pmix_list_get_size(&prte_ess_base_signals))) { forward_signals_events = (prte_event_t *) malloc(sizeof(prte_event_t) * i); if (NULL == forward_signals_events) { ret = PRTE_ERR_OUT_OF_RESOURCE; PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } i = 0; PMIX_LIST_FOREACH(sig, &prte_ess_base_signals, prte_ess_base_signal_t) { setup_sighandler(sig->signal, forward_signals_events + i, signal_forward_callback); ++i; } } signals_set = true; /* if we are supporting a singleton, add it to our jobs */ if (NULL != prte_pmix_server_globals.singleton) { rc = prep_singleton(prte_pmix_server_globals.singleton); if (PRTE_SUCCESS != ret) { PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } } /* setup the keepalive event registration */ PRTE_PMIX_CONSTRUCT_LOCK(&mylock.lock); code = PMIX_ERR_JOB_TERMINATED; PMIX_LOAD_PROCID(&pname, "PMIX_KEEPALIVE_PIPE", PMIX_RANK_UNDEF); PMIX_INFO_LOAD(&info, PMIX_EVENT_AFFECTED_PROC, &pname, PMIX_PROC); PMIx_Register_event_handler(&code, 1, &info, 1, parent_died_fn, evhandler_reg_callbk, (void *) &mylock); PRTE_PMIX_WAIT_THREAD(&mylock.lock); PMIX_INFO_DESTRUCT(&info); PRTE_PMIX_DESTRUCT_LOCK(&mylock.lock); /* check for launch directives in case we were launched by a * tool wanting to direct our operation - this needs to be * done prior to starting the DVM as it may include instructions * on the daemon executable, the fork/exec agent to be used by * the daemons, or other directives impacting the DVM itself. */ PMIX_LOAD_PROCID(&pname, myproc.nspace, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&info, PMIX_OPTIONAL, NULL, PMIX_BOOL); /* Have to cycle over directives we support*/ ret = PMIx_Get(&pname, PMIX_FORKEXEC_AGENT, &info, 1, &val); PMIX_INFO_DESTRUCT(&info); if (PMIX_SUCCESS == ret) { /* set our fork/exec agent */ PMIX_VALUE_RELEASE(val); } /* start the DVM */ /* get the daemon job object - was created by ess/hnp component */ if (NULL == (jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace))) { pmix_show_help("help-prun.txt", "bad-job-object", true, prte_tool_basename); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } /* ess/hnp also should have created a daemon "app" */ if (NULL == (dapp = (prte_app_context_t *) pmix_pointer_array_get_item(jdata->apps, 0))) { pmix_show_help("help-prun.txt", "bad-app-object", true, prte_tool_basename); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } /* check if we were given multiple prefixes on the cmd line, * and if so, are any of them different from each other. The * app parser puts every prefix it finds, regardless of app_context, * onto the jobdata list */ param = NULL; PMIX_LIST_FOREACH(iprteinfo, &jobdata, prte_info_item_t) { if (PMIx_Check_key(iprteinfo->info.key, "PRTE_PREFIX")) { if (NULL == param) { param = strdup(iprteinfo->info.value.data.string); } else if (0 != strcmp(param, iprteinfo->info.value.data.string)) { // we have non-matching prefixes pmix_show_help("help-plm-base.txt", "multiple-prefixes", true, prte_tool_basename, PRTE_CLI_PREFIX, PRTE_CLI_PREFIX, "PRRTE", PRTE_CLI_PREFIX, param, iprteinfo->info.value.data.string); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } } } /* Did the user specify a prefix, or want prefix by default? We * already checked for any prefixes on the cmd line, so we can * just use that result */ if (NULL != param || want_prefix_by_default) { if (NULL == param) { /* --enable-prte-prefix-default was given at time of configure. * NOTE: if the PRTE_PREFIX envar was set, then the installdirs * framework would have picked that value up during init and * populated the prte_install_dirs struct accordingly, so we * do not need to explicitly check the environment here */ param = strdup(prte_install_dirs.prefix); } /* "Parse" the param, aka remove superfluous path_sep. */ param_len = strlen(param); while (0 == strcmp(PRTE_PATH_SEP, &(param[param_len - 1]))) { param[param_len - 1] = '\0'; param_len--; if (0 == param_len) { /* We get here if we removed all PATH_SEP's and end up with an empty string. In this case, the prefix is just a single PATH_SEP. */ strncpy(param, PRTE_PATH_SEP, sizeof(param) - 1); break; } } } else if (NULL != (cptr = getenv("PRTE_PREFIX"))) { /* need to cover the case where "want prefix by default" is not * given, but PRTE_PREFIX was added to the environment prior * to actually invoking prte */ param = strdup(cptr); /* "Parse" the param, aka remove superfluous path_sep. */ param_len = strlen(param); while (0 == strcmp(PRTE_PATH_SEP, &(param[param_len - 1]))) { param[param_len - 1] = '\0'; param_len--; if (0 == param_len) { /* We get here if we removed all PATH_SEP's and end up with an empty string. In this case, the prefix is just a single PATH_SEP. */ strncpy(param, PRTE_PATH_SEP, sizeof(param) - 1); break; } } } else { /* Check if called with fully-qualified path to prte. (Note: Put this second so can override with --prefix (above). */ tpath = NULL; if ('/' == argv[0][0]) { char *tmp_basename = NULL; tpath = pmix_dirname(argv[0]); if (NULL != tpath) { /* Quick sanity check to ensure we got something/bin/ and that the installation tree is at least more or less what we expect it to be */ tmp_basename = pmix_basename(tpath); if (0 == strcmp("bin", tmp_basename)) { char *tmp = tpath; tpath = pmix_dirname(tmp); free(tmp); } else { free(tpath); tpath = NULL; } free(tmp_basename); } if (NULL != tpath) { param = tpath; } } } if (NULL != param) { // add the directive to the daemon job object if (prte_get_attribute(&jdata->attributes, PRTE_JOB_PREFIX, (void **) &cptr, PMIX_STRING)) { // already have a prefix directory entry - see if they are the same if (0 != strcmp(cptr, param)) { pmix_show_help("help-plm-base.txt", "multiple-prrte-prefixes", true, prte_tool_basename, prte_tool_basename, prte_tool_basename, param, cptr); free(param); free(cptr); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } free(cptr); } prte_set_attribute(&jdata->attributes, PRTE_JOB_PREFIX, PRTE_ATTR_GLOBAL, param, PMIX_STRING); free(param); } /* do the same for PMIx prefixes for the daemons. Note that we don't * have a prefix-by-default setting here because that only applies * to PRRTE itself. If the PMIx library used by PRRTE was configured * with that option, we have no way of knowing about it, so there is * nothing we can do. Likewise, the use of an absolute path for PRRTE * doesn't say anything about the location of the PMIx library */ param = NULL; PMIX_LIST_FOREACH(iprteinfo, &jobdata, prte_info_item_t) { if (PMIx_Check_key(iprteinfo->info.key, PMIX_PREFIX)) { if (NULL == param) { param = strdup(iprteinfo->info.value.data.string); } else if (0 != strcmp(param, iprteinfo->info.value.data.string)) { // we have non-matching prefixes pmix_show_help("help-plm-base.txt", "multiple-prefixes", true, prte_tool_basename, PRTE_CLI_PMIX_PREFIX, PRTE_CLI_PMIX_PREFIX, "PMIx", PRTE_CLI_PMIX_PREFIX, param, iprteinfo->info.value.data.string); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } } } /* Did the user specify a prefix? We checked for any prefixes on the * cmd line above, so we can just use that result */ if (NULL != param) { /* "Parse" the param, aka remove superfluous path_sep. */ param_len = strlen(param); while (0 == strcmp(PRTE_PATH_SEP, &(param[param_len - 1]))) { param[param_len - 1] = '\0'; param_len--; if (0 == param_len) { /* We get here if we removed all PATH_SEP's and end up with an empty string. In this case, the prefix is just a single PATH_SEP. */ strncpy(param, PRTE_PATH_SEP, sizeof(param) - 1); break; } } } else if (NULL != (cptr = getenv("PMIX_PREFIX"))) { param = strdup(cptr); /* "Parse" the param, aka remove superfluous path_sep. */ param_len = strlen(param); while (0 == strcmp(PRTE_PATH_SEP, &(param[param_len - 1]))) { param[param_len - 1] = '\0'; param_len--; if (0 == param_len) { /* We get here if we removed all PATH_SEP's and end up with an empty string. In this case, the prefix is just a single PATH_SEP. */ strncpy(param, PRTE_PATH_SEP, sizeof(param) - 1); break; } } } if (NULL != param) { // add the directive to the daemon job object if (prte_get_attribute(&jdata->attributes, PRTE_JOB_PMIX_PREFIX, (void **) &cptr, PMIX_STRING)) { // already have a prefix directory entry - see if they are the same if (0 != strcmp(cptr, param)) { pmix_show_help("help-plm-base.txt", "multiple-pmix-prefixes", true, prte_tool_basename, prte_tool_basename, param, cptr); free(param); free(cptr); PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } free(cptr); } prte_set_attribute(&jdata->attributes, PRTE_JOB_PMIX_PREFIX, PRTE_ATTR_GLOBAL, param, PMIX_STRING); free(param); } /* apply any provided runtime options to the DVM itself */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_RTOS); if (NULL != opt) { rc = prte_state_base_set_runtime_options(jdata, opt->values[0]); } else { rc = prte_state_base_set_runtime_options(jdata, prte_schizo_base.default_runtime_options); } if (PRTE_SUCCESS != rc) { PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } /* check a couple of display options for the DVM itself */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_DISPLAY); if (NULL != opt) { char **targv; char *tptr; int m; for (n=0; NULL != opt->values[n]; n++) { targv = PMIX_ARGV_SPLIT_COMPAT(opt->values[n], ','); for (i=0; NULL != targv[i]; i++) { if (PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_ALLOC)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_ALLOC, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); break; } else if (PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_PARSEABLE) || PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_PARSABLE)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } } PMIX_ARGV_FREE_COMPAT(targv); /* check for qualifiers */ tptr = strchr(opt->values[n], ':'); if (NULL != tptr) { ++tptr; targv = PMIX_ARGV_SPLIT_COMPAT(tptr, ':'); /* check qualifiers */ for (m=0; NULL != targv[m]; m++) { if (PMIX_CHECK_CLI_OPTION(targv[m], PRTE_CLI_PARSEABLE) || PMIX_CHECK_CLI_OPTION(targv[m], PRTE_CLI_PARSABLE)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); break; } } PMIX_ARGV_FREE_COMPAT(targv); } } } else if (NULL != prte_schizo_base.default_display_options) { char **targv; char *tptr; int m; targv = PMIX_ARGV_SPLIT_COMPAT(prte_schizo_base.default_display_options, ','); for (i=0; NULL != targv[i]; i++) { if (PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_ALLOC)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_ALLOC, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } else if (PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_PARSEABLE) || PMIX_CHECK_CLI_OPTION(targv[i], PRTE_CLI_PARSABLE)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } } PMIX_ARGV_FREE_COMPAT(targv); /* check for qualifiers */ tptr = strchr(prte_schizo_base.default_display_options, ':'); if (NULL != tptr) { ++tptr; targv = PMIX_ARGV_SPLIT_COMPAT(tptr, ':'); /* check qualifiers */ for (m=0; NULL != targv[m]; m++) { if (PMIX_CHECK_CLI_OPTION(targv[m], PRTE_CLI_PARSEABLE) || PMIX_CHECK_CLI_OPTION(targv[m], PRTE_CLI_PARSABLE)) { prte_set_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); break; } } PMIX_ARGV_FREE_COMPAT(targv); } } /* setup to listen for commands sent specifically to me, even though I would probably * be the one sending them! Unfortunately, since I am a participating daemon, * there are times I need to send a command to "all daemons", and that means *I* have * to receive it too */ PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_DAEMON, PRTE_RML_PERSISTENT, prte_daemon_recv, NULL); /* setup to capture job-level info */ PMIX_INFO_LIST_START(jinfo); /* see if we ourselves were spawned by someone */ PMIX_LOAD_PROCID(&pname, myproc.nspace, PMIX_RANK_WILDCARD); ret = PMIx_Get(&pname, PMIX_PARENT_ID, NULL, 0, &val); if (PMIX_SUCCESS == ret) { PMIX_LOAD_PROCID(&prte_process_info.my_parent, val->data.proc->nspace, val->data.proc->rank); PMIX_VALUE_RELEASE(val); PMIX_INFO_LIST_ADD(ret, jinfo, PMIX_REQUESTOR_IS_TOOL, NULL, PMIX_BOOL); /* indicate that we are launching on behalf of a parent */ PMIX_INFO_LIST_ADD(ret, jinfo, PMIX_PARENT_ID, &prte_process_info.my_parent, PMIX_PROC); } else { PMIX_LOAD_PROCID(&prte_process_info.my_parent, prte_process_info.myproc.nspace, prte_process_info.myproc.rank); } /* add any hostfile directives to the daemon job */ if (prte_persistent) { opt = pmix_cmd_line_get_param(&results, PRTE_CLI_HOSTFILE); if (NULL != opt) { tpath = PMIX_ARGV_JOIN_COMPAT(opt->values, ','); prte_set_attribute(&dapp->attributes, PRTE_APP_HOSTFILE, PRTE_ATTR_GLOBAL, tpath, PMIX_STRING); free(tpath); } /* Did the user specify any hosts? */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_HOST); if (NULL != opt) { char *tval; tval = PMIX_ARGV_JOIN_COMPAT(opt->values, ','); prte_set_attribute(&dapp->attributes, PRTE_APP_DASH_HOST, PRTE_ATTR_GLOBAL, tval, PMIX_STRING); free(tval); } } else { /* the directives might be in the app(s) */ if (NULL != hostfiles) { char *tval; tval = PMIX_ARGV_JOIN_COMPAT(hostfiles, ','); prte_set_attribute(&dapp->attributes, PRTE_APP_HOSTFILE, PRTE_ATTR_GLOBAL, tval, PMIX_STRING); free(tval); PMIX_ARGV_FREE_COMPAT(hostfiles); } if (NULL != hosts) { char *tval; tval = PMIX_ARGV_JOIN_COMPAT(hosts, ','); prte_set_attribute(&dapp->attributes, PRTE_APP_DASH_HOST, PRTE_ATTR_GLOBAL, tval, PMIX_STRING); free(tval); PMIX_ARGV_FREE_COMPAT(hosts); } } /* spawn the DVM - we skip the initial steps as this * isn't a user-level application */ PRTE_ACTIVATE_JOB_STATE(jdata, PRTE_JOB_STATE_ALLOCATE); /* we need to loop the event library until the DVM is alive */ while (prte_event_base_active && !prte_dvm_ready) { prte_event_loop(prte_event_base, PRTE_EVLOOP_ONCE); } /* check if something went wrong with setting up the dvm, bail out */ if (!prte_dvm_ready) { PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); goto DONE; } // see if we are to suicide if (PMIX_RANK_INVALID != prted_debug_failure) { /* are we the specified vpid? */ if (PRTE_PROC_MY_NAME->rank == prted_debug_failure || prted_debug_failure == PMIX_RANK_WILDCARD) { /* if the user specified we delay, then setup a timer * and have it kill us */ if (0 < prted_debug_failure_delay) { PRTE_TIMER_EVENT(prted_debug_failure_delay, 0, shutdown_callback); } else { pmix_output(0, "%s is executing clean abnormal termination", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* do -not- call finalize as this will send a message to the HNP * indicating clean termination! Instead, just forcibly cleanup * the local session_dir tree and exit */ jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_RELEASE(jdata); /* return with non-zero status */ ret = PRTE_ERROR_DEFAULT_EXIT_CODE; goto DONE; } } } opt = pmix_cmd_line_get_param(&results, PRTE_CLI_REPORT_PID); if (NULL != opt) { /* if the string is a "-", then output to stdout */ if (0 == strcmp(opt->values[0], "-")) { fprintf(stdout, "%lu\n", (unsigned long) getpid()); } else if (0 == strcmp(opt->values[0], "+")) { /* output to stderr */ fprintf(stderr, "%lu\n", (unsigned long) getpid()); } else { char *leftover; int outpipe; /* see if it is an integer pipe */ leftover = NULL; outpipe = strtol(opt->values[0], &leftover, 10); if (NULL == leftover || 0 == strlen(leftover)) { /* stitch together the var names and URI */ pmix_asprintf(&leftover, "%lu", (unsigned long) getpid()); /* output to the pipe */ rc = pmix_fd_write(outpipe, strlen(leftover) + 1, leftover); free(leftover); close(outpipe); } else { /* must be a file */ fp = fopen(opt->values[0], "w"); if (NULL == fp) { pmix_output(0, "Impossible to open the file %s in write mode\n", opt->values[0]); PRTE_UPDATE_EXIT_STATUS(1); goto DONE; } /* output my PID */ fprintf(fp, "%lu\n", (unsigned long) getpid()); fclose(fp); mypidfile = strdup(opt->values[0]); } } } if (prte_persistent) { PMIX_INFO_LIST_RELEASE(jinfo); goto proceed; } /***** CHECK FOR LAUNCH DIRECTIVES - ADD THEM TO JOB INFO IF FOUND ****/ PMIX_LOAD_PROCID(&pname, myproc.nspace, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&info, PMIX_OPTIONAL, NULL, PMIX_BOOL); ret = PMIx_Get(&pname, PMIX_LAUNCH_DIRECTIVES, &info, 1, &val); PMIX_INFO_DESTRUCT(&info); if (PMIX_SUCCESS == ret) { iptr = (pmix_info_t *) val->data.darray->array; ninfo = val->data.darray->size; for (n = 0; n < ninfo; n++) { PMIX_INFO_LIST_XFER(ret, jinfo, &iptr[n]); } PMIX_VALUE_RELEASE(val); } ret = prte_prun_parse_common_cli(jinfo, &results, schizo, &apps); if (PRTE_SUCCESS != ret) { goto DONE; } /* convert the job info into an array */ PMIX_INFO_LIST_CONVERT(ret, jinfo, &darray); if (PMIX_ERR_EMPTY == ret) { iptr = NULL; ninfo = 0; } else if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } else { iptr = (pmix_info_t *) darray.array; ninfo = darray.size; } PMIX_INFO_LIST_RELEASE(jinfo); /* convert the apps to an array */ napps = pmix_list_get_size(&apps); PMIX_APP_CREATE(papps, napps); n = 0; PMIX_LIST_FOREACH(app, &apps, prte_pmix_app_t) { papps[n].cmd = strdup(app->app.cmd); papps[n].argv = PMIX_ARGV_COPY_COMPAT(app->app.argv); papps[n].env = PMIX_ARGV_COPY_COMPAT(app->app.env); papps[n].cwd = strdup(app->app.cwd); papps[n].maxprocs = app->app.maxprocs; PMIX_INFO_LIST_CONVERT(ret, app->info, &darray); if (PMIX_SUCCESS != ret) { if (PMIX_ERR_EMPTY == ret) { papps[n].info = NULL; papps[n].ninfo = 0; } else { PMIX_ERROR_LOG(ret); PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } } else { papps[n].info = (pmix_info_t *) darray.array; papps[n].ninfo = darray.size; } ++n; } if (verbose) { pmix_output(0, "Spawning job"); } /* let the PMIx server handle it for us so that all the job infos * get properly recorded - e.g., forwarding IOF */ PRTE_PMIX_CONSTRUCT_LOCK(&lock); ret = PMIx_Spawn_nb(iptr, ninfo, papps, napps, spcbfunc, &lock); if (PRTE_SUCCESS != ret) { pmix_output(0, "PMIx_Spawn failed (%d): %s", ret, PMIx_Error_string(ret)); rc = ret; PRTE_UPDATE_EXIT_STATUS(rc); goto DONE; } /* we have to cycle the event library here so we can process * the spawn request */ while (prte_event_base_active && lock.active) { prte_event_loop(prte_event_base, PRTE_EVLOOP_ONCE); } PMIX_ACQUIRE_OBJECT(&lock.lock); if (PMIX_SUCCESS != lock.status) { PRTE_UPDATE_EXIT_STATUS(lock.status); goto DONE; } PMIX_LOAD_NSPACE(spawnednspace, lock.msg); PRTE_PMIX_DESTRUCT_LOCK(&lock); if (verbose) { pmix_output(0, "JOB %s EXECUTING", PRTE_JOBID_PRINT(spawnednspace)); } /* check what user wants us to do with stdin */ PMIX_LOAD_NSPACE(pname.nspace, spawnednspace); opt = pmix_cmd_line_get_param(&results, PRTE_CLI_STDIN); if (NULL != opt) { if (0 == strcmp(opt->values[0], "all")) { pname.rank = PMIX_RANK_WILDCARD; } else if (0 == strcmp(opt->values[0], "none")) { pname.rank = PMIX_RANK_INVALID; } else { pname.rank = strtoul(opt->values[0], NULL, 10); } } else { pname.rank = 0; } if (PMIX_RANK_INVALID != pname.rank) { PMIX_INFO_CREATE(iptr2, 1); PMIX_INFO_LOAD(&iptr2[0], PMIX_IOF_PUSH_STDIN, NULL, PMIX_BOOL); PRTE_PMIX_CONSTRUCT_LOCK(&lock); ret = PMIx_IOF_push(&pname, 1, NULL, iptr2, 1, opcbfunc, &lock); if (PMIX_SUCCESS != ret && PMIX_OPERATION_SUCCEEDED != ret) { pmix_output(0, "IOF push of stdin failed: %s", PMIx_Error_string(ret)); } else if (PMIX_SUCCESS == ret) { PRTE_PMIX_WAIT_THREAD(&lock); } PRTE_PMIX_DESTRUCT_LOCK(&lock); PMIX_INFO_FREE(iptr2, 1); } proceed: /* loop the event lib until an exit event is detected */ while (prte_event_base_active) { prte_event_loop(prte_event_base, PRTE_EVLOOP_ONCE); } PMIX_ACQUIRE_OBJECT(prte_event_base_active); /* close the push of our stdin */ PMIX_INFO_LOAD(&info, PMIX_IOF_COMPLETE, NULL, PMIX_BOOL); PRTE_PMIX_CONSTRUCT_LOCK(&lock); ret = PMIx_IOF_push(NULL, 0, NULL, &info, 1, opcbfunc, &lock); if (PMIX_SUCCESS != ret && PMIX_OPERATION_SUCCEEDED != ret) { pmix_output(0, "IOF close of stdin failed: %s", PMIx_Error_string(ret)); } else if (PMIX_SUCCESS == ret) { PRTE_PMIX_WAIT_THREAD(&lock); } PRTE_PMIX_DESTRUCT_LOCK(&lock); PMIX_INFO_DESTRUCT(&info); DONE: /* cleanup and leave */ prte_finalize(); if (NULL != mypidfile) { unlink(mypidfile); } if (prte_debug_flag) { fprintf(stderr, "exiting with status %d\n", prte_exit_status); } exit(prte_exit_status); } static void clean_abort(int fd, short flags, void *arg) { PRTE_HIDE_UNUSED_PARAMS(fd, flags); if (keepalive && NULL == arg) { // ignore this return; } /* if we have already ordered this once, don't keep * doing it to avoid race conditions */ if (pmix_mutex_trylock(&prun_abort_inprogress_lock)) { /* returns 1 if already locked */ if (forcibly_die) { /* exit with a non-zero status */ exit(1); } fprintf(stderr, "%s: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n", prte_tool_basename); forcibly_die = true; /* reset the event */ prte_event_add(&term_handler, NULL); return; } fflush(stderr); /* ensure we exit with a non-zero status */ PRTE_UPDATE_EXIT_STATUS(PRTE_ERROR_DEFAULT_EXIT_CODE); /* ensure that the forwarding of stdin stops */ prte_dvm_abort_ordered = true; /* tell us to be quiet - hey, the user killed us with a ctrl-c, * so need to tell them that! */ prte_execute_quiet = true; prte_abnormal_term_ordered = true; /* We are in an event handler; the job completed procedure will delete the signal handler that is currently running (which is a Bad Thing), so we can't call it directly. Instead, we have to exit this handler and setup to call job_completed() after this. */ prte_plm.terminate_orteds(); if (NULL != arg) { PMIX_RELEASE(arg); } } static bool first = true; static bool second = true; static void surekill(void) { prte_proc_t *child; int n; pid_t pid; /* we don't know how far we got, so be careful here */ if (NULL != prte_local_children) { for (n=0; n < prte_local_children->size; n++) { child = (prte_proc_t*)pmix_pointer_array_get_item(prte_local_children, n); if (NULL != child && 0 < child->pid) { pid = child->pid; #if HAVE_SETPGID { pid_t pgrp; pgrp = getpgid(pid); if (-1 != pgrp) { /* target the lead process of the process * group so we ensure that the signal is * seen by all members of that group. This * ensures that the signal is seen by any * child processes our child may have * started */ pid = -pgrp; } } #endif kill(pid, SIGKILL); } } } } /* * Attempt to terminate the job and wait for callback indicating * the job has been aborted. */ static void abort_signal_callback(int fd) { uint8_t foo = 1; char *msg = "Abort is in progress...hit ctrl-c again to forcibly terminate\n\n"; PRTE_HIDE_UNUSED_PARAMS(fd); /* if this is the first time thru, just get * the current time */ if (first) { first = false; /* tell the event lib to attempt to abnormally terminate */ if (-1 == write(term_pipe[1], &foo, 1)) { exit(1); } } else if (second) { if (-1 == write(2, (void *) msg, strlen(msg))) { exit(1); } fflush(stderr); second = false; } else { surekill(); // ensure we attempt to kill everything pmix_os_dirpath_destroy(prte_process_info.top_session_dir, true, NULL); exit(1); } } static int prep_singleton(const char *name) { char *ptr, *p1; prte_job_t *jdata; prte_node_t *node; prte_proc_t *proc; int rc; pmix_rank_t rank; prte_app_context_t *app; char cwd[PRTE_PATH_MAX]; ptr = strdup(name); p1 = strrchr(ptr, '.'); *p1 = '\0'; ++p1; rank = strtoul(p1, NULL, 10); jdata = PMIX_NEW(prte_job_t); PMIX_LOAD_NSPACE(jdata->nspace, ptr); free(ptr); rc = prte_set_job_data_object(jdata); if (PRTE_SUCCESS != rc) { PRTE_UPDATE_EXIT_STATUS(PRTE_ERR_FATAL); PMIX_RELEASE(jdata); return PRTE_ERR_FATAL; } /* must have an app */ app = PMIX_NEW(prte_app_context_t); app->app = strdup(jdata->nspace); app->num_procs = 1; PMIX_ARGV_APPEND_NOSIZE_COMPAT(&app->argv, app->app); pmix_getcwd(cwd, sizeof(cwd)); app->cwd = strdup(cwd); pmix_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps = 1; /* add a map */ jdata->map = PMIX_NEW(prte_job_map_t); /* add our node to the map since the singleton must * be here */ node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, PRTE_PROC_MY_NAME->rank); PMIX_RETAIN(node); pmix_pointer_array_add(jdata->map->nodes, node); ++(jdata->map->num_nodes); /* create a proc for the singleton */ proc = PMIX_NEW(prte_proc_t); PMIX_LOAD_PROCID(&proc->name, jdata->nspace, rank); proc->parent = PRTE_PROC_MY_NAME->rank; proc->app_idx = 0; proc->app_rank = rank; proc->local_rank = 0; proc->node_rank = 0; proc->state = PRTE_PROC_STATE_RUNNING; /* link it to the app */ PMIX_RETAIN(proc); pmix_pointer_array_set_item(&app->procs, rank, proc); app->first_rank = rank; /* link it to the node */ PMIX_RETAIN(node); proc->node = node; /* add it to the job */ pmix_pointer_array_set_item(jdata->procs, rank, proc); jdata->num_procs = 1; jdata->num_local_procs = 1; /* add it to the node */ PMIX_RETAIN(proc); pmix_pointer_array_add(node->procs, proc); node->num_procs = 1; node->slots_inuse = 1; // register the info with our PMIx server rc = prte_pmix_server_register_nspace(jdata); return rc; } static void signal_forward_callback(int signum, short args, void *cbdata) { pmix_status_t rc; pmix_proc_t proc; pmix_info_t info; PRTE_HIDE_UNUSED_PARAMS(args, cbdata); if (verbose) { fprintf(stderr, "%s: Forwarding signal %d to job\n", prte_tool_basename, signum); } /* send the signal out to the processes */ PMIX_LOAD_PROCID(&proc, spawnednspace, PMIX_RANK_WILDCARD); PMIX_INFO_LOAD(&info, PMIX_JOB_CTRL_SIGNAL, &signum, PMIX_INT); rc = PMIx_Job_control(&proc, 1, &info, 1, NULL, NULL); if (PMIX_SUCCESS != rc && PMIX_OPERATION_SUCCEEDED != rc) { fprintf(stderr, "Signal %d could not be sent to job %s (returned %s)", signum, spawnednspace, PMIx_Error_string(rc)); } } /** * Deal with sigpipe errors */ static int sigpipe_error_count = 0; static void epipe_signal_callback(int fd, short args, void *cbdata) { PRTE_HIDE_UNUSED_PARAMS(fd, args, cbdata); sigpipe_error_count++; if (10 < sigpipe_error_count) { /* time to abort */ pmix_output(0, "%s: SIGPIPE detected - aborting", prte_tool_basename); clean_abort(0, 0, NULL); } return; } prrte-3.0.13/src/tools/prte/Makefile.am0000664000175000017500000000430215145263240020054 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2015-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS) AM_CFLAGS = \ -DPRTE_CONFIGURE_USER="\"@PRTE_CONFIGURE_USER@\"" \ -DPRTE_CONFIGURE_HOST="\"@PRTE_CONFIGURE_HOST@\"" \ -DPRTE_CONFIGURE_DATE="\"@PRTE_CONFIGURE_DATE@\"" \ -DPRTE_BUILD_USER="\"$$USER\"" \ -DPRTE_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \ -DPRTE_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \ -DPRTE_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DPRTE_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ -DPRTE_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ -DPRTE_BUILD_LIBS="\"@LIBS@\"" \ -DPRTE_CC_ABSOLUTE="\"@PRTE_CC_ABSOLUTE@\"" \ -DPRTE_GREEK_VERSION="\"@PRTE_GREEK_VERSION@\"" \ -DPRTE_REPO_REV="\"@PRTE_REPO_REV@\"" \ -DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\"" bin_PROGRAMS = prte prte_SOURCES = \ prte.c prte_LDADD = \ $(prte_libevent_LIBS) \ $(prte_hwloc_LIBS) \ $(prte_pmix_LIBS) \ $(top_builddir)/src/libprrte.la install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f prterun$(EXEEXT); $(LN_S) prte$(EXEEXT) prterun$(EXEEXT)) uninstall-local: rm -f $(DESTDIR)$(bindir)/prterun$(EXEEXT) prrte-3.0.13/src/tools/prun/0000775000175000017500000000000015145263240016033 5ustar alastairalastairprrte-3.0.13/src/tools/prun/main.c0000664000175000017500000000270615145263240017130 0ustar alastairalastair/*************************************************************************** * * * PRTE: PMIx Reference RunTime Environment (PRTE) * * * * https://github.com/openpmix/prte * * * ***************************************************************************/ #include "prun.h" int main(int argc, char *argv[]) { return prun(argc, argv); } /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2017-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ prrte-3.0.13/src/tools/prun/prun.h0000664000175000017500000000231015145263240017164 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRUN_H #define PRUN_H #include "prte_config.h" BEGIN_C_DECLS /** * Main body of prun functionality */ int prun(int argc, char *argv[]); END_C_DECLS #endif /* PRTERUN_PRTERUN_H */ prrte-3.0.13/src/tools/prun/prun.c0000664000175000017500000002746315145263240017177 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Geoffroy Vallee. All rights reserved. * Copyright (c) 2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/include/constants.h" #include "src/include/version.h" #include #include #include #ifdef HAVE_STRINGS_H # include #endif /* HAVE_STRINGS_H */ #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_SYS_WAIT_H # include #endif /* HAVE_SYS_WAIT_H */ #ifdef HAVE_SYS_TIME_H # include #endif /* HAVE_SYS_TIME_H */ #include #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_POLL_H # include #endif #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/pmix/pmix-internal.h" #include "src/threads/pmix_mutex.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/prte_cmd_line.h" #include "src/util/pmix_fd.h" #include "src/util/pmix_os_path.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_show_help.h" #include "src/util/pmix_string_copy.h" #include "src/class/pmix_pointer_array.h" #include "src/runtime/prte_progress_threads.h" #include "prun.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ess/base/base.h" #include "src/mca/schizo/base/base.h" #include "src/mca/state/state.h" #include "src/prted/prted.h" #include "src/runtime/prte_globals.h" #include "src/runtime/runtime.h" #include "src/prted/pmix/pmix_server.h" #include "src/prted/pmix/pmix_server_internal.h" typedef struct { prte_pmix_lock_t lock; pmix_info_t *info; size_t ninfo; } mylock_t; int prun(int argc, char *argv[]) { int rc = 1, i; pmix_list_t apps; char **pargv; int pargc; prte_schizo_base_module_t *schizo; char hostname[PRTE_PATH_MAX]; char *personality; pmix_cli_result_t results; pmix_cli_item_t *opt; FILE *fp; char *mypidfile = NULL; bool first; char **split; char *param; int n; /* init the globals */ PMIX_CONSTRUCT(&apps, pmix_list_t); prte_tool_basename = pmix_basename(argv[0]); prte_tool_actual = "prun"; pargc = argc; pargv = pmix_argv_copy_strip(argv); // strip any quoted arguments gethostname(hostname, sizeof(hostname)); rc = prte_init_minimum(); if (PRTE_SUCCESS != rc) { return rc; } /* because we have to use the schizo framework and init our hostname * prior to parsing the incoming argv for cmd line options, do a hacky * search to support passing of impacted options (e.g., verbosity for schizo) */ rc = prte_schizo_base_parse_prte(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != rc) { return rc; } rc = prte_schizo_base_parse_pmix(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != rc) { return rc; } /* init the tiny part of PRTE we use */ prte_init_util(PRTE_PROC_TYPE_NONE); /* setup an event base */ rc = prte_event_base_open(); if (PRTE_SUCCESS != rc) { fprintf(stderr, "Unable to initialize event library\n"); exit(1); } /* open the SCHIZO framework */ rc = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); return rc; } if (PRTE_SUCCESS != (rc = prte_schizo_base_select())) { PRTE_ERROR_LOG(rc); return rc; } /* look for any personality specification and do a quick sanity check */ personality = NULL; bool rankby_found = false; bool bindto_found = false; for (i = 0; NULL != pargv[i]; i++) { if (0 == strcmp(pargv[i], "--personality")) { personality = pargv[i + 1]; continue; } if (0 == strcmp(pargv[i], "--map-by")) { free(pargv[i]); pargv[i] = strdup("--mapby"); continue; } if (0 == strcmp(pargv[i], "--rank-by") || 0 == strcmp(pargv[i], "--rankby")) { if (rankby_found) { pmix_show_help("help-schizo-base.txt", "multi-instances", true, pargv[i]); return PRTE_ERR_BAD_PARAM; } rankby_found = true; if (0 == strcmp(pargv[i], "--rank-by")) { free(pargv[i]); pargv[i] = strdup("--rankby"); } continue; } if (0 == strcmp(pargv[i], "--bind-to") || 0 == strcmp(pargv[i], "--bindto")) { if (bindto_found) { pmix_show_help("help-schizo-base.txt", "multi-instances", true, "bind-to"); return PRTE_ERR_BAD_PARAM; } bindto_found = true; if (0 == strcmp(pargv[i], "--bind-to")) { free(pargv[i]); pargv[i] = strdup("--bindto"); } continue; } if (0 == strcmp(pargv[i], "--runtime-options")) { free(pargv[i]); pargv[i] = strdup("--rtos"); continue; } } /* detect if we are running as a proxy and select the active * schizo module for this tool */ schizo = prte_schizo_base_detect_proxy(personality); if (NULL == schizo) { pmix_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, personality); return 1; } if (NULL == personality) { personality = schizo->name; } /* Register all global MCA Params */ if (PRTE_SUCCESS != (rc = prte_register_params())) { if (PRTE_ERR_SILENT != rc) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, "prte register params", PRTE_ERROR_NAME(rc), rc); } return 1; } /* parse the input argv to get values, including everyone's MCA params */ PMIX_CONSTRUCT(&results, pmix_cli_result_t); // check for special case of executable immediately following tool if ('-' != pargv[1][0]) { results.tail = PMIx_Argv_copy(&pargv[1]); } else { rc = schizo->parse_cli(pargv, &results, PMIX_CLI_WARN); if (PRTE_SUCCESS != rc) { PMIX_DESTRUCT(&results); if (PRTE_OPERATION_SUCCEEDED == rc) { return PRTE_SUCCESS; } if (PRTE_ERR_SILENT != rc) { fprintf(stderr, "%s: command line error (%s)\n", prte_tool_basename, prte_strerror(rc)); } return rc; } } /* check if we are running as root - if we are, then only allow * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning message */ if (0 == geteuid()) { schizo->allow_run_as_root(&results); // will exit us if not allowed } opt = pmix_cmd_line_get_param(&results, PRTE_CLI_REPORT_PID); if (NULL != opt) { /* if the string is a "-", then output to stdout */ if (0 == strcmp(opt->values[0], "-")) { fprintf(stdout, "%lu\n", (unsigned long) getpid()); } else if (0 == strcmp(opt->values[0], "+")) { /* output to stderr */ fprintf(stderr, "%lu\n", (unsigned long) getpid()); } else { char *leftover; int outpipe; /* see if it is an integer pipe */ leftover = NULL; outpipe = strtol(opt->values[0], &leftover, 10); if (NULL == leftover || 0 == strlen(leftover)) { /* stitch together the var names and URI */ pmix_asprintf(&leftover, "%lu", (unsigned long) getpid()); /* output to the pipe */ pmix_fd_write(outpipe, strlen(leftover) + 1, leftover); free(leftover); close(outpipe); } else { /* must be a file */ fp = fopen(opt->values[0], "w"); if (NULL == fp) { pmix_output(0, "Impossible to open the file %s in write mode\n", opt->values[0]); PRTE_UPDATE_EXIT_STATUS(1); goto DONE; } /* output my PID */ fprintf(fp, "%lu\n", (unsigned long) getpid()); fclose(fp); mypidfile = strdup(opt->values[0]); } } } /* if we were asked to report a uri, set the MCA param to do so */ opt = pmix_cmd_line_get_param(&results, PRTE_CLI_REPORT_URI); if (NULL != opt) { prte_pmix_server_globals.report_uri = strdup(opt->values[0]); } // check for an appfile opt = pmix_cmd_line_get_param(&results, PRTE_CLI_APPFILE); if (NULL != opt) { // parse the file and add its context to the argv array fp = fopen(opt->values[0], "r"); if (NULL == fp) { pmix_show_help("help-prun.txt", "appfile-failure", true, opt->values[0]); if (NULL != mypidfile) { free(mypidfile); } return 1; } first = true; while (NULL != (param = pmix_getline(fp))) { if (!first) { // add a colon delimiter PMIX_ARGV_APPEND_NOSIZE_COMPAT(&pargv, ":"); ++pargc; } // break the line down into parts split = PMIX_ARGV_SPLIT_COMPAT(param, ' '); for (n=0; NULL != split[n]; n++) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&pargv, split[n]); ++pargc; } PMIX_ARGV_FREE_COMPAT(split); first = false; } fclose(fp); } // open the ess framework so it can init the signal forwarding // list - we don't actually need the components rc = pmix_mca_base_framework_open(&prte_ess_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto DONE; } rc = prun_common(&results, schizo, pargc, pargv); DONE: // cleanup and leave if (NULL != mypidfile) { unlink(mypidfile); } (void) pmix_mca_base_framework_close(&prte_ess_base_framework); exit(prte_exit_status); } prrte-3.0.13/src/tools/prun/Makefile.am0000664000175000017500000000430715145263240020073 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2015-2020 Intel, Inc. All rights reserved. # Copyright (c) 2019 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS) AM_CFLAGS = \ -DPRTE_CONFIGURE_USER="\"@PRTE_CONFIGURE_USER@\"" \ -DPRTE_CONFIGURE_HOST="\"@PRTE_CONFIGURE_HOST@\"" \ -DPRTE_CONFIGURE_DATE="\"@PRTE_CONFIGURE_DATE@\"" \ -DPRTE_BUILD_USER="\"$$USER\"" \ -DPRTE_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \ -DPRTE_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \ -DPRTE_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DPRTE_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ -DPRTE_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ -DPRTE_BUILD_LIBS="\"@LIBS@\"" \ -DPRTE_CC_ABSOLUTE="\"@PRTE_CC_ABSOLUTE@\"" \ -DPRTE_GREEK_VERSION="\"@PRTE_GREEK_VERSION@\"" \ -DPRTE_REPO_REV="\"@PRTE_REPO_REV@\"" \ -DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\"" bin_PROGRAMS = prun prun_SOURCES = \ main.c \ prun.c \ prun.h prun_LDADD = \ $(prte_libevent_LIBS) \ $(prte_hwloc_LIBS) \ $(prte_pmix_LIBS) \ $(top_builddir)/src/libprrte.la prrte-3.0.13/src/tools/pcc/0000775000175000017500000000000015145263240015614 5ustar alastairalastairprrte-3.0.13/src/tools/pcc/Makefile.am0000664000175000017500000000230515145263240017650 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2014-2019 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2021-2025 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # if PRTE_HAVE_PMIXCC install-exec-hook: (cd $(DESTDIR)$(bindir); rm -f pcc; $(LN_S) $(PMIXCC_PATH)$(EXEEXT) pcc) uninstall-local: rm -f $(DESTDIR)$(bindir)/pcc$(EXEEXT) endif prrte-3.0.13/src/tools/pterm/0000775000175000017500000000000015145263240016176 5ustar alastairalastairprrte-3.0.13/src/tools/pterm/pterm.c0000664000175000017500000004522715145263240017503 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Geoffroy Vallee. All rights reserved. * Copyright (c) 2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/include/constants.h" #include "src/include/version.h" #include #include #include #ifdef HAVE_STRINGS_H # include #endif /* HAVE_STRINGS_H */ #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_SYS_WAIT_H # include #endif /* HAVE_SYS_WAIT_H */ #ifdef HAVE_SYS_TIME_H # include #endif /* HAVE_SYS_TIME_H */ #include #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_POLL_H # include #endif #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/pmix/pmix-internal.h" #include "src/threads/pmix_mutex.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/prte_cmd_line.h" #include "src/util/pmix_fd.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_getcwd.h" #include "src/util/pmix_show_help.h" #include "src/class/pmix_pointer_array.h" #include "src/runtime/prte_progress_threads.h" #include "src/util/pmix_os_path.h" #include "src/util/pmix_path.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/schizo/base/base.h" #include "src/mca/state/state.h" #include "src/runtime/prte_globals.h" #include "src/runtime/runtime.h" typedef struct { prte_pmix_lock_t lock; pmix_info_t *info; size_t ninfo; } mylock_t; static pmix_list_t job_info; static pmix_nspace_t myjobid = {0}; static pmix_proc_t myproc; static bool forcibly_die = false; static prte_event_t term_handler; static int term_pipe[2]; static pmix_mutex_t prun_abort_inprogress_lock = PMIX_MUTEX_STATIC_INIT; static prte_event_base_t *myevbase = NULL; static bool proxyrun = false; static bool verbose = false; static void abort_signal_callback(int signal); static void clean_abort(int fd, short flags, void *arg); static void infocb(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { prte_pmix_lock_t *lock = (prte_pmix_lock_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(info, ninfo); #if PMIX_VERSION_MAJOR == 3 && PMIX_VERSION_MINOR == 0 && PMIX_VERSION_RELEASE < 3 /* The callback should likely not have been called * see the comment below */ if (PMIX_ERR_COMM_FAILURE == status) { return; } #else PRTE_HIDE_UNUSED_PARAMS(status); #endif PMIX_ACQUIRE_OBJECT(lock); if (verbose) { pmix_output(0, "PTERM: INFOCB"); } if (NULL != release_fn) { release_fn(release_cbdata); } PRTE_PMIX_WAKEUP_THREAD(lock); } static void regcbfunc(pmix_status_t status, size_t ref, void *cbdata) { prte_pmix_lock_t *lock = (prte_pmix_lock_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(status, ref); PMIX_ACQUIRE_OBJECT(lock); PRTE_PMIX_WAKEUP_THREAD(lock); } static void evhandler(size_t evhdlr_registration_id, pmix_status_t status, const pmix_proc_t *source, pmix_info_t info[], size_t ninfo, pmix_info_t *results, size_t nresults, pmix_event_notification_cbfunc_fn_t cbfunc, void *cbdata) { prte_pmix_lock_t *lock = NULL; int jobstatus = 0; pmix_nspace_t jobid = {0}; size_t n; char *msg = NULL; PRTE_HIDE_UNUSED_PARAMS(evhdlr_registration_id, source, results, nresults); if (verbose) { pmix_output(0, "PRUN: EVHANDLER WITH STATUS %s(%d)", PMIx_Error_string(status), status); } /* we should always have info returned to us - if not, there is * nothing we can do */ if (NULL != info) { for (n = 0; n < ninfo; n++) { if (0 == strncmp(info[n].key, PMIX_JOB_TERM_STATUS, PMIX_MAX_KEYLEN)) { jobstatus = prte_pmix_convert_status(info[n].value.data.status); } else if (0 == strncmp(info[n].key, PMIX_EVENT_AFFECTED_PROC, PMIX_MAX_KEYLEN)) { PMIX_LOAD_NSPACE(jobid, info[n].value.data.proc->nspace); } else if (0 == strncmp(info[n].key, PMIX_EVENT_RETURN_OBJECT, PMIX_MAX_KEYLEN)) { lock = (prte_pmix_lock_t *) info[n].value.data.ptr; } else if (0 == strncmp(info[n].key, PMIX_EVENT_TEXT_MESSAGE, PMIX_MAX_KEYLEN)) { msg = info[n].value.data.string; } } if (verbose && PMIX_CHECK_NSPACE(jobid, myjobid)) { pmix_output(0, "JOB %s COMPLETED WITH STATUS %d", PRTE_JOBID_PRINT(jobid), jobstatus); } } if (NULL != lock) { /* save the status */ lock->status = jobstatus; if (NULL != msg) { lock->msg = strdup(msg); } /* release the lock */ PRTE_PMIX_WAKEUP_THREAD(lock); } /* we _always_ have to execute the evhandler callback or * else the event progress engine will hang */ if (NULL != cbfunc) { cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata); } } int main(int argc, char *argv[]) { int rc = PRTE_ERR_FATAL, i; prte_pmix_lock_t lock, rellock; pmix_info_t info, *iptr; pmix_status_t ret; bool flag; size_t ninfo; uint32_t ui32; char *param, *ptr; pid_t pid; void *tinfo; pmix_data_array_t darray; char hostname[PRTE_PATH_MAX]; char *personality; pmix_rank_t rank; pmix_cli_result_t results; pmix_cli_item_t *opt; prte_schizo_base_module_t *schizo; /* init the globals */ PMIX_CONSTRUCT(&job_info, pmix_list_t); prte_tool_basename = pmix_basename(argv[0]); prte_tool_actual = "pterm"; gethostname(hostname, sizeof(hostname)); PMIX_CONSTRUCT(&results, pmix_cli_result_t); rc = prte_init_minimum(); if (PRTE_SUCCESS != rc) { return rc; } /* we always need the prrte and pmix params */ rc = prte_schizo_base_parse_prte(argc, 0, argv, NULL); if (PRTE_SUCCESS != rc) { return rc; } rc = prte_schizo_base_parse_pmix(argc, 0, argv, NULL); if (PRTE_SUCCESS != rc) { return rc; } /* init the tiny part of PRTE we use */ prte_init_util(PRTE_PROC_MASTER); /* open the SCHIZO framework */ rc = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); return rc; } if (PRTE_SUCCESS != (rc = prte_schizo_base_select())) { PRTE_ERROR_LOG(rc); return rc; } /* look for any personality specification */ personality = NULL; for (i = 0; NULL != argv[i]; i++) { if (0 == strcmp(argv[i], "--personality")) { personality = argv[i + 1]; break; } } /* detect if we are running as a proxy and select the active * schizo module for this tool */ schizo = prte_schizo_base_detect_proxy(personality); if (NULL == schizo) { pmix_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, personality); return 1; } /* Register all global MCA Params */ if (PRTE_SUCCESS != (rc = prte_register_params())) { if (PRTE_ERR_SILENT != rc) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, "prte register params", PRTE_ERROR_NAME(rc), rc); } return 1; } rc = schizo->parse_cli(argv, &results, PMIX_CLI_WARN); if (PRTE_SUCCESS != rc) { PMIX_DESTRUCT(&results); if (PRTE_OPERATION_SUCCEEDED == rc) { return PRTE_SUCCESS; } if (PRTE_ERR_SILENT != rc) { fprintf(stderr, "%s: command line error (%s)\n", prte_tool_basename, prte_strerror(rc)); } else { rc = PRTE_SUCCESS; } return rc; } // we do NOT accept arguments other than our own if (NULL != results.tail) { param = PMIX_ARGV_JOIN_COMPAT(results.tail, ' '); if (0 != strcmp(param, argv[0])) { ptr = pmix_show_help_string("help-pterm.txt", "no-args", false, prte_tool_basename, param, prte_tool_basename); if (NULL != ptr) { printf("%s", ptr); free(ptr); } return -1; } free(param); } /* setup options */ PMIX_INFO_LIST_START(tinfo); /* tell PMIx what our name should be */ pmix_asprintf(¶m, "%s.%s.%lu", prte_tool_basename, hostname, (unsigned long)getpid()); PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_TOOL_NSPACE, param, PMIX_STRING); free(param); rank = 0; PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_TOOL_RANK, &rank, PMIX_PROC_RANK); if (pmix_cmd_line_is_taken(&results, "system-server-first")) { PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_CONNECT_SYSTEM_FIRST, NULL, PMIX_BOOL); } else if (pmix_cmd_line_is_taken(&results, "system-server-only")) { PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_CONNECT_TO_SYSTEM, NULL, PMIX_BOOL); } opt = pmix_cmd_line_get_param(&results, "wait-to-connect"); if (NULL != opt) { ui32 = strtol(opt->values[0], NULL, 10); PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_CONNECT_RETRY_DELAY, &ui32, PMIX_UINT32); } opt = pmix_cmd_line_get_param(&results, "num-connect-retries"); if (NULL != opt) { ui32 = strtol(opt->values[0], NULL, 10); PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_CONNECT_MAX_RETRIES, &ui32, PMIX_UINT32); } opt = pmix_cmd_line_get_param(&results, "pid"); if (NULL != opt) { /* see if it is an integer value */ char *leftover; leftover = NULL; pid = strtol(opt->values[0], &leftover, 10); if (NULL == leftover || 0 == strlen(leftover)) { /* it is an integer */ PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_SERVER_PIDINFO, &pid, PMIX_PID); } else if (0 == strncasecmp(opt->values[0], "file", 4)) { FILE *fp; /* step over the file: prefix */ param = strchr(opt->values[0], ':'); if (NULL == param) { /* malformed input */ pmix_show_help("help-prun.txt", "bad-option-input", true, prte_tool_basename, "--pid", opt->values[0], "file:path"); return PRTE_ERR_BAD_PARAM; } ++param; fp = fopen(param, "r"); if (NULL == fp) { pmix_show_help("help-prun.txt", "file-open-error", true, prte_tool_basename, "--pid", opt->values[0], param); return PRTE_ERR_BAD_PARAM; } rc = fscanf(fp, "%lu", (unsigned long *) &pid); if (1 != rc) { /* if we were unable to obtain the single conversion we * require, then error out */ pmix_show_help("help-prun.txt", "bad-file", true, prte_tool_basename, "--pid", opt->values[0], param); return PRTE_ERR_BAD_PARAM; } fclose(fp); PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_SERVER_PIDINFO, &pid, PMIX_PID); } } /* if they specified the URI, then pass it along */ opt = pmix_cmd_line_get_param(&results, "dvm-uri"); if (NULL != opt) { PMIX_INFO_LIST_ADD(rc, tinfo, PMIX_SERVER_URI, opt->values[0], PMIX_STRING); } /* convert to array of info */ PMIX_INFO_LIST_CONVERT(rc, tinfo, &darray); iptr = (pmix_info_t *) darray.array; ninfo = darray.size; PMIX_INFO_LIST_RELEASE(tinfo); /** setup callbacks for abort signals - from this point * forward, we need to abort in a manner that allows us * to cleanup. However, we cannot directly use libevent * to trap these signals as otherwise we cannot respond * to them if we are stuck in an event! So instead use * the basic POSIX trap functions to handle the signal, * and then let that signal handler do some magic to * avoid the hang * * NOTE: posix traps don't allow us to do anything major * in them, so use a pipe tied to a libevent event to * reach a "safe" place where the termination event can * be created */ if (0 != (rc = pipe(term_pipe))) { exit(1); } /* setup an event to attempt normal termination on signal */ myevbase = prte_progress_thread_init(NULL); prte_event_set(myevbase, &term_handler, term_pipe[0], PRTE_EV_READ, clean_abort, NULL); prte_event_add(&term_handler, NULL); /* Set both ends of this pipe to be close-on-exec so that no children inherit it */ if (pmix_fd_set_cloexec(term_pipe[0]) != PRTE_SUCCESS || pmix_fd_set_cloexec(term_pipe[1]) != PRTE_SUCCESS) { fprintf(stderr, "unable to set the pipe to CLOEXEC\n"); prte_progress_thread_finalize(NULL); exit(1); } /* point the signal trap to a function that will activate that event */ signal(SIGTERM, abort_signal_callback); signal(SIGINT, abort_signal_callback); signal(SIGHUP, abort_signal_callback); /* now initialize PMIx - we have to indicate we are a launcher so that we * will provide rendezvous points for tools to connect to us */ if (PMIX_SUCCESS != (ret = PMIx_tool_init(&myproc, iptr, ninfo))) { fprintf(stderr, "%s failed to initialize, likely due to no DVM being available\n", prte_tool_basename); exit(1); } PMIX_INFO_FREE(iptr, ninfo); /* setup a lock to track the connection */ PRTE_PMIX_CONSTRUCT_LOCK(&rellock); /* register to trap connection loss */ pmix_status_t code[2] = {PMIX_ERR_UNREACH, PMIX_ERR_LOST_CONNECTION}; PRTE_PMIX_CONSTRUCT_LOCK(&lock); PMIX_INFO_LOAD(&info, PMIX_EVENT_RETURN_OBJECT, &rellock, PMIX_POINTER); PMIx_Register_event_handler(code, 2, &info, 1, evhandler, regcbfunc, &lock); PRTE_PMIX_WAIT_THREAD(&lock); PRTE_PMIX_DESTRUCT_LOCK(&lock); flag = true; PMIX_INFO_LOAD(&info, PMIX_JOB_CTRL_TERMINATE, &flag, PMIX_BOOL); if (!proxyrun) { fprintf(stderr, "TERMINATING DVM..."); } PRTE_PMIX_CONSTRUCT_LOCK(&lock); rc = PMIx_Job_control_nb(NULL, 0, &info, 1, infocb, (void *) &lock); if (PMIX_SUCCESS == rc) { #if PMIX_VERSION_MAJOR == 3 && PMIX_VERSION_MINOR == 0 && PMIX_VERSION_RELEASE < 3 /* There is a bug in PMIx 3.0.0 up to 3.0.2 that causes the callback never * being called when the server terminates. The callback might be eventually * called though then the connection to the server closes with * status PMIX_ERR_COMM_FAILURE */ poll(NULL, 0, 1000); infocb(PMIX_SUCCESS, NULL, 0, (void *) &lock, NULL, NULL); #endif PRTE_PMIX_WAIT_THREAD(&lock); PRTE_PMIX_DESTRUCT_LOCK(&lock); /* wait for connection to depart */ PRTE_PMIX_WAIT_THREAD(&rellock); PRTE_PMIX_DESTRUCT_LOCK(&rellock); } else { PRTE_PMIX_WAIT_THREAD(&lock); PRTE_PMIX_DESTRUCT_LOCK(&rellock); } /* wait for the connection to go away */ fprintf(stderr, "DONE\n"); #if PMIX_VERSION_MAJOR == 3 && PMIX_VERSION_MINOR == 0 && PMIX_VERSION_RELEASE < 3 return rc; #endif /* cleanup and leave */ ret = PMIx_tool_finalize(); if (PRTE_SUCCESS == rc && PMIX_SUCCESS != ret) { rc = ret; } return rc; } static void clean_abort(int fd, short flags, void *arg) { PRTE_HIDE_UNUSED_PARAMS(fd, flags, arg); /* if we have already ordered this once, don't keep * doing it to avoid race conditions */ if (pmix_mutex_trylock(&prun_abort_inprogress_lock)) { /* returns 1 if already locked */ if (forcibly_die) { /* exit with a non-zero status */ exit(1); } fprintf(stderr, "prun: abort is already in progress...hit ctrl-c again to forcibly terminate\n\n"); forcibly_die = true; /* reset the event */ prte_event_add(&term_handler, NULL); PMIx_tool_finalize(); return; } } static struct timeval current, last = {0, 0}; static bool first = true; /* * Attempt to terminate the job and wait for callback indicating * the job has been abprted. */ static void abort_signal_callback(int fd) { uint8_t foo = 1; char *msg = "Abort is in progress...hit ctrl-c again within 5 seconds to forcibly terminate\n\n"; PRTE_HIDE_UNUSED_PARAMS(fd); /* if this is the first time thru, just get * the current time */ if (first) { first = false; gettimeofday(¤t, NULL); } else { /* get the current time */ gettimeofday(¤t, NULL); /* if this is within 5 seconds of the * last time we were called, then just * exit - we are probably stuck */ if ((current.tv_sec - last.tv_sec) < 5) { exit(1); } if (-1 == write(1, (void *) msg, strlen(msg))) { exit(1); } } /* save the time */ last.tv_sec = current.tv_sec; /* tell the event lib to attempt to abnormally terminate */ if (-1 == write(term_pipe[1], &foo, 1)) { exit(1); } } prrte-3.0.13/src/tools/pterm/Makefile.am0000664000175000017500000000254415145263240020237 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2015-2020 Intel, Inc. All rights reserved. # Copyright (c) 2019 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS) bin_PROGRAMS = pterm pterm_SOURCES = \ pterm.c pterm_LDADD = \ $(prte_libevent_LIBS) \ $(prte_hwloc_LIBS) \ $(prte_pmix_LIBS) \ $(top_builddir)/src/libprrte.la prrte-3.0.13/src/tools/Makefile.am0000664000175000017500000000247615145263240017114 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This makefile.am does not stand on its own - it is included from # src/Makefile.am SUBDIRS += \ tools/prted \ tools/prun \ tools/pcc \ tools/prte_info \ tools/prte \ tools/pterm DIST_SUBDIRS += \ tools/prted \ tools/prun \ tools/pcc \ tools/prte_info \ tools/prte \ tools/pterm prrte-3.0.13/src/tools/prte_info/0000775000175000017500000000000015145263240017034 5ustar alastairalastairprrte-3.0.13/src/tools/prte_info/output.c0000664000175000017500000001362215145263240020544 0ustar alastairalastair/* * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #include #ifdef HAVE_TERMIOS_H # include #endif #ifdef HAVE_SYS_IOCTL_H # include #endif #include #include "src/tools/prte_info/pinfo.h" #include "src/util/pmix_show_help.h" #define PRTE_max(a, b) (((a) > (b)) ? (a) : (b)) /* * Private variables - set some reasonable screen size defaults */ static int centerpoint = 24; static int screen_width = 78; /* * Prints the passed integer in a pretty or parsable format. */ void prte_info_out(const char *pretty_message, const char *plain_message, const char *value) { size_t i, len, max_value_width; char *spaces = NULL; char *filler = NULL; char *pos, *v, savev, *v_to_free; #ifdef HAVE_ISATTY /* If we have isatty(), if this is not a tty, then disable * wrapping for grep-friendly behavior */ if (0 == isatty(STDOUT_FILENO)) { screen_width = INT_MAX; } #endif #ifdef TIOCGWINSZ if (screen_width < INT_MAX) { struct winsize size; if (ioctl(STDOUT_FILENO, TIOCGWINSZ, (char *) &size) >= 0) { screen_width = size.ws_col; } } #endif /* Strip leading and trailing whitespace from the string value */ v = v_to_free = strdup(value); len = strlen(v); if (isspace(v[0])) { char *newv; i = 0; while (isspace(v[i]) && i < len) { ++i; } newv = strdup(v + i); free(v_to_free); v_to_free = v = newv; len = strlen(v); } if (len > 0 && isspace(v[len - 1])) { i = len - 1; /* Note that i is size_t (unsigned), so we can't check for i >= 0. But we don't need to, because if the value was all whitespace, stripping whitespace from the left (above) would have resulted in an empty string, and we wouldn't have gotten into this block. */ while (isspace(v[i]) && i > 0) { --i; } v[i] = '\0'; } if (prte_info_pretty && NULL != pretty_message) { if (centerpoint > (int) strlen(pretty_message)) { pmix_asprintf(&spaces, "%*s", centerpoint - (int) strlen(pretty_message), " "); } else { spaces = strdup(""); #if PRTE_ENABLE_DEBUG if (centerpoint < (int) strlen(pretty_message)) { pmix_show_help("help-prte-info.txt", "developer warning: field too long", false, pretty_message, centerpoint); } #endif } max_value_width = screen_width - strlen(spaces) - strlen(pretty_message) - 2; if (0 < strlen(pretty_message)) { pmix_asprintf(&filler, "%s%s: ", spaces, pretty_message); } else { pmix_asprintf(&filler, "%s ", spaces); } free(spaces); spaces = NULL; while (true) { if (strlen(v) < max_value_width) { printf("%s%s\n", filler, v); break; } else { pmix_asprintf(&spaces, "%*s", centerpoint + 2, " "); /* Work backwards to find the first space before * max_value_width */ savev = v[max_value_width]; v[max_value_width] = '\0'; pos = (char *) strrchr(v, (int) ' '); v[max_value_width] = savev; if (NULL == pos) { /* No space found < max_value_width. Look for the first * space after max_value_width. */ pos = strchr(&v[max_value_width], ' '); if (NULL == pos) { /* There's just no spaces. So just print it and be done. */ printf("%s%s\n", filler, v); break; } else { *pos = '\0'; printf("%s%s\n", filler, v); v = pos + 1; } } else { *pos = '\0'; printf("%s%s\n", filler, v); v = pos + 1; } /* Reset for the next iteration */ free(filler); filler = strdup(spaces); free(spaces); spaces = NULL; } } if (NULL != filler) { free(filler); } if (NULL != spaces) { free(spaces); } } else { if (NULL != plain_message && 0 < strlen(plain_message)) { printf("%s:%s\n", plain_message, value); } else { printf(" %s\n", value); } } if (NULL != v_to_free) { free(v_to_free); } } void prte_info_out_int(const char *pretty_message, const char *plain_message, int value) { char *valstr; pmix_asprintf(&valstr, "%d", (int) value); prte_info_out(pretty_message, plain_message, valstr); free(valstr); } prrte-3.0.13/src/tools/prte_info/version.c0000664000175000017500000002567715145263240020706 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021 IBM Corporation. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include "pmix.h" #include "src/include/version.h" #include "src/mca/base/pmix_base.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_printf.h" #include "src/util/name_fns.h" #include "src/tools/prte_info/pinfo.h" /* * Public variables */ const char *prte_info_ver_full = "full"; const char *prte_info_ver_major = "major"; const char *prte_info_ver_minor = "minor"; const char *prte_info_ver_release = "release"; const char *prte_info_ver_greek = "greek"; const char *prte_info_ver_repo = "repo"; /* * Private variables */ static const char *prte_info_ver_all = "all"; static const char *prte_info_ver_mca = "mca"; static const char *prte_info_ver_type = "type"; static const char *prte_info_ver_component = "component"; /* * Private functions */ static void show_mca_version(const pmix_mca_base_component_t *component, const char *scope, const char *ver_type); void prte_info_show_prte_version(const char *scope) { char *tmp, *tmp2; pmix_asprintf(&tmp, "%s:version:full", prte_info_type_prte); tmp2 = prte_util_make_version_string(scope, PRTE_MAJOR_VERSION, PRTE_MINOR_VERSION, PRTE_RELEASE_VERSION, PRTE_GREEK_VERSION, NULL); prte_info_out("PRTE", tmp, tmp2); free(tmp); free(tmp2); pmix_asprintf(&tmp, "%s:version:repo", prte_info_type_prte); prte_info_out("PRTE repo revision", tmp, PRTE_REPO_REV); free(tmp); pmix_asprintf(&tmp, "%s:version:release_date", prte_info_type_prte); prte_info_out("PRTE release date", tmp, PRTE_RELEASE_DATE); free(tmp); prte_info_out("PMIx", "pmix:version:full", PMIx_Get_version()); } /* * do_version * * Determines the version information related to the prte components * being used. * Accepts: * - want_all: True if all components' info is required. * - cmd_line: The constructed command line argument */ void prte_info_do_version(bool want_all) { char *arg1, *scope, **tmp; char *pos = NULL; int j; pmix_cli_item_t *opt; prte_info_components_open(); if (want_all) { prte_info_show_prte_version(prte_info_ver_full); for (j = 0; j < mca_types.size; ++j) { if (NULL == (pos = (char *) pmix_pointer_array_get_item(&mca_types, j))) { continue; } prte_info_show_component_version(pos, prte_info_component_all, prte_info_ver_full, prte_info_type_all); } } else { opt = pmix_cmd_line_get_param(&prte_info_cmd_line, "show-version"); if (NULL != opt) { tmp = PMIX_ARGV_SPLIT_COMPAT(opt->values[0], ':'); arg1 = tmp[0]; if (NULL == tmp[1]) { scope = (char*)prte_info_ver_all; } else { if (NULL != tmp[2]) { pos = tmp[1]; scope = tmp[2]; } else { pos = tmp[1]; scope = (char*)prte_info_ver_all; } } /* Version of PRTE */ if (0 == strcmp(prte_info_type_prte, arg1)) { prte_info_show_prte_version(scope); } /* Specific type and component */ else if (NULL != pos) { prte_info_show_component_version(arg1, pos, scope, prte_info_ver_all); } /* All components of a specific type */ else { prte_info_show_component_version(arg1, prte_info_component_all, scope, prte_info_ver_all); } PMIX_ARGV_FREE_COMPAT(tmp); } } } /* * Show all the components of a specific type/component combo (component may be * a wildcard) */ void prte_info_show_component_version(const char *type_name, const char *component_name, const char *scope, const char *ver_type) { bool want_all_components = false; bool found; pmix_list_item_t *item; pmix_mca_base_component_list_item_t *cli; const pmix_mca_base_component_t *component; pmix_list_t *components; int j; char *pos; prte_info_component_map_t *map; /* see if all components wanted */ if (0 == strcmp(prte_info_type_all, component_name)) { want_all_components = true; } /* Check to see if the type is valid */ for (found = false, j = 0; j < mca_types.size; ++j) { if (NULL == (pos = (char *) pmix_pointer_array_get_item(&mca_types, j))) { continue; } if (0 == strcmp(pos, type_name)) { found = true; break; } } if (!found) { exit(1); } /* Now that we have a valid type, find the right component list */ components = NULL; for (j = 0; j < prte_component_map.size; j++) { map = (prte_info_component_map_t*) pmix_pointer_array_get_item(&prte_component_map, j); if (NULL == map) { continue; } if (0 == strcmp(type_name, map->type)) { /* found it! */ components = map->components; break; } } if (NULL != components) { if (pmix_list_get_size(components) > 0) { for (item = pmix_list_get_first(components); pmix_list_get_end(components) != item; item = pmix_list_get_next(item)) { cli = (pmix_mca_base_component_list_item_t *) item; component = cli->cli_component; if (want_all_components || 0 == strcmp(component->pmix_mca_component_name, component_name)) { show_mca_version(component, scope, ver_type); } } } } else { /* there are no components, but we still show their type */ pmix_asprintf(&pos, "MCA %s", type_name); prte_info_out(pos, NULL, " no components"); free(pos); } } /* * Given a component, display its relevant version(s) */ static void show_mca_version(const pmix_mca_base_component_t *component, const char *scope, const char *ver_type) { bool printed; bool want_mca = false; bool want_type = false; bool want_component = false; char *message, *content; char *mca_version; char *api_version; char *component_version; char *tmp; if (0 == strcmp(ver_type, prte_info_ver_all) || 0 == strcmp(ver_type, prte_info_ver_mca)) { want_mca = true; } if (0 == strcmp(ver_type, prte_info_ver_all) || 0 == strcmp(ver_type, prte_info_ver_type)) { want_type = true; } if (0 == strcmp(ver_type, prte_info_ver_all) || 0 == strcmp(ver_type, prte_info_ver_component)) { want_component = true; } mca_version = prte_util_make_version_string(scope, component->pmix_mca_major_version, component->pmix_mca_minor_version, component->pmix_mca_release_version, "", ""); api_version = prte_util_make_version_string(scope, component->pmix_mca_type_major_version, component->pmix_mca_type_minor_version, component->pmix_mca_type_release_version, "", ""); component_version = prte_util_make_version_string(scope, component->pmix_mca_component_major_version, component->pmix_mca_component_minor_version, component->pmix_mca_component_release_version, "", ""); if (prte_info_pretty) { pmix_asprintf(&message, "MCA %s", component->pmix_mca_type_name); printed = false; pmix_asprintf(&content, "%s (", component->pmix_mca_component_name); if (want_mca) { pmix_asprintf(&tmp, "%sMCA v%s", content, mca_version); free(content); content = tmp; printed = true; } if (want_type) { if (printed) { pmix_asprintf(&tmp, "%s, ", content); free(content); content = tmp; } pmix_asprintf(&tmp, "%sAPI v%s", content, api_version); free(content); content = tmp; printed = true; } if (want_component) { if (printed) { pmix_asprintf(&tmp, "%s, ", content); free(content); content = tmp; } pmix_asprintf(&tmp, "%sComponent v%s", content, component_version); free(content); content = tmp; printed = true; } if (NULL != content) { pmix_asprintf(&tmp, "%s)", content); free(content); } else { pmix_asprintf(&tmp, ")"); } prte_info_out(message, NULL, tmp); free(message); free(tmp); } else { pmix_asprintf(&message, "mca:%s:%s:version", component->pmix_mca_type_name, component->pmix_mca_component_name); if (want_mca) { pmix_asprintf(&tmp, "mca:%s", mca_version); prte_info_out(NULL, message, tmp); free(tmp); } if (want_type) { pmix_asprintf(&tmp, "api:%s", api_version); prte_info_out(NULL, message, tmp); free(tmp); } if (want_component) { pmix_asprintf(&tmp, "component:%s", component_version); prte_info_out(NULL, message, tmp); free(tmp); } free(message); } free(mca_version); free(api_version); free(component_version); } prrte-3.0.13/src/tools/prte_info/param.c0000664000175000017500000004112415145263240020302 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2020 Intel, Inc. All rights reserved. * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 Intel, Inc. All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * Copyright (c) 2021 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #ifdef HAVE_NETDB_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/class/pmix_value_array.h" #include "src/include/constants.h" #include "src/include/prte_portable_platform.h" #include "src/include/version.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_show_help.h" #include "src/tools/prte_info/pinfo.h" /* * Public variables */ const char *prte_info_component_all = "all"; const char *prte_info_param_all = "all"; const char *prte_info_path_prefix = "prefix"; const char *prte_info_path_bindir = "bindir"; const char *prte_info_path_libdir = "libdir"; const char *prte_info_path_incdir = "incdir"; const char *prte_info_path_mandir = "mandir"; const char *prte_info_path_pkglibdir = "pkglibdir"; const char *prte_info_path_sysconfdir = "sysconfdir"; const char *prte_info_path_exec_prefix = "exec_prefix"; const char *prte_info_path_sbindir = "sbindir"; const char *prte_info_path_libexecdir = "libexecdir"; const char *prte_info_path_datarootdir = "datarootdir"; const char *prte_info_path_datadir = "datadir"; const char *prte_info_path_sharedstatedir = "sharedstatedir"; const char *prte_info_path_localstatedir = "localstatedir"; const char *prte_info_path_infodir = "infodir"; const char *prte_info_path_pkgdatadir = "pkgdatadir"; const char *prte_info_path_pkgincludedir = "pkgincludedir"; void prte_info_do_params(bool want_all_in, bool want_internal) { char *type, *str; char **args = NULL, **tmp; bool found; int i, j; bool want_all = false; pmix_cli_item_t *opt; prte_info_components_open(); opt = pmix_cmd_line_get_param(&prte_info_cmd_line, "param"); if (want_all_in) { want_all = true; } else { /* See if the special param "all" was given to --param; that * superceeds any individual type */ if (NULL != opt) { /* split the arguments at the colon */ args = PMIX_ARGV_SPLIT_COMPAT(opt->values[0], ':'); if (0 == strcmp(args[0], "all")) { want_all = true; } } } /* Show the params */ if (want_all) { for (i = 0; i < mca_types.size; ++i) { if (NULL == (type = (char *) pmix_pointer_array_get_item(&mca_types, i))) { continue; } prte_info_show_mca_params(type, prte_info_component_all, want_internal); } } else { if (NULL != opt && NULL != args) { type = args[0]; if (NULL != args[1]) { tmp = PMIX_ARGV_SPLIT_COMPAT(args[1], ','); for (j=0; NULL != tmp[j]; j++) { for (found = false, i = 0; i < mca_types.size; ++i) { str = (char *) pmix_pointer_array_get_item(&mca_types, i); if (NULL == str) { continue; } if (0 == strcmp(str, type)) { found = true; break; } } if (!found) { pmix_show_help("help-prte-info.txt", "not-found", true, type); exit(1); } prte_info_show_mca_params(type, tmp[j], want_internal); } PMIX_ARGV_FREE_COMPAT(tmp); } else { prte_info_show_mca_params(type, "*", want_internal); } } } if (NULL != args) { PMIX_ARGV_FREE_COMPAT(args); } } static void prte_info_show_mca_group_params(const pmix_mca_base_var_group_t *group, bool want_internal) { const pmix_mca_base_var_t *var; const int *variables; int ret, i, j, count; const int *groups; char **strings; variables = PMIX_VALUE_ARRAY_GET_BASE(&group->group_vars, const int); count = pmix_value_array_get_size((pmix_value_array_t *) &group->group_vars); for (i = 0; i < count; ++i) { ret = pmix_mca_base_var_get(variables[i], &var); if (PRTE_SUCCESS != ret) { continue; } ret = pmix_mca_base_var_dump(variables[i], &strings, !prte_info_pretty ? PMIX_MCA_BASE_VAR_DUMP_PARSABLE : PMIX_MCA_BASE_VAR_DUMP_READABLE); if (PRTE_SUCCESS != ret) { continue; } for (j = 0; strings[j]; ++j) { if (0 == j && prte_info_pretty) { char *message; pmix_asprintf(&message, "MCA %s", group->group_framework); prte_info_out(message, message, strings[j]); free(message); } else { prte_info_out("", "", strings[j]); } free(strings[j]); } free(strings); } groups = PMIX_VALUE_ARRAY_GET_BASE(&group->group_subgroups, const int); count = pmix_value_array_get_size((pmix_value_array_t *) &group->group_subgroups); for (i = 0; i < count; ++i) { ret = pmix_mca_base_var_group_get(groups[i], &group); if (PRTE_SUCCESS != ret) { continue; } prte_info_show_mca_group_params(group, want_internal); } } void prte_info_show_mca_params(const char *type, const char *component, bool want_internal) { const pmix_mca_base_var_group_t *group; int ret; if (0 == strcmp(component, "all")) { ret = pmix_mca_base_var_group_find("*", type, NULL); if (0 > ret) { return; } (void) pmix_mca_base_var_group_get(ret, &group); prte_info_show_mca_group_params(group, want_internal); } else { ret = pmix_mca_base_var_group_find("*", type, component); if (0 > ret) { return; } (void) pmix_mca_base_var_group_get(ret, &group); prte_info_show_mca_group_params(group, want_internal); } } void prte_info_do_path(bool want_all) { int i; char *scope; pmix_cli_item_t *opt; /* Check bozo case */ opt = pmix_cmd_line_get_param(&prte_info_cmd_line, "path"); if (NULL != opt) { for (i=0; NULL != opt->values[i]; i++) { scope = opt->values[i]; if (0 == strcmp("all", scope)) { want_all = true; break; } } } if (want_all) { prte_info_show_path(prte_info_path_prefix, prte_install_dirs.prefix); prte_info_show_path(prte_info_path_exec_prefix, prte_install_dirs.exec_prefix); prte_info_show_path(prte_info_path_bindir, prte_install_dirs.bindir); prte_info_show_path(prte_info_path_sbindir, prte_install_dirs.sbindir); prte_info_show_path(prte_info_path_libdir, prte_install_dirs.libdir); prte_info_show_path(prte_info_path_incdir, prte_install_dirs.includedir); prte_info_show_path(prte_info_path_mandir, prte_install_dirs.mandir); prte_info_show_path(prte_info_path_pkglibdir, prte_install_dirs.prtelibdir); prte_info_show_path(prte_info_path_libexecdir, prte_install_dirs.libexecdir); prte_info_show_path(prte_info_path_datarootdir, prte_install_dirs.datarootdir); prte_info_show_path(prte_info_path_datadir, prte_install_dirs.datadir); prte_info_show_path(prte_info_path_sysconfdir, prte_install_dirs.sysconfdir); prte_info_show_path(prte_info_path_sharedstatedir, prte_install_dirs.sharedstatedir); prte_info_show_path(prte_info_path_localstatedir, prte_install_dirs.localstatedir); prte_info_show_path(prte_info_path_infodir, prte_install_dirs.infodir); prte_info_show_path(prte_info_path_pkgdatadir, prte_install_dirs.prtedatadir); prte_info_show_path(prte_info_path_pkglibdir, prte_install_dirs.prtelibdir); prte_info_show_path(prte_info_path_pkgincludedir, prte_install_dirs.prteincludedir); } else { if (NULL != opt) { for (i=0; NULL != opt->values[i]; i++) { scope = opt->values[i]; if (0 == strcmp(prte_info_path_prefix, scope)) { prte_info_show_path(prte_info_path_prefix, prte_install_dirs.prefix); } else if (0 == strcmp(prte_info_path_bindir, scope)) { prte_info_show_path(prte_info_path_bindir, prte_install_dirs.bindir); } else if (0 == strcmp(prte_info_path_libdir, scope)) { prte_info_show_path(prte_info_path_libdir, prte_install_dirs.libdir); } else if (0 == strcmp(prte_info_path_incdir, scope)) { prte_info_show_path(prte_info_path_incdir, prte_install_dirs.includedir); } else if (0 == strcmp(prte_info_path_mandir, scope)) { prte_info_show_path(prte_info_path_mandir, prte_install_dirs.mandir); } else if (0 == strcmp(prte_info_path_pkglibdir, scope)) { prte_info_show_path(prte_info_path_pkglibdir, prte_install_dirs.prtelibdir); } else if (0 == strcmp(prte_info_path_sysconfdir, scope)) { prte_info_show_path(prte_info_path_sysconfdir, prte_install_dirs.sysconfdir); } else if (0 == strcmp(prte_info_path_exec_prefix, scope)) { prte_info_show_path(prte_info_path_exec_prefix, prte_install_dirs.exec_prefix); } else if (0 == strcmp(prte_info_path_sbindir, scope)) { prte_info_show_path(prte_info_path_sbindir, prte_install_dirs.sbindir); } else if (0 == strcmp(prte_info_path_libexecdir, scope)) { prte_info_show_path(prte_info_path_libexecdir, prte_install_dirs.libexecdir); } else if (0 == strcmp(prte_info_path_datarootdir, scope)) { prte_info_show_path(prte_info_path_datarootdir, prte_install_dirs.datarootdir); } else if (0 == strcmp(prte_info_path_datadir, scope)) { prte_info_show_path(prte_info_path_datadir, prte_install_dirs.datadir); } else if (0 == strcmp(prte_info_path_sharedstatedir, scope)) { prte_info_show_path(prte_info_path_sharedstatedir, prte_install_dirs.sharedstatedir); } else if (0 == strcmp(prte_info_path_localstatedir, scope)) { prte_info_show_path(prte_info_path_localstatedir, prte_install_dirs.localstatedir); } else if (0 == strcmp(prte_info_path_infodir, scope)) { prte_info_show_path(prte_info_path_infodir, prte_install_dirs.infodir); } else if (0 == strcmp(prte_info_path_pkgdatadir, scope)) { prte_info_show_path(prte_info_path_pkgdatadir, prte_install_dirs.prtedatadir); } else if (0 == strcmp(prte_info_path_pkgincludedir, scope)) { prte_info_show_path(prte_info_path_pkgincludedir, prte_install_dirs.prteincludedir); } else { pmix_show_help("help-prte-info.txt", "usage", true, "USAGE"); exit(1); } } } } } void prte_info_show_path(const char *type, const char *value) { char *pretty, *path; pretty = strdup(type); pretty[0] = toupper(pretty[0]); pmix_asprintf(&path, "path:%s", type); prte_info_out(pretty, path, value); free(pretty); free(path); } void prte_info_do_arch(void) { prte_info_out("Configured architecture", "config:arch", PRTE_ARCH); } void prte_info_do_hostname(void) { prte_info_out("Configure host", "config:host", PRTE_CONFIGURE_HOST); } /* * do_config * Accepts: * - want_all: boolean flag; TRUE -> display all options * FALSE -> display selected options * * This function displays all the options with which the current * installation of prte was configured. There are many options here * that are carried forward from PRTE-7 and are not mca parameters * in PRTE-10. I have to dig through the invalid options and replace * them with PRTE-10 options. */ void prte_info_do_config(bool want_all) { char *debug; char *have_dl; char *prun_prefix_by_default; char *symbol_visibility; char *manpages; /* setup the strings that don't require allocations*/ debug = PRTE_ENABLE_DEBUG ? "yes" : "no"; have_dl = PRTE_HAVE_DL_SUPPORT ? "yes" : "no"; prun_prefix_by_default = PRTE_WANT_PRTE_PREFIX_BY_DEFAULT ? "yes" : "no"; symbol_visibility = PRTE_C_HAVE_VISIBILITY ? "yes" : "no"; manpages = "yes"; /* output values */ prte_info_out("Configured by", "config:user", PRTE_CONFIGURE_USER); prte_info_out("Configured on", "config:timestamp", PRTE_CONFIGURE_DATE); prte_info_out("Configure host", "config:host", PRTE_CONFIGURE_HOST); prte_info_out("Configure command line", "config:cli", PRTE_CONFIGURE_CLI); prte_info_out("Built by", "build:user", PRTE_BUILD_USER); prte_info_out("Built on", "build:timestamp", PRTE_BUILD_DATE); prte_info_out("Built host", "build:host", PRTE_BUILD_HOST); prte_info_out("C compiler", "compiler:c:command", PRTE_CC); prte_info_out("C compiler absolute", "compiler:c:absolute", PRTE_CC_ABSOLUTE); prte_info_out("C compiler family name", "compiler:c:familyname", PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME)); prte_info_out("C compiler version", "compiler:c:version", PLATFORM_STRINGIFY(PLATFORM_COMPILER_VERSION_STR)); if (want_all) { prte_info_out_int("C char size", "compiler:c:sizeof:char", sizeof(char)); prte_info_out_int("C bool size", "compiler:c:sizeof:bool", sizeof(bool)); prte_info_out_int("C short size", "compiler:c:sizeof:short", sizeof(short)); prte_info_out_int("C int size", "compiler:c:sizeof:int", sizeof(int)); prte_info_out_int("C long size", "compiler:c:sizeof:long", sizeof(long)); prte_info_out_int("C float size", "compiler:c:sizeof:float", sizeof(float)); prte_info_out_int("C double size", "compiler:c:sizeof:double", sizeof(double)); prte_info_out_int("C pointer size", "compiler:c:sizeof:pointer", sizeof(void *)); prte_info_out("C bool align", "compiler:c:align:bool", "skipped"); prte_info_out_int("C int align", "compiler:c:align:int", ALIGNOF_INT); prte_info_out_int("C double align", "compiler:c:align:double", ALIGNOF_DOUBLE); } prte_info_out("Thread support", "option:threads", "posix"); if (want_all) { prte_info_out("Build CFLAGS", "option:build:cflags", PRTE_BUILD_CFLAGS); prte_info_out("Build LDFLAGS", "option:build:ldflags", PRTE_BUILD_LDFLAGS); prte_info_out("Build LIBS", "option:build:libs", PRTE_BUILD_LIBS); } prte_info_out("Internal debug support", "option:debug", debug); prte_info_out("dl support", "option:dlopen", have_dl); prte_info_out("prun default --prefix", "prun:prefix_by_default", prun_prefix_by_default); prte_info_out("Symbol vis. support", "options:visibility", symbol_visibility); prte_info_out("Manpages built", "options:man-pages", manpages); } prrte-3.0.13/src/tools/prte_info/components.c0000664000175000017500000001364615145263240021377 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include "src/runtime/runtime.h" #include "src/class/pmix_list.h" #include "src/class/pmix_pointer_array.h" #include "src/include/constants.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_cmd_line.h" #include "src/util/error.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_show_help.h" #include "src/include/prte_frameworks.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/mca/base/pmix_mca_base_component_repository.h" #include "src/tools/prte_info/pinfo.h" /* * Public variables */ static void component_map_construct(prte_info_component_map_t *map) { map->type = NULL; } static void component_map_destruct(prte_info_component_map_t *map) { if (NULL != map->type) { free(map->type); } /* the type close functions will release the * list of components */ } PMIX_CLASS_INSTANCE(prte_info_component_map_t, pmix_list_item_t, component_map_construct, component_map_destruct); pmix_pointer_array_t prte_component_map = PMIX_POINTER_ARRAY_STATIC_INIT; /* * Private variables */ static bool opened_components = false; static int info_register_framework(pmix_mca_base_framework_t *framework, pmix_pointer_array_t *component_map) { prte_info_component_map_t *map; int rc; rc = pmix_mca_base_framework_register(framework, PMIX_MCA_BASE_REGISTER_ALL); if (PMIX_SUCCESS != rc && PMIX_ERR_BAD_PARAM != rc) { return rc; } if (NULL != component_map) { map = PMIX_NEW(prte_info_component_map_t); map->type = strdup(framework->framework_name); map->components = &framework->framework_components; map->failed_components = &framework->framework_failed_components; pmix_pointer_array_add(component_map, map); } return rc; } static int register_project_frameworks(const char *project_name, pmix_mca_base_framework_t **frameworks, pmix_pointer_array_t *component_map) { int i, rc = PRTE_SUCCESS; for (i = 0; NULL != frameworks[i]; i++) { if (PMIX_SUCCESS != (rc = info_register_framework(frameworks[i], component_map))) { if (PMIX_ERR_BAD_PARAM == rc) { fprintf(stderr, "\nA \"bad parameter\" error was encountered when opening the %s %s " "framework\n", project_name, frameworks[i]->framework_name); fprintf(stderr, "The output received from that framework includes the following " "parameters:\n\n"); } else if (PMIX_ERR_NOT_AVAILABLE != rc) { fprintf(stderr, "%s_info_register: %s failed\n", project_name, frameworks[i]->framework_name); rc = PRTE_ERROR; } else { continue; } break; } } return rc; } static int register_framework_params(pmix_pointer_array_t *component_map) { int rc; /* Register mca/base parameters */ if (PMIX_SUCCESS != pmix_mca_base_open(NULL)) { pmix_show_help("help-prte_info.txt", "lib-call-fail", true, "mca_base_open", __FILE__, __LINE__); return PRTE_ERROR; } /* Register the PRTE layer's MCA parameters */ if (PRTE_SUCCESS != (rc = prte_register_params())) { fprintf(stderr, "prte_info_register: prte_register_params failed\n"); return rc; } return register_project_frameworks("prte", prte_frameworks, component_map); } void prte_info_components_open(void) { if (opened_components) { return; } opened_components = true; /* init the map */ PMIX_CONSTRUCT(&prte_component_map, pmix_pointer_array_t); pmix_pointer_array_init(&prte_component_map, 256, INT_MAX, 128); register_framework_params(&prte_component_map); } /* * Not to be confused with prte_info_close_components. */ void prte_info_components_close(void) { int i; prte_info_component_map_t *map; if (!opened_components) { return; } for (i = 0; NULL != prte_frameworks[i]; i++) { (void) pmix_mca_base_framework_close(prte_frameworks[i]); } for (i = 0; i < prte_component_map.size; i++) { if (NULL != (map = (prte_info_component_map_t *) pmix_pointer_array_get_item(&prte_component_map, i))) { PMIX_RELEASE(map); } } PMIX_DESTRUCT(&prte_component_map); opened_components = false; } prrte-3.0.13/src/tools/prte_info/prte_info.c0000664000175000017500000002056515145263240021175 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2010-2016 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_NETDB_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include "src/class/pmix_object.h" #include "src/class/pmix_pointer_array.h" #include "src/mca/base/pmix_base.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/mca/schizo/base/base.h" #include "src/prted/pmix/pmix_server.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/pmix_cmd_line.h" #include "src/util/error.h" #include "src/util/pmix_path.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "constants.h" #include "src/include/prte_frameworks.h" #include "src/include/version.h" #include "src/runtime/prte_locks.h" #include "src/tools/prte_info/pinfo.h" /* * Public variables */ bool prte_info_pretty = true; pmix_cli_result_t prte_info_cmd_line = PMIX_CLI_RESULT_STATIC_INIT; const char *prte_info_type_all = "all"; const char *prte_info_type_prte = "prte"; const char *prte_info_type_base = "base"; pmix_pointer_array_t mca_types = PMIX_POINTER_ARRAY_STATIC_INIT; int main(int argc, char *argv[]) { int ret = 0; bool acted = false; bool want_all = false; int i; char *str; char *ptr; char *personality; prte_schizo_base_module_t *schizo; PRTE_HIDE_UNUSED_PARAMS(argc); /* protect against problems if someone passes us thru a pipe * and then abnormally terminates the pipe early */ signal(SIGPIPE, SIG_IGN); prte_tool_basename = pmix_basename(argv[0]); prte_tool_actual = "prte_info"; /* Initialize the argv parsing stuff */ if (PRTE_SUCCESS != (ret = prte_init_util(PRTE_PROC_MASTER))) { pmix_show_help("help-prte-info.txt", "lib-call-fail", true, "prte_init_util", __FILE__, __LINE__, NULL); exit(ret); } /* open the SCHIZO framework */ ret = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); return ret; } if (PRTE_SUCCESS != (ret = prte_schizo_base_select())) { PRTE_ERROR_LOG(ret); return ret; } /* look for any personality specification */ personality = NULL; for (i = 0; NULL != argv[i]; i++) { if (0 == strcmp(argv[i], "--personality")) { personality = argv[i + 1]; break; } } /* detect if we are running as a proxy and select the active * schizo module for this tool */ schizo = prte_schizo_base_detect_proxy(personality); if (NULL == schizo) { pmix_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, personality); return 1; } if (NULL == personality) { personality = schizo->name; } /* Register all global MCA Params */ if (PRTE_SUCCESS != (ret = prte_register_params())) { if (PRTE_ERR_SILENT != ret) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, "prte register params", PRTE_ERROR_NAME(ret), ret); } return 1; } /* parse the input argv to get values, including everyone's MCA params */ PMIX_CONSTRUCT(&prte_info_cmd_line, pmix_cli_result_t); ret = schizo->parse_cli(argv, &prte_info_cmd_line, PMIX_CLI_SILENT); if (PRTE_SUCCESS != ret) { PMIX_DESTRUCT(&prte_info_cmd_line); if (PRTE_OPERATION_SUCCEEDED == ret) { return PRTE_SUCCESS; } if (PRTE_ERR_SILENT != ret) { fprintf(stderr, "%s: command line error (%s)\n", prte_tool_basename, prte_strerror(ret)); } return ret; } // we do NOT accept arguments other than our own if (NULL != prte_info_cmd_line.tail) { str = PMIX_ARGV_JOIN_COMPAT(prte_info_cmd_line.tail, ' '); if (0 != strcmp(str, argv[0])) { ptr = pmix_show_help_string("help-pterm.txt", "no-args", false, prte_tool_basename, str, prte_tool_basename); free(str); if (NULL != ptr) { printf("%s", ptr); free(ptr); } return -1; } free(str); } /* setup the mca_types array */ PMIX_CONSTRUCT(&mca_types, pmix_pointer_array_t); pmix_pointer_array_init(&mca_types, 256, INT_MAX, 128); /* add a type for prte itself */ pmix_pointer_array_add(&mca_types, "mca"); pmix_pointer_array_add(&mca_types, "prte"); /* add a type for hwloc */ pmix_pointer_array_add(&mca_types, "hwloc"); /* let the pmix server register params */ pmix_server_register_params(); /* add those in */ pmix_pointer_array_add(&mca_types, "pmix"); /* add the rml and routed types since they are no * longer in a framework */ pmix_pointer_array_add(&mca_types, "rml"); pmix_pointer_array_add(&mca_types, "routed"); /* push all the types found by autogen */ for (i = 0; NULL != prte_frameworks[i]; i++) { pmix_pointer_array_add(&mca_types, prte_frameworks[i]->framework_name); } /* Execute the desired action(s) */ want_all = pmix_cmd_line_is_taken(&prte_info_cmd_line, "all"); if (want_all) { prte_info_do_version(want_all); acted = true; } else if (pmix_cmd_line_is_taken(&prte_info_cmd_line, "show-version")) { prte_info_do_version(false); acted = true; } if (want_all || pmix_cmd_line_is_taken(&prte_info_cmd_line, "path")) { prte_info_do_path(want_all); acted = true; } if (want_all || pmix_cmd_line_is_taken(&prte_info_cmd_line, "arch")) { prte_info_do_arch(); acted = true; } if (want_all || pmix_cmd_line_is_taken(&prte_info_cmd_line, "hostname")) { prte_info_do_hostname(); acted = true; } if (want_all || pmix_cmd_line_is_taken(&prte_info_cmd_line, "config")) { prte_info_do_config(true); acted = true; } if (want_all || pmix_cmd_line_is_taken(&prte_info_cmd_line, "param")) { prte_info_do_params(want_all, pmix_cmd_line_is_taken(&prte_info_cmd_line, "internal")); acted = true; } /* If no command line args are specified, show default set */ if (!acted) { prte_info_show_prte_version(prte_info_ver_full); prte_info_show_path(prte_info_path_prefix, prte_install_dirs.prefix); prte_info_do_arch(); prte_info_do_hostname(); prte_info_do_config(false); prte_info_components_open(); for (i = 0; i < mca_types.size; ++i) { if (NULL == (str = (char *) pmix_pointer_array_get_item(&mca_types, i))) { continue; } prte_info_show_component_version(str, prte_info_component_all, prte_info_ver_full, prte_info_type_all); } } /* All done */ prte_info_components_close(); PMIX_DESTRUCT(&mca_types); pmix_mca_base_close(); return 0; } prrte-3.0.13/src/tools/prte_info/Makefile.am0000664000175000017500000000443315145263240021074 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2016-2020 Intel, Inc. All rights reserved. # Copyright (c) 2017 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_CFLAGS = \ -DPRTE_CONFIGURE_USER="\"@PRTE_CONFIGURE_USER@\"" \ -DPRTE_CONFIGURE_HOST="\"@PRTE_CONFIGURE_HOST@\"" \ -DPRTE_CONFIGURE_DATE="\"@PRTE_CONFIGURE_DATE@\"" \ -DPRTE_BUILD_USER="\"$$USER\"" \ -DPRTE_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \ -DPRTE_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \ -DPRTE_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DPRTE_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ -DPRTE_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ -DPRTE_BUILD_LIBS="\"@LIBS@\"" \ -DPRTE_CC_ABSOLUTE="\"@PRTE_CC_ABSOLUTE@\"" \ -DPRTE_GREEK_VERSION="\"@PRTE_GREEK_VERSION@\"" \ -DPRTE_REPO_REV="\"@PRTE_REPO_REV@\"" \ -DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\"" AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS) bin_PROGRAMS = prte_info prte_info_SOURCES = \ pinfo.h \ prte_info.c \ output.c \ param.c \ components.c \ version.c prte_info_LDADD = \ $(prte_libevent_LIBS) \ $(prte_hwloc_LIBS) \ $(prte_pmix_LIBS) \ $(top_builddir)/src/libprrte.la prrte-3.0.13/src/tools/prte_info/pinfo.h0000664000175000017500000001007715145263240020325 0ustar alastairalastair/* * Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_INFO_TOOL_H #define PRTE_INFO_TOOL_H #include "prte_config.h" #include "src/class/pmix_list.h" #include "src/class/pmix_pointer_array.h" #include "src/mca/mca.h" #include "src/util/pmix_cmd_line.h" #include "src/util/pmix_printf.h" BEGIN_C_DECLS /* * Globals */ extern bool prte_info_pretty; extern pmix_cli_result_t prte_info_cmd_line; extern const char *prte_info_type_all; extern const char *prte_info_type_prte; extern const char *prte_info_type_base; extern pmix_pointer_array_t mca_types; /* * Version-related strings and functions */ extern const char *prte_info_ver_full; extern const char *prte_info_ver_major; extern const char *prte_info_ver_minor; extern const char *prte_info_ver_release; extern const char *prte_info_ver_greek; extern const char *prte_info_ver_svn; void prte_info_do_version(bool want_all); void prte_info_show_prte_version(const char *scope); void prte_info_show_component_version(const char *type_name, const char *component_name, const char *scope, const char *ver_type); /* * Parameter/configuration-related functions */ extern const char *prte_info_component_all; extern const char *prte_info_param_all; extern const char *prte_info_path_prefix; extern const char *prte_info_path_bindir; extern const char *prte_info_path_libdir; extern const char *prte_info_path_incdir; extern const char *prte_info_path_mandir; extern const char *prte_info_path_pkglibdir; extern const char *prte_info_path_sysconfdir; extern const char *prte_info_path_exec_prefix; extern const char *prte_info_path_sbindir; extern const char *prte_info_path_libexecdir; extern const char *prte_info_path_datarootdir; extern const char *prte_info_path_datadir; extern const char *prte_info_path_sharedstatedir; extern const char *prte_info_path_localstatedir; extern const char *prte_info_path_infodir; extern const char *prte_info_path_pkgdatadir; extern const char *prte_info_path_pkgincludedir; void prte_info_do_params(bool want_all, bool want_internal); void prte_info_show_mca_params(const char *type, const char *component, bool want_internal); void prte_info_do_path(bool want_all); void prte_info_show_path(const char *type, const char *value); void prte_info_do_arch(void); void prte_info_do_hostname(void); void prte_info_do_config(bool want_all); void prte_info_show_prte_version(const char *scope); /* * Output-related functions */ void prte_info_out(const char *pretty_message, const char *plain_message, const char *value); void prte_info_out_int(const char *pretty_message, const char *plain_message, int value); /* * Component-related functions */ typedef struct { pmix_list_item_t super; char *type; pmix_list_t *components; pmix_list_t *failed_components; } prte_info_component_map_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_info_component_map_t); extern pmix_pointer_array_t prte_component_map; void prte_info_components_open(void); void prte_info_components_close(void); END_C_DECLS #endif /* PRTE_INFO_H */ prrte-3.0.13/src/tools/prted/0000775000175000017500000000000015145263240016165 5ustar alastairalastairprrte-3.0.13/src/tools/prted/prted.c0000664000175000017500000007753515145263240017470 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2022 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_NETDB_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include #ifdef HAVE_SYS_TIME_H # include #endif /* HAVE_SYS_TIME_H */ #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_SYS_WAIT_H # include #endif #include #include "src/event/event-internal.h" #include "src/hwloc/hwloc-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/base/pmix_mca_base_var.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/prte_cmd_line.h" #include "src/util/daemon_init.h" #include "src/util/pmix_fd.h" #include "src/util/pmix_if.h" #include "src/util/pmix_net.h" #include "src/util/pmix_os_path.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_environ.h" #include "src/rml/rml_contact.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" #include "src/util/nidmap.h" #include "src/util/pmix_parse_options.h" #include "src/util/proc_info.h" #include "src/util/session_dir.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ess/base/base.h" #include "src/mca/grpcomm/base/base.h" #include "src/mca/grpcomm/grpcomm.h" #include "src/mca/odls/base/base.h" #include "src/mca/plm/plm.h" #include "src/mca/ras/ras.h" #include "src/mca/rmaps/rmaps_types.h" #include "src/rml/rml.h" #include "src/mca/schizo/base/base.h" #include "src/mca/state/base/base.h" /* need access to the create_jobid fn used by plm components * so we can set singleton name, if necessary */ #include "src/mca/plm/base/plm_private.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_locks.h" #include "src/runtime/prte_quit.h" #include "src/runtime/prte_wait.h" #include "src/runtime/runtime.h" #include "src/prted/pmix/pmix_server.h" #include "src/prted/prted.h" /* * Globals */ static void shutdown_callback(int fd, short flags, void *arg); static void rollup(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata); static void node_regex_report(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata); static void report_prted(void); static pmix_data_buffer_t *bucket, *mybucket = NULL; static int ncollected = 0; static bool node_regex_waiting = false; static char *prte_parent_uri = NULL; static pmix_cli_result_t results; typedef struct { prte_pmix_lock_t lock; pmix_info_t *info; size_t ninfo; } myxfer_t; static void infocbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { myxfer_t *xfer = (myxfer_t *) cbdata; size_t n; PRTE_HIDE_UNUSED_PARAMS(status); if (NULL != info) { xfer->ninfo = ninfo; PMIX_INFO_CREATE(xfer->info, xfer->ninfo); for (n = 0; n < ninfo; n++) { PMIX_INFO_XFER(&xfer->info[n], &info[n]); } } if (NULL != release_fn) { release_fn(release_cbdata); } PRTE_PMIX_WAKEUP_THREAD(&xfer->lock); } static int wait_pipe[2]; static int wait_dvm(pid_t pid) { char reply; int rc; int status; close(wait_pipe[1]); do { rc = read(wait_pipe[0], &reply, 1); } while (0 > rc && EINTR == errno); if (1 == rc && 'K' == reply) { return 0; } else if (0 == rc) { waitpid(pid, &status, 0); if (WIFEXITED(status)) { return WEXITSTATUS(status); } } return 255; } int main(int argc, char *argv[]) { int ret = 0; int i; pmix_data_buffer_t *buffer; pmix_value_t val; pmix_proc_t proc; pmix_status_t prc; myxfer_t xfer; pmix_data_buffer_t pbuf, *wbuf; pmix_byte_object_t pbo; int8_t flag; uint8_t naliases, ni; char **nonlocal = NULL, *personality; int n; pmix_value_t *vptr; char **pargv; int pargc; prte_schizo_base_module_t *schizo; pmix_cli_item_t *opt; prte_job_t *jdata; char *umask_str = getenv("PRTE_DAEMON_UMASK_VALUE"); if (NULL != umask_str) { char *endptr; long mask = strtol(umask_str, &endptr, 8); if ((!(0 == mask && (EINVAL == errno || ERANGE == errno))) && (*endptr == '\0')) { umask(mask); } } /* ensure we aren't misdirected on choice of proxy since * some environments forward their envars */ unsetenv("PRTE_MCA_schizo_proxy"); /* initialize the globals */ PMIX_DATA_BUFFER_CREATE(bucket); prte_tool_basename = pmix_basename(argv[0]); prte_tool_actual = "prted"; pargc = argc; pargv = pmix_argv_copy_strip(argv); // strip any quoted arguments /* save a pristine copy of the environment for launch purposes. * This MUST be done so that we can pass it to any local procs we * spawn - otherwise, those local procs will get a bunch of * params only relevant to PRRTE. Skip all PMIx and PRRTE params * as those are only targeting us */ prte_launch_environ = NULL; for (i=0; NULL != environ[i]; i++) { if (0 != strncmp(environ[i], "PMIX_", 5) && 0 != strncmp(environ[i], "PRTE_", 5)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_launch_environ, environ[i]); } } ret = prte_init_minimum(); if (PRTE_SUCCESS != ret) { return ret; } /* we always need the prrte and pmix params */ ret = prte_schizo_base_parse_prte(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != ret) { return ret; } ret = prte_schizo_base_parse_pmix(pargc, 0, pargv, NULL); if (PRTE_SUCCESS != ret) { return ret; } /* init the tiny part of PRTE we initially use */ prte_init_util(PRTE_PROC_DAEMON); /* open the SCHIZO framework */ ret = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); return ret; } if (PRTE_SUCCESS != (ret = prte_schizo_base_select())) { PRTE_ERROR_LOG(ret); return ret; } /* look for any personality specification */ personality = NULL; for (i = 0; NULL != pargv[i]; i++) { if (0 == strcmp(pargv[i], "--personality")) { personality = pargv[i + 1]; break; } } /* get our schizo module */ schizo = prte_schizo_base_detect_proxy(personality); if (NULL == schizo) { pmix_show_help("help-schizo-base.txt", "no-proxy", true, prte_tool_basename, personality); return 1; } /* parse the CLI to load the MCA params */ PMIX_CONSTRUCT(&results, pmix_cli_result_t); ret = schizo->parse_cli(pargv, &results, PMIX_CLI_SILENT); if (PRTE_SUCCESS != ret) { if (PRTE_OPERATION_SUCCEEDED == ret) { return PRTE_SUCCESS; } if (PRTE_ERR_SILENT != ret) { fprintf(stderr, "%s: command line error (%s)\n", prte_tool_basename, prte_strerror(ret)); } return ret; } /* Register all global MCA Params */ if (PRTE_SUCCESS != (ret = prte_register_params())) { if (PRTE_ERR_SILENT != ret) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, "prte register params", PRTE_ERROR_NAME(ret), ret); } return 1; } /* check if we are running as root - if we are, then only allow * us to proceed if the allow-run-as-root flag was given. Otherwise, * exit with a giant warning message */ if (0 == geteuid()) { schizo->allow_run_as_root(&results); // will exit us if not allowed } /* check for debug options */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG)) { prte_debug_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS)) { prte_debug_daemons_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_DEBUG_DAEMONS_FILE)) { prte_debug_daemons_file_flag = true; } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_LEAVE_SESSION_ATTACHED)) { prte_leave_session_attached = true; } // check for hetero nodes if (pmix_cmd_line_is_taken(&results, PRTE_CLI_HETERO_NODES)) { prte_hetero_nodes = true; } /* if prte_daemon_debug is set, let someone know we are alive right * away just in case we have a problem along the way */ if (prte_debug_daemons_flag) { fprintf(stderr, "Daemon was launched on %s - beginning to initialize\n", prte_process_info.nodename); } /* detach from controlling terminal * otherwise, remain attached so output can get to us */ if (!prte_leave_session_attached && !prte_debug_daemons_flag) { if (0 > pipe(wait_pipe)) { return PRTE_ERROR; } prte_state_base.parent_fd = wait_pipe[1]; prte_daemon_init_callback(NULL, wait_dvm); close(wait_pipe[0]); } else { // the daemon_init_callback fn already setsid, so don't // do it twice! #if defined(HAVE_SETSID) /* see if we were directed to separate from current session */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_SET_SID)) { setsid(); } #endif } /* ensure we silence any compression warnings */ PMIX_SETENV_COMPAT("PMIX_MCA_compress_base_silence_warning", "1", true, &environ); /* check for bootstrap operation */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_BOOTSTRAP)) { /* fill in our procID and other information * from the configuration file */ ret = prte_ess_base_bootstrap(); if (PRTE_SUCCESS != ret) { return ret; } } if (PRTE_SUCCESS != (ret = prte_init(&argc, &argv, PRTE_PROC_DAEMON))) { PRTE_ERROR_LOG(ret); return ret; } /* bind ourselves if so directed */ if (NULL != prte_daemon_cores) { char **cores = NULL, *tmp; hwloc_obj_t pu; hwloc_cpuset_t ours, res; int core; /* could be a collection of comma-delimited ranges, so * use our handy utility to parse it */ pmix_util_parse_range_options(prte_daemon_cores, &cores); if (NULL != cores) { ours = hwloc_bitmap_alloc(); hwloc_bitmap_zero(ours); res = hwloc_bitmap_alloc(); for (i = 0; NULL != cores[i]; i++) { core = strtoul(cores[i], NULL, 10); if (NULL == (pu = prte_hwloc_base_get_pu(prte_hwloc_topology, false, core))) { /* the message will now come out locally */ pmix_show_help("help-prted.txt", "orted:cannot-bind", true, prte_process_info.nodename, prte_daemon_cores); ret = PRTE_ERR_NOT_SUPPORTED; hwloc_bitmap_free(ours); hwloc_bitmap_free(res); goto DONE; } hwloc_bitmap_or(res, ours, pu->cpuset); hwloc_bitmap_copy(ours, res); } /* if the result is all zeros, then don't bind */ if (!hwloc_bitmap_iszero(ours)) { (void) hwloc_set_cpubind(prte_hwloc_topology, ours, 0); if (prte_debug_daemons_flag) { tmp = prte_hwloc_base_cset2str(ours, false, false, prte_hwloc_topology); pmix_output(0, "Daemon %s is bound to cores %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), tmp); free(tmp); } } /* cleanup */ hwloc_bitmap_free(ours); hwloc_bitmap_free(res); PMIX_ARGV_FREE_COMPAT(cores); } } if (PMIX_RANK_INVALID != prted_debug_failure) { /* are we the specified vpid? */ if (PRTE_PROC_MY_NAME->rank == prted_debug_failure || prted_debug_failure == PMIX_RANK_WILDCARD) { /* if the user specified we delay, then setup a timer * and have it kill us */ if (0 < prted_debug_failure_delay) { PRTE_TIMER_EVENT(prted_debug_failure_delay, 0, shutdown_callback); } else { pmix_output(0, "%s is executing clean abnormal termination", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* do -not- call finalize as this will send a message to the HNP * indicating clean termination! Instead, just forcibly cleanup * the local session_dir tree and exit */ jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_RELEASE(jdata); /* return with non-zero status */ ret = PRTE_ERROR_DEFAULT_EXIT_CODE; goto DONE; } } } /* setup the primary daemon command receive function */ PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_DAEMON, PRTE_RML_PERSISTENT, prte_daemon_recv, NULL); /* output a message indicating we are alive, our name, and our pid * for debugging purposes */ if (prte_debug_flag) { fprintf(stderr, "Daemon %s checking in as pid %ld on host %s\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (long) prte_process_info.pid, prte_process_info.nodename); } /* add the DVM master's URI to our info */ PMIX_VALUE_LOAD(&val, prte_process_info.my_hnp_uri, PMIX_STRING); PMIX_LOAD_NSPACE(proc.nspace, prte_process_info.myproc.nspace); proc.rank = PRTE_PROC_MY_HNP->rank; prc = PMIx_Store_internal(&proc, PMIX_PROC_URI, &val); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_VALUE_DESTRUCT(&val); ret = PRTE_ERROR; goto DONE; } PMIX_VALUE_DESTRUCT(&val); /* If I have a parent, then save his contact info so * any messages we send can flow thru him. */ prte_parent_uri = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "parent_uri", "URI for the parent if tree launch is enabled.", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_parent_uri); if (NULL != prte_parent_uri) { /* set the contact info into our local database */ ret = prte_rml_parse_uris(prte_parent_uri, PRTE_PROC_MY_PARENT, NULL); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); goto DONE; } if (PRTE_PROC_MY_PARENT->rank != PRTE_PROC_MY_HNP->rank) { PMIX_VALUE_LOAD(&val, prte_parent_uri, PMIX_STRING); PMIX_LOAD_NSPACE(proc.nspace, prte_process_info.myproc.nspace); proc.rank = PRTE_PROC_MY_PARENT->rank; if (PMIX_SUCCESS != (prc = PMIx_Store_internal(&proc, PMIX_PROC_URI, &val))) { PMIX_ERROR_LOG(prc); PMIX_VALUE_DESTRUCT(&val); ret = PRTE_ERROR; goto DONE; } PMIX_VALUE_DESTRUCT(&val); } } /* setup the rollup callback */ PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_PRTED_CALLBACK, PRTE_RML_PERSISTENT, rollup, NULL); if (prte_static_ports || NULL != prte_parent_uri) { /* since we will be waiting for any children to send us * their rollup info before sending to our parent, save * a little time in the launch phase by "warming up" the * connection to our parent while we wait for our children */ PMIX_DATA_BUFFER_CREATE(wbuf); // zero-byte message PRTE_RML_RECV(PRTE_PROC_MY_PARENT, PRTE_RML_TAG_NODE_REGEX_REPORT, PRTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); node_regex_waiting = true; PRTE_RML_SEND(ret, PRTE_PROC_MY_PARENT->rank, wbuf, PRTE_RML_TAG_WARMUP_CONNECTION); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); PMIX_DATA_BUFFER_RELEASE(wbuf); goto DONE; } } /* send the information to the prted report-back point - this function * will process the data, but also counts the number of * prteds that reported back so the launch procedure can continue. * We need to do this at the last possible second as the HNP * can turn right around and begin issuing orders to us */ PMIX_DATA_BUFFER_CREATE(buffer); // zero-byte message /* insert our name for rollup purposes */ prc = PMIx_Data_pack(NULL, buffer, PRTE_PROC_MY_NAME, 1, PMIX_PROC); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } /* get any connection info we may have pushed */ prc = PMIx_Get(&prte_process_info.myproc, PMIX_PROC_URI, NULL, 0, &vptr); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } prc = PMIx_Data_pack(NULL, buffer, &vptr->data.string, 1, PMIX_STRING); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); ret = PRTE_ERROR; PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } /* include our node name */ prc = PMIx_Data_pack(NULL, buffer, &prte_process_info.nodename, 1, PMIX_STRING); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } /* include any non-loopback aliases for this node */ for (n = 0; NULL != prte_process_info.aliases[n]; n++) { if (0 != strcmp(prte_process_info.aliases[n], "localhost") && 0 != strcmp(prte_process_info.aliases[n], "127.0.0.1") && 0 != strcmp(prte_process_info.aliases[n], prte_process_info.nodename)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&nonlocal, prte_process_info.aliases[n]); } } naliases = PMIX_ARGV_COUNT_COMPAT(nonlocal); prc = PMIx_Data_pack(NULL, buffer, &naliases, 1, PMIX_UINT8); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_ARGV_FREE_COMPAT(nonlocal); goto DONE; } for (ni = 0; ni < naliases; ni++) { prc = PMIx_Data_pack(NULL, buffer, &nonlocal[ni], 1, PMIX_STRING); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_ARGV_FREE_COMPAT(nonlocal); goto DONE; } } PMIX_ARGV_FREE_COMPAT(nonlocal); prc = PMIx_Data_pack(NULL, buffer, &prte_topo_signature, 1, PMIX_STRING); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } /* if we are rank=1 or designated as having hetero node, then send our * topology back - otherwise, prte will request it if necessary */ if (1 == PRTE_PROC_MY_NAME->rank || prte_hetero_nodes) { pmix_data_buffer_t data; pmix_topology_t ptopo; bool compressed; /* setup an intermediate buffer */ PMIX_DATA_BUFFER_CONSTRUCT(&data); ptopo.source = "hwloc"; ptopo.topology = prte_hwloc_topology; prc = PMIx_Data_pack(NULL, &data, &ptopo, 1, PMIX_TOPO); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_DATA_BUFFER_DESTRUCT(&data); goto DONE; } if (PMIx_Data_compress((uint8_t *) data.base_ptr, data.bytes_used, (uint8_t **) &pbo.bytes, &pbo.size)) { /* the data was compressed - mark that we compressed it */ compressed = true; } else { compressed = false; pbo.bytes = data.base_ptr; pbo.size = data.bytes_used; data.base_ptr = NULL; data.bytes_used = 0; } PMIX_DATA_BUFFER_DESTRUCT(&data); prc = PMIx_Data_pack(NULL, buffer, &compressed, 1, PMIX_BOOL); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); goto DONE; } /* pack the data */ prc = PMIx_Data_pack(NULL, buffer, &pbo, 1, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); goto DONE; } PMIX_BYTE_OBJECT_DESTRUCT(&pbo); } /* collect our network inventory */ memset(&xfer, 0, sizeof(myxfer_t)); PRTE_PMIX_CONSTRUCT_LOCK(&xfer.lock); if (PMIX_SUCCESS != (prc = PMIx_server_collect_inventory(NULL, 0, infocbfunc, &xfer))) { PMIX_ERROR_LOG(prc); ret = PRTE_ERR_NOT_SUPPORTED; goto DONE; } PRTE_PMIX_WAIT_THREAD(&xfer.lock); if (NULL != xfer.info) { /* pack a flag indicating that the inventory is included */ flag = 1; prc = PMIx_Data_pack(NULL, buffer, &flag, 1, PMIX_INT8); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } PMIX_DATA_BUFFER_CONSTRUCT(&pbuf); if (PMIX_SUCCESS != (prc = PMIx_Data_pack(NULL, &pbuf, &xfer.ninfo, 1, PMIX_SIZE))) { PMIX_ERROR_LOG(prc); ret = PRTE_ERROR; PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_DATA_BUFFER_DESTRUCT(&pbuf); goto DONE; } if (PMIX_SUCCESS != (prc = PMIx_Data_pack(NULL, &pbuf, xfer.info, xfer.ninfo, PMIX_INFO))) { PMIX_ERROR_LOG(prc); ret = PRTE_ERROR; PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_DATA_BUFFER_DESTRUCT(&pbuf); goto DONE; } prc = PMIx_Data_unload(&pbuf, &pbo); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_DATA_BUFFER_DESTRUCT(&pbuf); goto DONE; } prc = PMIx_Data_pack(NULL, buffer, &pbo, 1, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); PMIX_DATA_BUFFER_DESTRUCT(&pbuf); goto DONE; } PMIX_DATA_BUFFER_DESTRUCT(&pbuf); } else { /* pack a flag indicating no inventory was provided */ flag = 0; prc = PMIx_Data_pack(NULL, buffer, &flag, 1, PMIX_INT8); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } } if (pmix_cmd_line_is_taken(&results, PRTE_CLI_TREE_SPAWN)) { /* if we are tree-spawning, start by sending it to ourselves */ PRTE_RML_SEND(ret, PRTE_PROC_MY_NAME->rank, buffer, PRTE_RML_TAG_PRTED_CALLBACK); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } } else { /* send it to the HNP */ PRTE_RML_SEND(ret, PRTE_PROC_MY_HNP->rank, buffer, PRTE_RML_TAG_PRTED_CALLBACK); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); PMIX_DATA_BUFFER_RELEASE(buffer); goto DONE; } } /* if we are tree-spawning, then we need to capture the MCA params * from our cmd line so we can pass them along to the daemons we spawn - * otherwise, only the first layer of daemons will ever see them */ if (pmix_cmd_line_is_taken(&results, PRTE_CLI_TREE_SPAWN)) { int k; bool ignore; char *no_keep[] = { "prte_hnp_uri", "prte_ess_jobid", "prte_ess_vpid", "prte_ess_num_procs", "prte_parent_uri", "mca_base_env_list", NULL }; opt = pmix_cmd_line_get_param(&results, PRTE_CLI_PRTEMCA); if (NULL != opt) { // cycle across found values for (i=0; NULL != opt->values[i]; i++) { char *t = strchr(opt->values[i], '='); *t = '\0'; ++t; ignore = false; /* see if this is something we cannot pass along */ for (k = 0; NULL != no_keep[k]; k++) { if (0 == strcmp(no_keep[k], opt->values[i])) { ignore = true; break; } } if (!ignore) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, "--"PRTE_CLI_PRTEMCA); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, opt->values[i]); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, t); } --t; *t = '='; } } opt = pmix_cmd_line_get_param(&results, PRTE_CLI_PMIXMCA); if (NULL != opt) { // cycle across found values - we always pass PMIx values for (i=0; NULL != opt->values[i]; i++) { char *t = strchr(opt->values[i], '='); *t = '\0'; ++t; PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, "--"PRTE_CLI_PMIXMCA); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, opt->values[i]); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prted_cmd_line, t); --t; *t = '='; } } } if (prte_debug_flag) { pmix_output(0, "%s prted: up and running - waiting for commands!", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); } ret = PRTE_SUCCESS; /* loop the event lib until an exit event is detected */ while (prte_event_base_active) { prte_event_loop(prte_event_base, PRTE_EVLOOP_ONCE); } PMIX_ACQUIRE_OBJECT(prte_event_base_active); /* ensure all local procs are dead */ prte_odls.kill_local_procs(NULL); DONE: /* update the exit status, in case it wasn't done */ PRTE_UPDATE_EXIT_STATUS(ret); /* cleanup and leave */ prte_finalize(); /* cleanup the process info */ prte_proc_info_finalize(); if (prte_debug_flag) { fprintf(stderr, "exiting with status %d\n", prte_exit_status); } exit(prte_exit_status); } static void shutdown_callback(int fd, short flags, void *arg) { prte_timer_t *tm = (prte_timer_t *) arg; prte_job_t *jdata; PRTE_HIDE_UNUSED_PARAMS(fd, flags); if (NULL != tm) { /* release the timer */ PMIX_RELEASE(tm); } /* if we were ordered to abort, do so */ pmix_output(0, "%s is executing clean abnormal termination", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* do -not- call finalize as this will send a message to the HNP * indicating clean termination! Instead, just forcibly cleanup * the local session_dir tree and exit */ prte_odls.kill_local_procs(NULL); // mark that we are finalizing so the session directory will cleanup prte_finalizing = true; jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_RELEASE(jdata); exit(PRTE_ERROR_DEFAULT_EXIT_CODE); } static void rollup(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata) { pmix_proc_t child; int32_t cnt; pmix_value_t val; pmix_proc_t proc; pmix_status_t prc; PRTE_HIDE_UNUSED_PARAMS(status, tag, cbdata); ncollected++; /* if the sender is ourselves, then we save that buffer * so we can insert it at the beginning */ if (PMIX_CHECK_PROCID(sender, PRTE_PROC_MY_NAME)) { PMIX_DATA_BUFFER_CREATE(mybucket); prc = PMIx_Data_copy_payload(mybucket, buffer); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); goto report; } } else { /* xfer the contents of the rollup to our bucket */ prc = PMIx_Data_copy_payload(bucket, buffer); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); goto report; } /* the first entry in the bucket will be from our * direct child - harvest it for connection info */ cnt = 1; prc = PMIx_Data_unpack(NULL, buffer, &child, &cnt, PMIX_PROC); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); goto report; } PMIX_LOAD_PROCID(&proc, prte_process_info.myproc.nspace, sender->rank); PMIX_VALUE_CONSTRUCT(&val); cnt = 1; prc = PMIx_Data_unpack(&proc, buffer, (void *) &val.data.string, &cnt, PMIX_STRING); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); goto report; } prc = PMIx_Store_internal(&proc, PMIX_PROC_URI, &val); if (PMIX_SUCCESS != prc) { PMIX_ERROR_LOG(prc); PMIX_VALUE_DESTRUCT(&val); goto report; } PMIX_VALUE_DESTRUCT(&val); } report: report_prted(); } static void report_prted(void) { int nreqd, ret; /* get the number of children */ nreqd = pmix_list_get_size(&prte_rml_base.children) + 1; if (nreqd == ncollected && NULL != mybucket && !node_regex_waiting) { /* add the collection of our children's buckets to ours */ ret = PMIx_Data_copy_payload(mybucket, bucket); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); } PMIX_DATA_BUFFER_RELEASE(bucket); /* relay this on to our parent */ PRTE_RML_SEND(ret, PRTE_PROC_MY_PARENT->rank, mybucket, PRTE_RML_TAG_PRTED_CALLBACK); if (PRTE_SUCCESS != ret) { PRTE_ERROR_LOG(ret); PMIX_DATA_BUFFER_RELEASE(mybucket); } } } static void node_regex_report(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata) { int rc; bool *active = (bool *) cbdata; PRTE_HIDE_UNUSED_PARAMS(status, sender, tag); /* extract the node info if needed, and update the routing tree */ if (PRTE_SUCCESS != (rc = prte_util_decode_nidmap(buffer))) { PRTE_ERROR_LOG(rc); return; } *active = false; /* now launch any child daemons of ours */ prte_plm.remote_spawn(); report_prted(); } prrte-3.0.13/src/tools/prted/Makefile.am0000664000175000017500000000444515145263240020230 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2016-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS) AM_CFLAGS = \ -DPRTE_CONFIGURE_USER="\"@PRTE_CONFIGURE_USER@\"" \ -DPRTE_CONFIGURE_HOST="\"@PRTE_CONFIGURE_HOST@\"" \ -DPRTE_CONFIGURE_DATE="\"@PRTE_CONFIGURE_DATE@\"" \ -DPRTE_BUILD_USER="\"$$USER\"" \ -DPRTE_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \ -DPRTE_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \ -DPRTE_BUILD_CFLAGS="\"@CFLAGS@\"" \ -DPRTE_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \ -DPRTE_BUILD_LDFLAGS="\"@LDFLAGS@\"" \ -DPRTE_BUILD_LIBS="\"@LIBS@\"" \ -DPRTE_CC_ABSOLUTE="\"@PRTE_CC_ABSOLUTE@\"" \ -DPRTE_GREEK_VERSION="\"@PRTE_GREEK_VERSION@\"" \ -DPRTE_REPO_REV="\"@PRTE_REPO_REV@\"" \ -DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\"" bin_PROGRAMS = prted prted_SOURCES = prted.c # the following empty prted_LDFLAGS is used # so that the prted can be compiled statically # by simply changing the value of this from # nothing to -all-static in the Makefile.in # nice for systems that don't have all the shared # libraries on the computes prted_LDFLAGS = prted_LDADD = \ $(prte_libevent_LIBS) \ $(prte_hwloc_LIBS) \ $(prte_pmix_LIBS) \ $(top_builddir)/src/libprrte.la prrte-3.0.13/src/include/0000775000175000017500000000000015145263240015332 5ustar alastairalastairprrte-3.0.13/src/include/prte_portable_platform.h0000664000175000017500000000073115145263240022252 0ustar alastairalastair/* * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * Wrapper around GASNet's gasnet_portable_platform.h to avoid * compiler warnings */ #ifndef PRTE_PORTABLE_PLATFORM_H #define PRTE_PORTABLE_PLATFORM_H 1 #include "src/include/prte_portable_platform_real.h" #endif prrte-3.0.13/src/include/prte_stdatomic.h0000664000175000017500000000355115145263240020530 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2023 Triad National Security, LLC. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #if !defined(PRTE_STDATOMIC_H) # define PRTE_STDATOMIC_H # include "prte_stdint.h" # include # if PRTE_ATOMIC_C11 #ifdef HAVE_STDATOMIC_H # include #endif typedef atomic_int prte_atomic_int_t; typedef atomic_long prte_atomic_long_t; typedef _Atomic bool prte_atomic_bool_t; typedef _Atomic int32_t prte_atomic_int32_t; typedef _Atomic uint32_t prte_atomic_uint32_t; typedef _Atomic int64_t prte_atomic_int64_t; typedef _Atomic uint64_t prte_atomic_uint64_t; typedef _Atomic size_t prte_atomic_size_t; typedef _Atomic ssize_t prte_atomic_ssize_t; typedef _Atomic intptr_t prte_atomic_intptr_t; typedef _Atomic uintptr_t prte_atomic_uintptr_t; # else typedef volatile int prte_atomic_int_t; typedef volatile long prte_atomic_long_t; typedef volatile bool prte_atomic_bool_t; typedef volatile int32_t prte_atomic_int32_t; typedef volatile uint32_t prte_atomic_uint32_t; typedef volatile int64_t prte_atomic_int64_t; typedef volatile uint64_t prte_atomic_uint64_t; typedef volatile size_t prte_atomic_size_t; typedef volatile ssize_t prte_atomic_ssize_t; typedef volatile intptr_t prte_atomic_intptr_t; typedef volatile uintptr_t prte_atomic_uintptr_t; # endif #endif /* !defined(PRTE_STDATOMIC_H) */ prrte-3.0.13/src/include/align.h0000664000175000017500000000276315145263240016605 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Voltaire All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_ALIGN_H #define PRTE_ALIGN_H #define PRTE_DOWN_ALIGN(x, a, t) ((x) & ~(((t)(a) -1))) #define PRTE_DOWN_ALIGN_PTR(x, a, t) ((t) PRTE_DOWN_ALIGN((uintptr_t) x, a, uintptr_t)) #define PRTE_ALIGN(x, a, t) (((x) + ((t)(a) -1)) & ~(((t)(a) -1))) #define PRTE_ALIGN_PTR(x, a, t) ((t) PRTE_ALIGN((uintptr_t) x, a, uintptr_t)) #define PRTE_ALIGN_PAD_AMOUNT(x, s) ((~((uintptr_t)(x)) + 1) & ((uintptr_t)(s) -1)) #endif /* PRTE_ALIGN_H */ prrte-3.0.13/src/include/constants.h0000664000175000017500000002042715145263240017524 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_CONSTANTS_H #define PRTE_CONSTANTS_H #include "prte_config.h" #include "constants.h" BEGIN_C_DECLS #define PRTE_ERR_BASE 0 #define PRTE_ERR_SPLIT 100 enum { /* Error codes inherited from PRTE. Still enum values so that we get the nice debugger help. */ PRTE_SUCCESS = (PRTE_ERR_BASE), PRTE_ERROR = (PRTE_ERR_BASE - 1), PRTE_ERR_OUT_OF_RESOURCE = (PRTE_ERR_BASE - 2), /* fatal error */ PRTE_ERR_TEMP_OUT_OF_RESOURCE = (PRTE_ERR_BASE - 3), /* try again later */ PRTE_ERR_RESOURCE_BUSY = (PRTE_ERR_BASE - 4), PRTE_ERR_BAD_PARAM = (PRTE_ERR_BASE - 5), /* equivalent to MPI_ERR_ARG error code */ PRTE_ERR_FATAL = (PRTE_ERR_BASE - 6), PRTE_ERR_NOT_IMPLEMENTED = (PRTE_ERR_BASE - 7), PRTE_ERR_NOT_SUPPORTED = (PRTE_ERR_BASE - 8), PRTE_ERR_INTERRUPTED = (PRTE_ERR_BASE - 9), PRTE_ERR_WOULD_BLOCK = (PRTE_ERR_BASE - 10), PRTE_ERR_IN_ERRNO = (PRTE_ERR_BASE - 11), PRTE_ERR_UNREACH = (PRTE_ERR_BASE - 12), PRTE_ERR_NOT_FOUND = (PRTE_ERR_BASE - 13), PRTE_EXISTS = (PRTE_ERR_BASE - 14), /* indicates that the specified object already exists */ PRTE_ERR_TIMEOUT = (PRTE_ERR_BASE - 15), PRTE_ERR_NOT_AVAILABLE = (PRTE_ERR_BASE - 16), PRTE_ERR_PERM = (PRTE_ERR_BASE - 17), /* no permission */ PRTE_ERR_VALUE_OUT_OF_BOUNDS = (PRTE_ERR_BASE - 18), PRTE_ERR_FILE_READ_FAILURE = (PRTE_ERR_BASE - 19), PRTE_ERR_FILE_WRITE_FAILURE = (PRTE_ERR_BASE - 20), PRTE_ERR_FILE_OPEN_FAILURE = (PRTE_ERR_BASE - 21), PRTE_ERR_PACK_MISMATCH = (PRTE_ERR_BASE - 22), PRTE_ERR_PACK_FAILURE = (PRTE_ERR_BASE - 23), PRTE_ERR_UNPACK_FAILURE = (PRTE_ERR_BASE - 24), PRTE_ERR_UNPACK_INADEQUATE_SPACE = (PRTE_ERR_BASE - 25), PRTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = (PRTE_ERR_BASE - 26), PRTE_ERR_TYPE_MISMATCH = (PRTE_ERR_BASE - 27), PRTE_ERR_OPERATION_UNSUPPORTED = (PRTE_ERR_BASE - 28), PRTE_ERR_UNKNOWN_DATA_TYPE = (PRTE_ERR_BASE - 29), PRTE_ERR_BUFFER = (PRTE_ERR_BASE - 30), PRTE_ERR_DATA_TYPE_REDEF = (PRTE_ERR_BASE - 31), PRTE_ERR_DATA_OVERWRITE_ATTEMPT = (PRTE_ERR_BASE - 32), PRTE_ERR_MODULE_NOT_FOUND = (PRTE_ERR_BASE - 33), PRTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED = (PRTE_ERR_BASE - 34), PRTE_ERR_TOPO_SOCKET_NOT_SUPPORTED = (PRTE_ERR_BASE - 35), PRTE_ERR_TOPO_CORE_NOT_SUPPORTED = (PRTE_ERR_BASE - 36), PRTE_ERR_NOT_ENOUGH_SOCKETS = (PRTE_ERR_BASE - 37), PRTE_ERR_NOT_ENOUGH_CORES = (PRTE_ERR_BASE - 38), PRTE_ERR_INVALID_PHYS_CPU = (PRTE_ERR_BASE - 39), PRTE_ERR_MULTIPLE_AFFINITIES = (PRTE_ERR_BASE - 40), PRTE_ERR_SLOT_LIST_RANGE = (PRTE_ERR_BASE - 41), PRTE_ERR_NETWORK_NOT_PARSEABLE = (PRTE_ERR_BASE - 42), PRTE_ERR_SILENT = (PRTE_ERR_BASE - 43), PRTE_ERR_NOT_INITIALIZED = (PRTE_ERR_BASE - 44), PRTE_ERR_NOT_BOUND = (PRTE_ERR_BASE - 45), PRTE_ERR_TAKE_NEXT_OPTION = (PRTE_ERR_BASE - 46), PRTE_ERR_PROC_ENTRY_NOT_FOUND = (PRTE_ERR_BASE - 47), PRTE_ERR_DATA_VALUE_NOT_FOUND = (PRTE_ERR_BASE - 48), PRTE_ERR_CONNECTION_FAILED = (PRTE_ERR_BASE - 49), PRTE_ERR_AUTHENTICATION_FAILED = (PRTE_ERR_BASE - 50), PRTE_ERR_COMM_FAILURE = (PRTE_ERR_BASE - 51), PRTE_ERR_SERVER_NOT_AVAIL = (PRTE_ERR_BASE - 52), PRTE_ERR_IN_PROCESS = (PRTE_ERR_BASE - 53), /* PMIx equivalents for notification support */ PRTE_ERR_DEBUGGER_RELEASE = (PRTE_ERR_BASE - 54), PRTE_ERR_HANDLERS_COMPLETE = (PRTE_ERR_BASE - 55), PRTE_ERR_PARTIAL_SUCCESS = (PRTE_ERR_BASE - 56), PRTE_ERR_PROC_ABORTED = (PRTE_ERR_BASE - 57), PRTE_ERR_PROC_REQUESTED_ABORT = (PRTE_ERR_BASE - 58), PRTE_ERR_PROC_ABORTING = (PRTE_ERR_BASE - 59), PRTE_ERR_NODE_DOWN = (PRTE_ERR_BASE - 60), PRTE_ERR_NODE_OFFLINE = (PRTE_ERR_BASE - 61), PRTE_ERR_JOB_TERMINATED = (PRTE_ERR_BASE - 62), PRTE_ERR_PROC_RESTART = (PRTE_ERR_BASE - 63), PRTE_ERR_PROC_CHECKPOINT = (PRTE_ERR_BASE - 64), PRTE_ERR_PROC_MIGRATE = (PRTE_ERR_BASE - 65), PRTE_ERR_EVENT_REGISTRATION = (PRTE_ERR_BASE - 66), PRTE_ERR_HEARTBEAT_ALERT = (PRTE_ERR_BASE - 67), PRTE_ERR_FILE_ALERT = (PRTE_ERR_BASE - 68), PRTE_ERR_MODEL_DECLARED = (PRTE_ERR_BASE - 69), PRTE_PMIX_LAUNCH_DIRECTIVE = (PRTE_ERR_BASE - 70), PRTE_PMIX_LAUNCHER_READY = (PRTE_ERR_BASE - 71), PRTE_OPERATION_SUCCEEDED = (PRTE_ERR_BASE - 72), /* error codes specific to PRTE - don't forget to update src/util/error_strings.c when adding new error codes!! Otherwise, the error reporting system will potentially crash, or at the least not be able to report the new error correctly. */ PRTE_ERR_RECV_LESS_THAN_POSTED = (PRTE_ERR_SPLIT - 1), PRTE_ERR_RECV_MORE_THAN_POSTED = (PRTE_ERR_SPLIT - 2), PRTE_ERR_NO_MATCH_YET = (PRTE_ERR_SPLIT - 3), PRTE_ERR_REQUEST = (PRTE_ERR_SPLIT - 4), PRTE_ERR_NO_CONNECTION_ALLOWED = (PRTE_ERR_SPLIT - 5), PRTE_ERR_CONNECTION_REFUSED = (PRTE_ERR_SPLIT - 6), PRTE_ERR_COMPARE_FAILURE = (PRTE_ERR_SPLIT - 9), PRTE_ERR_COPY_FAILURE = (PRTE_ERR_SPLIT - 10), PRTE_ERR_PROC_STATE_MISSING = (PRTE_ERR_SPLIT - 11), PRTE_ERR_PROC_EXIT_STATUS_MISSING = (PRTE_ERR_SPLIT - 12), PRTE_ERR_INDETERMINATE_STATE_INFO = (PRTE_ERR_SPLIT - 13), PRTE_ERR_NODE_FULLY_USED = (PRTE_ERR_SPLIT - 14), PRTE_ERR_INVALID_NUM_PROCS = (PRTE_ERR_SPLIT - 15), PRTE_ERR_ADDRESSEE_UNKNOWN = (PRTE_ERR_SPLIT - 16), PRTE_ERR_SYS_LIMITS_PIPES = (PRTE_ERR_SPLIT - 17), PRTE_ERR_PIPE_SETUP_FAILURE = (PRTE_ERR_SPLIT - 18), PRTE_ERR_SYS_LIMITS_CHILDREN = (PRTE_ERR_SPLIT - 19), PRTE_ERR_FAILED_GET_TERM_ATTRS = (PRTE_ERR_SPLIT - 20), PRTE_ERR_WDIR_NOT_FOUND = (PRTE_ERR_SPLIT - 21), PRTE_ERR_EXE_NOT_FOUND = (PRTE_ERR_SPLIT - 22), PRTE_ERR_PIPE_READ_FAILURE = (PRTE_ERR_SPLIT - 23), PRTE_ERR_EXE_NOT_ACCESSIBLE = (PRTE_ERR_SPLIT - 24), PRTE_ERR_FAILED_TO_START = (PRTE_ERR_SPLIT - 25), PRTE_ERR_FILE_NOT_EXECUTABLE = (PRTE_ERR_SPLIT - 26), PRTE_ERR_HNP_COULD_NOT_START = (PRTE_ERR_SPLIT - 27), PRTE_ERR_SYS_LIMITS_SOCKETS = (PRTE_ERR_SPLIT - 28), PRTE_ERR_SOCKET_NOT_AVAILABLE = (PRTE_ERR_SPLIT - 29), PRTE_ERR_SYSTEM_WILL_BOOTSTRAP = (PRTE_ERR_SPLIT - 30), PRTE_ERR_RESTART_LIMIT_EXCEEDED = (PRTE_ERR_SPLIT - 31), PRTE_ERR_INVALID_NODE_RANK = (PRTE_ERR_SPLIT - 32), PRTE_ERR_INVALID_LOCAL_RANK = (PRTE_ERR_SPLIT - 33), PRTE_ERR_UNRECOVERABLE = (PRTE_ERR_SPLIT - 34), PRTE_ERR_MEM_LIMIT_EXCEEDED = (PRTE_ERR_SPLIT - 35), PRTE_ERR_HEARTBEAT_LOST = (PRTE_ERR_SPLIT - 36), PRTE_ERR_PROC_STALLED = (PRTE_ERR_SPLIT - 37), PRTE_ERR_NO_APP_SPECIFIED = (PRTE_ERR_SPLIT - 38), PRTE_ERR_NO_EXE_SPECIFIED = (PRTE_ERR_SPLIT - 39), PRTE_ERR_COMM_DISABLED = (PRTE_ERR_SPLIT - 40), PRTE_ERR_FAILED_TO_MAP = (PRTE_ERR_SPLIT - 41), PRTE_ERR_SENSOR_LIMIT_EXCEEDED = (PRTE_ERR_SPLIT - 42), PRTE_ERR_ALLOCATION_PENDING = (PRTE_ERR_SPLIT - 43), PRTE_ERR_NO_PATH_TO_TARGET = (PRTE_ERR_SPLIT - 44), PRTE_ERR_OP_IN_PROGRESS = (PRTE_ERR_SPLIT - 45), PRTE_ERR_OPEN_CONDUIT_FAIL = (PRTE_ERR_SPLIT - 46), PRTE_ERR_DUPLICATE_MSG = (PRTE_ERR_SPLIT - 47), PRTE_ERR_OUT_OF_ORDER_MSG = (PRTE_ERR_SPLIT - 48), PRTE_ERR_FORCE_SELECT = (PRTE_ERR_SPLIT - 49), PRTE_ERR_JOB_CANCELLED = (PRTE_ERR_SPLIT - 50), PRTE_ERR_CONDUIT_SEND_FAIL = (PRTE_ERR_SPLIT - 51) }; #define PRTE_ERR_MAX (PRTE_ERR_SPLIT - 100) END_C_DECLS #endif /* PRTE_CONSTANTS_H */ prrte-3.0.13/src/include/prte_config_bottom.h0000664000175000017500000003556315145263240021402 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * This file is included at the bottom of prte_config.h, and is * therefore a) after all the #define's that were output from * configure, and b) included in most/all files in PRTE. * * Since this file is *only* ever included by prte_config.h, and * prte_config.h already has #ifndef/#endif protection, there is no * need to #ifndef/#endif protection here. */ #ifndef PRTE_CONFIG_H # error "prte_config_bottom.h should only be included from prte_config.h" #endif /* * If we build a static library, Visual C define the _LIB symbol. In the * case of a shared library _USERDLL get defined. * * OMPI_BUILDING and _LIB define how prte_config.h * handles configuring all of PRTE's "compatibility" code. Both * constants will always be defined by the end of prte_config.h. * * OMPI_BUILDING affects how much compatibility code is included by * prte_config.h. It will always be 1 or 0. The user can set the * value before including either mpi.h or prte_config.h and it will be * respected. If prte_config.h is included before mpi.h, it will * default to 1. If mpi.h is included before prte_config.h, it will * default to 0. */ #ifndef OMPI_BUILDING # define OMPI_BUILDING 1 #endif /* * Flex is trying to include the unistd.h file. As there is no configure * option or this, the flex generated files will try to include the file * even on platforms without unistd.h. Therefore, if we * know this file is not available, we can prevent flex from including it. */ #ifndef HAVE_UNISTD_H # define YY_NO_UNISTD_H #endif /*********************************************************************** * * code that should be in ompi_config_bottom.h regardless of build * status * **********************************************************************/ /* * BEGIN_C_DECLS should be used at the beginning of your declarations, * so that C++ compilers don't mangle their names. Use END_C_DECLS at * the end of C declarations. */ #undef BEGIN_C_DECLS #undef END_C_DECLS #if defined(c_plusplus) || defined(__cplusplus) # define BEGIN_C_DECLS extern "C" { # define END_C_DECLS } #else # define BEGIN_C_DECLS /* empty */ # define END_C_DECLS /* empty */ #endif /** * The attribute definition should be included before any potential * usage. */ #if PRTE_HAVE_ATTRIBUTE_ALIGNED # define __prte_attribute_aligned__(a) __attribute__((__aligned__(a))) # define __prte_attribute_aligned_max__ __attribute__((__aligned__)) #else # define __prte_attribute_aligned__(a) # define __prte_attribute_aligned_max__ #endif #if PRTE_HAVE_ATTRIBUTE_ALWAYS_INLINE # define __prte_attribute_always_inline__ __attribute__((__always_inline__)) #else # define __prte_attribute_always_inline__ #endif #if PRTE_HAVE_ATTRIBUTE_COLD # define __prte_attribute_cold__ __attribute__((__cold__)) #else # define __prte_attribute_cold__ #endif #if PRTE_HAVE_ATTRIBUTE_CONST # define __prte_attribute_const__ __attribute__((__const__)) #else # define __prte_attribute_const__ #endif #if PRTE_HAVE_ATTRIBUTE_DEPRECATED # define __prte_attribute_deprecated__ __attribute__((__deprecated__)) #else # define __prte_attribute_deprecated__ #endif #if PRTE_HAVE_ATTRIBUTE_FORMAT # define __prte_attribute_format__(a, b, c) __attribute__((__format__(a, b, c))) #else # define __prte_attribute_format__(a, b, c) #endif /* Use this __atribute__ on function-ptr declarations, only */ #if PRTE_HAVE_ATTRIBUTE_FORMAT_FUNCPTR # define __prte_attribute_format_funcptr__(a, b, c) __attribute__((__format__(a, b, c))) #else # define __prte_attribute_format_funcptr__(a, b, c) #endif #if PRTE_HAVE_ATTRIBUTE_HOT # define __prte_attribute_hot__ __attribute__((__hot__)) #else # define __prte_attribute_hot__ #endif #if PRTE_HAVE_ATTRIBUTE_MALLOC # define __prte_attribute_malloc__ __attribute__((__malloc__)) #else # define __prte_attribute_malloc__ #endif #if PRTE_HAVE_ATTRIBUTE_MAY_ALIAS # define __prte_attribute_may_alias__ __attribute__((__may_alias__)) #else # define __prte_attribute_may_alias__ #endif #if PRTE_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION # define __prte_attribute_no_instrument_function__ __attribute__((__no_instrument_function__)) #else # define __prte_attribute_no_instrument_function__ #endif #if PRTE_HAVE_ATTRIBUTE_NOINLINE # define __prte_attribute_noinline__ __attribute__((__noinline__)) #else # define __prte_attribute_noinline__ #endif #if PRTE_HAVE_ATTRIBUTE_NONNULL # define __prte_attribute_nonnull__(a) __attribute__((__nonnull__(a))) # define __prte_attribute_nonnull_all__ __attribute__((__nonnull__)) #else # define __prte_attribute_nonnull__(a) # define __prte_attribute_nonnull_all__ #endif #if PRTE_HAVE_ATTRIBUTE_NORETURN # define __prte_attribute_noreturn__ __attribute__((__noreturn__)) #else # define __prte_attribute_noreturn__ #endif /* Use this __atribute__ on function-ptr declarations, only */ #if PRTE_HAVE_ATTRIBUTE_NORETURN_FUNCPTR # define __prte_attribute_noreturn_funcptr__ __attribute__((__noreturn__)) #else # define __prte_attribute_noreturn_funcptr__ #endif #if PRTE_HAVE_ATTRIBUTE_PACKED # define __prte_attribute_packed__ __attribute__((__packed__)) #else # define __prte_attribute_packed__ #endif #if PRTE_HAVE_ATTRIBUTE_PURE # define __prte_attribute_pure__ __attribute__((__pure__)) #else # define __prte_attribute_pure__ #endif #if PRTE_HAVE_ATTRIBUTE_SENTINEL # define __prte_attribute_sentinel__ __attribute__((__sentinel__)) #else # define __prte_attribute_sentinel__ #endif #if PRTE_HAVE_ATTRIBUTE_UNUSED # define __prte_attribute_unused__ __attribute__((__unused__)) #else # define __prte_attribute_unused__ #endif #if PRTE_HAVE_ATTRIBUTE_VISIBILITY # define __prte_attribute_visibility__(a) __attribute__((__visibility__(a))) #else # define __prte_attribute_visibility__(a) #endif #if PRTE_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT # define __prte_attribute_warn_unused_result__ __attribute__((__warn_unused_result__)) #else # define __prte_attribute_warn_unused_result__ #endif #if PRTE_HAVE_ATTRIBUTE_WEAK_ALIAS # define __prte_attribute_weak_alias__(a) __attribute__((__weak__, __alias__(a))) #else # define __prte_attribute_weak_alias__(a) #endif #if PRTE_HAVE_ATTRIBUTE_DESTRUCTOR # define __prte_attribute_destructor__ __attribute__((__destructor__)) #else # define __prte_attribute_destructor__ #endif #if PRTE_HAVE_ATTRIBUTE_OPTNONE # define __prte_attribute_optnone__ __attribute__((__optnone__)) #else # define __prte_attribute_optnone__ #endif #if PRTE_HAVE_ATTRIBUTE_EXTENSION # define __prte_attribute_extension__ __extension__ #else # define __prte_attribute_extension__ #endif #if PRTE_C_HAVE_VISIBILITY # define PRTE_EXPORT __prte_attribute_visibility__("default") # define PRTE_MODULE_EXPORT __prte_attribute_visibility__("default") #else # define PRTE_EXPORT # define PRTE_MODULE_EXPORT #endif #if !defined(__STDC_LIMIT_MACROS) && (defined(c_plusplus) || defined(__cplusplus)) /* When using a C++ compiler, the max / min value #defines for std types are only included if __STDC_LIMIT_MACROS is set before including stdint.h */ # define __STDC_LIMIT_MACROS #endif #include "prte_config.h" #include "prte_stdint.h" /*********************************************************************** * * Code that is only for when building PRTE or utilities that are * using the internals of PRTE. It should not be included when * building MPI applications * **********************************************************************/ #if OMPI_BUILDING /* * Maximum size of a filename path. */ # include # ifdef HAVE_SYS_PARAM_H # include # endif # if defined(PATH_MAX) # define PRTE_PATH_MAX (PATH_MAX + 1) # elif defined(_POSIX_PATH_MAX) # define PRTE_PATH_MAX (_POSIX_PATH_MAX + 1) # else # define PRTE_PATH_MAX 256 # endif /* * Set the compile-time path-separator on this system and variable separator */ # define PRTE_PATH_SEP "/" # define PRTE_ENV_SEP ':' # if defined(MAXHOSTNAMELEN) # define PRTE_MAXHOSTNAMELEN (MAXHOSTNAMELEN + 1) # elif defined(HOST_NAME_MAX) # define PRTE_MAXHOSTNAMELEN (HOST_NAME_MAX + 1) # else /* SUSv2 guarantees that "Host names are limited to 255 bytes". */ # define PRTE_MAXHOSTNAMELEN (255 + 1) # endif # define PRTE_DEBUG_ZERO(obj) /* * printf functions for portability (only when building PRTE) */ # if !defined(HAVE_VASPRINTF) || !defined(HAVE_VSNPRINTF) # include # include # endif # if !defined(HAVE_ASPRINTF) || !defined(HAVE_SNPRINTF) || !defined(HAVE_VASPRINTF) \ || !defined(HAVE_VSNPRINTF) # include "src/util/pmix_printf.h" # endif # ifndef HAVE_ASPRINTF # define asprintf pmix_asprintf # endif # ifndef HAVE_SNPRINTF # define snprintf pmix_snprintf # endif # ifndef HAVE_VASPRINTF # define vasprintf pmix_vasprintf # endif # ifndef HAVE_VSNPRINTF # define vsnprintf prte_vsnprintf # endif /* * On some homogenous big-iron machines (Sandia's Red Storm), there * are no htonl and friends. If that's the case, provide stubs. I * would hope we never find a platform that doesn't have these macros * and would want to talk to the outside world... On other platforms * we fail to detect them correctly. */ # if !defined(HAVE_UNIX_BYTESWAP) static inline uint32_t htonl(uint32_t hostvar) { return hostvar; } static inline uint32_t ntohl(uint32_t netvar) { return netvar; } static inline uint16_t htons(uint16_t hostvar) { return hostvar; } static inline uint16_t ntohs(uint16_t netvar) { return netvar; } # endif /* * Define __func__-preprocessor directive if the compiler does not * already define it. Define it to __FILE__ so that we at least have * a clue where the developer is trying to indicate where the error is * coming from (assuming that __func__ is typically used for * printf-style debugging). */ # if defined(HAVE_DECL___FUNC__) && !HAVE_DECL___FUNC__ # define __func__ __FILE__ # endif # define IOVBASE_TYPE void /* ensure the bool type is defined as it is used everywhere */ # include /** * If we generate our own bool type, we need a special way to cast the result * in such a way to keep the compilers silent. */ # define PRTE_INT_TO_BOOL(VALUE) (bool) (VALUE) /** * Top level define to check 2 things: a) if we want ipv6 support, and * b) the underlying system supports ipv6. Having one #define for * this makes it simpler to check throughout the code base. */ # if PRTE_ENABLE_IPV6 && defined(HAVE_STRUCT_SOCKADDR_IN6) # define PRTE_ENABLE_IPV6 1 # else # define PRTE_ENABLE_IPV6 0 # endif # if !defined(HAVE_STRUCT_SOCKADDR_STORAGE) && defined(HAVE_STRUCT_SOCKADDR_IN) # define sockaddr_storage sockaddr # define ss_family sa_family # endif /* Compatibility structure so that we don't have to have as many #if checks in the code base */ # if !defined(HAVE_STRUCT_SOCKADDR_IN6) && defined(HAVE_STRUCT_SOCKADDR_IN) # define sockaddr_in6 sockaddr_in # define sin6_len sin_len # define sin6_family sin_family # define sin6_port sin_port # define sin6_addr sin_addr # endif # if !HAVE_DECL_AF_UNSPEC # define AF_UNSPEC 0 # endif # if !HAVE_DECL_PF_UNSPEC # define PF_UNSPEC 0 # endif # if !HAVE_DECL_AF_INET6 # define AF_INET6 AF_UNSPEC # endif # if !HAVE_DECL_PF_INET6 # define PF_INET6 PF_UNSPEC # endif # if defined(__APPLE__) && defined(HAVE_INTTYPES_H) /* Prior to Mac OS X 10.3, the length modifier "ll" wasn't supported, but "q" was for long long. This isn't ANSI C and causes a warning when using PRI?64 macros. We don't support versions prior to OS X 10.3, so we dont' need such backward compatibility. Instead, redefine the macros to be "ll", which is ANSI C and doesn't cause a compiler warning. */ # include # if defined(__PRI_64_LENGTH_MODIFIER__) # undef __PRI_64_LENGTH_MODIFIER__ # define __PRI_64_LENGTH_MODIFIER__ "ll" # endif # if defined(__SCN_64_LENGTH_MODIFIER__) # undef __SCN_64_LENGTH_MODIFIER__ # define __SCN_64_LENGTH_MODIFIER__ "ll" # endif # endif # ifdef MCS_VXWORKS /* VXWorks puts some common functions in oddly named headers. Rather than update all the places the functions are used, which would be a maintenance disatster, just update here... */ # ifdef HAVE_IOLIB_H /* pipe(), ioctl() */ # include # endif # ifdef HAVE_SOCKLIB_H /* socket() */ # include # endif # ifdef HAVE_HOSTLIB_H /* gethostname() */ # include # endif # endif /* If we're in C++, then just undefine restrict and then define it to nothing. "restrict" is not part of the C++ language, and we don't have a corresponding AC_CXX_RESTRICT to figure out what the C++ compiler supports. */ # if defined(c_plusplus) || defined(__cplusplus) # undef restrict # define restrict # endif #else /* For a similar reason to what is listed in prte_config_top.h, we want to protect others from the autoconf/automake-generated PACKAGE_ macros in prte_config.h. We can't put these undef's directly in prte_config.h because they'll be turned into #defines' via autoconf. So put them here in case any only else includes OMPI/PRTE's config.h files. */ # undef PACKAGE_BUGREPORT # undef PACKAGE_NAME # undef PACKAGE_STRING # undef PACKAGE_TARNAME # undef PACKAGE_VERSION # undef PACKAGE_URL # undef HAVE_CONFIG_H #endif /* OMPI_BUILDING */ prrte-3.0.13/src/include/prefetch.h0000664000175000017500000000222115145263240017300 0ustar alastairalastair/* * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file * * Compiler-specific prefetch functions * * A small set of prefetch / prediction interfaces for using compiler * directives to improve memory prefetching and branch prediction */ #ifndef PRTE_PREFETCH_H #define PRTE_PREFETCH_H #include "prte_config.h" /* C code */ #if PRTE_C_HAVE_BUILTIN_EXPECT # define PMIX_LIKELY(expression) __builtin_expect(!!(expression), 1) # define PMIX_UNLIKELY(expression) __builtin_expect(!!(expression), 0) #else # define PMIX_LIKELY(expression) (expression) # define PMIX_UNLIKELY(expression) (expression) #endif #if PRTE_C_HAVE_BUILTIN_PREFETCH # define PRTE_PREFETCH(address, rw, locality) __builtin_prefetch(address, rw, locality) #else # define PRTE_PREFETCH(address, rw, locality) #endif #endif prrte-3.0.13/src/include/prte_config_top.h0000664000175000017500000000262215145263240020666 0ustar alastairalastair/* * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * This file is included at the top of prte_config.h, and is * therefore a) before all the #define's that were output from * configure, and b) included in most/all files in PRTE. * * Since this file is *only* ever included by prte_config.h, and * prte_config.h already has #ifndef/#endif protection, there is no * need to #ifndef/#endif protection here. */ #ifndef PRTE_CONFIG_H # error "prte_config_top.h should only be included from prte_config.h" #endif /* The only purpose of this file is to undef the PACKAGE_ macros that are put in by autoconf/automake projects. Specifically, if you include a .h file from another project that defines these macros (e.g., gmp.h) and then include OMPI/PRTE's config.h, you'll get a preprocessor conflict. So put these undef's here to protect us from other package's PACKAGE_ macros. Note that we can't put them directly in prte_config.h (e.g., via AH_TOP) because they will be turned into #define's by autoconf. */ #undef PACKAGE_BUGREPORT #undef PACKAGE_NAME #undef PACKAGE_STRING #undef PACKAGE_TARNAME #undef PACKAGE_VERSION #undef PACKAGE_URL #undef HAVE_CONFIG_H prrte-3.0.13/src/include/types.h0000664000175000017500000001340015145263240016645 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file */ #ifndef PRTE_TYPES_H #define PRTE_TYPES_H #include "prte_config.h" #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_SOCKET_H # include #endif #ifdef HAVE_SYS_SELECT_H # include #endif #ifdef HAVE_NETINET_IN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #if PRTE_ENABLE_DEBUG # include "src/util/pmix_output.h" #endif /** * Supported datatypes for messaging and storage operations. */ /** rank on node, used for both local and node rank. We * don't send these around on their own, so don't create * dedicated type support for them - we are defining them * here solely for readability in the code and so we have * one place where any future changes can be made */ typedef uint16_t prte_local_rank_t; typedef uint16_t prte_node_rank_t; #define PRTE_LOCAL_RANK PMIX_UINT16 #define PRTE_NODE_RANK PMIX_UINT16 #define PRTE_LOCAL_RANK_MAX UINT16_MAX - 1 #define PRTE_NODE_RANK_MAX UINT16_MAX - 1 #define PRTE_LOCAL_RANK_INVALID UINT16_MAX #define PRTE_NODE_RANK_INVALID UINT16_MAX /* index for app_contexts */ typedef uint32_t prte_app_idx_t; #define PRTE_APP_IDX PMIX_UINT32 #define PRTE_APP_IDX_MAX UINT32_MAX /* * portable assignment of pointer to int */ typedef union { uint64_t lval; uint32_t ival; void *pval; struct { uint32_t uval; uint32_t lval; } sval; } prte_ptr_t; /* * handle differences in iovec */ #if defined(__APPLE__) || defined(__WINDOWS__) typedef char *prte_iov_base_ptr_t; # define PRTE_IOVBASE char #else # define PRTE_IOVBASE void typedef void *prte_iov_base_ptr_t; #endif /* * handle differences in socklen_t */ #if defined(HAVE_SOCKLEN_T) typedef socklen_t prte_socklen_t; #else typedef int prte_socklen_t; #endif /* * Convert a 64 bit value to network byte order. */ static inline uint64_t prte_hton64(uint64_t val) __prte_attribute_const__; static inline uint64_t prte_hton64(uint64_t val) { #ifdef HAVE_UNIX_BYTESWAP union { uint64_t ll; uint32_t l[2]; } w, r; /* platform already in network byte order? */ if (htonl(1) == 1L) return val; w.ll = val; r.l[0] = htonl(w.l[1]); r.l[1] = htonl(w.l[0]); return r.ll; #else return val; #endif } /* * Convert a 64 bit value from network to host byte order. */ static inline uint64_t prte_ntoh64(uint64_t val) __prte_attribute_const__; static inline uint64_t prte_ntoh64(uint64_t val) { #ifdef HAVE_UNIX_BYTESWAP union { uint64_t ll; uint32_t l[2]; } w, r; /* platform already in network byte order? */ if (htonl(1) == 1L) return val; w.ll = val; r.l[0] = ntohl(w.l[1]); r.l[1] = ntohl(w.l[0]); return r.ll; #else return val; #endif } /** * Convert between a local representation of pointer and a 64 bits value. */ static inline uint64_t prte_ptr_ptol(void *ptr) __prte_attribute_const__; static inline uint64_t prte_ptr_ptol(void *ptr) { return (uint64_t)(uintptr_t) ptr; } static inline void *prte_ptr_ltop(uint64_t value) __prte_attribute_const__; static inline void *prte_ptr_ltop(uint64_t value) { #if SIZEOF_VOID_P == 4 && PRTE_ENABLE_DEBUG if (value > ((1ULL << 32) - 1ULL)) { pmix_output(0, "Warning: truncating value in prte_ptr_ltop"); } #endif return (void *) (uintptr_t) value; } #if defined(WORDS_BIGENDIAN) || !defined(HAVE_UNIX_BYTESWAP) static inline uint16_t prte_swap_bytes2(uint16_t val) __prte_attribute_const__; static inline uint16_t prte_swap_bytes2(uint16_t val) { union { uint16_t bigval; uint8_t arrayval[2]; } w, r; w.bigval = val; r.arrayval[0] = w.arrayval[1]; r.arrayval[1] = w.arrayval[0]; return r.bigval; } static inline uint32_t prte_swap_bytes4(uint32_t val) __prte_attribute_const__; static inline uint32_t prte_swap_bytes4(uint32_t val) { union { uint32_t bigval; uint8_t arrayval[4]; } w, r; w.bigval = val; r.arrayval[0] = w.arrayval[3]; r.arrayval[1] = w.arrayval[2]; r.arrayval[2] = w.arrayval[1]; r.arrayval[3] = w.arrayval[0]; return r.bigval; } static inline uint64_t prte_swap_bytes8(uint64_t val) __prte_attribute_const__; static inline uint64_t prte_swap_bytes8(uint64_t val) { union { uint64_t bigval; uint8_t arrayval[8]; } w, r; w.bigval = val; r.arrayval[0] = w.arrayval[7]; r.arrayval[1] = w.arrayval[6]; r.arrayval[2] = w.arrayval[5]; r.arrayval[3] = w.arrayval[4]; r.arrayval[4] = w.arrayval[3]; r.arrayval[5] = w.arrayval[2]; r.arrayval[6] = w.arrayval[1]; r.arrayval[7] = w.arrayval[0]; return r.bigval; } #else # define prte_swap_bytes2 htons # define prte_swap_bytes4 htonl # define prte_swap_bytes8 prte_hton64 #endif /* WORDS_BIGENDIAN || !HAVE_UNIX_BYTESWAP */ #endif prrte-3.0.13/src/include/prte_portable_platform_real.h0000664000175000017500000012631015145263240023257 0ustar alastairalastair/* $Source: bitbucket.org:berkeleylab/gasnet.git/other/portable_platform.h $ * Description: Portable platform detection header * Copyright 2006, Dan Bonachea * Copyright 2018, The Regents of the University of California * Terms of Use: In ADDITION to the license information in license.txt, * anyone redistributing this header agrees not to change any part of this notice, or * the version handshake in the header versioning section below. * Furthermore, redistributed copies of any portion of this header must * not appear within files named "portable_platform.h" or "gasnet_portable_platform.h", * unless it is embedded within a complete copy of the GASNet distribution. * These restrictions are designed to prevent conflicts for end users * who compose multiple projects using the PLATFORM_ namespace. * * The canonical version of this header is hosted in the GASNet project at: * https://bitbucket.org/berkeleylab/gasnet * * Developers who clone this header into their own project are HIGHLY encouraged to * contribute any improvements (especially addition of new platforms) back to the * canonical version, for the benefit of the community. * Contributions and bug reports should be directed to: * https://gasnet-bugs.lbl.gov or gasnet-staff@lbl.gov */ /* ------------------------------------------------------------------------------------ */ /* Header versioning: DO NOT CHANGE ANYTHING IN THIS SECTION * The license terms for this header prohibit modifying this section in ANY way. Clones should continue to advertise a PLATFORM_HEADER_VERSION equal to the canonical version they cloned, and should not modify the handshake logic which ensures the highest canonical header version is used. */ /* Publish and enforce version number for the public interface to this header */ /* YOU ARE NOT PERMITTED TO CHANGE THIS SECTION WITHOUT DIRECT APPROVAL FROM DAN BONACHEA */ #if !defined(_PORTABLE_PLATFORM_H) || !defined(PLATFORM_HEADER_VERSION) \ || _PORTABLE_PLATFORM_H != PLATFORM_HEADER_VERSION \ || PLATFORM_HEADER_VERSION < 22 #undef PLATFORM_HEADER_VERSION #define PLATFORM_HEADER_VERSION 22 #undef _PORTABLE_PLATFORM_H #define _PORTABLE_PLATFORM_H PLATFORM_HEADER_VERSION /* End Header versioning handshake */ /* ------------------------------------------------------------------------------------ */ /* make sure that previously-included older/broken clones of this header do not pollute our namespace */ #undef PLATFORM_COMPILER_FAMILYNAME #undef PLATFORM_COMPILER_FAMILYID #undef PLATFORM_COMPILER_ID #undef PLATFORM_COMPILER_VERSION #undef PLATFORM_COMPILER_VERSION_STR #undef PLATFORM_COMPILER_VERSION_INT #undef PLATFORM_COMPILER_IDSTR #undef PLATFORM_COMPILER_VERSION_GT #undef PLATFORM_COMPILER_VERSION_GE #undef PLATFORM_COMPILER_VERSION_EQ #undef PLATFORM_COMPILER_VERSION_LE #undef PLATFORM_COMPILER_VERSION_LT #undef PLATFORM_COMPILER_C_LANGLVL #undef PLATFORM_COMPILER_CXX_LANGLVL #undef PLATFORM_COMPILER_INTEL #undef PLATFORM_COMPILER_INTEL_C #undef PLATFORM_COMPILER_INTEL_CXX #undef PLATFORM_COMPILER_PATHSCALE #undef PLATFORM_COMPILER_PATHSCALE_C #undef PLATFORM_COMPILER_PATHSCALE_CXX #undef PLATFORM_COMPILER_PGI #undef PLATFORM_COMPILER_PGI_C #undef PLATFORM_COMPILER_PGI_CXX #undef PLATFORM_COMPILER_XLC #undef PLATFORM_COMPILER_XLC_C #undef PLATFORM_COMPILER_XLC_CXX #undef PLATFORM_COMPILER_COMPAQ #undef PLATFORM_COMPILER_COMPAQ_C #undef PLATFORM_COMPILER_COMPAQ_CXX #undef PLATFORM_COMPILER_SUN #undef PLATFORM_COMPILER_SUN_C #undef PLATFORM_COMPILER_SUN_CXX #undef PLATFORM_COMPILER_HP #undef PLATFORM_COMPILER_HP_C #undef PLATFORM_COMPILER_HP_CXX #undef PLATFORM_COMPILER_SGI #undef PLATFORM_COMPILER_SGI_C #undef PLATFORM_COMPILER_SGI_CXX #undef PLATFORM_COMPILER_CRAY #undef PLATFORM_COMPILER_CRAY_C #undef PLATFORM_COMPILER_CRAY_CXX #undef PLATFORM_COMPILER_KAI #undef PLATFORM_COMPILER_KAI_C #undef PLATFORM_COMPILER_KAI_CXX #undef PLATFORM_COMPILER_MTA #undef PLATFORM_COMPILER_MTA_C #undef PLATFORM_COMPILER_MTA_CXX #undef PLATFORM_COMPILER_NECSX #undef PLATFORM_COMPILER_NECSX_C #undef PLATFORM_COMPILER_NECSX_CXX #undef PLATFORM_COMPILER_MICROSOFT #undef PLATFORM_COMPILER_MICROSOFT_C #undef PLATFORM_COMPILER_MICROSOFT_CXX #undef PLATFORM_COMPILER_TINY #undef PLATFORM_COMPILER_TINY_C #undef PLATFORM_COMPILER_TINY_CXX #undef PLATFORM_COMPILER_LCC #undef PLATFORM_COMPILER_LCC_C #undef PLATFORM_COMPILER_LCC_CXX #undef PLATFORM_COMPILER_OPEN64 #undef PLATFORM_COMPILER_OPEN64_C #undef PLATFORM_COMPILER_OPEN64_CXX #undef PLATFORM_COMPILER_PCC #undef PLATFORM_COMPILER_PCC_C #undef PLATFORM_COMPILER_PCC_CXX #undef PLATFORM_COMPILER_CLANG #undef PLATFORM_COMPILER_CLANG_C #undef PLATFORM_COMPILER_CLANG_CXX #undef PLATFORM_COMPILER_NVHPC #undef PLATFORM_COMPILER_NVHPC_C #undef PLATFORM_COMPILER_NVHPC_CXX #undef PLATFORM_COMPILER_GNU #undef PLATFORM_COMPILER_GNU_C #undef PLATFORM_COMPILER_GNU_CXX #undef PLATFORM_COMPILER_UNKNOWN #undef PLATFORM_OS_FAMILYNAME #undef PLATFORM_OS_SUBFAMILYNAME #undef PLATFORM_OS_CATAMOUNT #undef PLATFORM_OS_BGP #undef PLATFORM_OS_BGQ #undef PLATFORM_OS_K42 #undef PLATFORM_OS_UCLINUX #undef PLATFORM_OS_LINUX #undef PLATFORM_OS_CNL #undef PLATFORM_OS_SUBFAMILY_CNL #undef PLATFORM_OS_WSL #undef PLATFORM_OS_SUBFAMILY_WSL #undef PLATFORM_OS_BLRTS #undef PLATFORM_OS_CYGWIN #undef PLATFORM_OS_MSWINDOWS #undef PLATFORM_OS_AIX #undef PLATFORM_OS_TRU64 #undef PLATFORM_OS_FREEBSD #undef PLATFORM_OS_NETBSD #undef PLATFORM_OS_OPENBSD #undef PLATFORM_OS_SOLARIS #undef PLATFORM_OS_DARWIN #undef PLATFORM_OS_IRIX #undef PLATFORM_OS_HPUX #undef PLATFORM_OS_UNICOS #undef PLATFORM_OS_MTA #undef PLATFORM_OS_SUPERUX #undef PLATFORM_OS_UNKNOWN #undef PLATFORM_ARCH_FAMILYNAME #undef PLATFORM_ARCH_32 #undef _PLATFORM_ARCH_32 #undef PLATFORM_ARCH_64 #undef _PLATFORM_ARCH_64 #undef PLATFORM_ARCH_BIG_ENDIAN #undef _PLATFORM_ARCH_BIG_ENDIAN #undef PLATFORM_ARCH_LITTLE_ENDIAN #undef _PLATFORM_ARCH_LITTLE_ENDIAN #undef PLATFORM_ARCH_POWERPC #undef PLATFORM_ARCH_MIC #undef PLATFORM_ARCH_X86_64 #undef PLATFORM_ARCH_IA64 #undef PLATFORM_ARCH_X86 #undef PLATFORM_ARCH_ALPHA #undef PLATFORM_ARCH_MIPS #undef PLATFORM_ARCH_SPARC #undef PLATFORM_ARCH_PARISC #undef PLATFORM_ARCH_CRAYX1 #undef PLATFORM_ARCH_CRAYT3E #undef PLATFORM_ARCH_MTA #undef PLATFORM_ARCH_NECSX #undef PLATFORM_ARCH_MICROBLAZE #undef PLATFORM_ARCH_ARM #undef PLATFORM_ARCH_AARCH64 #undef PLATFORM_ARCH_TILE #undef PLATFORM_ARCH_S390 #undef PLATFORM_ARCH_RISCV #undef PLATFORM_ARCH_UNKNOWN /* prevent known old/broken versions of this header from loading */ #undef OMPI_PORTABLE_PLATFORM_H #define OMPI_PORTABLE_PLATFORM_H #undef OPAL_PORTABLE_PLATFORM_H #define OPAL_PORTABLE_PLATFORM_H /* ------------------------------------------------------------------------------------ */ /* most of this file was written based on information in vendor documents, system headers, and inspecting verbose compiler output. Another useful source of information: http://predef.sourceforge.net/ */ /* ------------------------------------------------------------------------------------ */ /* helpers */ #undef _PLATFORM_STRINGIFY_HELPER #define _PLATFORM_STRINGIFY_HELPER(x) #x #undef PLATFORM_STRINGIFY #define PLATFORM_STRINGIFY(x) _PLATFORM_STRINGIFY_HELPER(x) /* ------------------------------------------------------------------------------------ */ /* Compiler detection */ /* PLATFORM_COMPILER_: defined to 1 if compiler is a given family, undef otherwise PLATFORM_COMPILER__C PLATFORM_COMPILER__CXX defined to 1 if compiler is a given family, and is the C or C++ compiler, respectively PLATFORM_COMPILER_FAMILYNAME: unquoted token which provides the compiler family name PLATFORM_COMPILER_FAMILYID: defined to a positive integral value which is unique to a given compiler family or zero if the compiler is unrecognized PLATFORM_COMPILER_ID: same as PLATFORM_COMPILER_FAMILYID, except C and C++ compilers are differentiated PLATFORM_COMPILER_VERSION: defined to an integral expression which is guaranteed to be monotonically non-decreasing with increasing compiler versions. Will be zero for unrecognized compilers. The exact encoding of compiler version tuples into this constant may occasionally change when this header is upgraded, so code should use the (in)equality macros below to check against particular compiler versions, instead of embedding an encoded constant. PLATFORM_COMPILER_VERSION_STR: A string representation of the compiler version, which may contain additional info PLATFORM_COMPILER_VERSION_[GT,GE,EQ,LE,LT](maj,min,pat): evaluate to non-zero iff the compiler version in use is respectively greater-than, greater-or-equal, equal, less-or-equal, less-than the provided version components PLATFORM_COMPILER_IDSTR: a string which uniquely identifies recognized compilers PLATFORM_COMPILER_C_LANGLVL and PLATFORM_COMPILER_CXX_LANGLVL: (in PLATFORM_HEADER_VERSION >= 5) defined to a positive integral value corresponding to the C or C++ (respectively) language standard to which the current compiler advertises conformance. Otherwise undef (in particular at most one of these is defined in a given compilation). */ #if defined(__INTEL_COMPILER) #define PLATFORM_COMPILER_INTEL 1 #define PLATFORM_COMPILER_FAMILYNAME INTEL #define PLATFORM_COMPILER_FAMILYID 2 #ifdef __cplusplus #define PLATFORM_COMPILER_INTEL_CXX 1 #else #define PLATFORM_COMPILER_INTEL_C 1 #endif /* Intel compiler version "patch number" * ------------------------------------- * Intel compiler versioning is unfortunately complicated by behavioral changes. * Versions prior to Intel 14.0.0 (Sept 2013) lacked a preprocessor symbol to supply the "update" number. * Version 14.0.0 and later supply a __INTEL_COMPILER_UPDATE symbol, but sadly several releases of Version 19 * report the wrong value in this field (bug 3876). * For now, the "patch" field of the PLATFORM_COMPILER_VERSION for Intel is the release package BUILD DATE, * in the same decimal YYYYMMDD format as __INTEL_COMPILER_BUILD_DATE, as this is the only indicator that has * remained reliably stable/correct across versions. * So for example to check for icc --version "19.0.1.144 20181018" or later, pass: * PLATFORM_COMPILER_VERSION_GE(19, 0, 20181018) * NOTE 1: this build-date is unfortunately OS-DEPENDENT, sometimes differing by several days or weeks * between the Linux and OSX releases. For a complete mapping, see: * https://software.intel.com/en-us/articles/intel-compiler-and-composer-update-version-numbers-to-compiler-version-number-mapping * NOTE 2: some of the build-date entries in the table linked above have been observed to be incorrect, * so when possible it's safest to verify the build-date from `icc --version` on both Linux and macOS. */ #undef _PLATFORM_INTEL_COMPILER_BUILD_DATE #undef _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE #define _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE 19900000 /* year 1990: corresponds roughly to Intel v4.5 (1992) */ /* MIN_BUILDDATE is used to normalize build dates to a bit-saving range for the encoding * Intel officially supports the current release and two prior (currently back to 2016) * Our 1990 floor corresponds to Intel v4.x that only worked on MS-DOS and predates both Linux and BSD-based macOS */ #ifdef __INTEL_COMPILER_BUILD_DATE #define _PLATFORM_INTEL_COMPILER_BUILD_DATE __INTEL_COMPILER_BUILD_DATE #else #define _PLATFORM_INTEL_COMPILER_BUILD_DATE _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE #endif /* Intel patch number is a decimal build date: YYYYMMDD - do NOT pass the "update number" */ #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ (((((maj) * 100) + (min)) << 19) | \ ((pat) < _PLATFORM_COMPILER_INTEL_MIN_BUILDDATE ? \ 0 : ((pat)-_PLATFORM_COMPILER_INTEL_MIN_BUILDDATE))) #undef _PLATFORM__INTEL_COMPILER #if __INTEL_COMPILER == 9999 /* Seen in 20110811 release of 12.1.0 - overflows VERSION_INT() */ #define _PLATFORM__INTEL_COMPILER 1201 #else #define _PLATFORM__INTEL_COMPILER __INTEL_COMPILER #endif #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(_PLATFORM__INTEL_COMPILER/100, _PLATFORM__INTEL_COMPILER%100, _PLATFORM_INTEL_COMPILER_BUILD_DATE) #define PLATFORM_COMPILER_VERSION_STR \ PLATFORM_STRINGIFY(_PLATFORM__INTEL_COMPILER) "." PLATFORM_STRINGIFY(_PLATFORM_INTEL_COMPILER_BUILD_DATE) #elif defined(__PATHSCALE__) #define PLATFORM_COMPILER_PATHSCALE 1 #define PLATFORM_COMPILER_FAMILYNAME PATHSCALE #define PLATFORM_COMPILER_FAMILYID 3 #ifdef __cplusplus #define PLATFORM_COMPILER_PATHSCALE_CXX 1 #else #define PLATFORM_COMPILER_PATHSCALE_C 1 #endif #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__PATHCC__,__PATHCC_MINOR__,__PATHCC_PATCHLEVEL__+0) #define PLATFORM_COMPILER_VERSION_STR __PATHSCALE__ #elif defined(__NVCOMPILER) /* Must occur prior to PGI and CLANG */ #define PLATFORM_COMPILER_NVHPC 1 #define PLATFORM_COMPILER_FAMILYNAME NVHPC #define PLATFORM_COMPILER_FAMILYID 20 #ifdef __cplusplus #define PLATFORM_COMPILER_NVHPC_CXX 1 #else #define PLATFORM_COMPILER_NVHPC_C 1 #endif #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__NVCOMPILER_MAJOR__,__NVCOMPILER_MINOR__,__NVCOMPILER_PATCHLEVEL__) #define PLATFORM_COMPILER_VERSION_STR \ PLATFORM_STRINGIFY(__NVCOMPILER_MAJOR__) "." PLATFORM_STRINGIFY(__NVCOMPILER_MINOR__) "-" PLATFORM_STRINGIFY(__NVCOMPILER_PATCHLEVEL__) #elif defined(__PGI) #define PLATFORM_COMPILER_PGI 1 #define PLATFORM_COMPILER_FAMILYNAME PGI #define PLATFORM_COMPILER_FAMILYID 4 #ifdef __cplusplus #define PLATFORM_COMPILER_PGI_CXX 1 #else #define PLATFORM_COMPILER_PGI_C 1 #endif #if __PGIC__ == 99 /* bug 2230: PGI versioning was broken for some platforms in 7.0 no way to know exact version, but provide something slightly more accurate */ #define PLATFORM_COMPILER_VERSION 0x070000 #define PLATFORM_COMPILER_VERSION_STR "7.?-?" #elif defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__PGIC__,__PGIC_MINOR__,__PGIC_PATCHLEVEL__) #define PLATFORM_COMPILER_VERSION_STR \ PLATFORM_STRINGIFY(__PGIC__) "." PLATFORM_STRINGIFY(__PGIC_MINOR__) "-" PLATFORM_STRINGIFY(__PGIC_PATCHLEVEL__) #else /* PGI before 6.1-4 lacks any version ID preprocessor macros - so use this filthy hack */ #ifdef PLATFORM_PGI_IS_ANCIENT /* Include below might fail for ancient versions lacking this header, but testing shows it works back to at least 5.1-3 (Nov 2003), and based on docs probably back to 3.2 (Sep 2000) */ #define PLATFORM_COMPILER_VERSION 0 #elif defined(__x86_64__) /* bug 1753 - 64-bit omp.h upgrade happenned in <6.0-8,6.1-1] */ #include "omp.h" #if defined(_PGOMP_H) /* 6.1.1 or newer */ #define PLATFORM_COMPILER_VERSION 0x060101 #define PLATFORM_COMPILER_VERSION_STR ">=6.1-1" #else /* 6.0.8 or older */ #define PLATFORM_COMPILER_VERSION 0 #define PLATFORM_COMPILER_VERSION_STR "<=6.0-8" #endif #else /* 32-bit omp.h upgrade happenned in <5.2-4,6.0-8] */ #include "omp.h" #if defined(_PGOMP_H) /* 6.0-8 or newer */ #define PLATFORM_COMPILER_VERSION 0x060008 #define PLATFORM_COMPILER_VERSION_STR ">=6.0-8" #else /* 5.2-4 or older */ #define PLATFORM_COMPILER_VERSION 0 #define PLATFORM_COMPILER_VERSION_STR "<=5.2-4" #endif #endif #endif #elif defined(__xlC__) || defined(__ibmxl__) #define PLATFORM_COMPILER_XLC 1 #define PLATFORM_COMPILER_FAMILYNAME XLC #define PLATFORM_COMPILER_FAMILYID 5 #ifdef __cplusplus #define PLATFORM_COMPILER_XLC_CXX 1 #else #define PLATFORM_COMPILER_XLC_C 1 #endif #ifdef __ibmxl_version__ #define PLATFORM_COMPILER_VERSION \ (__ibmxl_version__ << 24 | __ibmxl_release__ << 16 | \ __ibmxl_modification__ << 8 | __ibmxl_ptf_fix_level__) #define PLATFORM_COMPILER_VERSION_STR \ PLATFORM_STRINGIFY(__ibmxl_version__) "." PLATFORM_STRINGIFY(__ibmxl_release__) "." PLATFORM_STRINGIFY(__ibmxl_modification__) "." PLATFORM_STRINGIFY(__ibmxl_ptf_fix_level__) #else #ifdef __xlC_ver__ #define PLATFORM_COMPILER_VERSION (__xlC__ << 16 | __xlC_ver__) #else #define PLATFORM_COMPILER_VERSION (__xlC__ << 16) #endif #ifdef __xlc__ #define PLATFORM_COMPILER_VERSION_STR __xlc__ #else #define PLATFORM_COMPILER_VERSION_STR PLATFORM_STRINGIFY(__xlC__) #endif #endif #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) << 24) | ((min) << 16) | ((pat) << 8) ) #elif defined(__DECC) || defined(__DECCXX) #define PLATFORM_COMPILER_COMPAQ 1 #define PLATFORM_COMPILER_FAMILYNAME COMPAQ #define PLATFORM_COMPILER_FAMILYID 6 #ifdef __cplusplus #define PLATFORM_COMPILER_COMPAQ_CXX 1 #else #define PLATFORM_COMPILER_COMPAQ_C 1 #endif #if defined(__DECC_VER) #define PLATFORM_COMPILER_VERSION __DECC_VER #elif defined(__DECCXX_VER) #define PLATFORM_COMPILER_VERSION __DECCXX_VER #endif #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) * 10000000) + ((min) * 100000) + (90000) + (pat) ) /* 90000 = official ver, 80000 = customer special ver, 60000 = field test ver */ #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) #define PLATFORM_COMPILER_SUN 1 #define PLATFORM_COMPILER_FAMILYNAME SUN #define PLATFORM_COMPILER_FAMILYID 7 #ifdef __cplusplus #define PLATFORM_COMPILER_SUN_CXX 1 #else #define PLATFORM_COMPILER_SUN_C 1 #endif #if defined(__SUNPRO_C) && __SUNPRO_C > 0 #define PLATFORM_COMPILER_VERSION __SUNPRO_C #elif defined(__SUNPRO_CC) && __SUNPRO_CC > 0 #define PLATFORM_COMPILER_VERSION __SUNPRO_CC #endif /* Sun version numbers look like hex but are actually a sloppy concatenation of decimal version numbers * leading to weird discontinuities in the version space, luckily it remains monotonic (so far) */ #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) ( \ (min) < 10 ? \ ( ((maj) << 8) | ((min) << 4) | (pat) ) : \ ( ((maj) << 12) | (((min)/10) << 8) | (((min)%10) << 4) | (pat) ) ) #elif defined(__HP_cc) || defined(__HP_aCC) #define PLATFORM_COMPILER_HP 1 #define PLATFORM_COMPILER_FAMILYNAME HP #define PLATFORM_COMPILER_FAMILYID 8 #ifdef __cplusplus #define PLATFORM_COMPILER_HP_CXX 1 #else #define PLATFORM_COMPILER_HP_C 1 #endif #if defined(__HP_cc) && __HP_cc > 0 #define PLATFORM_COMPILER_VERSION __HP_cc #elif defined(__HP_aCC) && __HP_aCC > 0 #define PLATFORM_COMPILER_VERSION __HP_aCC #endif #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) << 16) | ((min) << 8) | (pat) ) #elif defined(_SGI_COMPILER_VERSION) || \ (defined(_COMPILER_VERSION) && defined(__sgi) && !defined(__GNUC__)) /* 7.3.0 and earlier lack _SGI_COMPILER_VERSION */ #define PLATFORM_COMPILER_SGI 1 #define PLATFORM_COMPILER_FAMILYNAME SGI #define PLATFORM_COMPILER_FAMILYID 9 #ifdef __cplusplus #define PLATFORM_COMPILER_SGI_CXX 1 #else #define PLATFORM_COMPILER_SGI_C 1 #endif #if defined(_SGI_COMPILER_VERSION) && _SGI_COMPILER_VERSION > 0 #define PLATFORM_COMPILER_VERSION _SGI_COMPILER_VERSION #elif defined(_COMPILER_VERSION) && _COMPILER_VERSION > 0 #define PLATFORM_COMPILER_VERSION _COMPILER_VERSION #endif #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ ( ((maj) << 8) | ((min) << 4) | (pat) ) #elif defined(_CRAYC) #define PLATFORM_COMPILER_CRAY 1 #define PLATFORM_COMPILER_FAMILYNAME CRAY #define PLATFORM_COMPILER_FAMILYID 10 #ifdef __cplusplus #define PLATFORM_COMPILER_CRAY_CXX 1 #else #define PLATFORM_COMPILER_CRAY_C 1 #endif #if defined(_RELEASE_MAJOR) && defined(_RELEASE_MINOR) /* XE, XK, XC */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(_RELEASE_MAJOR,_RELEASE_MINOR,0) #elif defined(_RELEASE) && defined(_RELEASE_MINOR) /* X1 and XT */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(_RELEASE,_RELEASE_MINOR,0) #elif defined(_RELEASE) /* T3E */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(_RELEASE,0,0) #endif #ifdef _RELEASE_STRING /* X1 and XT, XK, XC */ #define PLATFORM_COMPILER_VERSION_STR _RELEASE_STRING #endif #elif defined(__KCC) #define PLATFORM_COMPILER_KAI 1 #define PLATFORM_COMPILER_FAMILYNAME KAI #define PLATFORM_COMPILER_FAMILYID 11 #ifdef __cplusplus #define PLATFORM_COMPILER_KAI_CXX 1 #else #define PLATFORM_COMPILER_KAI_C 1 #endif #elif defined(__MTA__) #define PLATFORM_COMPILER_MTA 1 #define PLATFORM_COMPILER_FAMILYNAME MTA #define PLATFORM_COMPILER_FAMILYID 12 #ifdef __cplusplus #define PLATFORM_COMPILER_MTA_CXX 1 #else #define PLATFORM_COMPILER_MTA_C 1 #endif #elif defined(_SX) #define PLATFORM_COMPILER_NECSX 1 #define PLATFORM_COMPILER_FAMILYNAME NECSX #define PLATFORM_COMPILER_FAMILYID 13 #ifdef __cplusplus #define PLATFORM_COMPILER_NECSX_CXX 1 #else #define PLATFORM_COMPILER_NECSX_C 1 #endif #elif defined(_MSC_VER) #define PLATFORM_COMPILER_MICROSOFT 1 #define PLATFORM_COMPILER_FAMILYNAME MICROSOFT #define PLATFORM_COMPILER_FAMILYID 14 #ifdef __cplusplus #define PLATFORM_COMPILER_MICROSOFT_CXX 1 #else #define PLATFORM_COMPILER_MICROSOFT_C 1 #endif #define PLATFORM_COMPILER_VERSION _MSC_VER #elif defined(__TINYC__) #define PLATFORM_COMPILER_TINY 1 #define PLATFORM_COMPILER_FAMILYNAME TINY #define PLATFORM_COMPILER_FAMILYID 15 #ifdef __cplusplus #define PLATFORM_COMPILER_TINY_CXX 1 #else #define PLATFORM_COMPILER_TINY_C 1 #endif #elif defined(__LCC__) #define PLATFORM_COMPILER_LCC 1 #define PLATFORM_COMPILER_FAMILYNAME LCC #define PLATFORM_COMPILER_FAMILYID 16 #ifdef __cplusplus #define PLATFORM_COMPILER_LCC_CXX 1 #else #define PLATFORM_COMPILER_LCC_C 1 #endif #elif defined(__OPENCC__) #define PLATFORM_COMPILER_OPEN64 1 #define PLATFORM_COMPILER_FAMILYNAME OPEN64 #define PLATFORM_COMPILER_FAMILYID 17 #ifdef __cplusplus #define PLATFORM_COMPILER_OPEN64_CXX 1 #else #define PLATFORM_COMPILER_OPEN64_C 1 #endif /* Note: can't use __OPENCC_PATCHLEVEL__ because it is sometimes non-integer (eg 3.2). Adding a cast would not result in a preprocessor constant expression. */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__OPENCC__,__OPENCC_MINOR__,0) #define PLATFORM_COMPILER_VERSION_STR __OPEN64__ #elif defined(__PCC__) #define PLATFORM_COMPILER_PCC 1 #define PLATFORM_COMPILER_FAMILYNAME PCC #define PLATFORM_COMPILER_FAMILYID 18 #ifdef __cplusplus #define PLATFORM_COMPILER_PCC_CXX 1 #else #define PLATFORM_COMPILER_PCC_C 1 #endif #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__PCC__,__PCC_MINOR__,__PCC_MINORMINOR__) #define PLATFORM_COMPILER_VERSION_STR \ PLATFORM_STRINGIFY(__PCC__) "." PLATFORM_STRINGIFY(__PCC_MINOR__) "." PLATFORM_STRINGIFY(__PCC_MINORMINOR__) #elif defined(__clang__) #define PLATFORM_COMPILER_CLANG 1 #define PLATFORM_COMPILER_FAMILYNAME CLANG #define PLATFORM_COMPILER_FAMILYID 19 #ifdef __cplusplus #define PLATFORM_COMPILER_CLANG_CXX 1 #else #define PLATFORM_COMPILER_CLANG_C 1 #endif #ifdef __clang_version__ /* clang 2.7 (gcc 4.2.1 compliant) and earlier lacked specific version identification */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__clang_major__,__clang_minor__,__clang_patchlevel__) #define PLATFORM_COMPILER_VERSION_STR __clang_version__ #endif /* NOTE: PLATFORM_COMPILER_FAMILYID "20" is allocted to NVHPC, appearing earlier */ #else /* unknown compiler */ #define PLATFORM_COMPILER_UNKNOWN 1 #endif /* this stanza comes last, because many vendor compilers lie and claim to be GNU C for compatibility reasons and/or because they share a frontend */ #undef _PLATFORM_COMPILER_GNU_VERSION_STR #undef __PLATFORM_COMPILER_GNU_VERSION_STR #if defined(__GNUC__) #undef PLATFORM_COMPILER_UNKNOWN #ifndef PLATFORM_COMPILER_FAMILYID #define PLATFORM_COMPILER_GNU 1 #define PLATFORM_COMPILER_FAMILYNAME GNU #define PLATFORM_COMPILER_FAMILYID 1 #ifdef __cplusplus #define PLATFORM_COMPILER_GNU_CXX 1 #else #define PLATFORM_COMPILER_GNU_C 1 #endif #if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__) #elif defined(__GNUC_MINOR__) /* older versions of egcs lack __GNUC_PATCHLEVEL__ */ #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__GNUC__,__GNUC_MINOR__,0) #else #define PLATFORM_COMPILER_VERSION \ PLATFORM_COMPILER_VERSION_INT(__GNUC__,0,0) #endif #define PLATFORM_COMPILER_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR #else #define _PLATFORM_COMPILER_GNU_VERSION_STR __PLATFORM_COMPILER_GNU_VERSION_STR #endif /* gather any advertised GNU version number info, even for non-gcc compilers */ #if defined(__GNUC_MINOR__) && defined(__GNUC_PATCHLEVEL__) #define __PLATFORM_COMPILER_GNU_VERSION_STR \ PLATFORM_STRINGIFY(__GNUC__) "." PLATFORM_STRINGIFY(__GNUC_MINOR__) "." PLATFORM_STRINGIFY(__GNUC_PATCHLEVEL__) #elif defined(__GNUC_MINOR__) #define __PLATFORM_COMPILER_GNU_VERSION_STR \ PLATFORM_STRINGIFY(__GNUC__) "." PLATFORM_STRINGIFY(__GNUC_MINOR__) ".?" #else #define __PLATFORM_COMPILER_GNU_VERSION_STR \ PLATFORM_STRINGIFY(__GNUC__) ".?.?" #endif #elif defined(PLATFORM_COMPILER_UNKNOWN) /* unknown compiler */ #define PLATFORM_COMPILER_FAMILYNAME UNKNOWN #define PLATFORM_COMPILER_FAMILYID 0 #endif /* defaulting */ #ifndef PLATFORM_COMPILER_VERSION #define PLATFORM_COMPILER_VERSION 0 /* don't know */ #endif #ifndef PLATFORM_COMPILER_VERSION_STR #define PLATFORM_COMPILER_VERSION_STR PLATFORM_STRINGIFY(PLATFORM_COMPILER_VERSION) #endif #ifndef PLATFORM_COMPILER_VERSION_INT #define PLATFORM_COMPILER_VERSION_INT(maj,min,pat) \ (((maj) << 16) | ((min) << 8) | (pat)) #endif /* version check macros */ #define PLATFORM_COMPILER_VERSION_GT(maj,min,pat) \ PLATFORM_COMPILER_VERSION > PLATFORM_COMPILER_VERSION_INT(maj,min,pat) #define PLATFORM_COMPILER_VERSION_GE(maj,min,pat) \ PLATFORM_COMPILER_VERSION >= PLATFORM_COMPILER_VERSION_INT(maj,min,pat) #define PLATFORM_COMPILER_VERSION_EQ(maj,min,pat) \ PLATFORM_COMPILER_VERSION == PLATFORM_COMPILER_VERSION_INT(maj,min,pat) #define PLATFORM_COMPILER_VERSION_LE(maj,min,pat) \ PLATFORM_COMPILER_VERSION <= PLATFORM_COMPILER_VERSION_INT(maj,min,pat) #define PLATFORM_COMPILER_VERSION_LT(maj,min,pat) \ PLATFORM_COMPILER_VERSION < PLATFORM_COMPILER_VERSION_INT(maj,min,pat) /* misc feature detection */ #ifdef __cplusplus #define PLATFORM_COMPILER_ID (10000+PLATFORM_COMPILER_FAMILYID) #else #define PLATFORM_COMPILER_ID PLATFORM_COMPILER_FAMILYID #endif /* default language spec conformance detection */ #if !defined(PLATFORM_COMPILER_C_LANGLVL) && !defined(PLATFORM_COMPILER_CXX_LANGLVL) #if defined(__cplusplus) && (__cplusplus > 0) /* C++98 or newer */ #define PLATFORM_COMPILER_CXX_LANGLVL __cplusplus #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ > 0) /* C95 or newer */ #define PLATFORM_COMPILER_C_LANGLVL __STDC_VERSION__ #elif defined(__STDC__) && !defined(__cplusplus) && !defined(__STDC_VERSION__) /* C89/C90 */ #define PLATFORM_COMPILER_C_LANGLVL 199000L #else /* unknown - leave both undef */ #endif #endif #undef _PLATFORM_COMPILER_STD_STDC #ifdef __STDC__ #define _PLATFORM_COMPILER_STD_STDC "__STDC__" #else #define _PLATFORM_COMPILER_STD_STDC "-" #endif #undef _PLATFORM_COMPILER_STD_STDC_VERSION #ifdef __STDC_VERSION__ #define _PLATFORM_COMPILER_STD_STDC_VERSION ",__STDC_VERSION__=" PLATFORM_STRINGIFY(__STDC_VERSION__) #else #define _PLATFORM_COMPILER_STD_STDC_VERSION #endif #undef _PLATFORM_COMPILER_STD_STDC_EXT #ifdef __STDC_EXT__ #define _PLATFORM_COMPILER_STD_STDC_EXT ",__STDC_EXT__=" PLATFORM_STRINGIFY(__STDC_EXT__) #else #define _PLATFORM_COMPILER_STD_STDC_EXT #endif #undef _PLATFORM_COMPILER_STD_CPLUSPLUS #ifdef __cplusplus #define _PLATFORM_COMPILER_STD_CPLUSPLUS ",__cplusplus=" PLATFORM_STRINGIFY(__cplusplus) #else #define _PLATFORM_COMPILER_STD_CPLUSPLUS #endif #undef _PLATFORM_COMPILER_MISC_VERSION_STR #ifndef _PLATFORM_COMPILER_MISC_VERSION_STR #ifdef __VERSION__ #define _PLATFORM_COMPILER_MISC_VERSION_STR "|misc:" __VERSION__ #else #define _PLATFORM_COMPILER_MISC_VERSION_STR #endif #endif #undef _PLATFORM_COMPILER_GNU_VERSION_STR_HELP #ifdef _PLATFORM_COMPILER_GNU_VERSION_STR #define _PLATFORM_COMPILER_GNU_VERSION_STR_HELP "|GNU:" _PLATFORM_COMPILER_GNU_VERSION_STR #else #define _PLATFORM_COMPILER_GNU_VERSION_STR_HELP #endif #define PLATFORM_COMPILER_IDSTR \ "|COMPILER_FAMILY:" \ PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME) \ "|COMPILER_VERSION:" PLATFORM_COMPILER_VERSION_STR \ "|COMPILER_FAMILYID:" \ PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYID) \ _PLATFORM_COMPILER_GNU_VERSION_STR_HELP \ "|STD:" _PLATFORM_COMPILER_STD_STDC \ _PLATFORM_COMPILER_STD_STDC_VERSION \ _PLATFORM_COMPILER_STD_STDC_EXT \ _PLATFORM_COMPILER_STD_CPLUSPLUS \ _PLATFORM_COMPILER_MISC_VERSION_STR \ "|" /* ------------------------------------------------------------------------------------ */ /* OS detection */ /* PLATFORM_OS_: defined to a positive value if OS belongs to a given family, undef otherwise PLATFORM_OS_FAMILYNAME: unquoted token which provides the OS family name Some systems also define a subfamily: PLATFORM_OS_SUBFAMILY_: positive value or undef PLATFORM_OS_SUBFAMILYNAME: unquoted token for subfamily name or undef */ #if defined(__LIBCATAMOUNT__) || defined(__QK_USER__) #define PLATFORM_OS_CATAMOUNT 1 #define PLATFORM_OS_FAMILYNAME CATAMOUNT #elif defined(GASNETI_ARCH_BGP) || defined(__bgp__) #define PLATFORM_OS_BGP 1 #define PLATFORM_OS_FAMILYNAME BGP #elif defined(GASNETI_ARCH_BGQ) || defined(__bgq__) #define PLATFORM_OS_BGQ 1 #define PLATFORM_OS_FAMILYNAME BGQ #elif defined(__K42) #define PLATFORM_OS_K42 1 #define PLATFORM_OS_FAMILYNAME K42 #elif defined(__uClinux__) #define PLATFORM_OS_UCLINUX 1 #define PLATFORM_OS_FAMILYNAME UCLINUX #elif defined(__linux) || defined(__linux__) || defined(__gnu_linux__) #define PLATFORM_OS_LINUX 1 #define PLATFORM_OS_FAMILYNAME LINUX #if defined(GASNETI_ARCH_WSL) #define PLATFORM_OS_SUBFAMILY_WSL 1 #define PLATFORM_OS_SUBFAMILYNAME WSL #elif defined(__CRAYXT_COMPUTE_LINUX_TARGET) /* NOTE: As of 2022-07 this is ONLY defined for the Cray cc/CC wrappers, and not the raw PrgEnv compilers */ #define PLATFORM_OS_SUBFAMILY_CNL 1 #define PLATFORM_OS_SUBFAMILYNAME CNL #endif #elif defined(__blrts) || defined(__blrts__) || defined(__gnu_blrts__) #define PLATFORM_OS_BLRTS 1 #define PLATFORM_OS_FAMILYNAME BLRTS #elif defined(__CYGWIN__) #define PLATFORM_OS_CYGWIN 1 #define PLATFORM_OS_FAMILYNAME CYGWIN #elif defined(_WIN32) #define PLATFORM_OS_MSWINDOWS 1 #define PLATFORM_OS_FAMILYNAME MSWINDOWS #elif defined(_AIX) #define PLATFORM_OS_AIX 1 #define PLATFORM_OS_FAMILYNAME AIX #elif defined(__osf__) || defined(__digital__) #define PLATFORM_OS_TRU64 1 #define PLATFORM_OS_FAMILYNAME TRU64 #elif defined(__FreeBSD) || defined(__FreeBSD__) #define PLATFORM_OS_FREEBSD 1 #define PLATFORM_OS_FAMILYNAME FREEBSD #elif defined(__NetBSD) || defined(__NetBSD__) #define PLATFORM_OS_NETBSD 1 #define PLATFORM_OS_FAMILYNAME NETBSD #elif defined(__OpenBSD__) #define PLATFORM_OS_OPENBSD 1 #define PLATFORM_OS_FAMILYNAME OPENBSD #elif defined(__sun) || defined(__sun__) #define PLATFORM_OS_SOLARIS 1 #define PLATFORM_OS_FAMILYNAME SOLARIS #elif (defined(__APPLE__) && defined(__MACH__)) || \ defined(__osx86__) /* PGI on OSX */ #define PLATFORM_OS_DARWIN 1 #define PLATFORM_OS_FAMILYNAME DARWIN #elif defined(__sgi) || defined(__sgi__) #define PLATFORM_OS_IRIX 1 #define PLATFORM_OS_FAMILYNAME IRIX #elif defined(__hpux) || defined(__hpux__) #define PLATFORM_OS_HPUX 1 #define PLATFORM_OS_FAMILYNAME HPUX #elif defined(_CRAY) || defined(_UNICOSMP) #define PLATFORM_OS_UNICOS 1 #define PLATFORM_OS_FAMILYNAME UNICOS #elif defined(__MTA__) #define PLATFORM_OS_MTA 1 #define PLATFORM_OS_FAMILYNAME MTA #elif defined(_SX) #define PLATFORM_OS_SUPERUX 1 #define PLATFORM_OS_FAMILYNAME SUPERUX #else #define PLATFORM_OS_UNKNOWN 1 #define PLATFORM_OS_FAMILYNAME UNKNOWN #endif /* ------------------------------------------------------------------------------------ */ /* Architecture detection */ /* PLATFORM_ARCH_: defined to positive value if CPU belongs to a given family, undef otherwise PLATFORM_ARCH_FAMILYNAME: unquoted token which provides the CPU family name PLATFORM_ARCH_32 - 32-bit pointers PLATFORM_ARCH_64 - 64-bit pointers PLATFORM_ARCH_BIG_ENDIAN - big-endian word order PLATFORM_ARCH_LITTLE_ENDIAN - little-endian word order defined to positive value if CPU is known to have the indicated property, undef otherwise */ #if defined(__ppc64) || defined(__ppc64__) || \ defined(__PPC64) || defined(__PPC64__) || \ defined(__powerpc64) || defined(__powerpc64__) #define PLATFORM_ARCH_POWERPC 1 #define PLATFORM_ARCH_FAMILYNAME POWERPC #define _PLATFORM_ARCH_64 1 #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(_POWER) || \ defined(__PPC) || defined(__PPC__) || \ defined(__powerpc) || defined(__powerpc__) || \ defined(__ppc) || defined(__ppc__) || \ defined(__POWERPC__) #define PLATFORM_ARCH_POWERPC 1 #define PLATFORM_ARCH_FAMILYNAME POWERPC #define _PLATFORM_ARCH_32 1 #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) #define PLATFORM_ARCH_POWERPC 1 #define PLATFORM_ARCH_FAMILYNAME POWERPC #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__KNC__) || defined(__MIC__) #define PLATFORM_ARCH_MIC 1 #define PLATFORM_ARCH_FAMILYNAME MIC #define _PLATFORM_ARCH_64 1 #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(__x86_64) || defined(__x86_64__) || \ defined(__athlon) || defined(__athlon__) || \ defined(__amd64) || defined(__amd64__) #define PLATFORM_ARCH_X86_64 1 #define PLATFORM_ARCH_FAMILYNAME X86_64 #define _PLATFORM_ARCH_64 1 #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(__ia64__) || defined(__ia64) #define PLATFORM_ARCH_IA64 1 #define PLATFORM_ARCH_FAMILYNAME IA64 #define _PLATFORM_ARCH_64 1 #if defined(PLATFORM_OS_LINUX) || defined(PLATFORM_OS_FREEBSD) #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(PLATFORM_OS_HPUX) #define _PLATFORM_ARCH_BIG_ENDIAN 1 #else /* Unknown. Hope one of the other mechanisms can sort it out. */ #endif #elif defined(__i386__) || defined(__i386) || \ defined(__i486__) || defined(__i486) || \ defined(__i586__) || defined(__i586) || \ defined(__i686__) || defined(__i686) || \ defined(__pentiumpro) || defined(__pentiumpro__) || \ defined(_M_IX86) #define PLATFORM_ARCH_X86 1 #define PLATFORM_ARCH_FAMILYNAME X86 #define _PLATFORM_ARCH_32 1 #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(__alpha) || defined(__alpha__) #define PLATFORM_ARCH_ALPHA 1 #define PLATFORM_ARCH_FAMILYNAME ALPHA #define _PLATFORM_ARCH_64 1 #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(_mips) || defined(__mips) || defined(__mips__) || \ defined(__host_mips) || defined(__host_mips__) || \ defined(_MIPS_ARCH) || defined(__R4000) #define PLATFORM_ARCH_MIPS 1 #define PLATFORM_ARCH_FAMILYNAME MIPS #ifdef _MIPSEL /* MIPS cores support both little and big endian modes */ /* SiCortex */ #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #else /* IRIX */ #define _PLATFORM_ARCH_BIG_ENDIAN 1 #endif #ifdef _MIPS_SZPTR #if _MIPS_SZPTR == 32 #define _PLATFORM_ARCH_32 1 #elif _MIPS_SZPTR == 64 #define _PLATFORM_ARCH_64 1 #endif #endif #elif defined(__sparc) || defined(__sparc__) || \ defined(__sparclet__) || defined(__sparclite__) || \ defined(__sparcv8) || defined(__sparcv9) #define PLATFORM_ARCH_SPARC 1 #define PLATFORM_ARCH_FAMILYNAME SPARC #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__hppa) || defined(__hppa__) || \ defined(__parisc) || defined(__parisc__) || \ defined(_PA_RISC1_1) || defined(_PA_RISC2_0) #define PLATFORM_ARCH_PARISC 1 #define PLATFORM_ARCH_FAMILYNAME PARISC #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__crayx1) #define PLATFORM_ARCH_CRAYX1 1 #define PLATFORM_ARCH_FAMILYNAME CRAYX1 #define _PLATFORM_ARCH_BIG_ENDIAN 1 #define _PLATFORM_ARCH_64 1 #elif defined(_CRAYT3E) #define PLATFORM_ARCH_CRAYT3E 1 #define PLATFORM_ARCH_FAMILYNAME CRAYT3E #define _PLATFORM_ARCH_BIG_ENDIAN 1 #define _PLATFORM_ARCH_64 1 #elif defined(__MTA__) #define PLATFORM_ARCH_MTA 1 #define PLATFORM_ARCH_FAMILYNAME MTA #elif defined(_SX) #define PLATFORM_ARCH_NECSX 1 #define PLATFORM_ARCH_FAMILYNAME NECSX #elif defined(__MICROBLAZE__) #define PLATFORM_ARCH_MICROBLAZE 1 #define PLATFORM_ARCH_FAMILYNAME MICROBLAZE #define _PLATFORM_ARCH_BIG_ENDIAN 1 #define _PLATFORM_ARCH_32 1 #elif defined(__arm__) #define PLATFORM_ARCH_ARM 1 #define PLATFORM_ARCH_FAMILYNAME ARM #define _PLATFORM_ARCH_32 1 #if defined(__ARMEB__) #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__ARMEL__) #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #endif #elif defined(__aarch64__) #define PLATFORM_ARCH_AARCH64 1 #define PLATFORM_ARCH_FAMILYNAME AARCH64 #if defined(__AARCH64EB__) #define _PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__AARCH64EL__) #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #endif #elif defined(__tile__) #define PLATFORM_ARCH_TILE 1 #define PLATFORM_ARCH_FAMILYNAME TILE #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #if defined(__tilegx__) #define _PLATFORM_ARCH_64 1 #else #define _PLATFORM_ARCH_32 1 #endif #elif defined(__s390__) #define PLATFORM_ARCH_S390 1 #define PLATFORM_ARCH_FAMILYNAME S390 #define _PLATFORM_ARCH_BIG_ENDIAN 1 #if defined(__s390x__) #define _PLATFORM_ARCH_64 1 #else #define _PLATFORM_ARCH_32 1 #endif #elif defined(__riscv) #define PLATFORM_ARCH_RISCV 1 #define PLATFORM_ARCH_FAMILYNAME RISCV #define _PLATFORM_ARCH_LITTLE_ENDIAN 1 #if __riscv_xlen == 32 #define _PLATFORM_ARCH_32 1 #else /* (__riscv_xlen == 64) || (__riscv_xlen == 128) */ #define _PLATFORM_ARCH_64 1 #endif #else /* unknown CPU */ #define PLATFORM_ARCH_UNKNOWN 1 #define PLATFORM_ARCH_FAMILYNAME UNKNOWN #endif /* generic chip properties */ #if defined(PLATFORM_ARCH_BIG_ENDIAN) || defined(PLATFORM_ARCH_LITTLE_ENDIAN) #error internal error in endianness configuration #endif /* PLATFORM_ARCH_{BIG,LITTLE}_ENDIAN: first detect common preprocessor defines then default to any arch-specific value provided */ #if defined(__BIG_ENDIAN__) || defined(WORDS_BIGENDIAN) || \ ( __BYTE_ORDER__ > 0 && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ) #define PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(__LITTLE_ENDIAN__) || defined(WORDS_LITTLEENDIAN) || \ ( __BYTE_ORDER__ > 0 && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ) #define PLATFORM_ARCH_LITTLE_ENDIAN 1 #elif defined(_PLATFORM_ARCH_BIG_ENDIAN) #define PLATFORM_ARCH_BIG_ENDIAN 1 #elif defined(_PLATFORM_ARCH_LITTLE_ENDIAN) #define PLATFORM_ARCH_LITTLE_ENDIAN 1 #endif #undef _PLATFORM_ARCH_BIG_ENDIAN #undef _PLATFORM_ARCH_LITTLE_ENDIAN #if defined(PLATFORM_ARCH_BIG_ENDIAN) && defined(PLATFORM_ARCH_LITTLE_ENDIAN) #error conflicting endianness information #endif /* PLATFORM_ARCH_{32,64}: first trust SIZEOF_VOID_P, which is most likely to be accurate next, detect common 32/64 preprocessor defines finally default to any arch-specific value provided */ #if defined(PLATFORM_ARCH_64) || defined(PLATFORM_ARCH_32) #error internal error in bit width configuration #endif #if SIZEOF_VOID_P == 8 #define PLATFORM_ARCH_64 1 #elif SIZEOF_VOID_P == 4 #define PLATFORM_ARCH_32 1 #elif defined(_LP64) || defined(__LP64__) || \ defined(__arch64__) || defined(__64BIT__) || \ __INTPTR_MAX__ > 2147483647 #define PLATFORM_ARCH_64 1 #elif defined(_ILP32) || defined(__ILP32__) || \ defined(__arch32__) || defined(__32BIT__) || \ __INTPTR_MAX__ == 2147483647 #define PLATFORM_ARCH_32 1 #elif defined(_PLATFORM_ARCH_64) #define PLATFORM_ARCH_64 1 #elif defined(_PLATFORM_ARCH_32) #define PLATFORM_ARCH_32 1 #endif #undef _PLATFORM_ARCH_64 #undef _PLATFORM_ARCH_32 #if defined(PLATFORM_ARCH_64) && defined(PLATFORM_ARCH_32) #error conflicting bit width information #elif !defined(PLATFORM_ARCH_64) && !defined(PLATFORM_ARCH_32) #error missing bit width information #endif /* ------------------------------------------------------------------------------------ */ /* handy test code that can be parsed after preprocess or executed to show platform results */ #ifdef PLATFORM_SHOW #include #include const char * COMPILER_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYNAME) , * COMPILER_FAMILYID = PLATFORM_STRINGIFY(PLATFORM_COMPILER_FAMILYID) , * COMPILER_VERSION_STR = PLATFORM_COMPILER_VERSION_STR , * COMPILER_IDSTR = PLATFORM_COMPILER_IDSTR , * OS_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_OS_FAMILYNAME) , * ARCH_FAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_ARCH_FAMILYNAME) ; int main(void) { #define PLATFORM_DISP(x) printf("PLATFORM_"#x"=%s\n",x) #define PLATFORM_DISPI(x) printf("PLATFORM_"#x"=%li\n",(long int)PLATFORM_##x) #define PLATFORM_DISPX(x) printf("PLATFORM_"#x"=0x%lx\n",(long int)PLATFORM_##x) PLATFORM_DISP(COMPILER_FAMILYNAME); PLATFORM_DISP(COMPILER_FAMILYID); PLATFORM_DISPI(COMPILER_ID); PLATFORM_DISPX(COMPILER_VERSION); PLATFORM_DISP(COMPILER_VERSION_STR); PLATFORM_DISP(COMPILER_IDSTR); #ifdef PLATFORM_COMPILER_C_LANGLVL PLATFORM_DISPI(COMPILER_C_LANGLVL); #elif defined(PLATFORM_COMPILER_CXX_LANGLVL) PLATFORM_DISPI(COMPILER_CXX_LANGLVL); #else printf("WARNING: Missing PLATFORM_COMPILER_C(XX)_LANGLVL!"); #endif PLATFORM_DISP(OS_FAMILYNAME); #ifdef PLATFORM_OS_SUBFAMILYNAME { const char * OS_SUBFAMILYNAME = PLATFORM_STRINGIFY(PLATFORM_OS_SUBFAMILYNAME); PLATFORM_DISP(OS_SUBFAMILYNAME); } #endif PLATFORM_DISP(ARCH_FAMILYNAME); #ifdef PLATFORM_ARCH_32 PLATFORM_DISPI(ARCH_32); assert(sizeof(void *) == 4); #else PLATFORM_DISPI(ARCH_64); assert(sizeof(void *) == 8); #endif { int x = 0x00FF; unsigned char *p = (unsigned char *)&x; #ifdef PLATFORM_ARCH_BIG_ENDIAN PLATFORM_DISPI(ARCH_BIG_ENDIAN); assert(*p == 0); #else PLATFORM_DISPI(ARCH_LITTLE_ENDIAN); assert(*p == 0xFF); #endif } return 0; } #endif /* ------------------------------------------------------------------------------------ */ #endif prrte-3.0.13/src/include/hash_string.h0000664000175000017500000000474415145263240020025 0ustar alastairalastair/* * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file * * Simple macros to quickly compute a hash value from a string. * */ #ifndef PRTE_HASH_STRING_H #define PRTE_HASH_STRING_H /** * Compute the hash value and the string length simultaneously * * @param str (IN) The string which will be parsed (char*) * @param hash (OUT) Where the hash value will be stored (uint32_t) * @param length (OUT) The computed length of the string (uint32_t) */ #define PRTE_HASH_STRLEN(str, hash, length) \ do { \ register const char *_str = (str); \ register uint32_t _hash = 0; \ register uint32_t _len = 0; \ \ while (*_str) { \ _len++; \ _hash += *_str++; \ _hash += (_hash << 10); \ _hash ^= (_hash >> 6); \ } \ \ _hash += (_hash << 3); \ _hash ^= (_hash >> 11); \ (hash) = (_hash + (_hash << 15)); \ (length) = _len; \ } while (0) /** * Compute the hash value * * @param str (IN) The string which will be parsed (char*) * @param hash (OUT) Where the hash value will be stored (uint32_t) */ #define PRTE_HASH_STR(str, hash) \ do { \ register const char *_str = (str); \ register uint32_t _hash = 0; \ \ while (*_str) { \ _hash += *_str++; \ _hash += (_hash << 10); \ _hash ^= (_hash >> 6); \ } \ \ _hash += (_hash << 3); \ _hash ^= (_hash >> 11); \ (hash) = (_hash + (_hash << 15)); \ } while (0) #endif /* PRTE_HASH_STRING_H */ prrte-3.0.13/src/include/Makefile.am0000664000175000017500000000313115145263240017364 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This makefile.am does not stand on its own - it is included from src/Makefile.am headers += \ include/constants.h \ include/types.h \ include/prte_frameworks.h \ include/hash_string.h \ include/prefetch.h \ include/prte_config_top.h \ include/prte_config_bottom.h \ include/prte_portable_platform.h \ include/prte_portable_platform_real.h \ include/prte_stdint.h \ include/prte_stdatomic.h \ include/prte_socket_errno.h \ include/align.h nodist_headers += \ include/prte_config.h \ include/version.h libprrte_la_SOURCES += \ $(headers) \ include/prte_frameworks.c prrte-3.0.13/src/include/prte_socket_errno.h0000664000175000017500000000210415145263240021227 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_GET_SOCKET_ERROR_H #define PRTE_GET_SOCKET_ERROR_H #include "constants.h" #include #define prte_socket_errno errno #endif /* PRTE_GET_ERROR_H */ prrte-3.0.13/src/include/version.h.in0000664000175000017500000000274215145263240017602 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * This file should be included by any file that needs full * version information for the PRTE project */ #ifndef PRTE_VERSIONS_H #define PRTE_VERSIONS_H #define PRTE_MAJOR_VERSION @PRTE_MAJOR_VERSION@ #define PRTE_MINOR_VERSION @PRTE_MINOR_VERSION@ #define PRTE_GREEK_VERSION "@PRTE_GREEK_VERSION@" #define PRTE_WANT_REPO_REV @PRTE_WANT_REPO_REV@ #define PRTE_REPO_REV "@PRTE_REPO_REV@" #ifdef PRTE_VERSION /* If we included version.h, we want the real version, not the stripped (no-r number) verstion */ #undef PRTE_VERSION #endif #define PRTE_VERSION "@PRTE_VERSION@" #endif prrte-3.0.13/src/include/prte_stdint.h0000664000175000017500000000660415145263240020050 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * This file includes the C99 stdint.h file if available, and otherwise * defines fixed-width types according to the SIZEOF information * gathered by configure. */ #ifndef PRTE_STDINT_H #define PRTE_STDINT_H 1 #include "prte_config.h" /* * Include what we can and define what is missing. */ #include #include #ifdef HAVE_SYS_TYPES_H # include #endif /* 128-bit */ #ifdef HAVE_INT128_T typedef int128_t prte_int128_t; typedef uint128_t prte_uint128_t; # define HAVE_PRTE_INT128_T 1 #elif defined(HAVE___INT128) /* suppress warning about __int128 type */ # pragma GCC diagnostic push /* Clang won't quietly accept "-pedantic", but GCC versions older than ~4.8 * won't quietly accept "-Wpedanic". The whole "#pragma GCC diagnostic ..." * facility only was added to GCC as of version 4.6. */ # if defined(__clang__) || (defined(__GNUC__) && __GNUC__ >= 6) # pragma GCC diagnostic ignored "-Wpedantic" # else # pragma GCC diagnostic ignored "-pedantic" # endif typedef __int128 prte_int128_t; typedef unsigned __int128 prte_uint128_t; # pragma GCC diagnostic pop # define HAVE_PRTE_INT128_T 1 #else # define HAVE_PRTE_INT128_T 0 #endif /* Pointers */ #if SIZEOF_VOID_P == SIZEOF_INT # ifndef HAVE_INTPTR_T typedef signed int intptr_t; # endif # ifndef HAVE_UINTPTR_T typedef unsigned int uintptr_t; # endif #elif SIZEOF_VOID_P == SIZEOF_LONG # ifndef HAVE_INTPTR_T typedef signed long intptr_t; # endif # ifndef HAVE_UINTPTR_T typedef unsigned long uintptr_t; # endif #elif SIZEOF_VOID_P == SIZEOF_LONG_LONG # ifndef HAVE_INTPTR_T typedef signed long long intptr_t; # endif # ifndef HAVE_UINTPTR_T typedef unsigned long long uintptr_t; # endif #else # error Failed to define pointer-sized integer types #endif /* inttypes.h printf specifiers */ #include #ifndef PRIsize_t # if defined(ACCEPT_C99) # define PRIsize_t "zu" # elif SIZEOF_SIZE_T == SIZEOF_LONG # define PRIsize_t "lu" # elif SIZEOF_SIZE_T == SIZEOF_LONG_LONG # define PRIsize_t "llu" # else # define PRIsize_t "u" # endif #endif #endif /* PRTE_STDINT_H */ prrte-3.0.13/src/hwloc/0000775000175000017500000000000015145263240015023 5ustar alastairalastairprrte-3.0.13/src/hwloc/hwloc-internal.h0000664000175000017500000003764615145263240020142 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow */ #ifndef PRTE_MCA_HWLOC_H #define PRTE_MCA_HWLOC_H #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #include #include #include #if HWLOC_API_VERSION >= 0x20000 # include #endif #if HWLOC_API_VERSION < 0x10b00 #define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE #define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET #endif #if HWLOC_API_VERSION < 0x10a00 static inline hwloc_obj_t hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) { hwloc_obj_t obj = NULL; while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL) if (obj->os_index == os_index) return obj; return NULL; } #endif #include "src/class/pmix_list.h" #include "src/class/pmix_value_array.h" BEGIN_C_DECLS /* ******************************************************************** */ /* Although we cannot bind if --without-hwloc is set, * we do still need to know some basic locality data * like on_node and not_on_node. So ensure that we * always have access to that much info by including * the definitions here, outside the if-have-hwloc test */ typedef uint16_t prte_hwloc_locality_t; #define PRTE_HWLOC_LOCALITY_T PRTE_UINT16 /** Process locality definitions */ enum { PRTE_PROC_LOCALITY_UNKNOWN = 0x0000, PRTE_PROC_NON_LOCAL = 0x8000, PRTE_PROC_ON_CLUSTER = 0x0001, PRTE_PROC_ON_CU = 0x0002, PRTE_PROC_ON_HOST = 0x0004, PRTE_PROC_ON_NODE = 0x000c, // same host PRTE_PROC_ON_PACKAGE = 0x0020, PRTE_PROC_ON_NUMA = 0x0040, PRTE_PROC_ON_L3CACHE = 0x0080, PRTE_PROC_ON_L2CACHE = 0x0100, PRTE_PROC_ON_L1CACHE = 0x0200, PRTE_PROC_ON_CORE = 0x0400, PRTE_PROC_ON_HWTHREAD = 0x0800, PRTE_PROC_ALL_LOCAL = 0x0fff, }; /** Process locality macros */ #define PRTE_PROC_ON_LOCAL_CLUSTER(n) (!!((n) &PRTE_PROC_ON_CLUSTER)) #define PRTE_PROC_ON_LOCAL_CU(n) (!!((n) &PRTE_PROC_ON_CU)) #define PRTE_PROC_ON_LOCAL_HOST(n) (!!((n) &PRTE_PROC_ON_HOST)) #define PRTE_PROC_ON_LOCAL_NODE(n) (!!((n) &PRTE_PROC_ON_LOCAL_HOST(n))) #define PRTE_PROC_ON_LOCAL_PACKAGE(n) (!!((n) &PRTE_PROC_ON_PACKAGE)) #define PRTE_PROC_ON_LOCAL_NUMA(n) (!!((n) &PRTE_PROC_ON_NUMA)) #define PRTE_PROC_ON_LOCAL_L3CACHE(n) (!!((n) &PRTE_PROC_ON_L3CACHE)) #define PRTE_PROC_ON_LOCAL_L2CACHE(n) (!!((n) &PRTE_PROC_ON_L2CACHE)) #define PRTE_PROC_ON_LOCAL_L1CACHE(n) (!!((n) &PRTE_PROC_ON_L1CACHE)) #define PRTE_PROC_ON_LOCAL_CORE(n) (!!((n) &PRTE_PROC_ON_CORE)) #define PRTE_PROC_ON_LOCAL_HWTHREAD(n) (!!((n) &PRTE_PROC_ON_HWTHREAD)) /* ******************************************************************** */ /** * Struct used to describe a section of memory (starting address * and length). This is really the same thing as an iovec, but * we include a separate type for it for at least 2 reasons: * * 1. Some OS's iovec definitions are exceedingly lame (e.g., * Solaris 9 has the length argument as an int, instead of a * size_t). * * 2. We reserve the right to expand/change this struct in the * future. */ typedef struct { /** Starting address of segment */ void *mbs_start_addr; /** Length of segment */ size_t mbs_len; } prte_hwloc_base_memory_segment_t; /* define binding policies */ typedef uint16_t prte_binding_policy_t; #define PRTE_BINDING_POLICY PRTE_UINT16 /* binding directives */ #define PRTE_BIND_IF_SUPPORTED 0x1000 #define PRTE_BIND_ALLOW_OVERLOAD 0x2000 #define PRTE_BIND_GIVEN 0x4000 // overload policy was given #define PRTE_BIND_OVERLOAD_GIVEN 0x0100 /* binding policies - any changes in these * values must be reflected in prte/mca/rmaps/rmaps.h */ #define PRTE_BIND_TO_NONE 1 #define PRTE_BIND_TO_PACKAGE 2 #define PRTE_BIND_TO_NUMA 3 #define PRTE_BIND_TO_L3CACHE 4 #define PRTE_BIND_TO_L2CACHE 5 #define PRTE_BIND_TO_L1CACHE 6 #define PRTE_BIND_TO_CORE 7 #define PRTE_BIND_TO_HWTHREAD 8 #define PRTE_GET_BINDING_POLICY(pol) ((pol) &0x00ff) #define PRTE_SET_BINDING_POLICY(target, pol) \ (target) = (pol) | (((target) & 0xff00) | PRTE_BIND_GIVEN) #define PRTE_SET_DEFAULT_BINDING_POLICY(target, pol) \ do { \ if (!PRTE_BINDING_POLICY_IS_SET((target))) { \ (target) = (pol) | (((target) & 0xff00) | PRTE_BIND_IF_SUPPORTED); \ } \ } while (0); /* check if policy is set */ #define PRTE_BINDING_POLICY_IS_SET(pol) ((pol) &0x4000) /* macro to detect if binding was qualified */ #define PRTE_BINDING_REQUIRED(n) (!(PRTE_BIND_IF_SUPPORTED & (n))) /* macro to detect if binding is forced */ #define PRTE_BIND_OVERLOAD_ALLOWED(n) (PRTE_BIND_ALLOW_OVERLOAD & (n)) #define PRTE_BIND_OVERLOAD_SET(n) (PRTE_BIND_OVERLOAD_GIVEN & (n)) /* some global values */ PRTE_EXPORT extern hwloc_topology_t prte_hwloc_topology; PRTE_EXPORT extern prte_binding_policy_t prte_hwloc_default_binding_policy; PRTE_EXPORT extern hwloc_obj_type_t prte_hwloc_levels[]; PRTE_EXPORT extern char *prte_hwloc_default_cpu_list; PRTE_EXPORT extern bool prte_hwloc_default_use_hwthread_cpus; #if HWLOC_API_VERSION < 0x20000 # define HWLOC_OBJ_L3CACHE HWLOC_OBJ_CACHE # define HWLOC_OBJ_L2CACHE HWLOC_OBJ_CACHE # define HWLOC_OBJ_L1CACHE HWLOC_OBJ_CACHE # if HWLOC_API_VERSION < 0x10a00 # define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET # endif # define HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC 0 # define HAVE_HWLOC_TOPOLOGY_DUP 0 #else # define HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC 1 # define HAVE_HWLOC_TOPOLOGY_DUP 1 #endif /** * Debugging output stream */ PRTE_EXPORT extern int prte_hwloc_base_output; PRTE_EXPORT extern bool prte_hwloc_base_inited; /* we always must have some minimal locality support */ #define PRTE_HWLOC_PRINT_MAX_SIZE 50 #define PRTE_HWLOC_PRINT_NUM_BUFS 16 typedef struct { char *buffers[PRTE_HWLOC_PRINT_NUM_BUFS]; int cntr; } prte_hwloc_print_buffers_t; prte_hwloc_print_buffers_t *prte_hwloc_get_print_buffer(void); extern char *prte_hwloc_print_null; PRTE_EXPORT char *prte_hwloc_base_print_locality(prte_hwloc_locality_t locality); PRTE_EXPORT extern char *prte_hwloc_base_topo_file; PRTE_EXPORT extern bool prte_hwloc_synthetic_topo; /* convenience macro for debugging */ #define PRTE_HWLOC_SHOW_BINDING(n, v, t) \ do { \ char tmp1[1024]; \ hwloc_cpuset_t bind; \ bind = prte_hwloc_alloc(); \ if (hwloc_get_cpubind(t, bind, HWLOC_CPUBIND_PROCESS) < 0) { \ pmix_output_verbose(n, v, "CANNOT DETERMINE BINDING AT %s:%d", __FILE__, __LINE__); \ } else { \ prte_hwloc_base_cset2mapstr(tmp1, sizeof(tmp1), t, bind); \ pmix_output_verbose(n, v, "BINDINGS AT %s:%d: %s", __FILE__, __LINE__, tmp1); \ } \ hwloc_bitmap_free(bind); \ } while (0); #if HWLOC_API_VERSION < 0x20000 # define PRTE_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ do { \ obj = HWLOC_OBJ_CACHE; \ cache_level = level; \ } while (0) #else # define PRTE_HWLOC_MAKE_OBJ_CACHE(level, obj, cache_level) \ do { \ obj = HWLOC_OBJ_L##level##CACHE; \ cache_level = 0; \ } while (0) #endif PRTE_EXPORT prte_hwloc_locality_t prte_hwloc_base_get_relative_locality(hwloc_topology_t topo, char *cpuset1, char *cpuset2); PRTE_EXPORT int prte_hwloc_base_set_default_binding(void *jdata, void *options); PRTE_EXPORT int prte_hwloc_base_set_binding_policy(void *jdata, char *spec); struct prte_rmaps_numa_node_t { pmix_list_item_t super; int index; float dist_from_closed; }; typedef struct prte_rmaps_numa_node_t prte_rmaps_numa_node_t; PMIX_CLASS_DECLARATION(prte_rmaps_numa_node_t); /** * Enum for what memory allocation policy we want for user allocations. * MAP = memory allocation policy. */ typedef enum { PRTE_HWLOC_BASE_MAP_NONE, PRTE_HWLOC_BASE_MAP_LOCAL_ONLY } prte_hwloc_base_map_t; /** * Global reflecting the MAP (set by MCA param). */ PRTE_EXPORT extern prte_hwloc_base_map_t prte_hwloc_base_map; /** * Enum for what to do if the hwloc framework tries to bind memory * and fails. BFA = bind failure action. */ typedef enum { PRTE_HWLOC_BASE_MBFA_SILENT, PRTE_HWLOC_BASE_MBFA_WARN, PRTE_HWLOC_BASE_MBFA_ERROR } prte_hwloc_base_mbfa_t; /** * Global reflecting the BFA (set by MCA param). */ PRTE_EXPORT extern prte_hwloc_base_mbfa_t prte_hwloc_base_mbfa; /** * Discover / load the hwloc topology (i.e., call hwloc_topology_init() and * hwloc_topology_load()). */ PRTE_EXPORT int prte_hwloc_base_get_topology(void); PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_setup_summary(hwloc_topology_t topo); /** * Set the hwloc topology to that from the given topo file */ PRTE_EXPORT int prte_hwloc_base_set_topology(char *topofile); PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_generate_cpuset(hwloc_topology_t topo, bool use_hwthread_cpus, char *cpulist); PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_filter_cpus(hwloc_topology_t topo); /** * Free the hwloc topology. */ PRTE_EXPORT unsigned int prte_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level); PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level, unsigned int instance); PRTE_EXPORT unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj); /** * Get the number of pu's under a given hwloc object. */ PRTE_EXPORT unsigned int prte_hwloc_base_get_npus(hwloc_topology_t topo, bool use_hwthread_cpus, hwloc_cpuset_t envelope, hwloc_obj_t target); PRTE_EXPORT char *prte_hwloc_base_print_binding(prte_binding_policy_t binding); /** * Determine if there is a single cpu in a bitmap. */ PRTE_EXPORT bool prte_hwloc_base_single_cpu(hwloc_cpuset_t cpuset); /** * Provide a utility to parse a slot list against the local * cpus of given type, and produce a cpuset for the described binding */ PRTE_EXPORT int prte_hwloc_base_cpu_list_parse(const char *slot_str, hwloc_topology_t topo, bool use_hwthread_cpus, hwloc_cpuset_t cpumask); PRTE_EXPORT char *prte_hwloc_base_find_coprocessors(hwloc_topology_t topo); PRTE_EXPORT char *prte_hwloc_base_check_on_coprocessor(void); /** * Report a bind failure using the normal mechanisms if a component * fails to bind memory -- according to the value of the * hwloc_base_bind_failure_action MCA parameter. */ PRTE_EXPORT int prte_hwloc_base_report_bind_failure(const char *file, int line, const char *msg, int rc); /** * This function sets the process-wide memory affinity policy * according to prte_hwloc_base_map and prte_hwloc_base_mbfa. It needs * to be a separate, standalone function (as opposed to being done * during prte_hwloc_base_open()) because prte_hwloc_topology is not * loaded by prte_hwloc_base_open(). Hence, an upper layer needs to * invoke this function after prte_hwloc_topology has been loaded. */ PRTE_EXPORT int prte_hwloc_base_set_process_membind_policy(void); PRTE_EXPORT int prte_hwloc_base_membind(prte_hwloc_base_memory_segment_t *segs, size_t count, int node_id); PRTE_EXPORT int prte_hwloc_base_node_name_to_id(char *node_name, int *id); PRTE_EXPORT int prte_hwloc_base_memory_set(prte_hwloc_base_memory_segment_t *segments, size_t num_segments); /** * Make a prettyprint string for a hwloc_cpuset_t (e.g., "package * 2[core 3]"). */ PRTE_EXPORT char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, bool use_hwthread_cpus, bool physical, hwloc_topology_t topo); PRTE_EXPORT void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset, bool use_hwthread_cpus, hwloc_topology_t topo, int *pkgnum, char *cores, int sz); /* get the hwloc object that corresponds to the given processor id and type */ PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_pu(hwloc_topology_t topo, bool use_hwthread_cpus, int lid); /* get the topology "signature" so we can check for differences - caller * if responsible for freeing the returned string */ PRTE_EXPORT char *prte_hwloc_base_get_topo_signature(hwloc_topology_t topo); /* get a string describing the locality of a given process */ PRTE_EXPORT char *prte_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap); /* extract a location from the locality string */ PRTE_EXPORT char *prte_hwloc_base_get_location(char *locality, hwloc_obj_type_t type, unsigned index); PRTE_EXPORT prte_hwloc_locality_t prte_hwloc_compute_relative_locality(char *loc1, char *loc2); PRTE_EXPORT int prte_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen); PRTE_EXPORT int prte_hwloc_base_topology_set_flags(hwloc_topology_t topology, unsigned long flags, bool io); PRTE_EXPORT int prte_hwloc_base_open(void); PRTE_EXPORT void prte_hwloc_base_close(void); PRTE_EXPORT int prte_hwloc_base_register(void); PRTE_EXPORT int prte_hwloc_print(char **output, char *prefix, hwloc_topology_t src); PRTE_EXPORT void prte_hwloc_build_map(hwloc_topology_t topo, hwloc_cpuset_t avail, bool use_hwthread_cpus, hwloc_bitmap_t coreset); PRTE_EXPORT bool prte_hwloc_base_core_cpus(hwloc_topology_t topo); END_C_DECLS #endif /* PRTE_HWLOC_H_ */ prrte-3.0.13/src/hwloc/hwloc_base_maffinity.c0000664000175000017500000001137515145263240021352 0ustar alastairalastair/* * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/include/constants.h" #include "src/hwloc/hwloc-internal.h" /* * Don't use show_help() here (or print any error message at all). * Let the upper layer output a relevant message, because doing so may * be complicated (e.g., this might be called from the PRTE ODLS, * which has to do some extra steps to get error messages to be * displayed). */ int prte_hwloc_base_set_process_membind_policy(void) { int rc = 0, flags; hwloc_membind_policy_t policy; hwloc_cpuset_t cpuset; /* Make sure prte_hwloc_topology has been set by the time we've been called */ if (PRTE_SUCCESS != prte_hwloc_base_get_topology()) { return PRTE_ERR_BAD_PARAM; } /* Set the default memory allocation policy according to MCA param */ switch (prte_hwloc_base_map) { case PRTE_HWLOC_BASE_MAP_LOCAL_ONLY: policy = HWLOC_MEMBIND_BIND; flags = HWLOC_MEMBIND_STRICT; break; case PRTE_HWLOC_BASE_MAP_NONE: default: policy = HWLOC_MEMBIND_DEFAULT; flags = 0; break; } cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = PRTE_ERR_OUT_OF_RESOURCE; } else { int e; hwloc_get_cpubind(prte_hwloc_topology, cpuset, 0); rc = hwloc_set_membind(prte_hwloc_topology, cpuset, policy, flags); e = errno; hwloc_bitmap_free(cpuset); /* See if hwloc was able to do it. If hwloc failed due to ENOSYS, but the base_map == NONE, then it's not really an error. */ if (0 != rc && ENOSYS == e && PRTE_HWLOC_BASE_MAP_NONE == prte_hwloc_base_map) { rc = 0; } } return (0 == rc) ? PRTE_SUCCESS : PRTE_ERROR; } int prte_hwloc_base_memory_set(prte_hwloc_base_memory_segment_t *segments, size_t num_segments) { int rc = PRTE_SUCCESS; char *msg = NULL; size_t i; hwloc_cpuset_t cpuset = NULL; /* bozo check */ if (PRTE_SUCCESS != prte_hwloc_base_get_topology()) { msg = "hwloc_set_area_membind() failure - topology not available"; return prte_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } /* This module won't be used unless the process is already processor-bound. So find out where we're processor bound, and bind our memory there, too. */ cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = PRTE_ERR_OUT_OF_RESOURCE; msg = "hwloc_bitmap_alloc() failure"; goto out; } hwloc_get_cpubind(prte_hwloc_topology, cpuset, 0); for (i = 0; i < num_segments; ++i) { if (0 != hwloc_set_area_membind(prte_hwloc_topology, segments[i].mbs_start_addr, segments[i].mbs_len, cpuset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_STRICT)) { rc = PRTE_ERROR; msg = "hwloc_set_area_membind() failure"; goto out; } } out: if (NULL != cpuset) { hwloc_bitmap_free(cpuset); } if (PRTE_SUCCESS != rc) { return prte_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } return PRTE_SUCCESS; } int prte_hwloc_base_node_name_to_id(char *node_name, int *id) { /* GLB: fix me */ *id = atoi(node_name + 3); return PRTE_SUCCESS; } int prte_hwloc_base_membind(prte_hwloc_base_memory_segment_t *segs, size_t count, int node_id) { size_t i; int rc = PRTE_SUCCESS; char *msg = NULL; hwloc_cpuset_t cpuset = NULL; /* bozo check */ if (PRTE_SUCCESS != prte_hwloc_base_get_topology()) { msg = "hwloc_set_area_membind() failure - topology not available"; return prte_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } cpuset = hwloc_bitmap_alloc(); if (NULL == cpuset) { rc = PRTE_ERR_OUT_OF_RESOURCE; msg = "hwloc_bitmap_alloc() failure"; goto out; } hwloc_bitmap_set(cpuset, node_id); for (i = 0; i < count; i++) { if (0 != hwloc_set_area_membind(prte_hwloc_topology, segs[i].mbs_start_addr, segs[i].mbs_len, cpuset, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_STRICT)) { rc = PRTE_ERROR; msg = "hwloc_set_area_membind() failure"; goto out; } } out: if (NULL != cpuset) { hwloc_bitmap_free(cpuset); } if (PRTE_SUCCESS != rc) { return prte_hwloc_base_report_bind_failure(__FILE__, __LINE__, msg, rc); } return PRTE_SUCCESS; } prrte-3.0.13/src/hwloc/Makefile.am0000664000175000017500000000117715145263240017065 0ustar alastairalastair# # Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2015-2020 Intel, Inc. All rights reserved. # Copyright (c) 2018 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2021 Nanook Consulting. All rights reserved. # Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # headers += \ hwloc/hwloc-internal.h libprrte_la_SOURCES += \ hwloc/hwloc.c \ hwloc/hwloc_base_util.c \ hwloc/hwloc_base_maffinity.c prrte-3.0.13/src/hwloc/hwloc.c0000664000175000017500000007401315145263240016310 0ustar alastairalastair/* * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/hwloc/hwloc-internal.h" #include "src/include/constants.h" #include "src/mca/base/pmix_base.h" #include "src/mca/mca.h" #include "src/mca/rmaps/base/base.h" #include "src/mca/schizo/schizo.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_tsd.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/util/pmix_show_help.h" #include "src/util/prte_cmd_line.h" /* * Globals */ bool prte_hwloc_base_inited = false; hwloc_topology_t prte_hwloc_topology = NULL; hwloc_cpuset_t prte_hwloc_my_cpuset = NULL; prte_hwloc_base_map_t prte_hwloc_base_map = PRTE_HWLOC_BASE_MAP_NONE; prte_hwloc_base_mbfa_t prte_hwloc_base_mbfa = PRTE_HWLOC_BASE_MBFA_WARN; prte_binding_policy_t prte_hwloc_default_binding_policy = 0; char *prte_hwloc_default_cpu_list = NULL; char *prte_hwloc_base_topo_file = NULL; int prte_hwloc_base_output = -1; bool prte_hwloc_default_use_hwthread_cpus = false; bool prte_hwloc_synthetic_topo = false; hwloc_obj_type_t prte_hwloc_levels[] = { HWLOC_OBJ_MACHINE, HWLOC_OBJ_NUMANODE, HWLOC_OBJ_PACKAGE, HWLOC_OBJ_L3CACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L1CACHE, HWLOC_OBJ_CORE, HWLOC_OBJ_PU }; static char *prte_hwloc_base_binding_policy = NULL; static int verbosity = 0; static char *default_cpu_list = NULL; static bool bind_to_core = false; static bool bind_to_socket = false; static char *enum_values = NULL; int prte_hwloc_base_register(void) { int ret; char *ptr; /* debug output */ ret = pmix_mca_base_var_register("prte", "hwloc", "base", "verbose", "Debug verbosity", PMIX_MCA_BASE_VAR_TYPE_INT, &verbosity); pmix_mca_base_var_register_synonym(ret, "opal", "hwloc", "base", "verbose", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); if (0 < verbosity) { prte_hwloc_base_output = pmix_output_open(NULL); pmix_output_set_verbosity(prte_hwloc_base_output, verbosity); } /* handle some deprecated options */ prte_hwloc_default_use_hwthread_cpus = false; (void) pmix_mca_base_var_register("prte", "hwloc", "base", "use_hwthreads_as_cpus", "Use hardware threads as independent cpus", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_hwloc_default_use_hwthread_cpus); (void) pmix_mca_base_var_register("prte", "hwloc", "base", "bind_to_core", "Bind processes to cores", PMIX_MCA_BASE_VAR_TYPE_BOOL, &bind_to_core); (void) pmix_mca_base_var_register("prte", "hwloc", "base", "bind_to_socket", "Bind processes to sockets", PMIX_MCA_BASE_VAR_TYPE_BOOL, &bind_to_socket); /* hwloc_base_mbind_policy */ prte_hwloc_base_map = PRTE_HWLOC_BASE_MAP_NONE; ret = pmix_mca_base_var_register("prte", "hwloc", "default", "mem_alloc_policy", "Default general memory allocations placement policy (this is not memory binding). " "\"none\" means that no memory policy is applied. \"local_only\" means that a process' " "memory allocations will be restricted to its local NUMA domain. " "If using direct launch, this policy will not be in effect until after PMIx_Init. " "Note that operating system paging policies are unaffected by this setting. For " "example, if \"local_only\" is used and local NUMA domain memory is exhausted, a new " "memory allocation may cause paging.", PMIX_MCA_BASE_VAR_TYPE_STRING, &enum_values); if (0 > ret) { return ret; } if (NULL != enum_values) { if (0 == strncasecmp(enum_values, "none", strlen("none"))) { prte_hwloc_base_map = PRTE_HWLOC_BASE_MAP_NONE; } else if (0 == strncasecmp(enum_values, "local_only", strlen("local_only"))) { prte_hwloc_base_map = PRTE_HWLOC_BASE_MAP_LOCAL_ONLY; } else { pmix_show_help("help-prte-hwloc-base.txt", "invalid binding_policy", true, enum_values); return PRTE_ERR_SILENT; } } /* hwloc_base_bind_failure_action */ enum_values = NULL; prte_hwloc_base_mbfa = PRTE_HWLOC_BASE_MBFA_WARN; ret = pmix_mca_base_var_register("prte", "hwloc", "default", "mem_bind_failure_action", "What PRTE will do if it explicitly tries to bind memory to a specific NUMA " "location, and fails. Note that this is a different case than the general " "allocation policy described by mem_alloc_policy. A value of \"silent\" " "means that PRTE will proceed without comment. A value of \"warn\" means that " "PRTE will warn the first time this happens, but allow the job to continue " "(possibly with degraded performance). A value of \"error\" means that PRTE " "will abort the job if this happens.", PMIX_MCA_BASE_VAR_TYPE_STRING, &enum_values); if (0 > ret) { return ret; } if (NULL != enum_values) { if (0 == strncasecmp(enum_values, "silent", strlen("silent"))) { prte_hwloc_base_mbfa = PRTE_HWLOC_BASE_MBFA_SILENT; } else if (0 == strncasecmp(enum_values, "warn", strlen("warn"))) { prte_hwloc_base_mbfa = PRTE_HWLOC_BASE_MBFA_WARN; } else if (0 == strncasecmp(enum_values, "error", strlen("error"))) { prte_hwloc_base_mbfa = PRTE_HWLOC_BASE_MBFA_ERROR; } else { pmix_show_help("help-prte-hwloc-base.txt", "invalid binding_policy", true, enum_values); return PRTE_ERR_SILENT; } } /* NOTE: for future developers and readers of this code, the binding policies are strictly * limited to none, hwthread, core, l1cache, l2cache, l3cache, package, and numa * * The default binding policy can be modified by any combination of the following: * * overload-allowed - multiple processes can be bound to the same PU (core or HWT) * * if-supported - perform the binding if it is supported by the OS, but do not * generate an error if it cannot be done */ prte_hwloc_base_binding_policy = NULL; ret = pmix_mca_base_var_register("prte", NULL, NULL, "bindto", "Default policy for binding processes. Allowed values: none, hwthread, core, l1cache, " "l2cache, " "l3cache, numa, package, (\"none\" is the default when oversubscribed, \"core\" is " "the default otherwise). Allowed " "colon-delimited qualifiers: " "overload-allowed, if-supported, limit. For more details, see \"prterun --help bind-to\"" "The full directive need not be provided — " "only enough characters are required to uniquely identify the " "directive. Directive values are case insensitive", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_hwloc_base_binding_policy); (void) pmix_mca_base_var_register_synonym(ret, "prte", NULL, NULL, "bind_to", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); (void) pmix_mca_base_var_register_synonym(ret, "prte", "hwloc", "default", "binding_policy", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); if (NULL == prte_hwloc_base_binding_policy) { if (bind_to_core) { prte_hwloc_base_binding_policy = "core"; } else if (bind_to_socket) { prte_hwloc_base_binding_policy = "package"; } } /* Allow specification of a default CPU list - a comma-delimited list of cpu ranges that * are the default PUs for this DVM. CPUs are to be specified as LOGICAL indices. If a * cpuset is provided, then all process placements and bindings will be constrained to the * identified CPUs. IN ESSENCE, THIS IS A USER-DEFINED "SOFT" CGROUP. * * Example: if the default binding policy is "core", then each process will be bound to the * first unused core underneath the topological object upon which it has been mapped. In other * words, if two processes are mapped to a given package, then the first process will be bound * to core0 of that package, and the second process will be bound to core1. * * If the cpuset specified that only cores 10, 12, and 14 were to be used, then the first * process would be bound to core10 and the second process would be bound to core12. * * If the default binding policy had been set to "package", and if cores 10, 12, and 14 are all * on the same package, then both processes would be bound to cores 10, 12, and 14. Note that * they would have been bound to all PUs on the package if the cpuset had not been given. * * If cores 10 and 12 are on package0, and core14 is on package1, then if the first process is * mapped to package0 and we are using a binding policy of "package", the first process would be * bound to core10 and core12. If the second process were mapped to package1, then it would be * bound only to core14 as that is the only PU in the cpuset that lies in package1. */ default_cpu_list = NULL; ret = pmix_mca_base_var_register("prte", "hwloc", "default", "cpu_list", "Comma-separated list of ranges specifying logical cpus to be used by the DVM. " "Supported modifier:HWTCPUS (ranges specified in hwthreads) or CORECPUS " "(default: ranges specified in cores)", PMIX_MCA_BASE_VAR_TYPE_STRING, &default_cpu_list); if (NULL != default_cpu_list) { if (NULL != (ptr = strrchr(default_cpu_list, ':'))) { *ptr = '\0'; prte_hwloc_default_cpu_list = strdup(default_cpu_list); ++ptr; if (0 == strcasecmp(ptr, "HWTCPUS")) { prte_hwloc_default_use_hwthread_cpus = true; } else if (0 == strcasecmp(ptr, "CORECPUS")) { prte_hwloc_default_use_hwthread_cpus = false; } else { pmix_show_help("help-prte-hwloc-base.txt", "bad-processor-type", true, default_cpu_list, ptr); return PRTE_ERR_BAD_PARAM; } } else { prte_hwloc_default_cpu_list = strdup(default_cpu_list); } } prte_hwloc_base_topo_file = NULL; ret = pmix_mca_base_var_register("prte", "hwloc", "use", "topo_file", "Read local topology from file instead of directly sensing it", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_hwloc_base_topo_file); (void) pmix_mca_base_var_register_synonym(ret, "prte", "ras", "simulator", "topo_files", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); (void) pmix_mca_base_var_register_synonym(ret, "prte", "hwloc", "base", "use_topo_file", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); /* register parameters */ return PRTE_SUCCESS; } int prte_hwloc_base_open(void) { int rc; if (prte_hwloc_base_inited) { return PRTE_SUCCESS; } prte_hwloc_base_inited = true; /* check the provided default binding policy for correctness - specifically want to ensure * there are no disallowed qualifiers and setup the global param */ if (PRTE_SUCCESS != (rc = prte_hwloc_base_set_binding_policy(NULL, prte_hwloc_base_binding_policy))) { return rc; } return PRTE_SUCCESS; } void prte_hwloc_base_close(void) { if (!prte_hwloc_base_inited) { return; } if (NULL != prte_hwloc_default_cpu_list) { free(prte_hwloc_default_cpu_list); } /* destroy the topology */ if (NULL != prte_hwloc_topology) { hwloc_topology_destroy(prte_hwloc_topology); prte_hwloc_topology = NULL; } /* All done */ prte_hwloc_base_inited = false; } int prte_hwloc_base_set_default_binding(void *jd, void *opt) { prte_job_t *jdata = (prte_job_t*)jd; prte_rmaps_options_t *options = (prte_rmaps_options_t*)opt; prte_mapping_policy_t mpol; if (prte_get_attribute(&jdata->attributes, PRTE_JOB_PES_PER_PROC, NULL, PMIX_UINT16)) { /* bind to cpus */ if (options->use_hwthreads || prte_rmaps_base.require_hwtcpus) { /* if we are using hwthread cpus, then bind to those */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byhwthread", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } else { /* bind to core */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bycore", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } } else if (PRTE_FLAG_TEST(jdata, PRTE_JOB_FLAG_TOOL)) { /* tools are never bound */ PRTE_SET_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NONE); } else { /* if we are mapping by some object, then we default * to binding to that object */ mpol = PRTE_GET_MAPPING_POLICY(jdata->map->mapping); if (PRTE_MAPPING_BYHWTHREAD == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byhwthread", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } else if (PRTE_MAPPING_BYCORE == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bycore", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } else if (PRTE_MAPPING_BYL1CACHE == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byL1", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L1CACHE); } else if (PRTE_MAPPING_BYL2CACHE == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byL2", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L2CACHE); } else if (PRTE_MAPPING_BYL3CACHE == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byL3", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L3CACHE); } else if (PRTE_MAPPING_BYNUMA == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bynuma", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA); } else if (PRTE_MAPPING_BYPACKAGE == mpol) { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bypackage", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_PACKAGE); } else if (PRTE_MAPPING_PELIST == mpol) { if (options->use_hwthreads) { /* if we are using hwthread cpus, then bind to those */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byhwthread for pe-list", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } else { /* otherwise bind to core */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bycore for pe-list", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } } else if (PRTE_MAPPING_PPR == mpol) { if (HWLOC_OBJ_MACHINE == options->maptype) { if (options->nprocs <= 2) { /* we are mapping by node or some other non-object method */ if (options->use_hwthreads || prte_rmaps_base.require_hwtcpus) { /* if we are using hwthread cpus, then bind to those */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byhwthread", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } else { /* otherwise bind to core */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bycore", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } } else { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bynuma", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA); } } else if (HWLOC_OBJ_PACKAGE == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_PACKAGE); } else if (HWLOC_OBJ_NUMANODE== options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA); #if HWLOC_API_VERSION < 0x20000 } else if (HWLOC_OBJ_CACHE == options->maptype) { if (1 == options->cmaplvl) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L1CACHE); } else if (2 == options->cmaplvl) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L2CACHE); } else if (3 == options->cmaplvl) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L3CACHE); } #else } else if (HWLOC_OBJ_L1CACHE == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L1CACHE); } else if (HWLOC_OBJ_L2CACHE == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L2CACHE); } else if (HWLOC_OBJ_L3CACHE == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_L3CACHE); #endif } else if (HWLOC_OBJ_CORE == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } else if (HWLOC_OBJ_PU == options->maptype) { PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } } else { if (options->nprocs <= 2) { /* we are mapping by node or some other non-object method */ if (options->use_hwthreads || prte_rmaps_base.require_hwtcpus) { /* if we are using hwthread cpus, then bind to those */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using byhwthread", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_HWTHREAD); } else { /* otherwise bind to core */ pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bycore", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_CORE); } } else { pmix_output_verbose(options->verbosity, options->stream, "setdefaultbinding[%d] binding not given - using bynuma", __LINE__); PRTE_SET_DEFAULT_BINDING_POLICY(jdata->map->binding, PRTE_BIND_TO_NUMA); } } } /* they might have set the overload-allowed flag while wanting PRRTE * to set the default binding - don't override it */ if (!PRTE_BIND_OVERLOAD_SET(jdata->map->binding)) { if (PRTE_BIND_OVERLOAD_ALLOWED(prte_hwloc_default_binding_policy)) { jdata->map->binding |= PRTE_BIND_ALLOW_OVERLOAD; } } return PRTE_SUCCESS; } static bool fns_init = false; static pmix_tsd_key_t print_tsd_key; char *prte_hwloc_print_null = "NULL"; static void buffer_cleanup(void *value) { int i; prte_hwloc_print_buffers_t *ptr; if (NULL != value) { ptr = (prte_hwloc_print_buffers_t *) value; for (i = 0; i < PRTE_HWLOC_PRINT_NUM_BUFS; i++) { free(ptr->buffers[i]); } free(ptr); } } prte_hwloc_print_buffers_t *prte_hwloc_get_print_buffer(void) { prte_hwloc_print_buffers_t *ptr; int ret, i; if (!fns_init) { /* setup the print_args function */ if (PRTE_SUCCESS != (ret = pmix_tsd_key_create(&print_tsd_key, buffer_cleanup))) { return NULL; } fns_init = true; } ret = pmix_tsd_getspecific(print_tsd_key, (void **) &ptr); if (PRTE_SUCCESS != ret) return NULL; if (NULL == ptr) { ptr = (prte_hwloc_print_buffers_t *) malloc(sizeof(prte_hwloc_print_buffers_t)); for (i = 0; i < PRTE_HWLOC_PRINT_NUM_BUFS; i++) { ptr->buffers[i] = (char *) malloc((PRTE_HWLOC_PRINT_MAX_SIZE + 1) * sizeof(char)); } ptr->cntr = 0; ret = pmix_tsd_setspecific(print_tsd_key, (void *) ptr); } return (prte_hwloc_print_buffers_t *) ptr; } char *prte_hwloc_base_print_locality(prte_hwloc_locality_t locality) { prte_hwloc_print_buffers_t *ptr; int idx; ptr = prte_hwloc_get_print_buffer(); if (NULL == ptr) { return prte_hwloc_print_null; } /* cycle around the ring */ if (PRTE_HWLOC_PRINT_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } idx = 0; if (PRTE_PROC_ON_LOCAL_CLUSTER(locality)) { ptr->buffers[ptr->cntr][idx++] = 'C'; ptr->buffers[ptr->cntr][idx++] = 'L'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_CU(locality)) { ptr->buffers[ptr->cntr][idx++] = 'C'; ptr->buffers[ptr->cntr][idx++] = 'U'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_NODE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'N'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_PACKAGE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'S'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_NUMA(locality)) { ptr->buffers[ptr->cntr][idx++] = 'N'; ptr->buffers[ptr->cntr][idx++] = 'M'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_L3CACHE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'L'; ptr->buffers[ptr->cntr][idx++] = '3'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_L2CACHE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'L'; ptr->buffers[ptr->cntr][idx++] = '2'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_L1CACHE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'L'; ptr->buffers[ptr->cntr][idx++] = '1'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_CORE(locality)) { ptr->buffers[ptr->cntr][idx++] = 'C'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (PRTE_PROC_ON_LOCAL_HWTHREAD(locality)) { ptr->buffers[ptr->cntr][idx++] = 'H'; ptr->buffers[ptr->cntr][idx++] = 'w'; ptr->buffers[ptr->cntr][idx++] = 't'; ptr->buffers[ptr->cntr][idx++] = ':'; } if (0 < idx) { ptr->buffers[ptr->cntr][idx - 1] = '\0'; } else if (PRTE_PROC_NON_LOCAL & locality) { ptr->buffers[ptr->cntr][idx++] = 'N'; ptr->buffers[ptr->cntr][idx++] = 'O'; ptr->buffers[ptr->cntr][idx++] = 'N'; ptr->buffers[ptr->cntr][idx++] = '\0'; } else { /* must be an unknown locality */ ptr->buffers[ptr->cntr][idx++] = 'U'; ptr->buffers[ptr->cntr][idx++] = 'N'; ptr->buffers[ptr->cntr][idx++] = 'K'; ptr->buffers[ptr->cntr][idx++] = '\0'; } return ptr->buffers[ptr->cntr]; } int prte_hwloc_base_set_binding_policy(void *jdat, char *spec) { int i; prte_binding_policy_t tmp; char **quals, *myspec, *ptr; prte_job_t *jdata = (prte_job_t *) jdat; /* set default */ tmp = 0; /* binding specification */ if (NULL == spec) { return PRTE_SUCCESS; } myspec = strdup(spec); // protect the input /* check for qualifiers */ ptr = strchr(myspec, ':'); if (NULL != ptr) { *ptr = '\0'; ++ptr; quals = PMIX_ARGV_SPLIT_COMPAT(ptr, ':'); for (i = 0; NULL != quals[i]; i++) { if (PMIX_CHECK_CLI_OPTION(quals[i], PRTE_CLI_IF_SUPP)) { tmp |= PRTE_BIND_IF_SUPPORTED; } else if (PMIX_CHECK_CLI_OPTION(quals[i], PRTE_CLI_OVERLOAD)) { tmp |= (PRTE_BIND_ALLOW_OVERLOAD | PRTE_BIND_OVERLOAD_GIVEN); } else if (PMIX_CHECK_CLI_OPTION(quals[i], PRTE_CLI_NOOVERLOAD)) { tmp = (tmp & ~PRTE_BIND_ALLOW_OVERLOAD); tmp |= PRTE_BIND_OVERLOAD_GIVEN; } else if (PMIX_CHECK_CLI_OPTION(quals[i], PRTE_CLI_REPORT)) { if (NULL == jdata) { pmix_show_help("help-prte-rmaps-base.txt", "unsupported-default-modifier", true, "binding policy", quals[i]); free(myspec); return PRTE_ERR_SILENT; } prte_set_attribute(&jdata->attributes, PRTE_JOB_REPORT_BINDINGS, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } else { /* unknown option */ pmix_show_help("help-prte-hwloc-base.txt", "unrecognized-modifier", true, spec); PMIX_ARGV_FREE_COMPAT(quals); free(myspec); return PRTE_ERR_BAD_PARAM; } } PMIX_ARGV_FREE_COMPAT(quals); } if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_NONE)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_NONE); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_HWT)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_HWTHREAD); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_CORE)) { if (prte_rmaps_base.require_hwtcpus) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_HWTHREAD); } else { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_CORE); } } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_L1CACHE)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_L1CACHE); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_L2CACHE)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_L2CACHE); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_L3CACHE)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_L3CACHE); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_NUMA)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_NUMA); } else if (PMIX_CHECK_CLI_OPTION(myspec, PRTE_CLI_PACKAGE)) { PRTE_SET_BINDING_POLICY(tmp, PRTE_BIND_TO_PACKAGE); } else { pmix_show_help("help-prte-hwloc-base.txt", "invalid binding_policy", true, "binding", spec); free(myspec); return PRTE_ERR_BAD_PARAM; } free(myspec); if (NULL == jdata) { prte_hwloc_default_binding_policy = tmp; } else { if (NULL == jdata->map) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } jdata->map->binding = tmp; } return PRTE_SUCCESS; } prrte-3.0.13/src/hwloc/hwloc_base_util.c0000664000175000017500000021464015145263240020341 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2017 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (C) 2018 Mellanox Technologies, Ltd. * All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019-2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #define PRTE_HWLOC_WANT_SHMEM 1 #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_ENDIAN_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #if HAVE_FCNTL_H # include #endif #include "src/include/constants.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_tsd.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_os_dirpath.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/hwloc/hwloc-internal.h" bool prte_hwloc_base_core_cpus(hwloc_topology_t topo) { hwloc_obj_t obj; hwloc_obj_t pu; obj = hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0); if (NULL == obj) { return false; } /* see if the cpuset of a core match that of a PU */ pu = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PU, 0); /* if the two are equal, then we really don't have * cores in this topology */ if (hwloc_bitmap_isequal(obj->cpuset, pu->cpuset)) { return false; } /* we do have cores */ return true; } /* * Provide the hwloc object that corresponds to the given * processor id of the given type. Remember: "processor" here [usually] means "core" -- * except that on some platforms, hwloc won't find any cores; it'll * only find PUs (!). On such platforms, then do the same calculation * but with PUs instead of COREs. */ hwloc_obj_t prte_hwloc_base_get_pu(hwloc_topology_t topo, bool use_hwthread_cpus, int lid) { hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; hwloc_obj_t obj; /* hwloc isn't able to find cores on all platforms. Example: PPC64 running RHEL 5.4 (linux kernel 2.6.18) only reports NUMA nodes and PU's. Fine. However, note that hwloc_get_obj_by_type() will return NULL in 2 (effectively) different cases: - no objects of the requested type were found - the Nth object of the requested type was not found So first we have to see if we can find *any* cores by looking for the 0th core. If we find it, then try to find the Nth core. Otherwise, try to find the Nth PU. */ if (use_hwthread_cpus || !prte_hwloc_base_core_cpus(topo)) { obj_type = HWLOC_OBJ_PU; } pmix_output_verbose(5, prte_hwloc_base_output, "Searching for %d LOGICAL PU", lid); /* Now do the actual lookup. */ obj = hwloc_get_obj_by_type(topo, obj_type, lid); pmix_output_verbose(5, prte_hwloc_base_output, "logical cpu %d %s found", lid, (NULL == obj) ? "not" : "is"); /* Found the right core (or PU). Return the object */ return obj; } hwloc_cpuset_t prte_hwloc_base_generate_cpuset(hwloc_topology_t topo, bool use_hwthread_cpus, char *cpulist) { hwloc_cpuset_t avail = NULL, pucpus, res; char **ranges = NULL, **range = NULL; int idx, cpu, start, end; hwloc_obj_t pu; /* find the specified logical cpus */ ranges = PMIX_ARGV_SPLIT_COMPAT(cpulist, ','); avail = hwloc_bitmap_alloc(); hwloc_bitmap_zero(avail); res = hwloc_bitmap_alloc(); pucpus = hwloc_bitmap_alloc(); for (idx = 0; idx < PMIX_ARGV_COUNT_COMPAT(ranges); idx++) { range = PMIX_ARGV_SPLIT_COMPAT(ranges[idx], '-'); switch (PMIX_ARGV_COUNT_COMPAT(range)) { case 1: /* only one cpu given - get that object */ cpu = strtoul(range[0], NULL, 10); if (NULL != (pu = prte_hwloc_base_get_pu(topo, use_hwthread_cpus, cpu))) { #if HWLOC_API_VERSION < 0x20000 hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); #else hwloc_bitmap_and(pucpus, pu->cpuset, hwloc_topology_get_allowed_cpuset(topo)); #endif hwloc_bitmap_or(res, avail, pucpus); hwloc_bitmap_copy(avail, res); } break; case 2: /* range given */ start = strtoul(range[0], NULL, 10); end = strtoul(range[1], NULL, 10); for (cpu = start; cpu <= end; cpu++) { if (NULL != (pu = prte_hwloc_base_get_pu(topo, use_hwthread_cpus, cpu))) { #if HWLOC_API_VERSION < 0x20000 hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset); #else hwloc_bitmap_and(pucpus, pu->cpuset, hwloc_topology_get_allowed_cpuset(topo)); #endif hwloc_bitmap_or(res, avail, pucpus); hwloc_bitmap_copy(avail, res); } } break; default: break; } PMIX_ARGV_FREE_COMPAT(range); } if (NULL != ranges) { PMIX_ARGV_FREE_COMPAT(ranges); } hwloc_bitmap_free(res); hwloc_bitmap_free(pucpus); return avail; } hwloc_cpuset_t prte_hwloc_base_setup_summary(hwloc_topology_t topo) { hwloc_cpuset_t avail = NULL; avail = hwloc_bitmap_alloc(); /* get the root available cpuset */ #if HWLOC_API_VERSION < 0x20000 hwloc_obj_t root; root = hwloc_get_root_obj(topo); if (NULL == root->online_cpuset && NULL == root->allowed_cpuset) { /* we are hosed */ return NULL; } if (NULL == root->online_cpuset) { hwloc_bitmap_copy(avail, root->allowed_cpuset); } else if (NULL == root->allowed_cpuset) { hwloc_bitmap_copy(avail, root->online_cpuset); } else { hwloc_bitmap_and(avail, root->online_cpuset, root->allowed_cpuset); } #else hwloc_bitmap_copy(avail, hwloc_topology_get_allowed_cpuset(topo)); #endif return avail; } /* determine the node-level available cpuset based on * online vs allowed vs user-specified cpus */ hwloc_cpuset_t prte_hwloc_base_filter_cpus(hwloc_topology_t topo) { hwloc_cpuset_t avail = NULL; /* process any specified default cpu set against this topology */ if (NULL == prte_hwloc_default_cpu_list) { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base: no cpus specified - using root available cpuset")); avail = prte_hwloc_base_setup_summary(topo); } else { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base: filtering cpuset")); avail = prte_hwloc_base_generate_cpuset(topo, prte_hwloc_default_use_hwthread_cpus, prte_hwloc_default_cpu_list); } return avail; } static void fill_cache_line_size(void) { bool found = false; unsigned size = 0, cache_level = 2, i = 0; hwloc_obj_type_t cache_object = HWLOC_OBJ_L2CACHE; hwloc_obj_t obj; /* Look for the smallest L2 cache size */ size = 4096; while (cache_level > 0 && !found) { i = 0; while (1) { obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, cache_object, cache_level, i); if (NULL == obj) { --cache_level; cache_object = HWLOC_OBJ_L1CACHE; break; } else { if (NULL != obj->attr && obj->attr->cache.linesize > 0 && size > obj->attr->cache.linesize) { size = obj->attr->cache.linesize; found = true; } } ++i; } } /* If we found an L2 cache size in the hwloc data, save it in prte_cache_line_size. Otherwise, we'll leave whatever default was set in prte_init.c */ if (found) { prte_cache_line_size = (int) size; } } int prte_hwloc_base_get_topology(void) { int rc; pmix_output_verbose(2, prte_hwloc_base_output, "hwloc:base:get_topology"); /* see if we already have it */ if (NULL != prte_hwloc_topology) { return PRTE_SUCCESS; } if (NULL == prte_hwloc_base_topo_file) { pmix_output_verbose(1, prte_hwloc_base_output, "hwloc:base discovering topology"); if (0 != hwloc_topology_init(&prte_hwloc_topology) || 0 != prte_hwloc_base_topology_set_flags(prte_hwloc_topology, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, true) || 0 != hwloc_topology_load(prte_hwloc_topology)) { PRTE_ERROR_LOG(PRTE_ERR_NOT_SUPPORTED); return PRTE_ERR_NOT_SUPPORTED; } } else { pmix_output_verbose(1, prte_hwloc_base_output, "hwloc:base loading topology from file %s", prte_hwloc_base_topo_file); if (PRTE_SUCCESS != (rc = prte_hwloc_base_set_topology(prte_hwloc_base_topo_file))) { return rc; } prte_hwloc_synthetic_topo = true; } /* fill prte_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); return PRTE_SUCCESS; } int prte_hwloc_base_set_topology(char *topofile) { hwloc_obj_t obj; unsigned j, k; int rc; struct hwloc_topology_support *support; PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:set_topology %s", topofile)); if (NULL != prte_hwloc_topology) { hwloc_topology_destroy(prte_hwloc_topology); } if (0 != hwloc_topology_init(&prte_hwloc_topology)) { return PRTE_ERR_NOT_SUPPORTED; } if (0 != hwloc_topology_set_xml(prte_hwloc_topology, topofile)) { hwloc_topology_destroy(prte_hwloc_topology); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:set_topology bad topo file")); return PRTE_ERR_NOT_SUPPORTED; } /* since we are loading this from an external source, we have to * explicitly set a flag so hwloc sets things up correctly */ rc = prte_hwloc_base_topology_set_flags(prte_hwloc_topology, 0, true); if (0 != rc) { hwloc_topology_destroy(prte_hwloc_topology); return PRTE_ERR_NOT_SUPPORTED; } /* unfortunately, early hwloc does not include support info in its * xml output :-(( We default to assuming it is present as * systems that use this option are likely to provide * binding support */ support = (struct hwloc_topology_support *) hwloc_topology_get_support(prte_hwloc_topology); support->cpubind->set_thisproc_cpubind = true; support->membind->set_thisproc_membind = true; if (0 != hwloc_topology_load(prte_hwloc_topology)) { hwloc_topology_destroy(prte_hwloc_topology); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:set_topology failed to load")); return PRTE_ERR_NOT_SUPPORTED; } /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So * we remove that information here. */ obj = hwloc_get_root_obj(prte_hwloc_topology); for (k = 0; k < obj->infos_count; k++) { if (NULL == obj->infos || NULL == obj->infos[k].name || NULL == obj->infos[k].value) { continue; } if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) { free(obj->infos[k].name); free(obj->infos[k].value); /* left justify the array */ for (j = k; j < obj->infos_count - 1; j++) { obj->infos[j] = obj->infos[j + 1]; } obj->infos[obj->infos_count - 1].name = NULL; obj->infos[obj->infos_count - 1].value = NULL; obj->infos_count--; break; } } /* fill prte_cache_line_size global with the smallest L1 cache line size */ fill_cache_line_size(); /* all done */ return PRTE_SUCCESS; } int prte_hwloc_base_report_bind_failure(const char *file, int line, const char *msg, int rc) { static int already_reported = 0; if (!already_reported && PRTE_HWLOC_BASE_MBFA_SILENT != prte_hwloc_base_mbfa) { pmix_show_help( "help-prte-hwloc-base.txt", "mbind failure", true, prte_process_info.nodename, getpid(), file, line, msg, (PRTE_HWLOC_BASE_MBFA_WARN == prte_hwloc_base_mbfa) ? "Warning -- your job will continue, but possibly with degraded performance" : "ERROR -- your job may abort or behave erraticly"); already_reported = 1; return rc; } return PRTE_SUCCESS; } /* determine if there is a single cpu in a bitmap */ bool prte_hwloc_base_single_cpu(hwloc_cpuset_t cpuset) { int i; bool one = false; /* count the number of bits that are set - there is * one bit for each available pu. We could just * subtract the first and last indices, but there * may be "holes" in the bitmap corresponding to * offline or unallowed cpus - so we have to * search for them. Return false if we anything * other than one */ for (i = hwloc_bitmap_first(cpuset); i <= hwloc_bitmap_last(cpuset); i++) { if (hwloc_bitmap_isset(cpuset, i)) { if (one) { return false; } one = true; } } return one; } /* get the number of pu's under a given hwloc object */ unsigned int prte_hwloc_base_get_npus(hwloc_topology_t topo, bool use_hwthread_cpus, hwloc_cpuset_t envelope, hwloc_obj_t obj) { unsigned int cnt = 0; hwloc_cpuset_t avail; if (NULL == obj->cpuset) { return 0; } if (NULL == envelope) { avail = hwloc_bitmap_dup(obj->cpuset); } else { avail = hwloc_bitmap_alloc(); hwloc_bitmap_and(avail, obj->cpuset, envelope); } if (!use_hwthread_cpus) { /* if we are treating cores as cpus, then we really * want to know how many cores are in this object. * hwloc sets a bit for each "pu", so we can't just * count bits in this case as there may be more than * one hwthread/core. Instead, find the number of cores * in the system */ cnt = hwloc_get_nbobjs_inside_cpuset_by_type(topo, avail, HWLOC_OBJ_CORE); } else { /* count the number of bits that are set - there is * one bit for each available pu. We could just * subtract the first and last indices, but there * may be "holes" in the bitmap corresponding to * offline or unallowed cpus - so we count them with * the bitmap "weight" (a.k.a. population count) function */ cnt = hwloc_bitmap_weight(avail); } hwloc_bitmap_free(avail); return cnt; } unsigned int prte_hwloc_base_get_obj_idx(hwloc_topology_t topo, hwloc_obj_t obj) { unsigned cache_level = 0; hwloc_obj_t ptr; unsigned int nobjs, i; PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_idx")); #if HWLOC_API_VERSION < 0x20000 /* determine the number of objects of this type */ if (HWLOC_OBJ_CACHE == obj->type) { cache_level = obj->attr->cache.depth; } #endif nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, obj->type, cache_level); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_idx found %u objects of type %s:%u", nobjs, hwloc_obj_type_string(obj->type), cache_level)); /* find this object */ for (i = 0; i < nobjs; i++) { ptr = prte_hwloc_base_get_obj_by_type(topo, obj->type, cache_level, i); if (ptr == obj) { return i; } } /* if we get here, it wasn't found */ pmix_show_help("help-prte-hwloc-base.txt", "obj-idx-failed", true, hwloc_obj_type_string(obj->type), cache_level); return UINT_MAX; } #if HWLOC_API_VERSION < 0x20000 /* hwloc treats cache objects as special * cases. Instead of having a unique type for each cache level, * there is a single cache object type, and the level is encoded * in an attribute union. So looking for cache objects involves * a multi-step test :-( */ static hwloc_obj_t df_search(hwloc_topology_t topo, hwloc_obj_t start, hwloc_obj_type_t target, unsigned cache_level, unsigned int nobj, unsigned int *num_objs) { int search_depth; PRTE_HIDE_UNUSED_PARAMS(start); search_depth = hwloc_get_type_depth(topo, target); if (HWLOC_TYPE_DEPTH_MULTIPLE == search_depth) { /* either v1.x Cache, or Groups */ if (cache_level != HWLOC_OBJ_CACHE) { return NULL; } search_depth = hwloc_get_cache_type_depth(topo, cache_level, (hwloc_obj_cache_type_t) -1); } if (HWLOC_TYPE_DEPTH_UNKNOWN == search_depth) { return NULL; } if (num_objs) { *num_objs = hwloc_get_nbobjs_by_depth(topo, search_depth); } return hwloc_get_obj_by_depth(topo, search_depth, nobj); } #endif unsigned int prte_hwloc_base_get_nbobjs_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level) { int rc; #if HWLOC_API_VERSION >= 0x20000 PRTE_HIDE_UNUSED_PARAMS(cache_level); #endif /* bozo check */ if (NULL == topo) { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_nbobjs NULL topology")); return 0; } #if HWLOC_API_VERSION >= 0x20000 if (0 > (rc = hwloc_get_nbobjs_by_type(topo, target))) { pmix_output(0, "UNKNOWN HWLOC ERROR"); return 0; } return rc; #else unsigned int num_objs; hwloc_obj_t obj; /* we can just use the hwloc accessor to get it, * unless it is a CACHE as these are treated as special cases */ if (HWLOC_OBJ_CACHE != target) { /* we should not get an error back, but just in case... */ if (0 > (rc = hwloc_get_nbobjs_by_type(topo, target))) { pmix_output(0, "UNKNOWN HWLOC ERROR"); return 0; } return rc; } /* for everything else, we have to do some work */ num_objs = 0; obj = hwloc_get_root_obj(topo); df_search(topo, obj, target, cache_level, 0, &num_objs); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:get_nbojbs computed data %u of %s:%u", num_objs, hwloc_obj_type_string(target), cache_level)); return num_objs; #endif } /* as above, only return the Nth instance of the specified object * type from inside the topology */ hwloc_obj_t prte_hwloc_base_get_obj_by_type(hwloc_topology_t topo, hwloc_obj_type_t target, unsigned cache_level, unsigned int instance) { #if HWLOC_API_VERSION >= 0x20000 PRTE_HIDE_UNUSED_PARAMS(cache_level); #endif /* bozo check */ if (NULL == topo) { return NULL; } #if HWLOC_API_VERSION >= 0x20000 return hwloc_get_obj_by_type(topo, target, instance); #else hwloc_obj_t obj; /* we can just use the hwloc accessor to get it, unless it is a CACHE * as these are treated as special cases */ if (HWLOC_OBJ_CACHE != target) { return hwloc_get_obj_by_type(topo, target, instance); } /* for everything else, we have to do some work */ obj = hwloc_get_root_obj(topo); return df_search(topo, obj, target, cache_level, instance, NULL); #endif } /* The current slot_list notation only goes to the core level - i.e., the location * is specified as package:core. Thus, the code below assumes that all locations * are to be parsed under that notation. */ static int package_to_cpu_set(char *cpus, hwloc_topology_t topo, hwloc_bitmap_t cpumask) { char **range; int range_cnt; int lower_range, upper_range; int package_id; hwloc_obj_t obj; if ('*' == cpus[0]) { /* requesting cpumask for ALL packages */ obj = hwloc_get_root_obj(topo); /* set to all available processors - essentially, * this specification equates to unbound */ hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); return PRTE_SUCCESS; } range = PMIX_ARGV_SPLIT_COMPAT(cpus, '-'); range_cnt = PMIX_ARGV_COUNT_COMPAT(range); switch (range_cnt) { case 1: /* no range was present, so just one package given */ package_id = atoi(range[0]); obj = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, 0, package_id); /* get the available cpus for this package */ hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); break; case 2: /* range of packages was given */ lower_range = atoi(range[0]); upper_range = atoi(range[1]); /* cycle across the range of packages */ for (package_id = lower_range; package_id <= upper_range; package_id++) { obj = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, 0, package_id); /* set the available cpus for this package bits in the bitmask */ hwloc_bitmap_or(cpumask, cpumask, obj->cpuset); } break; default: PMIX_ARGV_FREE_COMPAT(range); return PRTE_ERROR; } PMIX_ARGV_FREE_COMPAT(range); return PRTE_SUCCESS; } static int package_core_to_cpu_set(char *package_core_list, hwloc_topology_t topo, hwloc_bitmap_t cpumask) { int rc = PRTE_SUCCESS, i, j; char **package_core, *corestr; char **range, **list; int range_cnt; int lower_range, upper_range; int package_id, core_id; hwloc_obj_t package, core; hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; unsigned int npus; bool hwthreadcpus = false; package_core = PMIX_ARGV_SPLIT_COMPAT(package_core_list, ':'); package_id = atoi(package_core[0]); /* get the object for this package id */ package = prte_hwloc_base_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, 0, package_id); if (NULL == package) { PMIX_ARGV_FREE_COMPAT(package_core); return PRTE_ERR_NOT_FOUND; } /* as described in comment near top of file, hwloc isn't able * to find cores on all platforms. Adjust the type here if * required */ if (NULL == hwloc_get_obj_by_type(topo, HWLOC_OBJ_CORE, 0)) { obj_type = HWLOC_OBJ_PU; hwthreadcpus = true; } npus = prte_hwloc_base_get_npus(topo, hwthreadcpus, NULL, package); npus = npus * package_id; for (i = 1; NULL != package_core[i]; i++) { if ('C' == package_core[i][0] || 'c' == package_core[i][0]) { corestr = &package_core[i][1]; } else { corestr = package_core[i]; } if ('*' == corestr[0]) { /* set to all cpus on this package */ hwloc_bitmap_or(cpumask, cpumask, package->cpuset); /* we are done - already assigned all cores! */ rc = PRTE_SUCCESS; break; } else { range = PMIX_ARGV_SPLIT_COMPAT(corestr, '-'); range_cnt = PMIX_ARGV_COUNT_COMPAT(range); /* see if a range was set or not */ switch (range_cnt) { case 1: /* only one core, or a list of cores, specified */ list = PMIX_ARGV_SPLIT_COMPAT(range[0], ','); for (j = 0; NULL != list[j]; j++) { /* get the indexed core from this package */ core_id = atoi(list[j]) + npus; /* get that object */ core = prte_hwloc_base_get_obj_by_type(topo, obj_type, 0, core_id); if (NULL == core) { rc = PRTE_ERR_NOT_FOUND; break; } /* get the cpus */ hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } PMIX_ARGV_FREE_COMPAT(list); break; case 2: /* range of core id's was given */ pmix_output_verbose(5, prte_hwloc_base_output, "range of cores given: start %s stop %s", range[0], range[1]); lower_range = atoi(range[0]); upper_range = atoi(range[1]); for (j = lower_range; j <= upper_range; j++) { /* get the indexed core from this package */ core_id = j + npus; /* get that object */ core = prte_hwloc_base_get_obj_by_type(topo, obj_type, 0, core_id); if (NULL == core) { rc = PRTE_ERR_NOT_FOUND; break; } /* get the cpus add them into the result */ hwloc_bitmap_or(cpumask, cpumask, core->cpuset); } break; default: PMIX_ARGV_FREE_COMPAT(range); PMIX_ARGV_FREE_COMPAT(package_core); return PRTE_ERROR; } PMIX_ARGV_FREE_COMPAT(range); } } PMIX_ARGV_FREE_COMPAT(package_core); return rc; } int prte_hwloc_base_cpu_list_parse(const char *slot_str, hwloc_topology_t topo, bool use_hwthread_cpus, hwloc_cpuset_t cpumask) { char **item, **rngs, *lst; int rc, i, j, k; hwloc_obj_t pu; char **range, **list; size_t range_cnt; int core_id, lower_range, upper_range; /* bozo checks */ if (NULL == prte_hwloc_topology) { return PRTE_ERR_NOT_SUPPORTED; } if (NULL == slot_str || 0 == strlen(slot_str)) { return PRTE_ERR_BAD_PARAM; } pmix_output_verbose(5, prte_hwloc_base_output, "slot assignment: slot_list == %s", slot_str); /* split at ';' */ item = PMIX_ARGV_SPLIT_COMPAT(slot_str, ';'); /* start with a clean mask */ hwloc_bitmap_zero(cpumask); /* loop across the items and accumulate the mask */ for (i = 0; NULL != item[i]; i++) { pmix_output_verbose(5, prte_hwloc_base_output, "working assignment %s", item[i]); /* if they specified "package" by starting with an S/s, * or if they use package:core notation, then parse the * package/core info */ if ('P' == item[i][0] || 'p' == item[i][0] || 'S' == item[i][0] || 's' == item[i][0] || // backward compatibility NULL != strchr(item[i], ':')) { /* specified a package */ if (NULL == strchr(item[i], ':')) { /* binding just to the package level, though * it could specify multiple packages * Skip the P and look for ranges */ rngs = PMIX_ARGV_SPLIT_COMPAT(&item[i][1], ','); for (j = 0; NULL != rngs[j]; j++) { if (PRTE_SUCCESS != (rc = package_to_cpu_set(rngs[j], topo, cpumask))) { PMIX_ARGV_FREE_COMPAT(rngs); PMIX_ARGV_FREE_COMPAT(item); return rc; } } PMIX_ARGV_FREE_COMPAT(rngs); } else { if ('P' == item[i][0] || 'p' == item[i][0] || 'S' == item[i][0] || 's' == item[i][0]) { lst = &item[i][1]; } else { lst = item[i]; } if (PRTE_SUCCESS != (rc = package_core_to_cpu_set(lst, topo, cpumask))) { PMIX_ARGV_FREE_COMPAT(item); return rc; } } } else { rngs = PMIX_ARGV_SPLIT_COMPAT(item[i], ','); for (k = 0; NULL != rngs[k]; k++) { /* just a core specification - see if one or a range was given */ range = PMIX_ARGV_SPLIT_COMPAT(rngs[k], '-'); range_cnt = PMIX_ARGV_COUNT_COMPAT(range); /* see if a range was set or not */ switch (range_cnt) { case 1: /* only one core, or a list of cores, specified */ list = PMIX_ARGV_SPLIT_COMPAT(range[0], ','); for (j = 0; NULL != list[j]; j++) { core_id = atoi(list[j]); /* find the specified available cpu */ if (NULL == (pu = prte_hwloc_base_get_pu(topo, use_hwthread_cpus, core_id))) { PMIX_ARGV_FREE_COMPAT(range); PMIX_ARGV_FREE_COMPAT(item); PMIX_ARGV_FREE_COMPAT(rngs); PMIX_ARGV_FREE_COMPAT(list); return PRTE_ERR_NOT_FOUND; } /* get the cpus for that object and set them in the massk*/ hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); } PMIX_ARGV_FREE_COMPAT(list); break; case 2: /* range of core id's was given */ lower_range = atoi(range[0]); upper_range = atoi(range[1]); for (core_id = lower_range; core_id <= upper_range; core_id++) { /* find the specified logical available cpu */ if (NULL == (pu = prte_hwloc_base_get_pu(topo, use_hwthread_cpus, core_id))) { PMIX_ARGV_FREE_COMPAT(range); PMIX_ARGV_FREE_COMPAT(item); PMIX_ARGV_FREE_COMPAT(rngs); return PRTE_ERR_NOT_FOUND; } /* get the cpus for that object and set them in the mask*/ hwloc_bitmap_or(cpumask, cpumask, pu->cpuset); } break; default: PMIX_ARGV_FREE_COMPAT(range); PMIX_ARGV_FREE_COMPAT(item); PMIX_ARGV_FREE_COMPAT(rngs); return PRTE_ERROR; } PMIX_ARGV_FREE_COMPAT(range); } PMIX_ARGV_FREE_COMPAT(rngs); } } PMIX_ARGV_FREE_COMPAT(item); return PRTE_SUCCESS; } static void prte_hwloc_base_get_relative_locality_by_depth(hwloc_topology_t topo, unsigned d, hwloc_cpuset_t loc1, hwloc_cpuset_t loc2, prte_hwloc_locality_t *locality, bool *shared) { unsigned width, w; hwloc_obj_t obj; int sect1, sect2; /* get the width of the topology at this depth */ width = hwloc_get_nbobjs_by_depth(topo, d); /* scan all objects at this depth to see if * our locations overlap with them */ for (w = 0; w < width; w++) { /* get the object at this depth/index */ obj = hwloc_get_obj_by_depth(topo, d, w); /* see if our locations intersect with the cpuset for this obj */ sect1 = hwloc_bitmap_intersects(obj->cpuset, loc1); sect2 = hwloc_bitmap_intersects(obj->cpuset, loc2); /* if both intersect, then we share this level */ if (sect1 && sect2) { *shared = true; switch (obj->type) { case HWLOC_OBJ_PACKAGE: *locality |= PRTE_PROC_ON_PACKAGE; break; case HWLOC_OBJ_NUMANODE: *locality |= PRTE_PROC_ON_NUMA; break; #if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: if (3 == obj->attr->cache.depth) { *locality |= PRTE_PROC_ON_L3CACHE; } else if (2 == obj->attr->cache.depth) { *locality |= PRTE_PROC_ON_L2CACHE; } else { *locality |= PRTE_PROC_ON_L1CACHE; } break; #else case HWLOC_OBJ_L3CACHE: *locality |= PRTE_PROC_ON_L3CACHE; break; case HWLOC_OBJ_L2CACHE: *locality |= PRTE_PROC_ON_L2CACHE; break; case HWLOC_OBJ_L1CACHE: *locality |= PRTE_PROC_ON_L1CACHE; break; #endif case HWLOC_OBJ_CORE: *locality |= PRTE_PROC_ON_CORE; break; case HWLOC_OBJ_PU: *locality |= PRTE_PROC_ON_HWTHREAD; break; default: /* just ignore it */ break; } break; } /* otherwise, we don't share this * object - but we still might share another object * on this level, so we have to keep searching */ } } prte_hwloc_locality_t prte_hwloc_base_get_relative_locality(hwloc_topology_t topo, char *cpuset1, char *cpuset2) { prte_hwloc_locality_t locality; hwloc_cpuset_t loc1, loc2; unsigned depth, d; bool shared; hwloc_obj_type_t type; /* start with what we know - they share a node on a cluster * NOTE: we may alter that latter part as hwloc's ability to * sense multi-cu, multi-cluster systems grows */ locality = PRTE_PROC_ON_NODE | PRTE_PROC_ON_HOST | PRTE_PROC_ON_CU | PRTE_PROC_ON_CLUSTER; /* if either cpuset is NULL, then that isn't bound */ if (NULL == cpuset1 || NULL == cpuset2) { return locality; } /* get the max depth of the topology */ depth = hwloc_topology_get_depth(topo); /* convert the strings to cpusets */ loc1 = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(loc1, cpuset1); loc2 = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(loc2, cpuset2); /* start at the first depth below the top machine level */ for (d = 1; d < depth; d++) { shared = false; /* get the object type at this depth */ type = hwloc_get_depth_type(topo, d); /* if it isn't one of interest, then ignore it */ if (HWLOC_OBJ_NUMANODE != type && HWLOC_OBJ_PACKAGE != type && #if HWLOC_API_VERSION < 0x20000 HWLOC_OBJ_CACHE != type && #else HWLOC_OBJ_L3CACHE != type && HWLOC_OBJ_L2CACHE != type && HWLOC_OBJ_L1CACHE != type && #endif HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) { continue; } prte_hwloc_base_get_relative_locality_by_depth(topo, d, loc1, loc2, &locality, &shared); /* if we spanned the entire width without finding * a point of intersection, then no need to go * deeper */ if (!shared) { break; } } #if HWLOC_API_VERSION >= 0x20000 prte_hwloc_base_get_relative_locality_by_depth(topo, (unsigned) HWLOC_TYPE_DEPTH_NUMANODE, loc1, loc2, &locality, &shared); #endif pmix_output_verbose(5, prte_hwloc_base_output, "locality: %s", prte_hwloc_base_print_locality(locality)); hwloc_bitmap_free(loc1); hwloc_bitmap_free(loc2); return locality; } /* searches the given topology for coprocessor objects and returns * their serial numbers as a comma-delimited string, or NULL * if no coprocessors are found */ char *prte_hwloc_base_find_coprocessors(hwloc_topology_t topo) { #if HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC hwloc_obj_t osdev; unsigned i; char **cps = NULL; #endif char *cpstring = NULL; int depth; /* coprocessors are recorded under OS_DEVICEs, so first * see if we have any of those */ if (HWLOC_TYPE_DEPTH_UNKNOWN == (depth = hwloc_get_type_depth(topo, HWLOC_OBJ_OS_DEVICE))) { PMIX_OUTPUT_VERBOSE( (5, prte_hwloc_base_output, "hwloc:base:find_coprocessors: NONE FOUND IN TOPO")); return NULL; } #if HAVE_DECL_HWLOC_OBJ_OSDEV_COPROC /* check the device objects for coprocessors */ osdev = hwloc_get_obj_by_depth(topo, depth, 0); while (NULL != osdev) { if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type) { /* got one! find and save its serial number */ for (i = 0; i < osdev->infos_count; i++) { if (0 == strncmp(osdev->infos[i].name, "MICSerialNumber", strlen("MICSerialNumber"))) { PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:find_coprocessors: coprocessor %s found", osdev->infos[i].value)); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cps, osdev->infos[i].value); } } } osdev = osdev->next_cousin; } if (NULL != cps) { cpstring = PMIX_ARGV_JOIN_COMPAT(cps, ','); PMIX_ARGV_FREE_COMPAT(cps); } PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:find_coprocessors: hosting coprocessors %s", (NULL == cpstring) ? "NONE" : cpstring)); #else PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:find_coprocessors: the version of hwloc that PRTE was built " "against (v%d.%d.%d) does not support detecting coprocessors", (HWLOC_API_VERSION >> 16) && 0xFF, (HWLOC_API_VERSION >> 8) & 0xFF, HWLOC_API_VERSION && 0xFF)); #endif return cpstring; } #define PRTE_HWLOC_MAX_ELOG_LINE 1024 static char *hwloc_getline(FILE *fp) { char *ret, *buff; char input[PRTE_HWLOC_MAX_ELOG_LINE]; ret = fgets(input, PRTE_HWLOC_MAX_ELOG_LINE, fp); if (NULL != ret) { input[strlen(input) - 1] = '\0'; /* remove newline */ buff = strdup(input); return buff; } return NULL; } /* checks local environment to determine if this process * is on a coprocessor - if so, it returns the serial number * as a string, or NULL if it isn't on a coprocessor */ char *prte_hwloc_base_check_on_coprocessor(void) { /* this support currently is limited to Intel Phi processors * but will hopefully be extended as we get better, more * generalized ways of identifying coprocessors */ FILE *fp; char *t, *cptr, *e, *cp = NULL; if (NULL == (fp = fopen("/proc/elog", "r"))) { /* nothing we can do */ return NULL; } /* look for the line containing the serial number of this * card - usually the first line in the file */ while (NULL != (cptr = hwloc_getline(fp))) { if (NULL != (t = strstr(cptr, "Card"))) { /* we want the string right after this - delimited by * a colon at the end */ t += 5; // move past "Card " if (NULL == (e = strchr(t, ':'))) { /* not what we were expecting */ free(cptr); continue; } *e = '\0'; cp = strdup(t); free(cptr); break; } free(cptr); } fclose(fp); PMIX_OUTPUT_VERBOSE((5, prte_hwloc_base_output, "hwloc:base:check_coprocessor: on coprocessor %s", (NULL == cp) ? "NONE" : cp)); return cp; } char *prte_hwloc_base_print_binding(prte_binding_policy_t binding) { char *ret, *bind; prte_hwloc_print_buffers_t *ptr; switch (PRTE_GET_BINDING_POLICY(binding)) { case PRTE_BIND_TO_NONE: bind = "NONE"; break; case PRTE_BIND_TO_PACKAGE: bind = "PACKAGE"; break; case PRTE_BIND_TO_NUMA: bind = "NUMA"; break; case PRTE_BIND_TO_L3CACHE: bind = "L3CACHE"; break; case PRTE_BIND_TO_L2CACHE: bind = "L2CACHE"; break; case PRTE_BIND_TO_L1CACHE: bind = "L1CACHE"; break; case PRTE_BIND_TO_CORE: bind = "CORE"; break; case PRTE_BIND_TO_HWTHREAD: bind = "HWTHREAD"; break; default: bind = "UNKNOWN"; } ptr = prte_hwloc_get_print_buffer(); if (NULL == ptr) { return prte_hwloc_print_null; } /* cycle around the ring */ if (PRTE_HWLOC_PRINT_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } if (!PRTE_BINDING_REQUIRED(binding) && PRTE_BIND_OVERLOAD_ALLOWED(binding)) { snprintf(ptr->buffers[ptr->cntr], PRTE_HWLOC_PRINT_MAX_SIZE, "%s:IF-SUPPORTED:OVERLOAD-ALLOWED", bind); } else if (PRTE_BIND_OVERLOAD_ALLOWED(binding)) { snprintf(ptr->buffers[ptr->cntr], PRTE_HWLOC_PRINT_MAX_SIZE, "%s:OVERLOAD-ALLOWED", bind); } else if (!PRTE_BINDING_REQUIRED(binding)) { snprintf(ptr->buffers[ptr->cntr], PRTE_HWLOC_PRINT_MAX_SIZE, "%s:IF-SUPPORTED", bind); } else { snprintf(ptr->buffers[ptr->cntr], PRTE_HWLOC_PRINT_MAX_SIZE, "%s", bind); } ret = ptr->buffers[ptr->cntr]; ptr->cntr++; return ret; } void prte_hwloc_build_map(hwloc_topology_t topo, hwloc_cpuset_t avail, bool use_hwthread_cpus, hwloc_bitmap_t coreset) { unsigned k, obj_index, core_index; hwloc_obj_t pu, core; /* the bits in the cpuset _always_ represent hwthreads, so * we have to manually determine which core each bit is under * so we can report the cpus in terms of "cores" */ /* start with the first set bit */ hwloc_bitmap_zero(coreset); k = hwloc_bitmap_first(avail); obj_index = 0; while (k != (unsigned) -1) { if (use_hwthread_cpus) { /* mark this thread as occupied */ hwloc_bitmap_set(coreset, k); } else { /* Go upward and find the core this PU belongs to */ pu = hwloc_get_obj_inside_cpuset_by_type(topo, avail, HWLOC_OBJ_PU, obj_index); core = pu; while (NULL != core && core->type != HWLOC_OBJ_CORE) { core = core->parent; } core_index = 0; if (NULL != core) { core_index = core->logical_index; } /* mark everything since the last place as * being empty */ hwloc_bitmap_set(coreset, core_index); } /* move to the next set bit */ k = hwloc_bitmap_next(avail, k); ++obj_index; } } /* formatting core/hwt binding information as xml elements */ static int bitmap_list_snprintf_exp(char *__hwloc_restrict buf, size_t buflen, const struct hwloc_bitmap_s *__hwloc_restrict set, char *type) { int ret = 0; char *tmp = buf; #if HWLOC_API_VERSION >= 0x20000 int prev = -1; ssize_t size = buflen; int res = -1; /* mark the end in case we do nothing later */ if (buflen > 0) { tmp[0] = '\0'; } while (1) { int begin, end; begin = hwloc_bitmap_next(set, prev); if (begin == -1) { break; } end = hwloc_bitmap_next_unset(set, begin); if (end == begin + 1) { res = snprintf(tmp, size, "%*c<%s>%d\n", 20, ' ', type, begin, type); } else if (end == -1) { res = snprintf(tmp, size, "%*c<%s>%d\n", 20, ' ', type, begin, type); } else { for (int i = begin; i <= end - 1; i++) { res = snprintf(tmp, size, "%*c<%s>%d\n", 20, ' ', type, i, type); if (i != (end - 1)) { tmp += res; } } } if (res < 0) { return -1; } ret += res; if (res >= size) { res = size > 0 ? (int)size - 1 : 0; } tmp += res; size -= res; if (end == -1) { break; } else { prev = end - 1; } } #else PRTE_HIDE_UNUSED_PARAMS(set, type); if (buflen > 0) { tmp[0] = '\0'; } ret = PRTE_ERR_NOT_SUPPORTED; #endif return ret; } /* * Output is undefined if a rank is bound to more than 1 package */ void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset, bool use_hwthread_cpus, hwloc_topology_t topo, int *pkgnum, char *cores, int sz) { int n, npkgs, npus, ncores; hwloc_cpuset_t avail, coreset = NULL; hwloc_obj_t pkg; bool bits_as_cores = false; /* if the cpuset is all zero, then something is wrong */ if (hwloc_bitmap_iszero(cpuset)) { snprintf(cores, sz, "\n%*c\n", 20, ' '); } /* if the cpuset includes all available cpus, and * the available cpus were not externally constrained, * then we are unbound */ avail = prte_hwloc_base_filter_cpus(topo); if (hwloc_bitmap_isequal(cpuset, avail) && hwloc_bitmap_isfull(avail)) { snprintf(cores, sz, "\n%*c\n", 20, ' '); } hwloc_bitmap_free(avail); /* get the number of packages in the topology */ npkgs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); avail = hwloc_bitmap_alloc(); npus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); ncores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; } if (!use_hwthread_cpus && !bits_as_cores) { coreset = hwloc_bitmap_alloc(); } /* binding happens within a package and not across packages */ for (n = 0; n < npkgs; n++) { pkg = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); /* see if we have any here */ hwloc_bitmap_and(avail, cpuset, pkg->cpuset); if (hwloc_bitmap_iszero(avail)) { continue; } if (bits_as_cores) { /* can just use the hwloc fn directly */ bitmap_list_snprintf_exp(cores, sz, avail, "core"); } else if (use_hwthread_cpus) { /* can just use the hwloc fn directly */ bitmap_list_snprintf_exp(cores, sz, avail, "hwt"); } else { prte_hwloc_build_map(topo, avail, use_hwthread_cpus | bits_as_cores, coreset); /* now print out the string */ bitmap_list_snprintf_exp(cores, sz, coreset, "core"); } *pkgnum = n; } hwloc_bitmap_free(avail); if (NULL != coreset) { hwloc_bitmap_free(coreset); } } static int compare_unsigned(const void *a, const void *b) { return (*(unsigned *)a - *(unsigned *)b); } /* generate a logical string output of a hwloc_cpuset_t */ static int build_map(char *answer, size_t size, hwloc_const_cpuset_t bitmap, bool physical, char *prefix, hwloc_topology_t topo) { unsigned indices[2048], id; int nsites = 0, n, start, end, idx; hwloc_obj_t pu; char tmp[128]; bool inrange, first, unique; unsigned val; for (id = hwloc_bitmap_first(bitmap); id != (unsigned)-1; id = hwloc_bitmap_next(bitmap, id)) { // the id's are for threads, but we want cores pu = hwloc_get_pu_obj_by_os_index(topo, id); // go upward to find the core that contains this pu while (NULL != pu && pu->type != HWLOC_OBJ_CORE) { pu = pu->parent; } if (NULL == pu) { pmix_show_help("help-prte-hwloc-base.txt", "pu-not-found", true, id); return PRTE_ERR_SILENT; } if (physical) { // record the physical site val = pu->os_index; } else { // record the logical site val = pu->logical_index; } // add it uniquely to the array of indices - it could be a duplicate unique = true; for (n=0; n < nsites; n++) { if (indices[n] == val) { unique = false; break; } } if (unique) { indices[nsites] = val; ++nsites; if (2048 == nsites) { pmix_show_help("help-prte-hwloc-base.txt", "too-many-sites", true); return PRTE_ERR_SILENT; } } } /* this should never happen as it would mean that the bitmap was * empty, which is something we checked before calling this function */ if (0 == nsites) { return PRTE_ERR_NOT_FOUND; } if (1 == nsites) { // only bound to one location - most common case snprintf(answer, size, "%s%u", prefix, indices[0]); return PRTE_SUCCESS; } // sort them qsort(indices, nsites, sizeof(unsigned), compare_unsigned); // parse through and look for ranges start = indices[0]; end = indices[0]; inrange = false; first = true; // prep the answer snprintf(answer, size, "%s", prefix); idx = strlen(prefix); for (n=1; n < nsites; n++) { // see if we are in a range if (1 == (indices[n]-end)) { inrange = true; end = indices[n]; continue; } // we are not in a range, or we are // at the end of a range if (inrange) { // we are at the end of the range if (start == end) { if (first) { snprintf(tmp, 128, "%u", start); first = false; } else { snprintf(tmp, 128, ",%u", start); } memcpy(&answer[idx], tmp, strlen(tmp)); idx += strlen(tmp); } else { if (first) { snprintf(tmp, 128, "%u-%u", start, end); first = false; } else { snprintf(tmp, 128, ",%u-%u", start, end); } memcpy(&answer[idx], tmp, strlen(tmp)); idx += strlen(tmp); } // mark the end of the range inrange = false; start = indices[n]; end = indices[n]; } else { if (first) { snprintf(tmp, 128, "%u", start); first = false; } else { snprintf(tmp, 128, ",%u", start); } memcpy(&answer[idx], tmp, strlen(tmp)); idx += strlen(tmp); inrange = false; start = indices[n]; end = indices[n]; } } // see if we have a dangling entry if (start == end) { if (first) { snprintf(tmp, 128, "%u", start); } else { snprintf(tmp, 128, ",%u", start); } memcpy(&answer[idx], tmp, strlen(tmp)); snprintf(tmp, 128, "%u", start); } else { if (first) { snprintf(tmp, 128, "%u-%u", start, end); first = false; } else { snprintf(tmp, 128, ",%u-%u", start, end); } memcpy(&answer[idx], tmp, strlen(tmp)); idx += strlen(tmp); } return PRTE_SUCCESS; } /* * Make a prettyprint string for a hwloc_cpuset_t */ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset, bool use_hwthread_cpus, bool physical, hwloc_topology_t topo) { int n, npkgs, npus, ncores; char tmp[2048], ans[4096]; hwloc_cpuset_t avail, coreset = NULL; char **output = NULL, *result; hwloc_obj_t pkg; bool bits_as_cores = false; int complete; char *prefix; /* if the cpuset is all zero, then something is wrong */ if (hwloc_bitmap_iszero(cpuset)) { return strdup("EMPTY CPUSET"); } /* if the cpuset includes all available cpus, and * the available cpus were not externally constrained, * then we are unbound */ avail = prte_hwloc_base_filter_cpus(topo); if (hwloc_bitmap_isequal(cpuset, avail) && hwloc_bitmap_isfull(avail)) { return strdup("UNBOUND"); } hwloc_bitmap_free(avail); /* get the number of packages in the topology */ npkgs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE); avail = hwloc_bitmap_alloc(); npus = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU); ncores = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; } if (!use_hwthread_cpus && !bits_as_cores) { coreset = hwloc_bitmap_alloc(); } if (bits_as_cores || !use_hwthread_cpus) { if (physical) { prefix = "core:P"; } else { prefix = "core:L"; } } else { if (physical) { prefix = "hwt:P"; } else { prefix = "hwt:L"; } } for (n = 0; n < npkgs; n++) { memset(tmp, 0, sizeof(tmp)); memset(ans, 0, sizeof(ans)); pkg = hwloc_get_obj_by_type(topo, HWLOC_OBJ_PACKAGE, n); /* see if we have any here */ hwloc_bitmap_and(avail, cpuset, pkg->cpuset); if (hwloc_bitmap_iszero(avail)) { continue; } if (bits_as_cores) { /* can just use the hwloc fn directly */ hwloc_bitmap_list_snprintf(tmp, 2048, avail); snprintf(ans, 4096, "package[%d][%s%s]", n, prefix, tmp); } else if (use_hwthread_cpus) { /* can just use the hwloc fn directly */ hwloc_bitmap_list_snprintf(tmp, 2048, avail); snprintf(ans, 4096, "package[%d][%s%s]", n, prefix, tmp); } else { // build the map for this cpuset complete = build_map(tmp, 2048, avail, physical, prefix, topo); if (PRTE_SUCCESS == complete) { if (physical) { snprintf(ans, 4096, "package[%d][%s]", n, tmp); } else { snprintf(ans, 4096, "package[%d][%s]", n, tmp); } } else { PMIX_ARGV_FREE_COMPAT(output); return NULL; } } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&output, ans); } if (NULL != output) { result = PMIX_ARGV_JOIN_COMPAT(output, ' '); PMIX_ARGV_FREE_COMPAT(output); } else { result = NULL; } hwloc_bitmap_free(avail); if (NULL != coreset) { hwloc_bitmap_free(coreset); } return result; } char *prte_hwloc_base_get_topo_signature(hwloc_topology_t topo) { char *sig = NULL, *arch = NULL, *endian; hwloc_obj_t obj; unsigned i; char buffer[4096]; int rc; obj = hwloc_get_root_obj(topo); rc = hwloc_topology_export_synthetic(topo, buffer, 4096, HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS); if (-1 == rc) { snprintf(buffer, 4096, "NON-SYMMETRIC[%u]", prte_process_info.myproc.rank); } /* get the root object so we can add the processor architecture */ obj = hwloc_get_root_obj(topo); for (i = 0; i < obj->infos_count; i++) { if (0 == strcmp(obj->infos[i].name, "Architecture")) { arch = obj->infos[i].value; break; } } if (NULL == arch) { arch = "unknown"; } #ifdef __BYTE_ORDER # if __BYTE_ORDER == __LITTLE_ENDIAN endian = "le"; # else endian = "be"; # endif #else endian = "unknown"; #endif // form the final signature pmix_asprintf(&sig, "%s:%s:%s", buffer, arch, endian); return sig; } static int prte_hwloc_base_get_locality_string_by_depth(hwloc_topology_t topo, int d, hwloc_cpuset_t cpuset, hwloc_cpuset_t result) { hwloc_obj_t obj; unsigned width, w; /* get the width of the topology at this depth */ width = hwloc_get_nbobjs_by_depth(topo, d); if (0 == width) { return -1; } /* scan all objects at this depth to see if * the location overlaps with them */ for (w = 0; w < width; w++) { /* get the object at this depth/index */ obj = hwloc_get_obj_by_depth(topo, d, w); /* see if the location intersects with it */ if (hwloc_bitmap_intersects(obj->cpuset, cpuset)) { hwloc_bitmap_set(result, w); } } return 0; } char *prte_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap) { char *locality = NULL, *tmp, *t2; unsigned depth, d; hwloc_cpuset_t cpuset, result; hwloc_obj_type_t type; /* if this proc is not bound, then there is no locality. We * know it isn't bound if the cpuset is NULL, or if it is * all 1's */ if (NULL == bitmap) { return NULL; } cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(cpuset, bitmap); if (hwloc_bitmap_isfull(cpuset)) { hwloc_bitmap_free(cpuset); return NULL; } /* we are going to use a bitmap to save the results so * that we can use a hwloc utility to print them */ result = hwloc_bitmap_alloc(); /* get the max depth of the topology */ depth = hwloc_topology_get_depth(topo); /* start at the first depth below the top machine level */ for (d = 1; d < depth; d++) { /* get the object type at this depth */ type = hwloc_get_depth_type(topo, d); /* if it isn't one of interest, then ignore it */ if (HWLOC_OBJ_NUMANODE != type && HWLOC_OBJ_PACKAGE != type && #if HWLOC_API_VERSION < 0x20000 HWLOC_OBJ_CACHE != type && #else HWLOC_OBJ_L1CACHE != type && HWLOC_OBJ_L2CACHE != type && HWLOC_OBJ_L3CACHE != type && #endif HWLOC_OBJ_CORE != type && HWLOC_OBJ_PU != type) { continue; } if (prte_hwloc_base_get_locality_string_by_depth(topo, d, cpuset, result) < 0) { continue; } /* it should be impossible, but allow for the possibility * that we came up empty at this depth */ if (!hwloc_bitmap_iszero(result)) { hwloc_bitmap_list_asprintf(&tmp, result); switch (type) { case HWLOC_OBJ_NUMANODE: pmix_asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; case HWLOC_OBJ_PACKAGE: pmix_asprintf(&t2, "%sSK%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; #if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: { unsigned cachedepth = hwloc_get_obj_by_depth(topo, d, 0)->attr->cache.depth; if (3 == cachedepth) { pmix_asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; } else if (2 == cachedepth) { pmix_asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; } else { pmix_asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; } } break; #else case HWLOC_OBJ_L3CACHE: pmix_asprintf(&t2, "%sL3%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; case HWLOC_OBJ_L2CACHE: pmix_asprintf(&t2, "%sL2%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; case HWLOC_OBJ_L1CACHE: pmix_asprintf(&t2, "%sL1%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; #endif case HWLOC_OBJ_CORE: pmix_asprintf(&t2, "%sCR%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; case HWLOC_OBJ_PU: pmix_asprintf(&t2, "%sHT%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; break; default: /* just ignore it */ break; } free(tmp); } hwloc_bitmap_zero(result); } #if HWLOC_API_VERSION >= 0x20000 if (prte_hwloc_base_get_locality_string_by_depth(topo, HWLOC_TYPE_DEPTH_NUMANODE, cpuset, result) == 0) { /* it should be impossible, but allow for the possibility * that we came up empty at this depth */ if (!hwloc_bitmap_iszero(result)) { hwloc_bitmap_list_asprintf(&tmp, result); pmix_asprintf(&t2, "%sNM%s:", (NULL == locality) ? "" : locality, tmp); if (NULL != locality) { free(locality); } locality = t2; free(tmp); } hwloc_bitmap_zero(result); } #endif hwloc_bitmap_free(result); hwloc_bitmap_free(cpuset); /* remove the trailing colon */ if (NULL != locality) { locality[strlen(locality) - 1] = '\0'; } return locality; } char *prte_hwloc_base_get_location(char *locality, hwloc_obj_type_t type, unsigned index) { char **loc; char *srch, *ans = NULL; size_t n; #if HWLOC_API_VERSION >= 0x20000 PRTE_HIDE_UNUSED_PARAMS(index); #endif if (NULL == locality) { return NULL; } switch (type) { case HWLOC_OBJ_NUMANODE: srch = "NM"; break; case HWLOC_OBJ_PACKAGE: srch = "SK"; break; #if HWLOC_API_VERSION < 0x20000 case HWLOC_OBJ_CACHE: if (3 == index) { srch = "L3"; } else if (2 == index) { srch = "L2"; } else { srch = "L1"; } break; #else case HWLOC_OBJ_L3CACHE: srch = "L3"; break; case HWLOC_OBJ_L2CACHE: srch = "L2"; break; case HWLOC_OBJ_L1CACHE: srch = "L1"; break; #endif case HWLOC_OBJ_CORE: srch = "CR"; break; case HWLOC_OBJ_PU: srch = "HT"; break; default: return NULL; } loc = PMIX_ARGV_SPLIT_COMPAT(locality, ':'); for (n = 0; NULL != loc[n]; n++) { if (0 == strncmp(loc[n], srch, 2)) { ans = strdup(&loc[n][2]); break; } } PMIX_ARGV_FREE_COMPAT(loc); return ans; } prte_hwloc_locality_t prte_hwloc_compute_relative_locality(char *loc1, char *loc2) { prte_hwloc_locality_t locality; char **set1, **set2; hwloc_bitmap_t bit1, bit2; size_t n1, n2; /* start with what we know - they share a node on a cluster * NOTE: we may alter that latter part as hwloc's ability to * sense multi-cu, multi-cluster systems grows */ locality = PRTE_PROC_ON_NODE | PRTE_PROC_ON_HOST | PRTE_PROC_ON_CU | PRTE_PROC_ON_CLUSTER; /* if either location is NULL, then that isn't bound */ if (NULL == loc1 || NULL == loc2) { return locality; } set1 = PMIX_ARGV_SPLIT_COMPAT(loc1, ':'); set2 = PMIX_ARGV_SPLIT_COMPAT(loc2, ':'); bit1 = hwloc_bitmap_alloc(); bit2 = hwloc_bitmap_alloc(); /* check each matching type */ for (n1 = 0; NULL != set1[n1]; n1++) { /* convert the location into bitmap */ hwloc_bitmap_list_sscanf(bit1, &set1[n1][2]); /* find the matching type in set2 */ for (n2 = 0; NULL != set2[n2]; n2++) { if (0 == strncmp(set1[n1], set2[n2], 2)) { /* convert the location into bitmap */ hwloc_bitmap_list_sscanf(bit2, &set2[n2][2]); /* see if they intersect */ if (hwloc_bitmap_intersects(bit1, bit2)) { /* set the corresponding locality bit */ if (0 == strncmp(set1[n1], "SK", 2)) { locality |= PRTE_PROC_ON_PACKAGE; } else if (0 == strncmp(set1[n1], "NM", 2)) { locality |= PRTE_PROC_ON_NUMA; } else if (0 == strncmp(set1[n1], "L3", 2)) { locality |= PRTE_PROC_ON_L3CACHE; } else if (0 == strncmp(set1[n1], "L2", 2)) { locality |= PRTE_PROC_ON_L2CACHE; } else if (0 == strncmp(set1[n1], "L1", 2)) { locality |= PRTE_PROC_ON_L1CACHE; } else if (0 == strncmp(set1[n1], "CR", 2)) { locality |= PRTE_PROC_ON_CORE; } else if (0 == strncmp(set1[n1], "HT", 2)) { locality |= PRTE_PROC_ON_HWTHREAD; } else { /* should never happen */ pmix_output(0, "UNRECOGNIZED LOCALITY %s", set1[n1]); } } break; } } } PMIX_ARGV_FREE_COMPAT(set1); PMIX_ARGV_FREE_COMPAT(set2); hwloc_bitmap_free(bit1); hwloc_bitmap_free(bit2); return locality; } int prte_hwloc_base_topology_export_xmlbuffer(hwloc_topology_t topology, char **xmlpath, int *buflen) { #if HWLOC_API_VERSION < 0x00020000 return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen); #else return hwloc_topology_export_xmlbuffer(topology, xmlpath, buflen, 0); #endif } int prte_hwloc_base_topology_set_flags(hwloc_topology_t topology, unsigned long flags, bool io) { if (io) { #if HWLOC_API_VERSION < 0x00020000 flags |= HWLOC_TOPOLOGY_FLAG_IO_DEVICES; #else int ret = hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_IMPORTANT); if (0 != ret) { return ret; } #endif } // Blacklist the "gl" component due to potential conflicts. // See "https://github.com/open-mpi/ompi/issues/10025" for // an explanation #ifdef HWLOC_VERSION_MAJOR #if HWLOC_VERSION_MAJOR > 2 hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "gl"); #elif HWLOC_VERSION_MAJOR == 2 && HWLOC_VERSION_MINOR >= 1 hwloc_topology_set_components(topology, HWLOC_TOPOLOGY_COMPONENTS_FLAG_BLACKLIST, "gl"); #endif #endif return hwloc_topology_set_flags(topology, flags); } #define PRTE_HWLOC_MAX_STRING 2048 static void print_hwloc_obj(char **output, char *prefix, hwloc_topology_t topo, hwloc_obj_t obj) { hwloc_obj_t obj2; char string[1024], *tmp, *tmp2, *pfx; unsigned i; struct hwloc_topology_support *support; /* print the object type */ hwloc_obj_type_snprintf(string, 1024, obj, 1); pmix_asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix); pmix_asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s", (NULL == prefix) ? "" : prefix, string, obj->arity, pfx, (NULL == obj->name) ? "NULL" : obj->name); if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) { /* print the attributes */ pmix_asprintf(&tmp2, "%s%s%s", tmp, pfx, string); free(tmp); tmp = tmp2; } /* print the cpusets - apparently, some new HWLOC types don't * have cpusets, so protect ourselves here */ if (NULL != obj->cpuset) { hwloc_bitmap_snprintf(string, PRTE_HWLOC_MAX_STRING, obj->cpuset); pmix_asprintf(&tmp2, "%s%sCpuset: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (HWLOC_OBJ_MACHINE == obj->type) { /* root level object - add support values */ support = (struct hwloc_topology_support *) hwloc_topology_get_support(topo); pmix_asprintf(&tmp2, "%s%sBind CPU proc: %s%sBind CPU thread: %s", tmp, pfx, (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx, (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; pmix_asprintf(&tmp2, "%s%sBind MEM proc: %s%sBind MEM thread: %s", tmp, pfx, (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx, (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; } pmix_asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp); free(tmp); free(pfx); pmix_asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix); for (i = 0; i < obj->arity; i++) { obj2 = obj->children[i]; /* print the object */ print_hwloc_obj(&tmp2, pfx, topo, obj2); } free(pfx); if (NULL != *output) { free(*output); } *output = tmp2; } int prte_hwloc_print(char **output, char *prefix, hwloc_topology_t src) { hwloc_obj_t obj; char *tmp = NULL; /* get root object */ obj = hwloc_get_root_obj(src); /* print it */ print_hwloc_obj(&tmp, prefix, src, obj); *output = tmp; return PRTE_SUCCESS; } prrte-3.0.13/src/util/0000775000175000017500000000000015145263240014664 5ustar alastairalastairprrte-3.0.13/src/util/bipartite_graph.c0000664000175000017500000006655115145263240020211 0ustar alastairalastair/* * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include "constants.h" #include "prte_stdint.h" #include "src/class/pmix_list.h" #include "src/class/pmix_pointer_array.h" #include "src/runtime/prte_globals.h" #include "src/util/error.h" #include "src/util/pmix_output.h" #include "src/util/bipartite_graph.h" #include "src/util/bipartite_graph_internal.h" #ifndef container_of # define container_of(ptr, type, member) ((type *) (((char *) (ptr)) - offsetof(type, member))) #endif #define GRAPH_DEBUG 0 #if GRAPH_DEBUG # define GRAPH_DEBUG_OUT(args) printf(args) #else # define GRAPH_DEBUG_OUT(args) \ do { \ } while (0) #endif #define MAX_COST INT64_MAX #ifndef MAX # define MAX(a, b) ((a) > (b) ? (a) : (b)) #endif #ifndef MIN # define MIN(a, b) ((a) < (b) ? (a) : (b)) #endif #define f(i, j) flow[n * i + j] /* ensure that (a+b<=max) */ static inline void check_add64_overflow(int64_t a, int64_t b) { #if PRTE_ENABLE_DEBUG assert(!((b > 0) && (a > (INT64_MAX - b))) && !((b < 0) && (a < (INT64_MIN - b)))); #else PRTE_HIDE_UNUSED_PARAMS(a, b); #endif } static void edge_constructor(prte_bp_graph_edge_t *e) { PMIX_CONSTRUCT(&e->outbound_li, pmix_list_item_t); PMIX_CONSTRUCT(&e->inbound_li, pmix_list_item_t); } static void edge_destructor(prte_bp_graph_edge_t *e) { PMIX_DESTRUCT(&e->outbound_li); PMIX_DESTRUCT(&e->inbound_li); } PMIX_CLASS_DECLARATION(prte_bp_graph_edge_t); PMIX_CLASS_INSTANCE(prte_bp_graph_edge_t, pmix_object_t, edge_constructor, edge_destructor); #if GRAPH_DEBUG static void dump_vec(const char *name, int *vec, int n) __prte_attribute_unused__; static void dump_vec(const char *name, int *vec, int n) { int i; fprintf(stderr, "%s={", name); for (i = 0; i < n; ++i) { fprintf(stderr, "[%d]=%2d, ", i, vec[i]); } fprintf(stderr, "}\n"); } static void dump_vec64(const char *name, int64_t *vec, int n) __prte_attribute_unused__; static void dump_vec64(const char *name, int64_t *vec, int n) { int i; fprintf(stderr, "%s={", name); for (i = 0; i < n; ++i) { fprintf(stderr, "[%d]=%2" PRIi64 ", ", i, vec[i]); } fprintf(stderr, "}\n"); } static void dump_flow(int *flow, int n) __prte_attribute_unused__; static void dump_flow(int *flow, int n) { int u, v; fprintf(stderr, "flow={\n"); for (u = 0; u < n; ++u) { fprintf(stderr, "u=%d| ", u); for (v = 0; v < n; ++v) { fprintf(stderr, "%2d,", f(u, v)); } fprintf(stderr, "\n"); } fprintf(stderr, "}\n"); } #endif static int get_capacity(prte_bp_graph_t *g, int source, int target) { prte_bp_graph_edge_t *e; CHECK_VERTEX_RANGE(g, source); CHECK_VERTEX_RANGE(g, target); FOREACH_OUT_EDGE(g, source, e, 0) { assert(e->source == source); if (e->target == target) { return e->capacity; } } return 0; } static int set_capacity(prte_bp_graph_t *g, int source, int target, int cap) { prte_bp_graph_edge_t *e; CHECK_VERTEX_RANGE(g, source); CHECK_VERTEX_RANGE(g, target); FOREACH_OUT_EDGE(g, source, e, PRTE_ERR_NOT_FOUND) { assert(e->source == source); if (e->target == target) { e->capacity = cap; return PRTE_SUCCESS; } } return PRTE_ERR_NOT_FOUND; } static void free_vertex(prte_bp_graph_t *g, prte_bp_graph_vertex_t *v) { if (NULL != v) { if (NULL != g->v_data_cleanup_fn && NULL != v->v_data) { g->v_data_cleanup_fn(v->v_data); } free(v); } } int prte_bp_graph_create(prte_bp_graph_cleanup_fn_t v_data_cleanup_fn, prte_bp_graph_cleanup_fn_t e_data_cleanup_fn, prte_bp_graph_t **g_out) { int err; prte_bp_graph_t *g = NULL; if (NULL == g_out) { return PRTE_ERR_BAD_PARAM; } *g_out = NULL; g = calloc(1, sizeof(*g)); if (NULL == g) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); err = PRTE_ERR_OUT_OF_RESOURCE; goto out_free_g; } g->source_idx = -1; g->sink_idx = -1; g->v_data_cleanup_fn = v_data_cleanup_fn; g->e_data_cleanup_fn = e_data_cleanup_fn; /* now that we essentially have an empty graph, add vertices to it */ PMIX_CONSTRUCT(&g->vertices, pmix_pointer_array_t); err = pmix_pointer_array_init(&g->vertices, 0, INT_MAX, 32); if (PRTE_SUCCESS != err) { goto out_free_g; } *g_out = g; return PRTE_SUCCESS; out_free_g: free(g); return err; } int prte_bp_graph_free(prte_bp_graph_t *g) { int i; prte_bp_graph_edge_t *e, *next; prte_bp_graph_vertex_t *v; /* remove all edges from all out_edges lists */ for (i = 0; i < NUM_VERTICES(g); ++i) { v = V_ID_TO_PTR(g, i); LIST_FOREACH_SAFE_CONTAINED(e, next, &v->out_edges, prte_bp_graph_edge_t, outbound_li) { pmix_list_remove_item(&v->out_edges, &e->outbound_li); PMIX_RELEASE(e); } } /* now remove from all in_edges lists and free the edge */ for (i = 0; i < NUM_VERTICES(g); ++i) { v = V_ID_TO_PTR(g, i); LIST_FOREACH_SAFE_CONTAINED(e, next, &v->in_edges, prte_bp_graph_edge_t, inbound_li) { pmix_list_remove_item(&v->in_edges, &e->inbound_li); if (NULL != g->e_data_cleanup_fn && NULL != e->e_data) { g->e_data_cleanup_fn(e->e_data); } PMIX_RELEASE(e); } free_vertex(g, V_ID_TO_PTR(g, i)); pmix_pointer_array_set_item(&g->vertices, i, NULL); } g->num_vertices = 0; PMIX_DESTRUCT(&g->vertices); free(g); return PRTE_SUCCESS; } int prte_bp_graph_clone(const prte_bp_graph_t *g, bool copy_user_data, prte_bp_graph_t **g_clone_out) { int err; int i; int index; prte_bp_graph_t *gx; prte_bp_graph_edge_t *e; if (NULL == g_clone_out) { return PRTE_ERR_BAD_PARAM; } *g_clone_out = NULL; if (copy_user_data) { pmix_output(0, "[%s:%d:%s] user data copy requested but not yet supported", __FILE__, __LINE__, __func__); abort(); return PRTE_ERR_FATAL; } gx = NULL; err = prte_bp_graph_create(NULL, NULL, &gx); if (PRTE_SUCCESS != err) { return err; } assert(NULL != gx); /* reconstruct all vertices */ for (i = 0; i < NUM_VERTICES(g); ++i) { err = prte_bp_graph_add_vertex(gx, NULL, &index); if (PRTE_SUCCESS != err) { goto out_free_gx; } assert(index == i); } /* now reconstruct all the edges (iterate by source vertex only to avoid * double-adding) */ for (i = 0; i < NUM_VERTICES(g); ++i) { prte_bp_graph_vertex_t *_v; _v = V_ID_TO_PTR(g, i); if (NULL == _v) { err = PRTE_ERR_NOT_FOUND; goto out_free_gx; } LIST_FOREACH_CONTAINED(e, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) { assert(i == e->source); err = prte_bp_graph_add_edge(gx, e->source, e->target, e->cost, e->capacity, NULL); if (PRTE_SUCCESS != err) { goto out_free_gx; } } } *g_clone_out = gx; return PRTE_SUCCESS; out_free_gx: /* we don't reach in and manipulate gx's state directly, so it should be * safe to use the standard free function */ prte_bp_graph_free(gx); return err; } int prte_bp_graph_indegree(const prte_bp_graph_t *g, int vertex) { prte_bp_graph_vertex_t *v; v = V_ID_TO_PTR(g, vertex); if (NULL == v) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); return PRTE_ERR_NOT_FOUND; } return pmix_list_get_size(&v->in_edges); } int prte_bp_graph_outdegree(const prte_bp_graph_t *g, int vertex) { prte_bp_graph_vertex_t *v; v = V_ID_TO_PTR(g, vertex); return pmix_list_get_size(&v->out_edges); } int prte_bp_graph_add_edge(prte_bp_graph_t *g, int from, int to, int64_t cost, int capacity, void *e_data) { prte_bp_graph_edge_t *e; prte_bp_graph_vertex_t *v_from, *v_to; if (from < 0 || from >= NUM_VERTICES(g)) { return PRTE_ERR_BAD_PARAM; } if (to < 0 || to >= NUM_VERTICES(g)) { return PRTE_ERR_BAD_PARAM; } if (cost == MAX_COST) { return PRTE_ERR_BAD_PARAM; } if (capacity < 0) { /* negative cost is fine, but negative capacity is not currently * handled appropriately */ return PRTE_ERR_BAD_PARAM; } FOREACH_OUT_EDGE(g, from, e, PRTE_ERR_NOT_FOUND) { assert(e->source == from); if (e->target == to) { return PRTE_EXISTS; } } /* this reference is owned by the out_edges list */ e = PMIX_NEW(prte_bp_graph_edge_t); if (NULL == e) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } e->source = from; e->target = to; e->cost = cost; e->capacity = capacity; e->e_data = e_data; v_from = V_ID_TO_PTR(g, from); if (NULL == v_from) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); return PRTE_ERR_NOT_FOUND; } pmix_list_append(&v_from->out_edges, &e->outbound_li); PMIX_RETAIN(e); /* ref owned by in_edges list */ v_to = V_ID_TO_PTR(g, to); pmix_list_append(&v_to->in_edges, &e->inbound_li); return PRTE_SUCCESS; } int prte_bp_graph_add_vertex(prte_bp_graph_t *g, void *v_data, int *index_out) { prte_bp_graph_vertex_t *v; v = calloc(1, sizeof(*v)); if (NULL == v) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* add to the ptr array early to simplify cleanup in the incredibly rare * chance that adding fails */ v->v_index = pmix_pointer_array_add(&g->vertices, v); if (-1 == v->v_index) { free(v); PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } assert(v->v_index == g->num_vertices); ++g->num_vertices; v->v_data = v_data; PMIX_CONSTRUCT(&v->out_edges, pmix_list_t); PMIX_CONSTRUCT(&v->in_edges, pmix_list_t); if (NULL != index_out) { *index_out = v->v_index; } return PRTE_SUCCESS; } int prte_bp_graph_order(const prte_bp_graph_t *g) { return NUM_VERTICES(g); } /** * shrink a flow matrix for old_n vertices to one works for new_n * * Takes a matrix stored in a one-dimensional array of size (old_n*old_n) and * "truncates" it into a dense array of size (new_n*new_n) that only contain * the flow values for the first new_n vertices. E.g., it turns this array * (old_n=5, new_n=3): * * 1 2 3 4 5 * 6 7 8 9 10 * 11 12 13 14 15 * 16 17 18 19 20 * 21 22 23 24 25 * * into this array; * * 1 2 3 * 6 7 8 * 11 12 13 */ static void shrink_flow_matrix(int *flow, int old_n, int new_n) { int u, v; assert(old_n > new_n); for (u = 0; u < new_n; ++u) { for (v = 0; v < new_n; ++v) { flow[new_n * u + v] = flow[old_n * u + v]; } } } /** * Compute the so-called "bottleneck" capacity value for a path "pred" through * graph "gx". */ static int bottleneck_path(prte_bp_graph_t *gx, int n, int *pred) { int u, v; int min; PRTE_HIDE_UNUSED_PARAMS(n); min = INT_MAX; FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) { int cap_f_uv = get_capacity(gx, u, v); min = MIN(min, cap_f_uv); } return min; } /** * This routine implements the Bellman-Ford shortest paths algorithm, slightly * specialized for our forumlation of flow networks: * http://en.wikipedia.org/wiki/Bellman%E2%80%93Ford_algorithm * * Specifically, it attempts to find the shortest path from "source" to * "target". It returns true if such a path was found, false otherwise. Any * found path is returned in "pred" as a predecessor chain (i.e., pred[sink] * is the start of the path and pred[pred[sink]] is its predecessor, etc.). * * The contents of "pred" are only valid if this routine returns true. */ bool prte_bp_graph_bellman_ford(prte_bp_graph_t *gx, int source, int target, int *pred) { int64_t *dist; int i; int n; int u, v; bool found_target = false; if (NULL == gx) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return false; } if (NULL == pred) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return false; } if (source < 0 || source >= NUM_VERTICES(gx)) { return PRTE_ERR_BAD_PARAM; } if (target < 0 || target >= NUM_VERTICES(gx)) { return PRTE_ERR_BAD_PARAM; } /* initialize */ n = prte_bp_graph_order(gx); dist = malloc(n * sizeof(*dist)); if (NULL == dist) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); goto out; } for (i = 0; i < n; ++i) { dist[i] = MAX_COST; pred[i] = -1; } dist[source] = 0; /* relax repeatedly */ for (i = 1; i < NUM_VERTICES(gx); ++i) { bool relaxed = false; #if GRAPH_DEBUG dump_vec("pred", pred, NUM_VERTICES(gx)); dump_vec64("dist", dist, NUM_VERTICES(gx)); #endif for (u = 0; u < NUM_VERTICES(gx); ++u) { prte_bp_graph_edge_t *e_ptr; FOREACH_OUT_EDGE(gx, u, e_ptr, false) { v = e_ptr->target; /* make sure to only construct paths from edges that actually have * non-zero capacity */ if (e_ptr->capacity > 0 && dist[u] != MAX_COST) { /* avoid signed overflow for "infinity" */ check_add64_overflow(dist[u], e_ptr->cost); if ((dist[u] + e_ptr->cost) < dist[v]) { dist[v] = dist[u] + e_ptr->cost; pred[v] = u; relaxed = true; } } } } /* optimization: stop if an outer iteration did not succeed in * changing any dist/pred values (already at optimum) */ if (!relaxed) { GRAPH_DEBUG_OUT(("relaxed==false, breaking out")); break; } } /* check for negative-cost cycles */ for (u = 0; u < NUM_VERTICES(gx); ++u) { prte_bp_graph_edge_t *e_ptr; prte_bp_graph_vertex_t *_v; _v = V_ID_TO_PTR(gx, u); if (NULL == _v) { goto out; } LIST_FOREACH_CONTAINED(e_ptr, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) { v = e_ptr->target; if (e_ptr->capacity > 0 && dist[u] != MAX_COST && /* avoid signed overflow */ (dist[u] + e_ptr->cost) < dist[v]) { pmix_output(0, "[%s:%d:%s] negative-weight cycle detected", __FILE__, __LINE__, __func__); abort(); goto out; } } } if (dist[target] != MAX_COST) { found_target = true; } out: #if GRAPH_DEBUG dump_vec("pred", pred, NUM_VERTICES(gx)); #endif assert(pred[source] == -1); free(dist); GRAPH_DEBUG_OUT(("bellman_ford: found_target=%s", found_target ? "true" : "false")); return found_target; } /** * Transform the given connected, bipartite, acyclic digraph into a flow * network (i.e., add a source and a sink, with the source connected to vertex * set V1 and the sink connected to vertex set V2). This also creates * residual edges suitable for augmenting-path algorithms. All "source" nodes * in the original graph are considered to have an output of 1 and "sink" * nodes can take an input of 1. The result is that "forward" edges are all * created with capacity=1, "backward" (residual) edges are created with * capacity=0. * * After this routine, all capacities are "residual capacities" ($c_f$ in the * literature). * * Initial flow throughout the network is assumed to be 0 at all edges. * * The graph will be left in an undefined state if an error occurs (though * freeing it should still be safe). */ int prte_bp_graph_bipartite_to_flow(prte_bp_graph_t *g) { int err; int order; int u, v; int num_left, num_right; /* grab size before adding extra vertices */ order = prte_bp_graph_order(g); err = prte_bp_graph_add_vertex(g, NULL, &g->source_idx); if (PRTE_SUCCESS != err) { return err; } err = prte_bp_graph_add_vertex(g, NULL, &g->sink_idx); if (PRTE_SUCCESS != err) { return err; } /* The networks we are interested in are bipartite and have edges only * from one partition to the other partition (none vice versa). We * visualize this conventionally with all of the source vertices on the * left-hand side of an imaginary rendering of the graph and the target * vertices on the right-hand side of the rendering. The direction * "forward" is considered to be moving from left to right. */ num_left = 0; num_right = 0; for (u = 0; u < order; ++u) { int inbound = prte_bp_graph_indegree(g, u); int outbound = prte_bp_graph_outdegree(g, u); if (inbound > 0 && outbound > 0) { pmix_output(0, "[%s:%d:%s] graph is not (unidirectionally) bipartite", __FILE__, __LINE__, __func__); abort(); } else if (inbound > 0) { /* "right" side of the graph, create edges to the sink */ ++num_right; err = prte_bp_graph_add_edge(g, u, g->sink_idx, 0, /* no cost */ /*capacity=*/1, /*e_data=*/NULL); if (PRTE_SUCCESS != err) { GRAPH_DEBUG_OUT(("add_edge failed")); return err; } } else if (outbound > 0) { /* "left" side of the graph, create edges to the source */ ++num_left; err = prte_bp_graph_add_edge(g, g->source_idx, u, 0, /* no cost */ /*capacity=*/1, /*e_data=*/NULL); if (PRTE_SUCCESS != err) { GRAPH_DEBUG_OUT(("add_edge failed")); return err; } } } /* it doesn't make sense to extend this graph with a source and sink * unless */ if (num_right == 0 || num_left == 0) { return PRTE_ERR_BAD_PARAM; } /* now run through and create "residual" edges as well (i.e., create edges * in the reverse direction with 0 initial flow and a residual capacity of * $c_f(u,v)=c(u,v)-f(u,v)$). Residual edges can exist where no edges * exist in the original graph. */ order = prte_bp_graph_order(g); /* need residuals for newly created source/sink edges too */ for (u = 0; u < order; ++u) { prte_bp_graph_edge_t *e_ptr; FOREACH_OUT_EDGE(g, u, e_ptr, PRTE_ERR_NOT_FOUND) { v = e_ptr->target; /* (u,v) exists, add (v,u) if not already present. Cost is * negative for these edges because "giving back" flow pays us * back any cost already incurred. */ err = prte_bp_graph_add_edge(g, v, u, -e_ptr->cost, /*capacity=*/0, /*e_data=*/NULL); if (PRTE_SUCCESS != err && PRTE_EXISTS != err) { return err; } } } return PRTE_SUCCESS; } /** * Implements the "Successive Shortest Path" algorithm for computing the * minimum cost flow problem. This is a generalized version of the * Ford-Fulkerson algorithm. There are two major changes from F-F: * 1. In addition to capacities and flows, this algorithm pays attention to * costs for traversing an edge. This particular function leaves the * caller's costs alone but sets its own capacities. * 2. Shortest paths are computed using the cost metric. * * The algorithm's sketch looks like: * 1 Transform network G by adding source and sink, create residual edges * 2 Initial flow x is zero * 3 while ( Gx contains a path from s to t ) do * 4 Find any shortest path P from s to t * 5 Augment current flow x along P * 6 update Gx * * This function mutates the given graph (adding vertices and edges, changing * capacties, etc.), so callers may wish to clone the graph before calling * this routine. * * The result is an array of (u,v) vertex pairs, where (u,v) is an edge in the * original graph which has non-zero flow. * * Returns OMPI error codes like PRTE_SUCCESS/PRTE_ERR_OUT_OF_RESOURCE. * * This version of the algorithm has a theoretical upper bound on its running * time of O(|V|^2 * |E| * f), where f is essentially the maximum flow in the * graph. In our case, f=min(|V1|,|V2|), where V1 and V2 are the two * constituent sets of the bipartite graph. * * This algorithm's performance could probably be improved by modifying it to * use vertex potentials and Dijkstra's Algorithm instead of Bellman-Ford. * Normally vertex potentials are needed in order to use Dijkstra's safely, * but our graphs are constrained enough that this may not be necessary. * Switching to Dijkstra's implemented with a heap should yield a reduced * upper bound of O(|V| * |E| * f * log(|V|)). Let's consider this a future * enhancement for the time being, since it's not obvious at this point that * the faster running time will be worth the additional implementation * complexity. */ static int min_cost_flow_ssp(prte_bp_graph_t *gx, int **flow_out) { int err = PRTE_SUCCESS; int n; int *pred = NULL; int *flow = NULL; int u, v; int c; GRAPH_DEBUG_OUT(("begin min_cost_flow_ssp()")); if (NULL == flow_out) { return PRTE_ERR_BAD_PARAM; } *flow_out = NULL; n = prte_bp_graph_order(gx); pred = malloc(n * sizeof(*pred)); if (NULL == pred) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); err = PRTE_ERR_OUT_OF_RESOURCE; goto out_error; } /* "flow" is a 2d matrix of current flow values, all initialized to zero */ flow = calloc(n * n, sizeof(*flow)); if (NULL == flow) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); err = PRTE_ERR_OUT_OF_RESOURCE; goto out_error; } /* loop as long as paths exist from source to sink */ while (prte_bp_graph_bellman_ford(gx, gx->source_idx, gx->sink_idx, pred)) { int cap_f_path; /* find any shortest path P from s to t (already present in pred) */ GRAPH_DEBUG_OUT(("start outer iteration of SSP algorithm")); #if GRAPH_DEBUG dump_vec("pred", pred, NUM_VERTICES(gx)); dump_flow(flow, n); #endif cap_f_path = bottleneck_path(gx, n, pred); /* augment current flow along P */ FOREACH_UV_ON_PATH(pred, gx->source_idx, gx->sink_idx, u, v) { assert(u == pred[v]); f(u, v) = f(u, v) + cap_f_path; /* "forward" edge */ f(v, u) = f(v, u) - cap_f_path; /* residual network edge */ assert(f(u, v) == -f(v, u)); /* skew symmetry invariant */ /* update Gx as we go along: decrease capacity by this new * augmenting flow */ c = get_capacity(gx, u, v) - cap_f_path; assert(c >= 0); err = set_capacity(gx, u, v, c); if (PRTE_SUCCESS != err) { pmix_output(0, "[%s:%d:%s] unable to set capacity, missing edge?", __FILE__, __LINE__, __func__); abort(); } c = get_capacity(gx, v, u) + cap_f_path; assert(c >= 0); err = set_capacity(gx, v, u, c); if (PRTE_SUCCESS != err) { pmix_output(0, "[%s:%d:%s] unable to set capacity, missing edge?", __FILE__, __LINE__, __func__); abort(); } } } out: *flow_out = flow; free(pred); return err; out_error: free(*flow_out); GRAPH_DEBUG_OUT(("returning error %d", err)); goto out; } int prte_bp_graph_solve_bipartite_assignment(const prte_bp_graph_t *g, int *num_match_edges_out, int **match_edges_out) { int err; int i; int u, v; int n; int *flow = NULL; prte_bp_graph_t *gx = NULL; if (NULL == match_edges_out || NULL == num_match_edges_out) { return PRTE_ERR_BAD_PARAM; } *num_match_edges_out = 0; *match_edges_out = NULL; /* don't perturb the caller's data structure */ err = prte_bp_graph_clone(g, false, &gx); if (PRTE_SUCCESS != err) { GRAPH_DEBUG_OUT(("prte_bp_graph_clone failed")); goto out; } /* Transform gx into a residual flow network with capacities, a source, a * sink, and residual edges. We track the actual flow separately in the * "flow" matrix. Initial capacity for every forward edge is 1. Initial * capacity for every backward (residual) edge is 0. * * For the remainder of this routine (and the ssp routine) the capacities * refer to residual capacities ($c_f$) not capacities in the original * graph. For convenience we adjust all residual capacities as we go * along rather than recomputing them from the flow and capacities in the * original graph. This allows many other graph operations to have no * direct knowledge of the flow matrix. */ err = prte_bp_graph_bipartite_to_flow(gx); if (PRTE_SUCCESS != err) { GRAPH_DEBUG_OUT(("bipartite_to_flow failed")); PRTE_ERROR_LOG(err); return err; } /* Use the SSP algorithm to compute the min-cost flow over this network. * Edges with non-zero flow in the result should be part of the matching. * * Note that the flow array returned is sized for gx, not for g. Index * accordingly later on. */ err = min_cost_flow_ssp(gx, &flow); if (PRTE_SUCCESS != err) { GRAPH_DEBUG_OUT(("min_cost_flow_ssp failed")); return err; } assert(NULL != flow); /* don't care about new edges in gx, only old edges in g */ n = prte_bp_graph_order(g); #if GRAPH_DEBUG dump_flow(flow, NUM_VERTICES(gx)); #endif shrink_flow_matrix(flow, prte_bp_graph_order(gx), n); #if GRAPH_DEBUG dump_flow(flow, n); #endif for (u = 0; u < n; ++u) { for (v = 0; v < n; ++v) { if (f(u, v) > 0) { ++(*num_match_edges_out); } } } if (0 == *num_match_edges_out) { /* avoid attempting to allocate a zero-byte buffer */ goto out; } *match_edges_out = malloc(*num_match_edges_out * 2 * sizeof(int)); if (NULL == *match_edges_out) { *num_match_edges_out = 0; PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); err = PRTE_ERR_OUT_OF_RESOURCE; goto out; } i = 0; for (u = 0; u < n; ++u) { for (v = 0; v < n; ++v) { /* flow exists on this edge so include this edge in the matching */ if (f(u, v) > 0) { (*match_edges_out)[i++] = u; (*match_edges_out)[i++] = v; } } } out: free(flow); prte_bp_graph_free(gx); return err; } prrte-3.0.13/src/util/sys_limits.c0000664000175000017500000002135515145263240017235 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 NVIDIA Corporation. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_SYS_RESOURCE_H # include #endif #ifdef HAVE_UNISTD_H # include #endif #include "constants.h" #include "src/runtime/prte_globals.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/util/pmix_show_help.h" #include "src/util/sys_limits.h" /* * Create and initialize storage for the system limits */ PRTE_EXPORT prte_sys_limits_t prte_sys_limits = { /* initialized = */ false, /* num_files = */ -1, /* num_procs = */ -1, /* file_size = */ 0}; static int prte_setlimit(int resource, char *value, rlim_t *out) { struct rlimit rlim, rlim_set; rlim_t maxlim; rlim.rlim_cur = 0; if (0 == strcmp(value, "max")) { maxlim = (rlim_t) -1; } else if (0 == strncmp(value, "unlimited", strlen(value))) { maxlim = RLIM_INFINITY; } else { maxlim = strtol(value, NULL, 10); } if (0 <= getrlimit(resource, &rlim)) { if (rlim.rlim_max < maxlim) { rlim_set.rlim_cur = rlim.rlim_max; rlim_set.rlim_max = rlim.rlim_max; } else { rlim_set.rlim_cur = maxlim; rlim_set.rlim_max = maxlim; } if (0 <= setrlimit(resource, &rlim_set)) { rlim.rlim_cur = rlim_set.rlim_cur; } else if (RLIM_INFINITY == maxlim) { /* if unlimited wasn't allowed, try to set * to max allowed */ rlim_set.rlim_cur = rlim.rlim_max; rlim_set.rlim_max = rlim.rlim_max; if (0 <= setrlimit(resource, &rlim_set)) { rlim.rlim_cur = rlim_set.rlim_cur; } else { return PRTE_ERROR; } } else { return PRTE_ERROR; } } else { return PRTE_ERROR; } *out = rlim.rlim_cur; return PRTE_SUCCESS; } int prte_util_init_sys_limits(char **errmsg) { char **lims, **lim = NULL, *setlim; int i, rc = PRTE_ERROR; rlim_t value; /* if limits were not given, then nothing to do */ if (NULL == prte_set_max_sys_limits) { return PRTE_SUCCESS; } /* parse the requested limits to set */ lims = PMIX_ARGV_SPLIT_COMPAT(prte_set_max_sys_limits, ','); if (NULL == lims) { return PRTE_ERR_OUT_OF_RESOURCE; } /* each limit is expressed as a "param:value" pair */ for (i = 0; NULL != lims[i]; i++) { lim = PMIX_ARGV_SPLIT_COMPAT(lims[i], ':'); if (1 == PMIX_ARGV_COUNT_COMPAT(lim)) { setlim = "max"; } else { setlim = lim[1]; } /* for historical reasons, a value of "1" means * that we set the limits on #files, #children, * and max file size */ if (0 == strcmp(lim[0], "1")) { #if HAVE_DECL_RLIMIT_NOFILE if (PRTE_SUCCESS != prte_setlimit(RLIMIT_NOFILE, "max", &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "openfiles", "max"); goto out; } prte_sys_limits.num_files = value; #endif #if HAVE_DECL_RLIMIT_NPROC if (PRTE_SUCCESS != prte_setlimit(RLIMIT_NPROC, "max", &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "maxchildren", "max"); goto out; } prte_sys_limits.num_procs = value; #endif #if HAVE_DECL_RLIMIT_FSIZE if (PRTE_SUCCESS != prte_setlimit(RLIMIT_FSIZE, "max", &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "filesize", "max"); goto out; } prte_sys_limits.file_size = value; #endif break; } else if (0 == strcmp(lim[0], "0")) { /* user didn't want anything set */ break; } /* process them separately */ if (0 == strcmp(lim[0], "core")) { #if HAVE_DECL_RLIMIT_CORE if (PRTE_SUCCESS != prte_setlimit(RLIMIT_CORE, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "openfiles", setlim); goto out; } #endif } else if (0 == strcmp(lim[0], "filesize")) { #if HAVE_DECL_RLIMIT_FSIZE if (PRTE_SUCCESS != prte_setlimit(RLIMIT_FSIZE, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "filesize", setlim); goto out; } prte_sys_limits.file_size = value; #endif } else if (0 == strcmp(lim[0], "maxmem")) { #if HAVE_DECL_RLIMIT_AS if (PRTE_SUCCESS != prte_setlimit(RLIMIT_AS, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "maxmem", setlim); goto out; } #endif } else if (0 == strcmp(lim[0], "openfiles")) { #if HAVE_DECL_RLIMIT_NOFILE if (PRTE_SUCCESS != prte_setlimit(RLIMIT_NOFILE, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "openfiles", setlim); goto out; } prte_sys_limits.num_files = value; #endif } else if (0 == strcmp(lim[0], "stacksize")) { #if HAVE_DECL_RLIMIT_STACK if (PRTE_SUCCESS != prte_setlimit(RLIMIT_STACK, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "stacksize", setlim); goto out; } #endif } else if (0 == strcmp(lim[0], "maxchildren")) { #if HAVE_DECL_RLIMIT_NPROC if (PRTE_SUCCESS != prte_setlimit(RLIMIT_NPROC, setlim, &value)) { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-failed", true, "maxchildren", setlim); goto out; } prte_sys_limits.num_procs = value; #endif } else { *errmsg = pmix_show_help_string("help-prte-util.txt", "sys-limit-unrecognized", true, lim[0], setlim); goto out; } PMIX_ARGV_FREE_COMPAT(lim); lim = NULL; } /* indicate we initialized the limits structure */ prte_sys_limits.initialized = true; rc = PRTE_SUCCESS; out: PMIX_ARGV_FREE_COMPAT(lims); if (NULL != lim) { PMIX_ARGV_FREE_COMPAT(lim); } return rc; } int prte_getpagesize(void) { static int page_size = -1; if (page_size != -1) { // testing in a loop showed sysconf() took ~5 usec vs ~0.3 usec with it cached return page_size; } #ifdef HAVE_GETPAGESIZE return page_size = getpagesize(); #elif defined(_SC_PAGESIZE) return page_size = sysconf(_SC_PAGESIZE); #elif defined(_SC_PAGE_SIZE) return page_size = sysconf(_SC_PAGE_SIZE); #else return page_size = 65536; /* safer to overestimate than under */ #endif } prrte-3.0.13/src/util/error.h0000664000175000017500000000446515145263240016177 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_UTIL_ERROR_H #define PRTE_UTIL_ERROR_H #include "prte_config.h" #include "src/util/pmix_output.h" BEGIN_C_DECLS #define PRTE_ERROR_LOG(r) \ do { \ if (PRTE_ERR_SILENT != (r)) { \ pmix_output(0, "PRTE ERROR: %s in file %s at line %d", prte_strerror((r)), __FILE__, \ __LINE__); \ } \ } while (0) /** * Return string for given error message * * Accepts an error number argument \c errnum and returns a pointer to * the corresponding message string. The result is returned in a * static buffer that should not be released with free(). * * If errnum is \c PRTE_ERR_IN_ERRNO, the system strerror is called * with an argument of the current value of \c errno and the resulting * string is returned. * * If the errnum is not a known value, the returned value may be * overwritten by subsequent calls to prte_strerror. */ PRTE_EXPORT const char *prte_strerror(int errnum); END_C_DECLS #endif /* PRTE_UTIL_ERROR_H */ prrte-3.0.13/src/util/malloc.c0000664000175000017500000001230715145263240016302 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include "src/runtime/prte_globals.h" #include "src/util/malloc.h" #include "src/util/pmix_output.h" /* * Undefine "malloc" and "free" */ #if defined(malloc) # undef malloc #endif #if defined(calloc) # undef calloc #endif #if defined(free) # undef free #endif #if defined(realloc) # undef realloc #endif /* * Public variables */ int prte_malloc_debug_level = PRTE_MALLOC_DEBUG_LEVEL; int prte_malloc_output = -1; /* * Private variables */ #if PRTE_ENABLE_DEBUG static pmix_output_stream_t malloc_stream; #endif #if PRTE_ENABLE_DEBUG /* * Finalize the malloc debug interface */ void prte_malloc_finalize(void) { if (-1 != prte_malloc_output) { pmix_output_close(prte_malloc_output); prte_malloc_output = -1; PMIX_DESTRUCT(&malloc_stream); } } /* * Initialize the malloc debug interface */ void prte_malloc_init(void) { PMIX_CONSTRUCT(&malloc_stream, pmix_output_stream_t); malloc_stream.lds_is_debugging = true; malloc_stream.lds_verbose_level = 5; malloc_stream.lds_prefix = "malloc debug: "; malloc_stream.lds_want_stderr = true; prte_malloc_output = pmix_output_open(&malloc_stream); } #else void prte_malloc_init(void) { } void prte_malloc_finalize(void) { } #endif /* PRTE_ENABLE_DEBUG */ /* * Debug version of malloc */ void *prte_malloc(size_t size, const char *file, int line) { void *addr; #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 1) { if (size <= 0) { pmix_output(prte_malloc_output, "Request for %ld bytes (%s, %d)", (long) size, file, line); } } #else PRTE_HIDE_UNUSED_PARAMS(file, line); #endif /* PRTE_ENABLE_DEBUG */ addr = malloc(size); #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 0) { if (NULL == addr) { pmix_output(prte_malloc_output, "Request for %ld bytes failed (%s, %d)", (long) size, file, line); } } #endif /* PRTE_ENABLE_DEBUG */ return addr; } /* * Debug version of calloc */ void *prte_calloc(size_t nmembers, size_t size, const char *file, int line) { void *addr; #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 1) { if (size <= 0) { pmix_output(prte_malloc_output, "Request for %ld zeroed elements of size %ld (%s, %d)", (long) nmembers, (long) size, file, line); } } #else PRTE_HIDE_UNUSED_PARAMS(file, line); #endif /* PRTE_ENABLE_DEBUG */ addr = calloc(nmembers, size); #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 0) { if (NULL == addr) { pmix_output(prte_malloc_output, "Request for %ld zeroed elements of size %ld failed (%s, %d)", (long) nmembers, (long) size, file, line); } } #endif /* PRTE_ENABLE_DEBUG */ return addr; } /* * Debug version of realloc */ void *prte_realloc(void *ptr, size_t size, const char *file, int line) { void *addr; #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 1) { if (size <= 0) { if (NULL == ptr) { pmix_output(prte_malloc_output, "Realloc NULL for %ld bytes (%s, %d)", (long) size, file, line); } else { pmix_output(prte_malloc_output, "Realloc %p for %ld bytes (%s, %d)", ptr, (long) size, file, line); } } } #else PRTE_HIDE_UNUSED_PARAMS(file, line); #endif /* PRTE_ENABLE_DEBUG */ addr = realloc(ptr, size); #if PRTE_ENABLE_DEBUG if (prte_malloc_debug_level > 0) { if (NULL == addr) { pmix_output(prte_malloc_output, "Realloc %p for %ld bytes failed (%s, %d)", ptr, (long) size, file, line); } } #endif /* PRTE_ENABLE_DEBUG */ return addr; } /* * Debug version of free */ void prte_free(void *addr, const char *file, int line) { PRTE_HIDE_UNUSED_PARAMS(file, line); free(addr); } void prte_malloc_debug(int level) { #if PRTE_ENABLE_DEBUG prte_malloc_debug_level = level; #else PRTE_HIDE_UNUSED_PARAMS(level); #endif /* PRTE_ENABLE_DEBUG */ } prrte-3.0.13/src/util/malloc.h0000664000175000017500000001137715145263240016315 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2018 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file */ #ifndef PRTE_MALLOC_H #define PRTE_MALLOC_H #include #include /* * THIS FILE CANNOT INCLUDE ANY OTHER PRTE HEADER FILES!!! * * It is included via . Hence, it should not * include ANY other files, nor should it include "prte_config.h". * */ /* * Set PRTE_MALLOC_DEBUG_LEVEL to * 0 for no checking * 1 for basic error checking * 2 for more error checking */ #ifndef PRTE_MALLOC_DEBUG_LEVEL # define PRTE_MALLOC_DEBUG_LEVEL 2 #endif BEGIN_C_DECLS /** * Initialize malloc debug output. * * This function is invoked to setup a dedicated output stream for * malloc debug functions. It does \em not (currently) do anything * other than that (i.e., no internal accounting for tracking * malloc/free statements, etc.). * * It is invoked as part of prte_init(). Although this function is * not \em necessary for PRTE_MALLOC() and PRTE_FREE(), it is strong * recommended because no output messages -- regardless of the * malloc debug level set by prte_malloc_debug() -- will be displayed * unless this function is invoked first. */ PRTE_EXPORT void prte_malloc_init(void); PRTE_EXPORT void prte_malloc_finalize(void); /** * \internal * * Back-end error-checking malloc function for PRTE (you should use * the normal malloc() instead of this function). * * @param size The number of bytes to allocate * @param file Typically the __FILE__ macro * @param line Typically the __LINE__ macro * * This function is only used when --enable-mem-debug was specified to * configure (or by default if you're building in a SVN checkout). */ PRTE_EXPORT void *prte_malloc(size_t size, const char *file, int line) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; /** * \internal * * Back-end error-checking calloc function for PRTE (you should use * the normal calloc() instead of this function). * * @param nmembers Number of elements to malloc * @param size Size of each elements * @param file Typically the __FILE__ macro * @param line Typically the __LINE__ macro * * This function is only used when --enable-mem-debug was specified to * configure (or by default if you're building in a SVN checkout). */ PRTE_EXPORT void *prte_calloc(size_t nmembers, size_t size, const char *file, int line) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; /** * \internal * * Back-end error-checking realloc function for PRTE (you should use * the normal realloc() instead of this function). * * @param ptr Pointer to reallocate * @param size The number of bytes to allocate * @param file Typically the __FILE__ macro * @param line Typically the __LINE__ macro * * This function is only used when --enable-mem-debug was specified to * configure (or by default if you're building in a SVN checkout). */ PRTE_EXPORT void *prte_realloc(void *ptr, size_t size, const char *file, int line) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; /** * \internal * * Back-end error-checking free function for PRTE (you should use * free() instead of this function). * * @param addr Address on the heap to free() * @param file Typically the __FILE__ macro * @param line Typically the __LINE__ macro * * This function is only used when --enable-mem-debug was specified * to configure (or by default if you're building in a SVN * checkout). */ PRTE_EXPORT void prte_free(void *addr, const char *file, int line) __prte_attribute_nonnull__(1); /** * Used to set the debug level for malloc debug. * * @param level The level of debugging (0 = none, 1 = some, 2 = more) * * This value defaults to the PRTE_MALLOC_DEBUG_LEVEL. */ PRTE_EXPORT void prte_malloc_debug(int level); END_C_DECLS #endif /* PRTE_MALLOC_H */ prrte-3.0.13/src/util/qsort.h0000664000175000017500000000210015145263240016176 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_QSORT_H #define PRTE_QSORT_H BEGIN_C_DECLS void prte_qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *)); END_C_DECLS #endif prrte-3.0.13/src/util/bit_ops.h0000664000175000017500000001100615145263240016472 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_BIT_OPS_H #define PRTE_BIT_OPS_H #include "prefetch.h" /** * Calculates the highest bit in an integer * * @param value The integer value to examine * @param start Position to start looking * * @returns pos Position of highest-set integer or -1 if none are set. * * Look at the integer "value" starting at position "start", and move * to the right. Return the index of the highest bit that is set to * 1. * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead * of 17 cycles (on average value, with start=32) * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int prte_hibit(int value, int start) { unsigned int mask; #if PRTE_C_HAVE_BUILTIN_CLZ /* Only look at the part that the caller wanted looking at */ mask = value & ((1 << start) - 1); if (PMIX_UNLIKELY(0 == mask)) { return -1; } start = (8 * sizeof(int) - 1) - __builtin_clz(mask); #else --start; mask = 1 << start; for (; start >= 0; --start, mask >>= 1) { if (value & mask) { break; } } #endif return start; } /** * Returns the cube dimension of a given value. * * @param value The integer value to examine * * @returns cubedim The smallest cube dimension containing that value * * Look at the integer "value" and calculate the smallest power of two * dimension that contains that value. * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int prte_cube_dim(int value) { int dim, size; #if PRTE_C_HAVE_BUILTIN_CLZ if (PMIX_UNLIKELY(1 >= value)) { return 0; } size = 8 * sizeof(int); dim = size - __builtin_clz(value - 1); #else for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */ ; #endif return dim; } /** * @brief Returns next power-of-two of the given value. * * @param value The integer value to return power of 2 * * @returns The next power of two * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int prte_next_poweroftwo(int value) { int power2; #if PRTE_C_HAVE_BUILTIN_CLZ if (PMIX_UNLIKELY(0 == value)) { return 1; } power2 = 1 << (8 * sizeof(int) - __builtin_clz(value)); #else for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */ ; #endif return power2; } /** * @brief Returns next power-of-two of the given value (and the value itselve if already * power-of-two). * * @param value The integer value to return power of 2 * * @returns The next power of two (inclusive) * * WARNING: *NO* error checking is performed. This is meant to be a * fast inline function. * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). */ static inline int prte_next_poweroftwo_inclusive(int value) { int power2; #if PRTE_C_HAVE_BUILTIN_CLZ if (PMIX_UNLIKELY(1 >= value)) { return 1; } power2 = 1 << (8 * sizeof(int) - __builtin_clz(value - 1)); #else for (power2 = 1; power2 < value; power2 <<= 1) /* empty */ ; #endif return power2; } #endif /* PRTE_BIT_OPS_H */ prrte-3.0.13/src/util/parse_options.h0000664000175000017500000000242315145263240017723 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * */ #ifndef _PRTE_PARSE_OPTIONS_H_ #define _PRTE_PARSE_OPTIONS_H_ #include "prte_config.h" BEGIN_C_DECLS PRTE_EXPORT void pmix_util_parse_range_options(char *input, char ***output); PRTE_EXPORT void prte_util_get_ranges(char *inp, char ***startpts, char ***endpts); END_C_DECLS #endif prrte-3.0.13/src/util/sys_limits.h0000664000175000017500000000350115145263240017233 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_SYS_LIMITS_H #define PRTE_SYS_LIMITS_H #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif BEGIN_C_DECLS /* define a structure to hold the various limits we find * so that users can neatly access them */ typedef struct prte_sys_limits_t { bool initialized; int num_files; int num_procs; size_t file_size; } prte_sys_limits_t; /* since we only want to do this once, we will store the * values in the following locations - provide access here */ PRTE_EXPORT extern prte_sys_limits_t prte_sys_limits; /* Get the system resource limits and, if requested, set * them to the specified limit */ PRTE_EXPORT int prte_util_init_sys_limits(char **errmsg); /** * Get pagesize */ PRTE_EXPORT int prte_getpagesize(void); END_C_DECLS #endif /* PRTE_STRNCPY_H */ prrte-3.0.13/src/util/error_strings.c0000664000175000017500000002106715145263240017740 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file **/ #include "prte_config.h" #include "constants.h" #include #ifdef HAVE_SYS_SIGNAL_H # include #else # include #endif #include "src/mca/plm/plm_types.h" #include "src/runtime/prte_globals.h" #include "src/util/error_strings.h" const char *prte_job_state_to_str(prte_job_state_t state) { switch (state) { case PRTE_JOB_STATE_UNDEF: return "UNDEFINED"; case PRTE_JOB_STATE_INIT: return "PENDING INIT"; case PRTE_JOB_STATE_INIT_COMPLETE: return "INIT_COMPLETE"; case PRTE_JOB_STATE_ALLOCATE: return "PENDING ALLOCATION"; case PRTE_JOB_STATE_ALLOCATION_COMPLETE: return "ALLOCATION COMPLETE"; case PRTE_JOB_STATE_MAP: return "PENDING MAPPING"; case PRTE_JOB_STATE_MAP_COMPLETE: return "MAP COMPLETE"; case PRTE_JOB_STATE_SYSTEM_PREP: return "PENDING FINAL SYSTEM PREP"; case PRTE_JOB_STATE_LAUNCH_DAEMONS: return "PENDING DAEMON LAUNCH"; case PRTE_JOB_STATE_DAEMONS_LAUNCHED: return "DAEMONS LAUNCHED"; case PRTE_JOB_STATE_DAEMONS_REPORTED: return "ALL DAEMONS REPORTED"; case PRTE_JOB_STATE_VM_READY: return "VM READY"; case PRTE_JOB_STATE_LAUNCH_APPS: return "PENDING APP LAUNCH"; case PRTE_JOB_STATE_SEND_LAUNCH_MSG: return "SENDING LAUNCH MSG"; case PRTE_JOB_STATE_RUNNING: return "RUNNING"; case PRTE_JOB_STATE_SUSPENDED: return "SUSPENDED"; case PRTE_JOB_STATE_REGISTERED: return "SYNC REGISTERED"; case PRTE_JOB_STATE_LOCAL_LAUNCH_COMPLETE: return "LOCAL LAUNCH COMPLETE"; case PRTE_JOB_STATE_READY_FOR_DEBUG: return "READY FOR DEBUG"; case PRTE_JOB_STATE_STARTED: return "JOB STARTED"; case PRTE_JOB_STATE_UNTERMINATED: return "UNTERMINATED"; case PRTE_JOB_STATE_TERMINATED: return "NORMALLY TERMINATED"; case PRTE_JOB_STATE_NOTIFY_COMPLETED: return "NOTIFY COMPLETED"; case PRTE_JOB_STATE_NOTIFIED: return "NOTIFIED"; case PRTE_JOB_STATE_ALL_JOBS_COMPLETE: return "ALL JOBS COMPLETE"; case PRTE_JOB_STATE_ERROR: return "ARTIFICIAL BOUNDARY - ERROR"; case PRTE_JOB_STATE_KILLED_BY_CMD: return "KILLED BY INTERNAL COMMAND"; case PRTE_JOB_STATE_ABORTED: return "ABORTED"; case PRTE_JOB_STATE_FAILED_TO_START: return "FAILED TO START"; case PRTE_JOB_STATE_ABORTED_BY_SIG: return "ABORTED BY SIGNAL"; case PRTE_JOB_STATE_ABORTED_WO_SYNC: return "TERMINATED WITHOUT SYNC"; case PRTE_JOB_STATE_COMM_FAILED: return "COMMUNICATION FAILURE"; case PRTE_JOB_STATE_SENSOR_BOUND_EXCEEDED: return "SENSOR BOUND EXCEEDED"; case PRTE_JOB_STATE_CALLED_ABORT: return "PROC CALLED ABORT"; case PRTE_JOB_STATE_HEARTBEAT_FAILED: return "HEARTBEAT FAILED"; case PRTE_JOB_STATE_NEVER_LAUNCHED: return "NEVER LAUNCHED"; case PRTE_JOB_STATE_ABORT_ORDERED: return "ABORT IN PROGRESS"; case PRTE_JOB_STATE_NON_ZERO_TERM: return "AT LEAST ONE PROCESS EXITED WITH NON-ZERO STATUS"; case PRTE_JOB_STATE_FAILED_TO_LAUNCH: return "FAILED TO LAUNCH"; case PRTE_JOB_STATE_FORCED_EXIT: return "FORCED EXIT"; case PRTE_JOB_STATE_DAEMONS_TERMINATED: return "DAEMONS TERMINATED"; case PRTE_JOB_STATE_SILENT_ABORT: return "ERROR REPORTED ELSEWHERE"; case PRTE_JOB_STATE_REPORT_PROGRESS: return "REPORT PROGRESS"; case PRTE_JOB_STATE_ALLOC_FAILED: return "ALLOCATION FAILED"; case PRTE_JOB_STATE_MAP_FAILED: return "MAP FAILED"; case PRTE_JOB_STATE_CANNOT_LAUNCH: return "CANNOT LAUNCH"; case PRTE_JOB_STATE_FILES_POSN_FAILED: return "FILE PREPOSITION FAILED"; case PRTE_JOB_STATE_FT_CHECKPOINT: return "FAULT TOLERANCE CHECKPOINT"; case PRTE_JOB_STATE_FT_CONTINUE: return "FAULT TOLERANCE CONTINUE"; case PRTE_JOB_STATE_FT_RESTART: return "FAULT TOLERANCE RESTART"; case PRTE_JOB_STATE_ANY: return "ANY"; default: return "UNKNOWN STATE!"; } } const char *prte_app_ctx_state_to_str(prte_app_state_t state) { switch (state) { case PRTE_APP_STATE_UNDEF: return "UNDEFINED"; case PRTE_APP_STATE_INIT: return "PENDING INIT"; case PRTE_APP_STATE_ALL_MAPPED: return "ALL MAPPED"; case PRTE_APP_STATE_RUNNING: return "RUNNING"; case PRTE_APP_STATE_COMPLETED: return "COMPLETED"; default: return "UNKNOWN STATE!"; } } const char *prte_proc_state_to_str(prte_proc_state_t state) { switch (state) { case PRTE_PROC_STATE_UNDEF: return "UNDEFINED"; case PRTE_PROC_STATE_INIT: return "INITIALIZED"; case PRTE_PROC_STATE_RESTART: return "RESTARTING"; case PRTE_PROC_STATE_TERMINATE: return "MARKED FOR TERMINATION"; case PRTE_PROC_STATE_RUNNING: return "RUNNING"; case PRTE_PROC_STATE_REGISTERED: return "SYNC REGISTERED"; case PRTE_PROC_STATE_IOF_COMPLETE: return "IOF COMPLETE"; case PRTE_PROC_STATE_WAITPID_FIRED: return "WAITPID FIRED"; case PRTE_PROC_STATE_MODEX_READY: return "MODEX READY"; case PRTE_PROC_STATE_READY_FOR_DEBUG: return "READY FOR DEBUG"; case PRTE_PROC_STATE_UNTERMINATED: return "UNTERMINATED"; case PRTE_PROC_STATE_TERMINATED: return "NORMALLY TERMINATED"; case PRTE_PROC_STATE_ERROR: return "ARTIFICIAL BOUNDARY - ERROR"; case PRTE_PROC_STATE_KILLED_BY_CMD: return "KILLED BY INTERNAL COMMAND"; case PRTE_PROC_STATE_ABORTED: return "ABORTED"; case PRTE_PROC_STATE_FAILED_TO_START: return "FAILED TO START"; case PRTE_PROC_STATE_ABORTED_BY_SIG: return "ABORTED BY SIGNAL"; case PRTE_PROC_STATE_TERM_WO_SYNC: return "TERMINATED WITHOUT SYNC"; case PRTE_PROC_STATE_COMM_FAILED: return "COMMUNICATION FAILURE"; case PRTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: return "SENSOR BOUND EXCEEDED"; case PRTE_PROC_STATE_CALLED_ABORT: return "CALLED ABORT"; case PRTE_PROC_STATE_HEARTBEAT_FAILED: return "HEARTBEAT FAILED"; case PRTE_PROC_STATE_MIGRATING: return "MIGRATING"; case PRTE_PROC_STATE_CANNOT_RESTART: return "CANNOT BE RESTARTED"; case PRTE_PROC_STATE_TERM_NON_ZERO: return "EXITED WITH NON-ZERO STATUS"; case PRTE_PROC_STATE_FAILED_TO_LAUNCH: return "FAILED TO LAUNCH"; case PRTE_PROC_STATE_UNABLE_TO_SEND_MSG: return "UNABLE TO SEND MSG"; case PRTE_PROC_STATE_LIFELINE_LOST: return "LIFELINE LOST"; case PRTE_PROC_STATE_NO_PATH_TO_TARGET: return "NO PATH TO TARGET"; case PRTE_PROC_STATE_FAILED_TO_CONNECT: return "FAILED TO CONNECT"; case PRTE_PROC_STATE_PEER_UNKNOWN: return "PEER UNKNOWN"; case PRTE_PROC_STATE_ANY: return "ANY"; default: return "UNKNOWN STATE!"; } } const char *prte_node_state_to_str(prte_node_state_t state) { switch (state) { case PRTE_NODE_STATE_UNDEF: return "UNDEF"; case PRTE_NODE_STATE_UNKNOWN: return "UNKNOWN"; case PRTE_NODE_STATE_DOWN: return "DOWN"; case PRTE_NODE_STATE_UP: return "UP"; case PRTE_NODE_STATE_REBOOT: return "REBOOT"; case PRTE_NODE_STATE_DO_NOT_USE: return "DO_NOT_USE"; case PRTE_NODE_STATE_NOT_INCLUDED: return "NOT_INCLUDED"; case PRTE_NODE_STATE_ADDED: return "ADDED"; default: return "UNKNOWN STATE!"; } } prrte-3.0.13/src/util/nidmap.c0000664000175000017500000003202415145263240016301 0ustar alastairalastair/* * Copyright (c) 2016-2020 Intel, Inc. All rights reserved. * Copyright (c) 2018-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "prte_config.h" #include "types.h" #ifdef HAVE_UNISTD_H # include #endif #include #include "src/util/pmix_argv.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/rmaps/base/base.h" #include "src/rml/rml.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" #include "src/util/nidmap.h" int prte_util_nidmap_create(pmix_pointer_array_t *pool, pmix_data_buffer_t *buffer) { char *raw = NULL; pmix_rank_t *vpids = NULL; uint8_t u8; int n, m, ndaemons, nbytes; bool compressed; char **names = NULL; char **aliases = NULL, **als; prte_node_t *nptr; pmix_byte_object_t bo; size_t sz; pmix_status_t rc; /* pack a flag indicating if the HNP was included in the allocation */ if (prte_hnp_is_allocated) { u8 = 1; } else { u8 = 0; } rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &u8, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* pack a flag indicating if we are in a managed allocation */ if (prte_managed_allocation) { u8 = 1; } else { u8 = 0; } rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &u8, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* daemon vpids start from 0 and increase linearly by one * up to the number of nodes in the system. The vpid is * a 32-bit value. We don't know how many of the nodes * in the system have daemons - we may not be using them * all just yet. However, even the largest systems won't * have more than a million nodes for quite some time, * so for now we'll just allocate enough space to hold * them all. Someone can optimize this further later */ nbytes = prte_process_info.num_daemons * sizeof(pmix_rank_t); vpids = (pmix_rank_t *) malloc(nbytes); ndaemons = 0; for (n = 0; n < pool->size; n++) { if (NULL == (nptr = (prte_node_t *) pmix_pointer_array_get_item(pool, n))) { continue; } if (NULL == nptr->daemon) { continue; } /* add the hostname to the argv */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&names, nptr->name); als = NULL; if (NULL != nptr->aliases) { for (m=0; NULL != nptr->aliases[m]; m++) { // skip any localhost entries if (0 == strcmp(nptr->aliases[m], "localhost") || 0 == strcmp(nptr->aliases[m], "127.0.0.1")) { continue; } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&als, nptr->aliases[m]); } raw = PMIX_ARGV_JOIN_COMPAT(als, ','); PMIX_ARGV_FREE_COMPAT(als); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&aliases, raw); free(raw); } else { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&aliases, "PRTENONE"); } /* store the vpid */ vpids[ndaemons] = nptr->daemon->name.rank; ++ndaemons; } /* little protection */ if (NULL == names || NULL == aliases) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); free(vpids); return PRTE_ERR_NOT_FOUND; } /* construct the string of node names for compression */ raw = PMIX_ARGV_JOIN_COMPAT(names, ','); PMIX_ARGV_FREE_COMPAT(names); if (PMIx_Data_compress((uint8_t *) raw, strlen(raw) + 1, (uint8_t **) &bo.bytes, &sz)) { /* mark that this was compressed */ compressed = true; bo.size = sz; free(raw); } else { /* mark that this was not compressed */ compressed = false; bo.bytes = (char *) raw; bo.size = strlen(raw) + 1; } /* indicate compression */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &compressed, 1, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); free(vpids); return rc; } /* add the object */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &bo, 1, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); free(vpids); return rc; } free(bo.bytes); /* construct the string of aliases for compression */ raw = PMIX_ARGV_JOIN_COMPAT(aliases, ';'); PMIX_ARGV_FREE_COMPAT(aliases); if (PMIx_Data_compress((uint8_t *) raw, strlen(raw) + 1, (uint8_t **) &bo.bytes, &sz)) { /* mark that this was compressed */ compressed = true; bo.size = sz; free(raw); } else { /* mark that this was not compressed */ compressed = false; bo.bytes = (char *) raw; bo.size = strlen(raw) + 1; } /* indicate compression */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &compressed, 1, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); free(vpids); return rc; } /* add the object */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &bo, 1, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); free(vpids); return rc; } free(bo.bytes); /* compress the vpids */ if (PMIx_Data_compress((uint8_t *) vpids, nbytes, (uint8_t **) &bo.bytes, &sz)) { /* mark that this was compressed */ compressed = true; bo.size = sz; free(vpids); } else { /* mark that this was not compressed */ compressed = false; bo.bytes = (char *) vpids; bo.size = ndaemons * sizeof(pmix_rank_t); } /* indicate compression */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &compressed, 1, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); return rc; } /* add the object */ rc = PMIx_Data_pack(PRTE_PROC_MY_NAME, buffer, &bo, 1, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); free(bo.bytes); return rc; } free(bo.bytes); return rc; } int prte_util_decode_nidmap(pmix_data_buffer_t *buf) { uint8_t u8; pmix_rank_t *vpid = NULL; int cnt, n; bool compressed; size_t sz; pmix_byte_object_t pbo; char *raw = NULL, **names = NULL, **aliases = NULL; prte_node_t *nd; prte_job_t *daemons; prte_proc_t *proc; prte_topology_t *t = NULL; pmix_status_t rc; /* unpack the flag indicating if HNP is in allocation */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &u8, &cnt, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } if (1 == u8) { prte_hnp_is_allocated = true; } else { prte_hnp_is_allocated = false; } /* unpack the flag indicating if we are in managed allocation */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &u8, &cnt, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } if (1 == u8) { prte_managed_allocation = true; } else { prte_managed_allocation = false; } /* unpack compression flag for node names */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &compressed, &cnt, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* unpack the nodename object */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &pbo, &cnt, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* if compressed, decompress */ if (compressed) { if (!PMIx_Data_decompress((uint8_t *) pbo.bytes, pbo.size, (uint8_t **) &raw, &sz)) { PRTE_ERROR_LOG(PRTE_ERROR); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); rc = PRTE_ERROR; goto cleanup; } } else { raw = (char *) pbo.bytes; pbo.bytes = NULL; // protect the data pbo.size = 0; } PMIX_BYTE_OBJECT_DESTRUCT(&pbo); names = PMIX_ARGV_SPLIT_COMPAT(raw, ','); free(raw); /* unpack compression flag for node aliases */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &compressed, &cnt, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* unpack the aliases object */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &pbo, &cnt, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* if compressed, decompress */ if (compressed) { if (!PMIx_Data_decompress((uint8_t *) pbo.bytes, pbo.size, (uint8_t **) &raw, &sz)) { PRTE_ERROR_LOG(PRTE_ERROR); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); rc = PRTE_ERROR; goto cleanup; } } else { raw = (char *) pbo.bytes; pbo.bytes = NULL; // protect the data pbo.size = 0; } PMIX_BYTE_OBJECT_DESTRUCT(&pbo); aliases = PMIX_ARGV_SPLIT_COMPAT(raw, ';'); free(raw); /* unpack compression flag for daemon vpids */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &compressed, &cnt, PMIX_BOOL); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* unpack the vpid object */ cnt = 1; rc = PMIx_Data_unpack(PRTE_PROC_MY_NAME, buf, &pbo, &cnt, PMIX_BYTE_OBJECT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto cleanup; } /* if compressed, decompress */ if (compressed) { if (!PMIx_Data_decompress((uint8_t *) pbo.bytes, pbo.size, (uint8_t **) &vpid, &sz)) { PRTE_ERROR_LOG(PRTE_ERROR); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); rc = PRTE_ERROR; goto cleanup; } } else { vpid = (pmix_rank_t *) pbo.bytes; sz = pbo.size; pbo.bytes = NULL; pbo.size = 0; } PMIX_BYTE_OBJECT_DESTRUCT(&pbo); /* if we are the HNP, we don't need any of this stuff */ if (PRTE_PROC_IS_MASTER) { rc = PRTE_SUCCESS; goto cleanup; } /* get the daemon job object */ daemons = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); /* get our topology */ t = (prte_topology_t *) pmix_pointer_array_get_item(prte_node_topologies, 0); if (NULL == t) { /* should never happen */ PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); rc = PRTE_ERR_NOT_FOUND; goto cleanup; } /* create the node pool array - this will include * _all_ nodes known to the allocation */ for (n = 0; NULL != names[n]; n++) { /* do we already have this node? */ nd = (prte_node_t*)pmix_pointer_array_get_item(prte_node_pool, n); if (NULL != nd) { /* check the name */ if (0 != strcmp(nd->name, names[n])) { free(nd->name); nd->name = strdup(names[n]); } if (0 != strcmp(aliases[n], "PRTENONE")) { if (NULL != nd->aliases) { PMIX_ARGV_FREE_COMPAT(nd->aliases); } nd->aliases = PMIX_ARGV_SPLIT_COMPAT(aliases[n], ','); } continue; } /* add this name to the pool */ nd = PMIX_NEW(prte_node_t); nd->name = strdup(names[n]); nd->index = n; pmix_pointer_array_set_item(prte_node_pool, n, nd); /* add any aliases */ if (0 != strcmp(aliases[n], "PRTENONE")) { nd->aliases = PMIX_ARGV_SPLIT_COMPAT(aliases[n], ','); } /* set the topology - always default to homogeneous * as that is the most common scenario */ nd->topology = t; /* record the daemon on it */ proc = (prte_proc_t *) pmix_pointer_array_get_item(daemons->procs, vpid[n]); if (NULL == proc) { proc = PMIX_NEW(prte_proc_t); PMIX_LOAD_PROCID(&proc->name, PRTE_PROC_MY_NAME->nspace, vpid[n]); proc->state = PRTE_PROC_STATE_RUNNING; PRTE_FLAG_SET(proc, PRTE_PROC_FLAG_ALIVE); daemons->num_procs++; pmix_pointer_array_set_item(daemons->procs, proc->name.rank, proc); } PMIX_RETAIN(nd); proc->node = nd; PMIX_RETAIN(proc); nd->daemon = proc; } /* update num procs */ if (prte_process_info.num_daemons != daemons->num_procs) { prte_process_info.num_daemons = daemons->num_procs; /* update the routing tree */ prte_rml_compute_routing_tree(); } cleanup: if (NULL != vpid) { free(vpid); } if (NULL != names) { PMIX_ARGV_FREE_COMPAT(names); } return rc; } prrte-3.0.13/src/util/ethtool.c0000664000175000017500000000430715145263240016512 0ustar alastairalastair/* * Copyright (c) 2016 Karol Mroz. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_SOCKET_H # include #endif #ifdef HAVE_NET_IF_H # include #endif #ifdef HAVE_LINUX_ETHTOOL_H # include #endif #ifdef HAVE_SYS_IOCTL_H # include #endif #ifdef HAVE_LINUX_SOCKIOS_H # include #endif #include "src/runtime/prte_globals.h" #include "src/util/ethtool.h" #include "src/util/pmix_if.h" #include "src/util/pmix_string_copy.h" /* * Obtain an appropriate bandwidth for the interface if_name. On Linux, we * get this via an ioctl(). Elsewhere or in the error case, we return the * speed as 0. */ unsigned int prte_ethtool_get_speed(const char *if_name) { unsigned int speed = 0; #if defined(HAVE_DECL_SIOCETHTOOL) && defined(HAVE_STRUCT_IFREQ) && defined(HAVE_STRUCT_ETHTOOL_CMD) int sockfd; struct ifreq ifr; struct ethtool_cmd edata = { .cmd = ETHTOOL_GSET, }; sockfd = socket(PF_INET, SOCK_DGRAM, 0); if (sockfd < 0) { return 0; } memset(&ifr, 0, sizeof(struct ifreq)); pmix_string_copy(ifr.ifr_name, if_name, PMIX_IF_NAMESIZE); ifr.ifr_data = (char *) &edata; if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { goto out; } # if HAVE_DECL_ETHTOOL_CMD_SPEED speed = ethtool_cmd_speed(&edata); # elif defined(HAVE_STRUCT_ETHTOOL_CMD_SPEED_HI) speed = (edata.speed_hi << 16) | edata.speed; # else speed = edata.speed; # endif if (UINT_MAX == speed) { speed = 0; } out: close(sockfd); #else PRTE_HIDE_UNUSED_PARAMS(if_name); #endif return speed; } prrte-3.0.13/src/util/name_fns.c0000664000175000017500000003416515145263240016627 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016-2020 Intel, Inc. All rights reserved. * Copyright (c) 2018-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #include #include "src/threads/pmix_tsd.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_string_copy.h" #include "src/mca/errmgr/errmgr.h" #include "src/util/name_fns.h" #define PRTE_PRINT_NAME_ARGS_MAX_SIZE 1024 #define PRTE_PRINT_NAME_ARG_NUM_BUFS 16 /* constructor - used to initialize namelist instance */ static void prte_namelist_construct(prte_namelist_t *list) { PMIX_LOAD_PROCID(&list->name, NULL, PMIX_RANK_INVALID); } /* define instance of prte_class_t */ PMIX_CLASS_INSTANCE(prte_namelist_t, /* type name */ pmix_list_item_t, /* parent "class" name */ prte_namelist_construct, /* constructor */ NULL); /* destructor */ static bool fns_init = false; static pmix_tsd_key_t print_args_tsd_key; char *prte_print_args_null = "NULL"; typedef struct { char *buffers[PRTE_PRINT_NAME_ARG_NUM_BUFS]; int cntr; } prte_print_args_buffers_t; static void buffer_cleanup(void *value) { int i; prte_print_args_buffers_t *ptr; if (NULL != value) { ptr = (prte_print_args_buffers_t *) value; for (i = 0; i < PRTE_PRINT_NAME_ARG_NUM_BUFS; i++) { free(ptr->buffers[i]); } free(ptr); } } static prte_print_args_buffers_t *get_print_name_buffer(void) { prte_print_args_buffers_t *ptr; int ret, i; if (!fns_init) { /* setup the print_args function */ if (PRTE_SUCCESS != (ret = pmix_tsd_key_create(&print_args_tsd_key, buffer_cleanup))) { PRTE_ERROR_LOG(ret); return NULL; } fns_init = true; } ret = pmix_tsd_getspecific(print_args_tsd_key, (void **) &ptr); if (PRTE_SUCCESS != ret) return NULL; if (NULL == ptr) { ptr = (prte_print_args_buffers_t *) malloc(sizeof(prte_print_args_buffers_t)); for (i = 0; i < PRTE_PRINT_NAME_ARG_NUM_BUFS; i++) { ptr->buffers[i] = (char *) malloc((PRTE_PRINT_NAME_ARGS_MAX_SIZE + 1) * sizeof(char)); } ptr->cntr = 0; ret = pmix_tsd_setspecific(print_args_tsd_key, (void *) ptr); } return (prte_print_args_buffers_t *) ptr; } char *prte_util_print_name_args(const pmix_proc_t *name) { prte_print_args_buffers_t *ptr; char *job, *vpid; /* protect against NULL names */ if (NULL == name) { /* get the next buffer */ ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[NO-NAME]"); return ptr->buffers[ptr->cntr - 1]; } /* get the jobid, vpid strings first - this will protect us from * stepping on each other's buffer. This also guarantees * that the print_args function has been initialized, so * we don't need to duplicate that here */ job = prte_util_print_jobids(name->nspace); vpid = prte_util_print_vpids(name->rank); /* get the next buffer */ ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "[%s,%s]", job, vpid); return ptr->buffers[ptr->cntr - 1]; } char *prte_util_print_jobids(const pmix_nspace_t job) { prte_print_args_buffers_t *ptr; ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } if (0 == strlen(job)) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); } else { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } return ptr->buffers[ptr->cntr - 1]; } char *prte_util_print_job_family(const pmix_nspace_t job) { prte_print_args_buffers_t *ptr; char *cptr; ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } /* see if the job is invalid */ if (PMIX_NSPACE_INVALID(job)) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); } else { /* find the '@' sign delimiting the job family */ cptr = strrchr(job, '@'); if (NULL == cptr) { /* this isn't a PRRTE job */ snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } else { *cptr = '\0'; snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); *cptr = '@'; } } return ptr->buffers[ptr->cntr - 1]; } char *prte_util_print_local_jobid(const pmix_nspace_t job) { prte_print_args_buffers_t *ptr; char *cptr; ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } /* see if the job is invalid */ if (PMIX_NSPACE_INVALID(job)) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "[INVALID]"); } else { /* find the '@' sign delimiting the job family */ cptr = strrchr(job, '@'); if (NULL == cptr) { /* this isn't a PRRTE job */ snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", job); } else { ++cptr; snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", cptr); } } return ptr->buffers[ptr->cntr - 1]; } char *prte_util_print_vpids(const pmix_rank_t vpid) { prte_print_args_buffers_t *ptr; ptr = get_print_name_buffer(); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return prte_print_args_null; } /* cycle around the ring */ if (PRTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { ptr->cntr = 0; } if (PMIX_RANK_INVALID == vpid) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "INVALID"); } else if (PMIX_RANK_WILDCARD == vpid) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "WILDCARD"); } else if (PMIX_RANK_LOCAL_NODE == vpid) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALNODE"); } else if (PMIX_RANK_LOCAL_PEERS == vpid) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "LOCALPEERS"); } else if (PMIX_RANK_UNDEF == vpid) { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%s", "UNDEFINED"); } else { snprintf(ptr->buffers[ptr->cntr++], PRTE_PRINT_NAME_ARGS_MAX_SIZE, "%u", vpid); } return ptr->buffers[ptr->cntr - 1]; } /*** STRING FUNCTIONS ***/ int prte_util_convert_vpid_to_string(char **vpid_string, const pmix_rank_t vpid) { /* check for wildcard value - handle appropriately */ if (PMIX_RANK_WILDCARD == vpid) { *vpid_string = strdup("WILDCARD"); } else if (PMIX_RANK_INVALID == vpid) { *vpid_string = strdup("INVALID"); } else if (PMIX_RANK_LOCAL_NODE == vpid) { *vpid_string = strdup("LOCALNODE"); } else if (PMIX_RANK_LOCAL_PEERS == vpid) { *vpid_string = strdup("LOCALPEERS"); } else if (PMIX_RANK_UNDEF == vpid) { *vpid_string = strdup("UNDEFINED"); } else { if (0 > pmix_asprintf(vpid_string, "%u", vpid)) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } } return PRTE_SUCCESS; } int prte_util_convert_string_to_process_name(pmix_proc_t *name, const char *name_string) { char *p; /* check for NULL string - error */ if (NULL == name_string) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } p = strrchr(name_string, '.'); /** get last field -> vpid */ /* check for error */ if (NULL == p) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } *p = '\0'; PMIX_LOAD_NSPACE(name->nspace, name_string); *p = '.'; ++p; name->rank = strtoul(p, NULL, 10); return PRTE_SUCCESS; } int prte_util_convert_process_name_to_string(char **name_string, const pmix_proc_t *name) { char *job, *rank; if (NULL == name) { /* got an error */ PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } job = prte_util_print_jobids(name->nspace); rank = prte_util_print_vpids(name->rank); pmix_asprintf(name_string, "%s.%s", job, rank); return PRTE_SUCCESS; } /**** COMPARE NAME FIELDS ****/ int prte_util_compare_name_fields(prte_ns_cmp_bitmask_t fields, const pmix_proc_t *name1, const pmix_proc_t *name2) { /* handle the NULL pointer case */ if (NULL == name1 && NULL == name2) { return PRTE_EQUAL; } else if (NULL == name1) { return PRTE_VALUE2_GREATER; } else if (NULL == name2) { return PRTE_VALUE1_GREATER; } /* in this comparison function, we check for exact equalities. * In the case of wildcards, we check to ensure that the fields * actually match those values - thus, a "wildcard" in this * function does not actually stand for a wildcard value, but * rather a specific value - UNLESS the CMP_WILD bitmask value * is set */ /* check job id */ if (PRTE_NS_CMP_JOBID & fields) { if (PRTE_NS_CMP_WILD & fields && (0 == strlen(name1->nspace) || 0 == strlen(name2->nspace))) { goto check_vpid; } if (strlen(name1->nspace) < strlen(name2->nspace)) { return PRTE_VALUE2_GREATER; } else if (strlen(name1->nspace) > strlen(name2->nspace)) { return PRTE_VALUE1_GREATER; } } /* get here if jobid's are equal, or not being checked * now check vpid */ check_vpid: if (PRTE_NS_CMP_VPID & fields) { if (PRTE_NS_CMP_WILD & fields && (PMIX_RANK_WILDCARD == name1->rank || PMIX_RANK_WILDCARD == name2->rank)) { return PRTE_EQUAL; } if (name1->rank < name2->rank) { return PRTE_VALUE2_GREATER; } else if (name1->rank > name2->rank) { return PRTE_VALUE1_GREATER; } } /* only way to get here is if all fields are being checked and are equal, * or jobid not checked, but vpid equal, * only vpid being checked, and equal * return that fact */ return PRTE_EQUAL; } char *prte_pretty_print_timing(int64_t secs, int64_t usecs) { unsigned long minutes, seconds; float fsecs; char *timestring; seconds = secs + (usecs / 1000000l); minutes = seconds / 60l; seconds = seconds % 60l; if (0 == minutes && 0 == seconds) { fsecs = ((float) (secs) *1000000.0 + (float) usecs) / 1000.0; pmix_asprintf(×tring, "%8.2f millisecs", fsecs); } else { pmix_asprintf(×tring, "%3lu:%02lu min:sec", minutes, seconds); } return timestring; } char *prte_util_make_version_string(const char *scope, int major, int minor, int release, const char *greek, const char *repo) { char *str = NULL, *tmp; char temp[BUFSIZ]; temp[BUFSIZ - 1] = '\0'; if (0 == strcmp(scope, "full") || 0 == strcmp(scope, "all")) { snprintf(temp, BUFSIZ - 1, "%d.%d", major, minor); str = strdup(temp); if (release >= 0) { snprintf(temp, BUFSIZ - 1, ".%d", release); pmix_asprintf(&tmp, "%s%s", str, temp); free(str); str = tmp; } if (NULL != greek) { pmix_asprintf(&tmp, "%s%s", str, greek); free(str); str = tmp; } if (NULL != repo) { pmix_asprintf(&tmp, "%s%s", str, repo); free(str); str = tmp; } } else if (0 == strcmp(scope, "major")) { snprintf(temp, BUFSIZ - 1, "%d", major); } else if (0 == strcmp(scope, "minor")) { snprintf(temp, BUFSIZ - 1, "%d", minor); } else if (0 == strcmp(scope, "release")) { snprintf(temp, BUFSIZ - 1, "%d", release); } else if (0 == strcmp(scope, "greek")) { str = strdup(greek); } else if (0 == strcmp(scope, "repo")) { str = strdup(repo); } if (NULL == str) { str = strdup(temp); } return str; } prrte-3.0.13/src/util/attr.h0000664000175000017500000006727315145263240016026 0ustar alastairalastair/* * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_ATTRS_H #define PRTE_ATTRS_H #include "prte_config.h" #include "types.h" /*** FLAG FOR SETTING ATTRIBUTES - INDICATES IF THE *** ATTRIBUTE IS TO BE SHARED WITH REMOTE PROCS OR NOT */ #define PRTE_ATTR_LOCAL true // for local use only #define PRTE_ATTR_GLOBAL false // include when sending this object /* define the mininum value of the PRTE keys just in * case someone someday puts a layer underneath us */ #define PRTE_ATTR_KEY_BASE 0 /*** ATTRIBUTE FLAGS - never sent anywwhere ***/ typedef uint8_t prte_app_context_flags_t; #define PRTE_APP_FLAG_USED_ON_NODE 0x01 // is being used on the local node #define PRTE_APP_FLAG_TOOL 0x02 // this app describes daemons to be co-launched // with the application procs in the other apps // and does not count against allocation #define PRTE_APP_FLAG_COMPUTED 0x04 // num procs for this app were computed and not // given by the user /* APP_CONTEXT ATTRIBUTE KEYS */ #define PRTE_APP_HOSTFILE 1 // string - hostfile #define PRTE_APP_ADD_HOSTFILE 2 // string - hostfile to be added #define PRTE_APP_DASH_HOST 3 // string - hosts specified with -host option #define PRTE_APP_ADD_HOST 4 // string - hosts to be added #define PRTE_APP_USER_CWD 5 // bool - user specified cwd #define PRTE_APP_SSNDIR_CWD 6 // bool - use session dir as cwd #define PRTE_APP_PRELOAD_BIN 7 // bool - move binaries to remote nodes prior to exec #define PRTE_APP_PRELOAD_FILES 8 // string - files to be moved to remote nodes prior to exec #define PRTE_APP_SSTORE_LOAD 9 // string #define PRTE_APP_RECOV_DEF 10 // bool - whether or not a recovery policy was defined #define PRTE_APP_MAX_RESTARTS 11 // int32 - max number of times a process can be restarted #define PRTE_APP_MIN_NODES 12 // int64 - min number of nodes required #define PRTE_APP_MANDATORY 13 // bool - flag if nodes requested in -host are "mandatory" vs "optional" #define PRTE_APP_MAX_PPN 14 // uint32 - maximum number of procs/node for this app #define PRTE_APP_PMIX_PREFIX 15 // string - PMIX_PREFIX value for application procs #define PRTE_APP_NO_CACHEDIR 16 // bool - flag that a cache dir is not to be specified for a Singularity container #define PRTE_APP_SET_ENVAR 17 // prte_envar_t - set the given envar to the specified value #define PRTE_APP_UNSET_ENVAR 18 // string - name of envar to unset, if present #define PRTE_APP_PREPEND_ENVAR 19 // prte_envar_t - prepend the specified value to the given envar #define PRTE_APP_APPEND_ENVAR 20 // prte_envar_t - append the specified value to the given envar #define PRTE_APP_ADD_ENVAR 21 // prte_envar_t - add envar, do not override pre-existing one #define PRTE_APP_PSET_NAME 23 // string - user-assigned name for the process // set containing the given process #define PRTE_APP_MAX_KEY 100 /*** NODE FLAGS - never sent anywhere ***/ typedef uint8_t prte_node_flags_t; #define PRTE_NODE_FLAG_DAEMON_LAUNCHED 0x01 // whether or not the daemon on this node has been launched #define PRTE_NODE_FLAG_LOC_VERIFIED 0x02 // whether or not the location has been verified - used for // environments where the daemon's final destination is uncertain #define PRTE_NODE_FLAG_OVERSUBSCRIBED 0x04 // whether or not this node is oversubscribed #define PRTE_NODE_FLAG_MAPPED 0x08 // whether we have been added to the current map #define PRTE_NODE_FLAG_SLOTS_GIVEN 0x10 // the number of slots was specified - used only in non-managed environments #define PRTE_NODE_NON_USABLE 0x20 // the node is hosting a tool and is NOT to be used for jobs /*** NODE ATTRIBUTE KEYS - never sent anywhere ***/ #define PRTE_NODE_START_KEY PRTE_APP_MAX_KEY #define PRTE_NODE_USERNAME (PRTE_NODE_START_KEY + 1) #define PRTE_NODE_LAUNCH_ID (PRTE_NODE_START_KEY + 2) // int32 - Launch id needed by some systems to launch a proc on this node #define PRTE_NODE_HOSTID (PRTE_NODE_START_KEY + 3) // pmix_rank_t - if this "node" is a coprocessor being hosted on a different node, then // we need to know the id of our "host" to help any procs on us to determine locality #define PRTE_NODE_SERIAL_NUMBER (PRTE_NODE_START_KEY + 5) // string - serial number: used if node is a coprocessor #define PRTE_NODE_PORT (PRTE_NODE_START_KEY + 6) // int32 - Alternate port to be passed to plm #define PRTE_NODE_ADD_SLOTS (PRTE_NODE_START_KEY + 7) // bool - slots are being added to existing node #define PRTE_NODE_MAX_KEY (PRTE_NODE_START_KEY + 100) /*** JOB FLAGS - included in prte_job_t transmissions ***/ typedef uint16_t prte_job_flags_t; #define PRTE_JOB_FLAGS_T PRTE_UINT16 #define PRTE_JOB_FLAG_UPDATED 0x0001 // job has been updated and needs to be included in the pidmap message #define PRTE_JOB_FLAG_RESTARTED 0x0004 // some procs in this job are being restarted #define PRTE_JOB_FLAG_ABORTED 0x0008 // did this job abort? #define PRTE_JOB_FLAG_FORWARD_OUTPUT 0x0020 // forward output from the apps #define PRTE_JOB_FLAG_DO_NOT_MONITOR 0x0040 // do not monitor apps for termination #define PRTE_JOB_FLAG_FORWARD_COMM 0x0080 // #define PRTE_JOB_FLAG_RESTART 0x0200 // #define PRTE_JOB_FLAG_PROCS_MIGRATING 0x0400 // some procs in job are migrating from one node to another #define PRTE_JOB_FLAG_OVERSUBSCRIBED 0x0800 // at least one node in the job is oversubscribed #define PRTE_JOB_FLAG_TOOL 0x1000 // job is a tool and doesn't count against allocations #define PRTE_JOB_FLAG_LAUNCHER 0x2000 // job is also a launcher #define PRTE_JOB_FLAG_ERR_REPORTED 0x4000 // error report for job has been output /*** JOB ATTRIBUTE KEYS ***/ #define PRTE_JOB_START_KEY PRTE_NODE_MAX_KEY #define PRTE_JOB_LAUNCH_MSG_SENT (PRTE_JOB_START_KEY + 1) // timeval - time launch message was sent #define PRTE_JOB_LAUNCH_MSG_RECVD (PRTE_JOB_START_KEY + 2) // timeval - time launch message was recvd #define PRTE_JOB_MAX_LAUNCH_MSG_RECVD (PRTE_JOB_START_KEY + 3) // timeval - max time for launch msg to be received #define PRTE_JOB_CKPT_STATE (PRTE_JOB_START_KEY + 5) // size_t - ckpt state #define PRTE_JOB_SNAPSHOT_REF (PRTE_JOB_START_KEY + 6) // string - snapshot reference #define PRTE_JOB_SNAPSHOT_LOC (PRTE_JOB_START_KEY + 7) // string - snapshot location #define PRTE_JOB_SNAPC_INIT_BAR (PRTE_JOB_START_KEY + 8) // prte_grpcomm_coll_id_t - collective id #define PRTE_JOB_SNAPC_FINI_BAR (PRTE_JOB_START_KEY + 9) // prte_grpcomm_coll_id_t - collective id #define PRTE_JOB_NUM_NONZERO_EXIT (PRTE_JOB_START_KEY + 10) // int32 - number of procs with non-zero exit codes #define PRTE_SPAWN_TIMEOUT_EVENT (PRTE_JOB_START_KEY + 11) // prte_ptr (prte_timer_t*) - timer event for failure detect/response // if fails to launch #define PRTE_JOB_ABORTED_PROC (PRTE_JOB_START_KEY + 12) // prte_ptr (prte_proc_t*) - proc that caused abort to happen #define PRTE_JOB_MAPPER (PRTE_JOB_START_KEY + 13) // bool - job consists of MapReduce mappers #define PRTE_JOB_REDUCER (PRTE_JOB_START_KEY + 14) // bool - job consists of MapReduce reducers #define PRTE_JOB_COMBINER (PRTE_JOB_START_KEY + 15) // bool - job consists of MapReduce combiners #define PRTE_JOB_INDEX_ARGV (PRTE_JOB_START_KEY + 16) // bool - automatically index argvs #define PRTE_JOB_NO_VM (PRTE_JOB_START_KEY + 17) // bool - do not use VM launch #define PRTE_JOB_SPIN_FOR_DEBUG (PRTE_JOB_START_KEY + 18) // bool - the prted's are to spin while waiting for debugger #define PRTE_JOB_CONTINUOUS (PRTE_JOB_START_KEY + 19) // bool - job consists of continuously operating apps #define PRTE_JOB_RECOVER_DEFINED (PRTE_JOB_START_KEY + 20) // bool - recovery policy has been defined #define PRTE_JOB_NON_PRTE_JOB (PRTE_JOB_START_KEY + 22) // bool - non-prte job #define PRTE_JOB_STDOUT_TARGET (PRTE_JOB_START_KEY + 23) // pmix_nspace_t - job that is to receive the stdout (on its // stdin) from this one #define PRTE_JOB_POWER (PRTE_JOB_START_KEY + 24) // string - power setting for nodes in job #define PRTE_JOB_MAX_FREQ (PRTE_JOB_START_KEY + 25) // string - max freq setting for nodes in job #define PRTE_JOB_MIN_FREQ (PRTE_JOB_START_KEY + 26) // string - min freq setting for nodes in job #define PRTE_JOB_GOVERNOR (PRTE_JOB_START_KEY + 27) // string - governor used for nodes in job #define PRTE_JOB_FAIL_NOTIFIED (PRTE_JOB_START_KEY + 28) // bool - abnormal term of proc within job has been reported #define PRTE_JOB_TERM_NOTIFIED (PRTE_JOB_START_KEY + 29) // bool - normal term of job has been reported #define PRTE_JOB_PEER_MODX_ID (PRTE_JOB_START_KEY + 30) // prte_grpcomm_coll_id_t - collective id #define PRTE_JOB_INIT_BAR_ID (PRTE_JOB_START_KEY + 31) // prte_grpcomm_coll_id_t - collective id #define PRTE_JOB_FINI_BAR_ID (PRTE_JOB_START_KEY + 32) // prte_grpcomm_coll_id_t - collective id #define PRTE_JOB_FWDIO_TO_TOOL (PRTE_JOB_START_KEY + 33) // Forward IO for this job to the tool requesting its spawn #define PRTE_JOB_LAUNCHED_DAEMONS (PRTE_JOB_START_KEY + 35) // bool - Job caused new daemons to be spawned #define PRTE_JOB_REPORT_BINDINGS (PRTE_JOB_START_KEY + 36) // bool - Report process bindings #define PRTE_JOB_CPUSET (PRTE_JOB_START_KEY + 37) // string - "soft" cgroup envelope for the job #define PRTE_JOB_NOTIFICATIONS (PRTE_JOB_START_KEY + 38) // string - comma-separated list of desired notifications+methods #define PRTE_JOB_ROOM_NUM (PRTE_JOB_START_KEY + 39) // int - number of remote request's hotel room #define PRTE_JOB_LAUNCH_PROXY (PRTE_JOB_START_KEY + 40) // pmix_proc_t - name of spawn requestor #define PRTE_JOB_NSPACE_REGISTERED (PRTE_JOB_START_KEY + 41) // bool - job has been registered with embedded PMIx server #define PRTE_JOB_FIXED_DVM (PRTE_JOB_START_KEY + 42) // bool - do not change the size of the DVM for this job #define PRTE_JOB_DVM_JOB (PRTE_JOB_START_KEY + 43) // bool - job is using a DVM #define PRTE_JOB_CANCELLED (PRTE_JOB_START_KEY + 44) // bool - job was cancelled #define PRTE_JOB_OUTPUT_TO_FILE (PRTE_JOB_START_KEY + 45) // string - path to use as basename of files to which // stdout/err is to be directed #define PRTE_JOB_MERGE_STDERR_STDOUT (PRTE_JOB_START_KEY + 46) // bool - merge stderr into stdout stream #define PRTE_JOB_TAG_OUTPUT (PRTE_JOB_START_KEY + 47) // bool - tag stdout/stderr #define PRTE_JOB_TIMESTAMP_OUTPUT (PRTE_JOB_START_KEY + 48) // bool - timestamp stdout/stderr #define PRTE_JOB_MULTI_DAEMON_SIM (PRTE_JOB_START_KEY + 49) // bool - multiple daemons/node to simulate large cluster #define PRTE_JOB_NOTIFY_COMPLETION (PRTE_JOB_START_KEY + 50) // bool - notify parent proc when spawned job terminates #define PRTE_JOB_TRANSPORT_KEY (PRTE_JOB_START_KEY + 51) // string - transport keys assigned to this job #define PRTE_JOB_INFO_CACHE (PRTE_JOB_START_KEY + 52) // pmix_list_t - list of prte_value_t to be included in job_info #define PRTE_JOB_SILENT_TERMINATION (PRTE_JOB_START_KEY + 54) // bool - do not generate an event notification when job // normally terminates #define PRTE_JOB_SET_ENVAR (PRTE_JOB_START_KEY + 55) // prte_envar_t - set the given envar to the specified value #define PRTE_JOB_UNSET_ENVAR (PRTE_JOB_START_KEY + 56) // string - name of envar to unset, if present #define PRTE_JOB_PREPEND_ENVAR (PRTE_JOB_START_KEY + 57) // prte_envar_t - prepend the specified value to the given envar #define PRTE_JOB_APPEND_ENVAR (PRTE_JOB_START_KEY + 58) // prte_envar_t - append the specified value to the given envar #define PRTE_JOB_ADD_ENVAR (PRTE_JOB_START_KEY + 59) // prte_envar_t - add envar, do not override pre-existing one #define PRTE_JOB_APP_SETUP_DATA (PRTE_JOB_START_KEY + 60) // pmix_byte_object_t - blob containing app setup data #define PRTE_JOB_OUTPUT_TO_DIRECTORY (PRTE_JOB_START_KEY + 61) // string - path of directory to which stdout/err is to be directed #define PRTE_JOB_STOP_ON_EXEC (PRTE_JOB_START_KEY + 62) // bool - stop on first instruction for debugger attach #define PRTE_JOB_SPAWN_NOTIFIED (PRTE_JOB_START_KEY + 63) // bool - process requesting a spawn operation has been notified of result #define PRTE_JOB_DISPLAY_MAP (PRTE_JOB_START_KEY + 64) // bool - display job map #define PRTE_JOB_DISPLAY_DEVEL_MAP (PRTE_JOB_START_KEY + 65) // bool - display devel level job map #define PRTE_JOB_DISPLAY_TOPO (PRTE_JOB_START_KEY + 66) // bool - display topology with job map // 67 was removed option diffable map #define PRTE_JOB_DISPLAY_ALLOC (PRTE_JOB_START_KEY + 68) // bool - display allocation #define PRTE_JOB_DO_NOT_LAUNCH (PRTE_JOB_START_KEY + 69) // bool - do not launch job #define PRTE_JOB_XML_OUTPUT (PRTE_JOB_START_KEY + 70) // bool - print in xml format #define PRTE_JOB_TIMEOUT (PRTE_JOB_START_KEY + 71) // int32 - number of seconds job can run before terminating it as timed out #define PRTE_JOB_STACKTRACES (PRTE_JOB_START_KEY + 72) // bool - include process stack traces in timeout report #define PRTE_JOB_REPORT_STATE (PRTE_JOB_START_KEY + 73) // bool - include process state in timeout report #define PRTE_JOB_TIMEOUT_EVENT (PRTE_JOB_START_KEY + 74) // prte_ptr (prte_timer_t*) - timer event for job timeout #define PRTE_JOB_TRACE_TIMEOUT_EVENT (PRTE_JOB_START_KEY + 75) // prte_ptr (prte_timer_t*) - timer event for stacktrace collection #define PRTE_JOB_INHERIT (PRTE_JOB_START_KEY + 76) // bool - job inherits parent's mapping/ranking/binding policies #define PRTE_JOB_PES_PER_PROC (PRTE_JOB_START_KEY + 77) // uint16_t - number of cpus to be assigned to each process #define PRTE_JOB_DIST_DEVICE (PRTE_JOB_START_KEY + 78) // char* - device to use for dist mapping #define PRTE_JOB_HWT_CPUS (PRTE_JOB_START_KEY + 79) // bool - job requests hwthread cpus #define PRTE_JOB_CORE_CPUS (PRTE_JOB_START_KEY + 80) // bool - job requests core cpus #define PRTE_JOB_PPR (PRTE_JOB_START_KEY + 81) // char* - string specifying the procs-per-resource pattern #define PRTE_JOB_NOINHERIT (PRTE_JOB_START_KEY + 82) // bool do NOT inherit parent's mapping/ranking/binding policies #define PRTE_JOB_FILE (PRTE_JOB_START_KEY + 83) // char* - file to use for sequential or rankfile mapping #define PRTE_JOB_DO_NOT_RESOLVE (PRTE_JOB_START_KEY + 84) // bool - do not resolve nodes #define PRTE_JOB_DEBUG_TARGET (PRTE_JOB_START_KEY + 85) // pmix_proc_t - application proc to co-locate daemons with #define PRTE_JOB_DEBUG_DAEMONS_PER_NODE (PRTE_JOB_START_KEY + 86) // uint16_t - Number of debug daemons per node #define PRTE_JOB_DEBUG_DAEMONS_PER_PROC (PRTE_JOB_START_KEY + 87) // uint16_t - Number of debug daemons per application proc #define PRTE_JOB_STOP_IN_INIT (PRTE_JOB_START_KEY + 88) // bool - stop in PMIx_Init #define PRTE_JOB_STOP_IN_APP (PRTE_JOB_START_KEY + 89) // bool - stop at app-determined location #define PRTE_JOB_ENVARS_HARVESTED (PRTE_JOB_START_KEY + 90) // envars have already been harvested #define PRTE_JOB_OUTPUT_NOCOPY (PRTE_JOB_START_KEY + 91) // bool - do not copy output to stdout/err #define PRTE_JOB_RANK_OUTPUT (PRTE_JOB_START_KEY + 92) // bool - tag stdout/stderr with rank #define PRTE_SPAWN_TIMEOUT (PRTE_JOB_START_KEY + 93) // int32 - number of seconds to spawn before terminating it as timed out #define PRTE_JOB_RAW_OUTPUT (PRTE_JOB_START_KEY + 94) // bool - do not buffer output #define PRTE_JOB_EXEC_AGENT (PRTE_JOB_START_KEY + 95) // char* - string specifying the cmd to use when exec'ing the local proc #define PRTE_JOB_NOAGG_HELP (PRTE_JOB_START_KEY + 96) // bool - do not aggregate show_help messages #define PRTE_JOB_COLOCATE_PROCS (PRTE_JOB_START_KEY + 97) // pmix_data_array_t - colocate this job's procs with the given ones #define PRTE_JOB_COLOCATE_NPERPROC (PRTE_JOB_START_KEY + 98) // uint16_t - number of procs to colocate at each proc #define PRTE_JOB_COLOCATE_NPERNODE (PRTE_JOB_START_KEY + 99) // uint16_t - number of procs to colocate on node of each proc #define PRTE_JOB_TAG_OUTPUT_DETAILED (PRTE_JOB_START_KEY + 100) // bool - include [hostname:pid] in output stream tag #define PRTE_JOB_TAG_OUTPUT_FULLNAME (PRTE_JOB_START_KEY + 101) // bool - use full namespace in output stream tag #define PRTE_JOB_ERROR_NONZERO_EXIT (PRTE_JOB_START_KEY + 102) // bool - mark it as an error if a proc exits with non-zero status #define PRTE_JOB_CONTROLS (PRTE_JOB_START_KEY + 103) // char* - Directives controlling job behavior #define PRTE_JOB_SHOW_PROGRESS (PRTE_JOB_START_KEY + 104) // bool - show launch progress of this job #define PRTE_JOB_RECOVERABLE (PRTE_JOB_START_KEY + 105) // bool - job processes can be recovered, do not terminate upon // process failure #define PRTE_JOB_NOTIFY_ERRORS (PRTE_JOB_START_KEY + 106) // bool - provide PMIx events on errors #define PRTE_JOB_AUTORESTART (PRTE_JOB_START_KEY + 107) // bool - automatically restart failed processes #define PRTE_JOB_OUTPUT_PROCTABLE (PRTE_JOB_START_KEY + 108) // char* - string specifying where the output is to go, with a '-' // indicating stdout, '+' indicating stderr, else path #define PRTE_JOB_DISPLAY_PROCESSORS (PRTE_JOB_START_KEY + 109) // char* - string displaying nodes whose avail CPUs // are to be displayed #define PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT (PRTE_JOB_START_KEY + 110) // bool - display output in machine parsable format #define PRTE_JOB_EXTEND_DVM (PRTE_JOB_START_KEY + 111) // bool - DVM is being extended #define PRTE_JOB_CHILD_SEP (PRTE_JOB_START_KEY + 116) // bool - child job is to be considered independent // from its parent, do not terminate if // parent dies first #define PRTE_JOB_GPU_SUPPORT (PRTE_JOB_START_KEY + 117) // bool - enable/disable GPU support in app #define PRTE_JOB_PREFIX (PRTE_JOB_START_KEY + 118) // string - PRTE_PREFIX for daemons #define PRTE_JOB_PMIX_PREFIX (PRTE_JOB_START_KEY + 119) // string - PMIX_PREFIX for daemons #define PRTE_JOB_FWD_ENVIRONMENT (PRTE_JOB_START_KEY + 120) // bool - forward local environment to procs in this job #define PRTE_JOB_REPORT_PHYSICAL_CPUS (PRTE_JOB_START_KEY + 121) // bool - report using physical (vs logical) cpu IDs #define PRTE_JOB_ALLOC_DISPLAYED (PRTE_JOB_START_KEY + 122) // bool - allocation has been displayed #define PRTE_JOB_MAX_KEY (PRTE_JOB_START_KEY + 200) /*** PROC FLAGS - never sent anywhere ***/ typedef uint16_t prte_proc_flags_t; #define PRTE_PROC_FLAG_ALIVE 0x0001 // proc has been launched and has not yet terminated #define PRTE_PROC_FLAG_ABORT 0x0002 // proc called abort #define PRTE_PROC_FLAG_UPDATED 0x0004 // proc has been updated and need to be included in the next pidmap message #define PRTE_PROC_FLAG_LOCAL 0x0008 // indicate that this proc is local #define PRTE_PROC_FLAG_REPORTED 0x0010 // indicate proc has reported in #define PRTE_PROC_FLAG_REG 0x0020 // proc has registered #define PRTE_PROC_FLAG_HAS_DEREG 0x0040 // proc has deregistered #define PRTE_PROC_FLAG_AS_MPI 0x0080 // proc is MPI process #define PRTE_PROC_FLAG_IOF_COMPLETE 0x0100 // IOF has completed #define PRTE_PROC_FLAG_WAITPID 0x0200 // waitpid fired #define PRTE_PROC_FLAG_RECORDED 0x0400 // termination has been recorded #define PRTE_PROC_FLAG_DATA_IN_SM 0x0800 // modex data has been stored in the local shared memory region #define PRTE_PROC_FLAG_DATA_RECVD 0x1000 // modex data for this proc has been received #define PRTE_PROC_FLAG_SM_ACCESS 0x2000 // indicate if process can read modex data from shared memory region #define PRTE_PROC_FLAG_TERM_REPORTED 0x4000 // proc termination has been reported /*** PROCESS ATTRIBUTE KEYS ***/ #define PRTE_PROC_START_KEY PRTE_JOB_MAX_KEY #define PRTE_PROC_NOBARRIER (PRTE_PROC_START_KEY + 1) // bool - indicates proc should not barrier in prte_init #define PRTE_PROC_PRIOR_NODE (PRTE_PROC_START_KEY + 5) // void* - pointer to prte_node_t where this proc last executed #define PRTE_PROC_NRESTARTS (PRTE_PROC_START_KEY + 6) // int32 - number of times this process has been restarted #define PRTE_PROC_RESTART_TIME (PRTE_PROC_START_KEY + 7) // timeval - time of last restart #define PRTE_PROC_FAST_FAILS (PRTE_PROC_START_KEY + 8) // int32 - number of failures in "fast" window #define PRTE_PROC_CKPT_STATE (PRTE_PROC_START_KEY + 9) // size_t - ckpt state #define PRTE_PROC_SNAPSHOT_REF (PRTE_PROC_START_KEY + 10) // string - snapshot reference #define PRTE_PROC_SNAPSHOT_LOC (PRTE_PROC_START_KEY + 11) // string - snapshot location #define PRTE_PROC_NODENAME (PRTE_PROC_START_KEY + 12) // string - node where proc is located, used only by tools #define PRTE_PROC_CGROUP (PRTE_PROC_START_KEY + 13) // string - name of cgroup this proc shall be assigned to #define PRTE_PROC_NBEATS (PRTE_PROC_START_KEY + 14) // int32 - number of heartbeats in current window #define PRTE_PROC_MAX_KEY (PRTE_PROC_START_KEY + 100) /*** RML ATTRIBUTE keys ***/ #define PRTE_RML_START_KEY PRTE_PROC_MAX_KEY #define PRTE_RML_TRANSPORT_TYPE (PRTE_RML_START_KEY + 1) // string - null terminated string containing transport type #define PRTE_RML_PROTOCOL_TYPE (PRTE_RML_START_KEY + 2) // string - protocol type (e.g., as returned by fi_info) #define PRTE_RML_CONDUIT_ID (PRTE_RML_START_KEY + 3) // prte_rml_conduit_t - conduit_id for this transport #define PRTE_RML_INCLUDE_COMP_ATTRIB (PRTE_RML_START_KEY + 4) // string - comma delimited list of RML component names to be considered #define PRTE_RML_EXCLUDE_COMP_ATTRIB (PRTE_RML_START_KEY + 5) // string - comma delimited list of RML component names to be excluded #define PRTE_RML_TRANSPORT_ATTRIB (PRTE_RML_START_KEY + 6) // string - comma delimited list of transport types to be considered // (e.g., "fabric,ethernet") #define PRTE_RML_QUALIFIER_ATTRIB (PRTE_RML_START_KEY + 7) // string - comma delimited list of qualifiers (e.g., routed=direct,bandwidth=xxx) #define PRTE_RML_PROVIDER_ATTRIB (PRTE_RML_START_KEY + 8) // string - comma delimited list of provider names to be considered #define PRTE_RML_PROTOCOL_ATTRIB (PRTE_RML_START_KEY + 9) // string - comma delimited list of protocols to be considered (e.g., tcp,udp) #define PRTE_RML_ROUTED_ATTRIB (PRTE_RML_START_KEY + 10) // string - comma delimited list of routed modules to be considered #define PRTE_RML_MAX_KEY (PRTE_RML_START_KEY + 100) #define PRTE_ATTR_KEY_MAX PRTE_RML_MAX_KEY /*** FLAG OPS ***/ #define PRTE_FLAG_SET(p, f) ((p)->flags |= (f)) #define PRTE_FLAG_UNSET(p, f) ((p)->flags &= ~(f)) #define PRTE_FLAG_TEST(p, f) ((p)->flags & (f)) PRTE_EXPORT const char *prte_attr_key_to_str(prte_attribute_key_t key); /* Retrieve the named attribute from a list */ PRTE_EXPORT bool prte_get_attribute(pmix_list_t *attributes, prte_attribute_key_t key, void **data, pmix_data_type_t type); /* Set the named attribute in a list, overwriting any prior entry */ PRTE_EXPORT int prte_set_attribute(pmix_list_t *attributes, prte_attribute_key_t key, bool local, void *data, pmix_data_type_t type); /* Remove the named attribute from a list */ PRTE_EXPORT void prte_remove_attribute(pmix_list_t *attributes, prte_attribute_key_t key); PRTE_EXPORT prte_attribute_t *prte_fetch_attribute(pmix_list_t *attributes, prte_attribute_t *prev, prte_attribute_key_t key); PRTE_EXPORT int prte_prepend_attribute(pmix_list_t *attributes, prte_attribute_key_t key, bool local, void *data, pmix_data_type_t type); PRTE_EXPORT int prte_attr_load(prte_attribute_t *kv, void *data, pmix_data_type_t type); PRTE_EXPORT int prte_attr_unload(prte_attribute_t *kv, void **data, pmix_data_type_t type); PRTE_EXPORT char *prte_attr_print_list(pmix_list_t *attributes); /* * Register a handler for converting attr keys to strings * * Handlers will be invoked by prte_attr_key_to_str to return the appropriate value. */ typedef char *(*prte_attr2str_fn_t)(prte_attribute_key_t key); PRTE_EXPORT int prte_attr_register(const char *project, prte_attribute_key_t key_base, prte_attribute_key_t key_max, prte_attr2str_fn_t converter); /** FOR DIAGNOSTIC PURPOSES **/ #define PRTE_SHOW_ATTRS(a) \ do { \ char *_output = prte_attr_print_list((a)); \ fprintf(stderr, "[%s:%s:%d]\n%s\n", __FILE__, __func__, __LINE__, _output); \ free(_output); \ } while (0) #endif // forward declarations struct prte_proc_t; struct prte_node_t; struct prte_app_context_t; struct prte_job_t; PRTE_EXPORT char* prte_print_proc_flags(struct prte_proc_t *p); PRTE_EXPORT char* prte_print_node_flags(struct prte_node_t *p); PRTE_EXPORT char* prte_print_app_flags(struct prte_app_context_t *p); PRTE_EXPORT char* prte_print_job_flags(struct prte_job_t *p); prrte-3.0.13/src/util/prte_cmd_line.h0000664000175000017500000003101215145263240017636 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017-2022 IBM Corporation. All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_CMD_LINE_H #define PRTE_CMD_LINE_H #include "prte_config.h" #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include #include "src/class/pmix_list.h" #include "src/class/pmix_object.h" #include "src/util/pmix_argv.h" BEGIN_C_DECLS /* define the command line options that PRRTE internally understands. * It is the responsibility of each schizo component to translate its * command line inputs to these definitions. The definitions are provided * to help avoid errors due to typos - i.e., where the schizo component * interprets its CLI but assigns it to an erroneous string */ // NAME STRING ARGUMENT // Basic options #define PRTE_CLI_HELP "help" // optional #define PRTE_CLI_VERSION "version" // none #define PRTE_CLI_VERBOSE "verbose" // number of instances => verbosity level #define PRTE_CLI_PARSEABLE "parseable" // none #define PRTE_CLI_PARSABLE "parsable" // none #define PRTE_CLI_PERSONALITY "personality" // required // MCA parameter options #define PRTE_CLI_PRTEMCA "prtemca" // requires TWO #define PRTE_CLI_PMIXMCA "pmixmca" // requires TWO #define PRTE_CLI_TUNE "tune" // required // DVM options #define PRTE_CLI_NO_READY_MSG "no-ready-msg" // none #define PRTE_CLI_DAEMONIZE "daemonize" // none #define PRTE_CLI_SYSTEM_SERVER "system-server" // none #define PRTE_CLI_SET_SID "set-sid" // none #define PRTE_CLI_REPORT_PID "report-pid" // required #define PRTE_CLI_REPORT_URI "report-uri" // required #define PRTE_CLI_TEST_SUICIDE "test-suicide" // none #define PRTE_CLI_DEFAULT_HOSTFILE "default-hostfile" // required #define PRTE_CLI_SINGLETON "singleton" // required #define PRTE_CLI_KEEPALIVE "keepalive" // required #define PRTE_CLI_LAUNCH_AGENT "launch-agent" // required #define PRTE_CLI_MAX_VM_SIZE "max-vm-size" // required #define PRTE_CLI_DEBUG "debug" // none #define PRTE_CLI_DEBUG_DAEMONS "debug-daemons" // none #define PRTE_CLI_DEBUG_DAEMONS_FILE "debug-daemons-file" // none #define PRTE_CLI_LEAVE_SESSION_ATTACHED "leave-session-attached" // none #define PRTE_CLI_TMPDIR "tmpdir" // required #define PRTE_CLI_PREFIX "prefix" // required #define PRTE_CLI_PMIX_PREFIX "pmix-prefix" // required #define PRTE_CLI_APP_PREFIX "app-pmix-prefix" // required #define PRTE_CLI_NOPREFIX "noprefix" // none #define PRTE_CLI_NO_APP_PREFIX "no-app-prefix" // none #define PRTE_CLI_FWD_SIGNALS "forward-signals" // required #define PRTE_CLI_RUN_AS_ROOT "allow-run-as-root" // none #define PRTE_CLI_STREAM_BUF "stream-buffering" // required #define PRTE_CLI_BOOTSTRAP "bootstrap" // none #define PRTE_CLI_HETERO_NODES "hetero-nodes" // none // Application options #define PRTE_CLI_NP "np" // required #define PRTE_CLI_NPERNODE "N" // required #define PRTE_CLI_APPFILE "app" // required #define PRTE_CLI_FWD_ENVAR "x" // required #define PRTE_CLI_FWD_ENVIRON "fwd-environment" // optional #define PRTE_CLI_HOSTFILE "hostfile" // required #define PRTE_CLI_ADDHOSTFILE "add-hostfile" // required #define PRTE_CLI_HOST "host" // required #define PRTE_CLI_ADDHOST "add-host" // required #define PRTE_CLI_PATH "path" // required #define PRTE_CLI_PSET "pset" // required #define PRTE_CLI_PRELOAD_FILES "preload-files" // required #define PRTE_CLI_PRELOAD_BIN "preload-binary" // none #define PRTE_CLI_STDIN "stdin" // required #define PRTE_CLI_OUTPUT "output" // required #define PRTE_CLI_WDIR "wdir" // required #define PRTE_CLI_SET_CWD_SESSION "set-cwd-to-session-dir" // none #define PRTE_CLI_ENABLE_RECOVERY "enable-recovery" // none #define PRTE_CLI_DISABLE_RECOVERY "disable-recovery" // none #define PRTE_CLI_MEM_ALLOC_KIND "memory-alloc-kinds" // required #define PRTE_CLI_GPU_SUPPORT "gpu-support" // required // Placement options #define PRTE_CLI_MAPBY "mapby" // required #define PRTE_CLI_RANKBY "rankby" // required #define PRTE_CLI_BINDTO "bindto" // required // Runtime options #define PRTE_CLI_RTOS "rtos" // required // Debug options #define PRTE_CLI_DO_NOT_LAUNCH "do-not-launch" // none #define PRTE_CLI_DISPLAY "display" // required #define PRTE_CLI_XTERM "xterm" // none #define PRTE_CLI_DO_NOT_AGG_HELP "no-aggregate-help" // none // Tool connection options #define PRTE_CLI_SYS_SERVER_FIRST "system-server-first" // none #define PRTE_CLI_SYS_SERVER_ONLY "system-server-only" // none #define PRTE_CLI_DO_NOT_CONNECT "do-not-connect" // none #define PRTE_CLI_WAIT_TO_CONNECT "wait-to-connect" // required #define PRTE_CLI_NUM_CONNECT_RETRIES "num-connect-retries" // required #define PRTE_CLI_PID "pid" // required #define PRTE_CLI_NAMESPACE "namespace" // required #define PRTE_CLI_DVM_URI "dvm-uri" // required #define PRTE_CLI_DVM "dvm" // optional // Daemon-specific CLI options #define PRTE_CLI_PUBSUB_SERVER "prte-server" // required #define PRTE_CLI_CONTROLLER_URI "dvm-master-uri" // required #define PRTE_CLI_PARENT_URI "parent-uri" // required #define PRTE_CLI_TREE_SPAWN "tree-spawn" // required #define PRTE_CLI_PLM "plm" // required /* define accepted synonyms - these must be defined on the schizo component's * command line in order to be accepted, but PRRTE will automatically translate * them to their accepted synonym */ #define PRTE_CLI_MACHINEFILE "machinefile" // synonym for "hostfile" #define PRTE_CLI_WD "wd" // synonym for "wdir /* define the command line directives PRRTE recognizes */ // Placement directives #define PRTE_CLI_SLOT "slot" #define PRTE_CLI_HWT "hwthread" #define PRTE_CLI_CORE "core" #define PRTE_CLI_L1CACHE "l1cache" #define PRTE_CLI_L2CACHE "l2cache" #define PRTE_CLI_L3CACHE "l3cache" #define PRTE_CLI_NUMA "numa" #define PRTE_CLI_PACKAGE "package" #define PRTE_CLI_NODE "node" #define PRTE_CLI_SEQ "seq" #define PRTE_CLI_DIST "dist" #define PRTE_CLI_PPR "ppr" #define PRTE_CLI_RANKFILE "rankfile" #define PRTE_CLI_NONE "none" #define PRTE_CLI_HWTCPUS "hwtcpus" #define PRTE_CLI_PELIST "pe-list=" // Ranking directives // PRTE_CLI_SLOT, PRTE_CLI_NODE, PRTE_CLI_SPAN reused here #define PRTE_CLI_FILL "fill" #define PRTE_CLI_OBJ "object" // Output directives #define PRTE_CLI_TAG "tag" #define PRTE_CLI_TAG_DET "tag-detailed" #define PRTE_CLI_TAG_FULL "tag-fullname" #define PRTE_CLI_RANK "rank" #define PRTE_CLI_TIMESTAMP "timestamp" #define PRTE_CLI_XML "xml" #define PRTE_CLI_MERGE_ERROUT "merge-stderr-to-stdout" #define PRTE_CLI_DIR "directory" #define PRTE_CLI_FILE "filename" // Display directives #define PRTE_CLI_ALLOC "allocation" #define PRTE_CLI_MAP "map" #define PRTE_CLI_BIND "bind" #define PRTE_CLI_MAPDEV "map-devel" #define PRTE_CLI_TOPO "topo=" #define PRTE_CLI_CPUS "cpus=" // Runtime directives #define PRTE_CLI_ERROR_NZ "error-nonzero-status" // optional arg #define PRTE_CLI_NOLAUNCH "donotlaunch" // no arg #define PRTE_CLI_SHOW_PROGRESS "show-progress" // optional arg #define PRTE_CLI_RECOVERABLE "recoverable" // optional arg #define PRTE_CLI_AUTORESTART "autorestart" // optional arg #define PRTE_CLI_CONTINUOUS "continuous" // optional arg #define PRTE_CLI_MAX_RESTARTS "max-restarts" // reqd arg #define PRTE_CLI_EXEC_AGENT "exec-agent" // reqd arg #define PRTE_CLI_DEFAULT_EXEC_AGENT "default-exec-agent" // no arg #define PRTE_CLI_STOP_ON_EXEC "stop-on-exec" // optional arg #define PRTE_CLI_STOP_IN_INIT "stop-in-init" // optional arg #define PRTE_CLI_STOP_IN_APP "stop-in-app" // optional arg #define PRTE_CLI_TIMEOUT "timeout" // reqd arg #define PRTE_CLI_SPAWN_TIMEOUT "spawn-timeout" // reqd arg #define PRTE_CLI_REPORT_STATE "report-state-on-timeout" // optional arg #define PRTE_CLI_STACK_TRACES "get-stack-traces" // optional arg #define PRTE_CLI_REPORT_CHILD_SEP "report-child-jobs-separately" // optional arg #define PRTE_CLI_AGG_HELP "aggregate-help" // optional arg #define PRTE_CLI_NOTIFY_ERRORS "notifyerrors" // optional flag #define PRTE_CLI_OUTPUT_PROCTABLE "output-proctable" // optional arg /* define the command line qualifiers PRRTE recognizes */ // Placement qualifiers #define PRTE_CLI_PE "pe=" #define PRTE_CLI_SPAN "span" #define PRTE_CLI_OVERSUB "oversubscribe" #define PRTE_CLI_NOOVER "nooversubscribe" #define PRTE_CLI_NOLOCAL "nolocal" // PRTE_CLI_HWTCPUS reused here #define PRTE_CLI_CORECPUS "corecpus" #define PRTE_CLI_DEVICE "device=" #define PRTE_CLI_INHERIT "inherit" #define PRTE_CLI_NOINHERIT "noinherit" #define PRTE_CLI_QDIR "dir=" #define PRTE_CLI_QFILE "file=" #define PRTE_CLI_OVERLOAD "overload-allowed" #define PRTE_CLI_NOOVERLOAD "no-overload" #define PRTE_CLI_IF_SUPP "if-supported" #define PRTE_CLI_ORDERED "ordered" #define PRTE_CLI_REPORT "report" #define PRTE_CLI_DISPALLOC "displayalloc" // PRTE_CLI_DISPLAY reused here #define PRTE_CLI_DISPDEV "displaydevel" // Output qualifiers #define PRTE_CLI_NOCOPY "nocopy" #define PRTE_CLI_RAW "raw" #define PRTE_CLI_PATTERN "pattern" END_C_DECLS #endif /* PRTE_CMD_LINE_H */ prrte-3.0.13/src/util/session_dir.c0000664000175000017500000002362315145263240017357 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "prte_config.h" #include "constants.h" #include #ifdef HAVE_PWD_H # include #endif #include #include #include #ifdef HAVE_SYS_PARAM_H # include #endif /* HAVE_SYS_PARAM_H */ #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #include #ifdef HAVE_UNISTD_H # include #endif /* HAVE_UNISTD_H */ #include #ifdef HAVE_DIRENT_H # include #endif /* HAVE_DIRENT_H */ #ifdef HAVE_PWD_H # include #endif /* HAVE_PWD_H */ #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/pmix_os_dirpath.h" #include "src/util/pmix_os_path.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_environ.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ras/base/base.h" #include "src/runtime/prte_globals.h" #include "src/runtime/runtime.h" #include "src/util/session_dir.h" /******************************* * Local function Declarations *******************************/ static bool _check_file(const char *root, const char *path); static bool setup_base_complete = false; #define PRTE_PRINTF_FIX_STRING(a) ((NULL == a) ? "(null)" : a) /**************************** * Funcationality ****************************/ /* * Check and create the directory requested */ static int _create_dir(char *directory) { mode_t my_mode = S_IRWXU; /* I'm looking for full rights */ int ret; /* attempt to create it */ ret = pmix_os_dirpath_create(directory, my_mode); if (PMIX_ERR_EXISTS == ret) { // existence is good enough ret = PMIX_SUCCESS; } else if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); } ret = prte_pmix_convert_status(ret); return ret; } static int _setup_top_session_dir(void) { int rc = PRTE_SUCCESS; /* get the effective uid */ pid_t pid = getpid(); /* construct the top_session_dir if we need */ if (NULL == prte_process_info.top_session_dir) { if (NULL == prte_process_info.nodename || NULL == prte_process_info.tmpdir_base) { /* we can't setup top session dir */ rc = PRTE_ERR_BAD_PARAM; goto exit; } if (prte_process_info.shared_fs) { /* if it is a shared fs, then we need to include our nodename * to avoid collisions */ if (0 > pmix_asprintf(&prte_process_info.top_session_dir, "%s/%s.%s.%lu", prte_process_info.tmpdir_base, prte_process_info.sessdir_prefix, prte_process_info.nodename, (unsigned long)pid)) { prte_process_info.top_session_dir = NULL; rc = PRTE_ERR_OUT_OF_RESOURCE; goto exit; } } else { /* if not a shared fs, then we can just use our pid as that will be unique */ if (0 > pmix_asprintf(&prte_process_info.top_session_dir, "%s/%s.%lu", prte_process_info.tmpdir_base, prte_process_info.sessdir_prefix, (unsigned long)pid)) { prte_process_info.top_session_dir = NULL; rc = PRTE_ERR_OUT_OF_RESOURCE; goto exit; } } } rc = _create_dir(prte_process_info.top_session_dir); exit: if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); } return rc; } static int _setup_job_session_dir(prte_job_t *jdata) { int rc = PRTE_SUCCESS; if (NULL == jdata->session_dir) { if (0 > pmix_asprintf(&jdata->session_dir, "%s/%s", prte_process_info.top_session_dir, PRTE_LOCAL_JOBID_PRINT(jdata->nspace))) { return PRTE_ERR_OUT_OF_RESOURCE; } rc = _create_dir(jdata->session_dir); } return rc; } static int _setup_proc_session_dir(prte_job_t *jdata, pmix_proc_t *p) { int rc; char *tmp; if (0 > pmix_asprintf(&tmp, "%s/%s", jdata->session_dir, PMIX_RANK_PRINT(p->rank))) { return PRTE_ERR_OUT_OF_RESOURCE; } rc = _create_dir(tmp); free(tmp); return rc; } static int setup_base(void) { int rc; // only do this once if (setup_base_complete) { return PRTE_SUCCESS; } setup_base_complete = true; /* Ensure that system info is set */ prte_proc_info(); /* BEFORE doing anything else, check to see if this prefix is * allowed by the system */ if (NULL != prte_prohibited_session_dirs || NULL != prte_process_info.tmpdir_base) { char **list; int i, len; /* break the string into tokens - it should be * separated by ',' */ list = PMIX_ARGV_SPLIT_COMPAT(prte_prohibited_session_dirs, ','); len = PMIX_ARGV_COUNT_COMPAT(list); /* cycle through the list */ for (i = 0; i < len; i++) { /* check if prefix matches */ if (0 == strncmp(prte_process_info.tmpdir_base, list[i], strlen(list[i]))) { /* this is a prohibited location */ pmix_show_help("help-prte-runtime.txt", "prte:session:dir:prohibited", true, prte_process_info.tmpdir_base, prte_prohibited_session_dirs); PMIX_ARGV_FREE_COMPAT(list); return PRTE_ERR_FATAL; } } PMIX_ARGV_FREE_COMPAT(list); /* done with this */ } rc = _setup_top_session_dir(); return rc; } /* * Construct the session directory and create it if necessary */ int prte_session_dir(pmix_proc_t *proc) { int rc = PRTE_SUCCESS; prte_job_t *jdata; /* * Get the session directory full name */ if (PRTE_SUCCESS != (rc = setup_base())) { if (PRTE_ERR_FATAL == rc) { /* this indicates we should abort quietly */ rc = PRTE_ERR_SILENT; } goto cleanup; } /* setup job and proc session directories */ jdata = prte_get_job_data_object(proc->nspace); if (NULL == jdata) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); return PRTE_ERR_NOT_FOUND; } if (PRTE_SUCCESS != (rc = _setup_job_session_dir(jdata))) { PRTE_ERROR_LOG(rc); return rc; } if (PMIX_RANK_IS_VALID(proc->rank)) { if (PRTE_SUCCESS != (rc = _setup_proc_session_dir(jdata, proc))) { PRTE_ERROR_LOG(rc); return rc; } } if (prte_debug_flag) { pmix_output(0, "jobdir: %s", PRTE_PRINTF_FIX_STRING(jdata->session_dir)); pmix_output(0, "top: %s", PRTE_PRINTF_FIX_STRING(prte_process_info.top_session_dir)); pmix_output(0, "tmp: %s", PRTE_PRINTF_FIX_STRING(prte_process_info.tmpdir_base)); } cleanup: return rc; } void prte_job_session_dir_finalize(prte_job_t *jdata) { if (prte_process_info.rm_session_dirs) { /* RM will clean them up for us */ return; } /* special case - if a daemon is colocated with mpirun, * then we let mpirun do the rest to avoid a race * condition. this scenario always results in the rank=1 * daemon colocated with mpirun */ if (prte_ras_base.launch_orted_on_hn && PRTE_PROC_IS_DAEMON && 1 == PRTE_PROC_MY_NAME->rank) { return; } /* if this is the DVM job, then we destroy the top-level * session directory, but only if we are finalizing */ if (NULL == jdata || PMIX_CHECK_NSPACE(PRTE_PROC_MY_NAME->nspace, jdata->nspace)) { if (prte_finalizing) { if (NULL != prte_process_info.top_session_dir) { pmix_os_dirpath_destroy(prte_process_info.top_session_dir, true, _check_file); rmdir(prte_process_info.top_session_dir); free(prte_process_info.top_session_dir); prte_process_info.top_session_dir = NULL; } } return; } if (NULL == jdata || NULL == jdata->session_dir) { return; } pmix_os_dirpath_destroy(jdata->session_dir, true, _check_file); /* if the job-level session dir is now empty, remove it */ rmdir(jdata->session_dir); free(jdata->session_dir); jdata->session_dir = NULL; return; } static bool _check_file(const char *root, const char *path) { struct stat st; char *fullpath; /* * Keep: * - non-zero files starting with "output-" */ if (0 == strncmp(path, "output-", strlen("output-"))) { memset(&st, 0, sizeof(struct stat)); fullpath = pmix_os_path(false, root, path, NULL); stat(fullpath, &st); free(fullpath); if (0 == st.st_size) { return true; } return false; } return true; } prrte-3.0.13/src/util/numtostr.c0000664000175000017500000000350215145263240016723 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/util/numtostr.h" #include "src/util/pmix_printf.h" #include #include char *prte_ltostr(long num) { /* waste a little bit of space, but always have a big enough buffer */ int buflen = sizeof(long) * 8; char *buf = NULL; int ret = 0; buf = (char *) malloc(sizeof(char) * buflen); if (NULL == buf) return NULL; ret = snprintf(buf, buflen, "%ld", num); if (ret < 0) { free(buf); return NULL; } return buf; } char *prte_dtostr(double num) { /* waste a little bit of space, but always have a big enough buffer */ int buflen = sizeof(long) * 8; char *buf = NULL; int ret = 0; buf = (char *) malloc(sizeof(char) * buflen); if (NULL == buf) return NULL; ret = snprintf(buf, buflen, "%f", num); if (ret < 0) { free(buf); return NULL; } return buf; } prrte-3.0.13/src/util/daemon_init.h0000664000175000017500000000413015145263240017321 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file **/ #ifndef PRTE_DAEMON_INIT_H #define PRTE_DAEMON_INIT_H #include "prte_config.h" BEGIN_C_DECLS /* * Turn a process into a daemon. * * This function converts a process into a daemon in an orderly manner. It first forks a child * process, then the parent exits. The child continues on to become a session leader, reset the file * mode creation mask, and changes working directories to the one specified. * * @param working_dir Pointer to a character string containing the desired working directory. * Providing a value of NULL will cause the function to leave the program in the current working * directory. * @param parent_fn The function to execute in the parent before exiting * a value of NULL will cause the parent to simply exit(0). * * @retval PRTE_SUCCESS Indicates that the conversion was successful * @retval PRTE_ERROR Indicates that the conversion was not successful - a fork could not be * completed. */ PRTE_EXPORT int prte_daemon_init_callback(char *working_dir, int (*parent_fn)(pid_t child)); END_C_DECLS static inline int prte_daemon_init(char *working_dir) { return prte_daemon_init_callback(working_dir, NULL); } END_C_DECLS #endif /* PRTE_DAEMON_INIT_H */ prrte-3.0.13/src/util/crc.h0000664000175000017500000001223315145263240015605 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009 IBM Corporation. All rights reserved. * Copyright (c) 2009 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef _PRTE_CRC_H_ #define _PRTE_CRC_H_ #include "prte_config.h" #include BEGIN_C_DECLS #define CRC_POLYNOMIAL ((unsigned int) 0x04c11db7) #define CRC_INITIAL_REGISTER ((unsigned int) 0xffffffff) #define PRTE_CSUM(SRC, LEN) prte_uicsum(SRC, LEN) #define PRTE_CSUM_PARTIAL(SRC, LEN, UI1, UI2) prte_uicsum_partial(SRC, LEN, UI1, UI2) #define PRTE_CSUM_BCOPY_PARTIAL(SRC, DST, LEN1, LEN2, UI1, UI2) \ prte_bcopy_uicsum_partial(SRC, DST, LEN1, LEN2, UI1, UI2) #define PRTE_CSUM_ZERO 0 PRTE_EXPORT unsigned long prte_bcopy_csum_partial(const void *source, void *destination, size_t copylen, size_t csumlen, unsigned long *lastPartialLong, size_t *lastPartialLength); static inline unsigned long prte_bcopy_csum(const void *source, void *destination, size_t copylen, size_t csumlen) { unsigned long plong = 0; size_t plength = 0; return prte_bcopy_csum_partial(source, destination, copylen, csumlen, &plong, &plength); } PRTE_EXPORT unsigned int prte_bcopy_uicsum_partial(const void *source, void *destination, size_t copylen, size_t csumlen, unsigned int *lastPartialInt, size_t *lastPartialLength); static inline unsigned int prte_bcopy_uicsum(const void *source, void *destination, size_t copylen, size_t csumlen) { unsigned int pint = 0; size_t plength = 0; return prte_bcopy_uicsum_partial(source, destination, copylen, csumlen, &pint, &plength); } PRTE_EXPORT unsigned long prte_csum_partial(const void *source, size_t csumlen, unsigned long *lastPartialLong, size_t *lastPartialLength); static inline unsigned long prte_csum(const void *source, size_t csumlen) { unsigned long lastPartialLong = 0; size_t lastPartialLength = 0; return prte_csum_partial(source, csumlen, &lastPartialLong, &lastPartialLength); } /* * The buffer passed to this function is assumed to be 16-bit aligned */ static inline uint16_t prte_csum16(const void *source, size_t csumlen) { uint16_t *src = (uint16_t *) source; register uint32_t csum = 0; while (csumlen > 1) { csum += *src++; csumlen -= 2; } /* Add leftover byte, if any */ if (csumlen > 0) csum += *((unsigned char *) src); /* Fold 32-bit checksum to 16 bits */ while (csum >> 16) { csum = (csum & 0xFFFF) + (csum >> 16); } return csum; } PRTE_EXPORT unsigned int prte_uicsum_partial(const void *source, size_t csumlen, unsigned int *lastPartialInt, size_t *lastPartialLength); static inline unsigned int prte_uicsum(const void *source, size_t csumlen) { unsigned int lastPartialInt = 0; size_t lastPartialLength = 0; return prte_uicsum_partial(source, csumlen, &lastPartialInt, &lastPartialLength); } /* * CRC Support */ void prte_initialize_crc_table(void); PRTE_EXPORT unsigned int prte_bcopy_uicrc_partial(const void *source, void *destination, size_t copylen, size_t crclen, unsigned int partial_crc); static inline unsigned int prte_bcopy_uicrc(const void *source, void *destination, size_t copylen, size_t crclen) { return prte_bcopy_uicrc_partial(source, destination, copylen, crclen, CRC_INITIAL_REGISTER); } PRTE_EXPORT unsigned int prte_uicrc_partial(const void *source, size_t crclen, unsigned int partial_crc); static inline unsigned int prte_uicrc(const void *source, size_t crclen) { return prte_uicrc_partial(source, crclen, CRC_INITIAL_REGISTER); } END_C_DECLS #endif prrte-3.0.13/src/util/name_fns.h0000664000175000017500000000767315145263240016640 0ustar alastairalastair/* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * * Populates global structure with system-specific information. * * Notes: add limits.h, compute size of integer and other types via sizeof(type)*CHAR_BIT * */ #ifndef _PRTE_NAME_FNS_H_ #define _PRTE_NAME_FNS_H_ #include "prte_config.h" #ifdef HAVE_STDINT_h # include #endif #include "types.h" #include "src/class/pmix_list.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS typedef uint8_t prte_ns_cmp_bitmask_t; /**< Bit mask for comparing process names */ #define PRTE_NS_CMP_NONE 0x00 #define PRTE_NS_CMP_JOBID 0x02 #define PRTE_NS_CMP_VPID 0x04 #define PRTE_NS_CMP_ALL 0x0f #define PRTE_NS_CMP_WILD 0x10 /* useful define to print name args in output messages */ PRTE_EXPORT char *prte_util_print_name_args(const pmix_proc_t *name); #define PRTE_NAME_PRINT(n) prte_util_print_name_args(n) PRTE_EXPORT char *prte_util_print_jobids(const pmix_nspace_t job); #define PRTE_JOBID_PRINT(n) prte_util_print_jobids(n) PRTE_EXPORT char *prte_util_print_vpids(const pmix_rank_t vpid); #define PRTE_VPID_PRINT(n) prte_util_print_vpids(n) PRTE_EXPORT char *prte_util_print_job_family(const pmix_nspace_t job); #define PRTE_JOB_FAMILY_PRINT(n) prte_util_print_job_family(n) PRTE_EXPORT char *prte_util_print_local_jobid(const pmix_nspace_t job); #define PRTE_LOCAL_JOBID_PRINT(n) prte_util_print_local_jobid(n) PRTE_EXPORT char *prte_pretty_print_timing(int64_t secs, int64_t usecs); #define PRTE_LOCAL_JOBID(n) prte_util_get_local_jobid(n) __prte_attribute_always_inline__ static inline int prte_util_get_local_jobid(pmix_nspace_t n) { char *ptr; int lid; ptr = strrchr(n, '@'); if (NULL == ptr) { /* this isn't a PRRTE job */ return -1; } ++ptr; lid = strtoul(ptr, NULL, 10); return lid; } /* List of names for general use */ struct prte_namelist_t { pmix_list_item_t super; /**< Allows this item to be placed on a list */ pmix_proc_t name; /**< Name of a process */ }; typedef struct prte_namelist_t prte_namelist_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_namelist_t); PRTE_EXPORT int prte_util_convert_vpid_to_string(char **vpid_string, const pmix_rank_t vpid); PRTE_EXPORT int prte_util_convert_string_to_process_name(pmix_proc_t *name, const char *name_string); PRTE_EXPORT int prte_util_convert_process_name_to_string(char **name_string, const pmix_proc_t *name); PRTE_EXPORT int prte_util_compare_name_fields(prte_ns_cmp_bitmask_t fields, const pmix_proc_t *name1, const pmix_proc_t *name2); PRTE_EXPORT char *prte_util_make_version_string(const char *scope, int major, int minor, int release, const char *greek, const char *repo); END_C_DECLS #endif prrte-3.0.13/src/util/hostfile/0000775000175000017500000000000015145263240016501 5ustar alastairalastairprrte-3.0.13/src/util/hostfile/hostfile.c0000664000175000017500000011714415145263240020472 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #ifdef HAVE_UNISTD_H # include #endif #include #include #include #include "src/class/pmix_list.h" #include "src/mca/base/pmix_base.h" #include "src/mca/mca.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_if.h" #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ras/base/base.h" #include "src/runtime/prte_globals.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/util/hostfile/hostfile.h" #include "src/util/hostfile/hostfile_lex.h" static const char *cur_hostfile_name = NULL; static void hostfile_parse_error(int token) { switch (token) { case PRTE_HOSTFILE_STRING: pmix_show_help("help-hostfile.txt", "parse_error_string", true, cur_hostfile_name, prte_util_hostfile_line, token, prte_util_hostfile_value.sval); break; case PRTE_HOSTFILE_IPV4: case PRTE_HOSTFILE_IPV6: case PRTE_HOSTFILE_INT: pmix_show_help("help-hostfile.txt", "parse_error_int", true, cur_hostfile_name, prte_util_hostfile_line, token, prte_util_hostfile_value.ival); break; default: pmix_show_help("help-hostfile.txt", "parse_error", true, cur_hostfile_name, prte_util_hostfile_line, token); break; } } /** * Return the integer following an = (actually may only return positive ints) */ static int hostfile_parse_int(void) { if (PRTE_HOSTFILE_EQUAL != prte_util_hostfile_lex()) return -1; if (PRTE_HOSTFILE_INT != prte_util_hostfile_lex()) return -1; return prte_util_hostfile_value.ival; } /** * Return the string following an = (option to a keyword) */ static char *hostfile_parse_string(void) { int rc; if (PRTE_HOSTFILE_EQUAL != prte_util_hostfile_lex()) { return NULL; } rc = prte_util_hostfile_lex(); if (PRTE_HOSTFILE_STRING != rc) { return NULL; } return strdup(prte_util_hostfile_value.sval); } static int hostfile_parse_line(int token, pmix_list_t *updates, pmix_list_t *exclude, bool keep_all) { int rc; prte_node_t *node; bool got_max = false; char *value; char **argv; char *node_name = NULL; char *username = NULL; int cnt; int number_of_slots = 0; char buff[64]; char *alias = NULL; if (PRTE_HOSTFILE_STRING == token || PRTE_HOSTFILE_HOSTNAME == token || PRTE_HOSTFILE_INT == token || PRTE_HOSTFILE_IPV4 == token || PRTE_HOSTFILE_IPV6 == token) { if (PRTE_HOSTFILE_INT == token) { snprintf(buff, 64, "%d", prte_util_hostfile_value.ival); value = buff; } else { value = prte_util_hostfile_value.sval; } argv = PMIX_ARGV_SPLIT_COMPAT(value, '@'); cnt = PMIX_ARGV_COUNT_COMPAT(argv); if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { username = strdup(argv[0]); node_name = strdup(argv[1]); } else { pmix_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ } PMIX_ARGV_FREE_COMPAT(argv); if (!prte_keep_fqdn_hostnames) { // Strip off the FQDN if present, ignore IP addresses if (!pmix_net_isaddr(node_name)) { char *ptr; alias = strdup(node_name); if (NULL != (ptr = strchr(node_name, '.'))) { *ptr = '\0'; } } } /* if the first letter of the name is '^', then this is a node * to be excluded. Remove the ^ character so the nodename is * usable, and put it on the exclude list */ if ('^' == node_name[0]) { int i, len; len = strlen(node_name); for (i = 1; i < len; i++) { node_name[i - 1] = node_name[i]; } node_name[len - 1] = '\0'; /* truncate */ PMIX_OUTPUT_VERBOSE((3, prte_ras_base_framework.framework_output, "%s hostfile: node %s is being excluded", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node_name)); /* see if this is another name for us */ if (prte_check_host_is_local(node_name)) { /* Nodename has been allocated, that is for sure */ free(node_name); node_name = strdup(prte_process_info.nodename); } /* Do we need to make a new node object? First check to see if it's already in the exclude list */ node = prte_node_match(exclude, node_name); if (NULL == node) { node = PMIX_NEW(prte_node_t); if (prte_keep_fqdn_hostnames || NULL == alias) { node->name = strdup(node_name); } else { node->name = strdup(alias); node->rawname = strdup(node_name); } if (NULL != username) { prte_set_attribute(&node->attributes, PRTE_NODE_USERNAME, PRTE_ATTR_LOCAL, username, PMIX_STRING); } if (NULL != alias && 0 != strcmp(alias, node->name)) { // new node object, so alias must be unique PMIX_ARGV_APPEND_NOSIZE_COMPAT(&node->aliases, alias); } pmix_list_append(exclude, &node->super); } else { /* the node name may not match the prior entry, so ensure we * keep it if necessary */ if (0 != strcmp(node_name, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, node_name); } free(node_name); if (NULL != alias && 0 != strcmp(alias, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, alias); } } if (NULL != alias) { free(alias); } return PRTE_SUCCESS; } /* this is not a node to be excluded, so we need to process it and * add it to the "include" list. */ PMIX_OUTPUT_VERBOSE((3, prte_ras_base_framework.framework_output, "%s hostfile: node %s is being included - keep all is %s alias %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node_name, keep_all ? "TRUE" : "FALSE", (NULL == alias) ? "NULL" : alias)); /* see if this is another name for us */ if (prte_check_host_is_local(node_name)) { /* Nodename has been allocated, that is for sure */ free(node_name); node_name = strdup(prte_process_info.nodename); } /* Do we need to make a new node object? */ if (keep_all || NULL == (node = prte_node_match(updates, node_name))) { node = PMIX_NEW(prte_node_t); if (prte_keep_fqdn_hostnames || NULL == alias) { node->name = strdup(node_name); } else { node->name = strdup(node_name); node->rawname = strdup(alias); } free(node_name); node->slots = 1; if (NULL != username) { prte_set_attribute(&node->attributes, PRTE_NODE_USERNAME, PRTE_ATTR_LOCAL, username, PMIX_STRING); } if (NULL != alias && 0 != strcmp(alias, node->name)) { // new node object, so alias must be unique PMIX_ARGV_APPEND_NOSIZE_COMPAT(&node->aliases, alias); } pmix_list_append(updates, &node->super); } else { /* this node was already found once - add a slot and mark slots as "given" */ node->slots++; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); /* the node name may not match the prior entry, so ensure we * keep it if necessary */ if (0 != strcmp(node_name, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, node_name); } free(node_name); if (NULL != alias && 0 != strcmp(alias, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, alias); } } } else if (PRTE_HOSTFILE_RELATIVE == token) { /* store this for later processing */ node = PMIX_NEW(prte_node_t); // Strip off the FQDN if present, ignore IP addresses if (!pmix_net_isaddr(prte_util_hostfile_value.sval)) { char *ptr; alias = strdup(prte_util_hostfile_value.sval); if (NULL != (ptr = strchr(alias, '.'))) { *ptr = '\0'; } else { free(alias); alias = NULL; } } if (prte_keep_fqdn_hostnames || NULL == alias) { node->name = strdup(prte_util_hostfile_value.sval); } else { node->name = strdup(alias); node->rawname = strdup(prte_util_hostfile_value.sval); } if (NULL != alias && 0 != strcmp(alias, node->name)) { // new node object, so alias must be unique PMIX_ARGV_APPEND_NOSIZE_COMPAT(&node->aliases, alias); free(alias); } pmix_list_append(updates, &node->super); } else if (PRTE_HOSTFILE_RANK == token) { /* we can ignore the rank, but we need to extract the node name. we * first need to shift over to the other side of the equal sign as * this is where the node name will be */ while (!prte_util_hostfile_done && PRTE_HOSTFILE_EQUAL != token) { token = prte_util_hostfile_lex(); } if (prte_util_hostfile_done) { /* bad syntax somewhere */ return PRTE_ERROR; } /* next position should be the node name */ token = prte_util_hostfile_lex(); if (PRTE_HOSTFILE_INT == token) { snprintf(buff, 64, "%d", prte_util_hostfile_value.ival); value = buff; } else { value = prte_util_hostfile_value.sval; } argv = PMIX_ARGV_SPLIT_COMPAT(value, '@'); cnt = PMIX_ARGV_COUNT_COMPAT(argv); if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { username = strdup(argv[0]); node_name = strdup(argv[1]); } else { pmix_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ } PMIX_ARGV_FREE_COMPAT(argv); // Strip off the FQDN if present, ignore IP addresses if (!prte_keep_fqdn_hostnames && !pmix_net_isaddr(node_name)) { char *ptr; alias = strdup(node_name); if (NULL != (ptr = strchr(alias, '.'))) { *ptr = '\0'; } } /* Do we need to make a new node object? */ if (NULL == (node = prte_node_match(updates, node_name))) { node = PMIX_NEW(prte_node_t); node->name = strdup(node_name); node->slots = 1; if (NULL != username) { prte_set_attribute(&node->attributes, PRTE_NODE_USERNAME, PRTE_ATTR_LOCAL, username, PMIX_STRING); } pmix_list_append(updates, &node->super); } else { /* add a slot */ node->slots++; /* the node name may not match the prior entry, so ensure we * keep it if necessary */ if (0 != strcmp(node_name, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, node_name); } } if (NULL != alias) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, alias); free(alias); node->rawname = strdup(node_name); } PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s hostfile: node %s slots %d nodes-given %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node->name, node->slots, PRTE_FLAG_TEST(node, PRTE_NODE_FLAG_SLOTS_GIVEN) ? "TRUE" : "FALSE")); /* mark the slots as "given" since we take them as being the * number specified via the rankfile */ PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); /* skip to end of line */ while (!prte_util_hostfile_done && PRTE_HOSTFILE_NEWLINE != token) { token = prte_util_hostfile_lex(); } free(node_name); return PRTE_SUCCESS; } else { hostfile_parse_error(token); return PRTE_ERROR; } free(username); while (!prte_util_hostfile_done) { token = prte_util_hostfile_lex(); switch (token) { case PRTE_HOSTFILE_DONE: goto done; case PRTE_HOSTFILE_NEWLINE: goto done; case PRTE_HOSTFILE_USERNAME: username = hostfile_parse_string(); if (NULL != username) { prte_set_attribute(&node->attributes, PRTE_NODE_USERNAME, PRTE_ATTR_LOCAL, username, PMIX_STRING); free(username); } break; case PRTE_HOSTFILE_PORT: rc = hostfile_parse_int(); if (rc < 0) { pmix_show_help("help-hostfile.txt", "port", true, cur_hostfile_name, rc); return PRTE_ERROR; } prte_set_attribute(&node->attributes, PRTE_NODE_PORT, PRTE_ATTR_LOCAL, &rc, PMIX_INT); break; case PRTE_HOSTFILE_COUNT: case PRTE_HOSTFILE_CPU: case PRTE_HOSTFILE_SLOTS: rc = hostfile_parse_int(); if (rc < 0) { pmix_show_help("help-hostfile.txt", "slots", true, cur_hostfile_name, rc); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } if (PRTE_FLAG_TEST(node, PRTE_NODE_FLAG_SLOTS_GIVEN)) { /* multiple definitions were given for the * slot count - this is not allowed */ pmix_show_help("help-hostfile.txt", "slots-given", true, cur_hostfile_name, node->name); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } node->slots = rc; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); /* Ensure that slots_max >= slots */ if (node->slots_max != 0 && node->slots_max < node->slots) { node->slots_max = node->slots; } break; case PRTE_HOSTFILE_SLOTS_MAX: rc = hostfile_parse_int(); if (rc < 0) { pmix_show_help("help-hostfile.txt", "max_slots", true, cur_hostfile_name, ((size_t) rc)); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } /* Only take this update if it puts us >= node_slots */ if (rc >= node->slots) { if (node->slots_max != rc) { node->slots_max = rc; got_max = true; } } else { pmix_show_help("help-hostfile.txt", "max_slots_lt", true, cur_hostfile_name, node->slots, rc); PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } break; case PRTE_HOSTFILE_STRING: case PRTE_HOSTFILE_INT: /* just ignore it */ break; default: hostfile_parse_error(token); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } if (number_of_slots > node->slots) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); pmix_list_remove_item(updates, &node->super); PMIX_RELEASE(node); return PRTE_ERROR; } } done: if (got_max && !PRTE_FLAG_TEST(node, PRTE_NODE_FLAG_SLOTS_GIVEN)) { node->slots = node->slots_max; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } return PRTE_SUCCESS; } /** * Parse the specified file into a node list. */ static int hostfile_parse(const char *hostfile, pmix_list_t *updates, pmix_list_t *exclude, bool keep_all) { int token; int rc = PRTE_SUCCESS; cur_hostfile_name = hostfile; prte_util_hostfile_done = false; prte_util_hostfile_in = fopen(hostfile, "r"); if (NULL == prte_util_hostfile_in) { if (NULL == prte_default_hostfile || 0 != strcmp(prte_default_hostfile, hostfile)) { /* not the default hostfile, so not finding it * is an error */ pmix_show_help("help-hostfile.txt", "no-hostfile", true, hostfile); rc = PRTE_ERR_SILENT; goto unlock; } /* if this is the default hostfile and it was given, * then it's an error */ if (prte_default_hostfile_given) { pmix_show_help("help-hostfile.txt", "no-hostfile", true, hostfile); rc = PRTE_ERR_NOT_FOUND; goto unlock; } /* otherwise, not finding it is okay */ rc = PRTE_SUCCESS; goto unlock; } while (!prte_util_hostfile_done) { token = prte_util_hostfile_lex(); switch (token) { case PRTE_HOSTFILE_DONE: prte_util_hostfile_done = true; break; case PRTE_HOSTFILE_NEWLINE: break; /* * This looks odd, since we have several forms of host-definitions: * hostname just plain as it is, being a PRTE_HOSTFILE_STRING * IP4s and user@IPv4s * hostname.domain and user@hostname.domain */ case PRTE_HOSTFILE_STRING: case PRTE_HOSTFILE_INT: case PRTE_HOSTFILE_HOSTNAME: case PRTE_HOSTFILE_IPV4: case PRTE_HOSTFILE_IPV6: case PRTE_HOSTFILE_RELATIVE: case PRTE_HOSTFILE_RANK: rc = hostfile_parse_line(token, updates, exclude, keep_all); if (PRTE_SUCCESS != rc) { goto unlock; } break; default: hostfile_parse_error(token); goto unlock; } } fclose(prte_util_hostfile_in); prte_util_hostfile_in = NULL; prte_util_hostfile_lex_destroy(); unlock: cur_hostfile_name = NULL; return rc; } /** * Parse the provided hostfile and add the nodes to the list. */ int prte_util_add_hostfile_nodes(pmix_list_t *nodes, char *hostfile) { pmix_list_t exclude, adds; pmix_list_item_t *item; int rc, i; prte_node_t *nd, *node; bool found; PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s hostfile: checking hostfile %s for nodes", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), hostfile)); PMIX_CONSTRUCT(&exclude, pmix_list_t); PMIX_CONSTRUCT(&adds, pmix_list_t); /* parse the hostfile and add any new contents to the list */ if (PRTE_SUCCESS != (rc = hostfile_parse(hostfile, &adds, &exclude, false))) { goto cleanup; } /* check for any relative node directives */ PMIX_LIST_FOREACH(node, &adds, prte_node_t) { if ('+' == node->name[0]) { pmix_show_help("help-hostfile.txt", "hostfile:relative-syntax", true, node->name); rc = PRTE_ERR_SILENT; goto cleanup; } } /* remove from the list of nodes those that are in the exclude list */ while (NULL != (item = pmix_list_remove_first(&exclude))) { nd = (prte_node_t *) item; /* check for matches on nodes */ PMIX_LIST_FOREACH(node, &adds, prte_node_t) { if (prte_nptr_match(nd, node)) { /* match - remove it */ pmix_list_remove_item(&adds, &node->super); PMIX_RELEASE(node); break; } } PMIX_RELEASE(item); } /* transfer across all unique nodes */ while (NULL != (item = pmix_list_remove_first(&adds))) { nd = (prte_node_t *) item; found = false; PMIX_LIST_FOREACH(node, nodes, prte_node_t) { if (prte_nptr_match(nd, node)) { found = true; break; } } if (found) { /* add this node name as alias */ PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, nd->name); /* ensure all other aliases are also transferred */ if (NULL != nd->aliases) { for (i=0; NULL != nd->aliases[i]; i++) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, nd->aliases[i]); } } PMIX_RELEASE(item); } else { pmix_list_append(nodes, &nd->super); PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s hostfile: adding node %s slots %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), nd->name, nd->slots)); } } cleanup: PMIX_LIST_DESTRUCT(&exclude); PMIX_LIST_DESTRUCT(&adds); return rc; } /* Parse the provided hostfile and filter the nodes that are * on the input list, removing those that * are not found in the hostfile */ int prte_util_filter_hostfile_nodes(pmix_list_t *nodes, char *hostfile, bool remove) { pmix_list_t newnodes, exclude; pmix_list_item_t *item1, *item2, *next, *item3; prte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3; int rc = PRTE_SUCCESS; char *cptr; int num_empty, nodeidx; bool want_all_empty = false; pmix_list_t keep; bool found; PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s hostfile: filtering nodes through hostfile %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), hostfile)); /* parse the hostfile and create local list of findings */ PMIX_CONSTRUCT(&newnodes, pmix_list_t); PMIX_CONSTRUCT(&exclude, pmix_list_t); if (PRTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) { PMIX_DESTRUCT(&newnodes); PMIX_DESTRUCT(&exclude); return rc; } /* if the hostfile was empty, then treat it as a no-op filter */ if (0 == pmix_list_get_size(&newnodes)) { PMIX_DESTRUCT(&newnodes); PMIX_DESTRUCT(&exclude); /* indicate that the hostfile was empty */ return PRTE_ERR_TAKE_NEXT_OPTION; } /* remove from the list of newnodes those that are in the exclude list * since we could have added duplicate names above due to the */ while (NULL != (item1 = pmix_list_remove_first(&exclude))) { node_from_file = (prte_node_t *) item1; /* check for matches on nodes */ for (item2 = pmix_list_get_first(&newnodes); item2 != pmix_list_get_end(&newnodes); item2 = pmix_list_get_next(item2)) { prte_node_t *node = (prte_node_t *) item2; if (prte_nptr_match(node_from_file, node)) { /* match - remove it */ pmix_list_remove_item(&newnodes, item2); PMIX_RELEASE(item2); break; } } PMIX_RELEASE(item1); } /* now check our nodes and keep or mark those that match. We can * destruct our hostfile list as we go since this won't be needed */ PMIX_CONSTRUCT(&keep, pmix_list_t); while (NULL != (item2 = pmix_list_remove_first(&newnodes))) { node_from_file = (prte_node_t *) item2; next = pmix_list_get_next(item2); /* see if this is a relative node syntax */ if ('+' == node_from_file->name[0]) { /* see if we specified empty nodes */ if ('e' == node_from_file->name[1] || 'E' == node_from_file->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node_from_file->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* search the list of nodes provided to us and find those * that are empty */ item1 = pmix_list_get_first(nodes); while (0 < num_empty && item1 != pmix_list_get_end(nodes)) { node_from_list = (prte_node_t *) item1; next = pmix_list_get_next(item1); /* keep our place */ if (0 == node_from_list->slots_inuse) { /* check to see if this node is explicitly called * out later - if so, don't use it here */ for (item3 = pmix_list_get_first(&newnodes); item3 != pmix_list_get_end(&newnodes); item3 = pmix_list_get_next(item3)) { node3 = (prte_node_t *) item3; if (prte_nptr_match(node3, node_from_list)) { /* match - don't use it */ goto skipnode; } } if (remove) { /* remove item from list */ pmix_list_remove_item(nodes, item1); /* xfer to keep list */ pmix_list_append(&keep, item1); } else { /* mark as included */ PRTE_FLAG_SET(node_from_list, PRTE_NODE_FLAG_MAPPED); } --num_empty; } skipnode: item1 = next; } /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { pmix_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = PRTE_ERR_SILENT; goto cleanup; } } else if ('n' == node_from_file->name[1] || 'N' == node_from_file->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node_from_file->name[2], NULL, 10); node_from_pool = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, nodeidx); if (NULL == node_from_pool) { /* this is an error */ pmix_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node_from_file->name); rc = PRTE_ERR_SILENT; goto cleanup; } /* search the list of nodes provided to us and find it */ for (item1 = pmix_list_get_first(nodes); item1 != pmix_list_get_end(nodes); item1 = pmix_list_get_next(item1)) { node_from_list = (prte_node_t *) item1; if (prte_nptr_match(node_from_pool, node_from_list)) { if (remove) { /* match - remove item from list */ pmix_list_remove_item(nodes, item1); /* xfer to keep list */ pmix_list_append(&keep, item1); } else { /* mark as included */ PRTE_FLAG_SET(node_from_list, PRTE_NODE_FLAG_MAPPED); } break; } } } else { /* invalid relative node syntax */ pmix_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node_from_file->name); rc = PRTE_ERR_SILENT; goto cleanup; } } else { /* we are looking for a specific node on the list * search the provided list of nodes to see if this * one is found */ found = false; for (item1 = pmix_list_get_first(nodes); item1 != pmix_list_get_end(nodes); item1 = pmix_list_get_next(item1)) { node_from_list = (prte_node_t *) item1; /* we have converted all aliases for ourself * to our own detected nodename */ if (prte_nptr_match(node_from_file, node_from_list)) { /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (PRTE_FLAG_TEST(node_from_file, PRTE_NODE_FLAG_SLOTS_GIVEN) && node_from_file->slots < node_from_list->slots) { node_from_list->slots = node_from_file->slots; } if (remove) { /* remove the node from the list */ pmix_list_remove_item(nodes, item1); /* xfer it to keep list */ pmix_list_append(&keep, item1); } else { /* mark as included */ PRTE_FLAG_SET(node_from_list, PRTE_NODE_FLAG_MAPPED); } found = true; break; } } /* if the host in the newnode list wasn't found, * then that is an error we need to report to the * user and abort */ if (!found) { pmix_show_help("help-hostfile.txt", "hostfile:extra-node-not-found", true, hostfile, node_from_file->name); rc = PRTE_ERR_SILENT; goto cleanup; } } /* cleanup the newnode list */ PMIX_RELEASE(item2); } /* if we still have entries on our hostfile list, then * there were requested hosts that were not in our allocation. * This is an error - report it to the user and return an error */ if (0 != pmix_list_get_size(&newnodes)) { pmix_show_help("help-hostfile.txt", "not-all-mapped-alloc", true, hostfile); while (NULL != (item1 = pmix_list_remove_first(&newnodes))) { PMIX_RELEASE(item1); } PMIX_DESTRUCT(&newnodes); return PRTE_ERR_SILENT; } if (!remove) { /* all done */ PMIX_DESTRUCT(&newnodes); return PRTE_SUCCESS; } /* clear the rest of the nodes list */ while (NULL != (item1 = pmix_list_remove_first(nodes))) { PMIX_RELEASE(item1); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item1 = pmix_list_remove_first(&keep))) { pmix_list_append(nodes, item1); } cleanup: PMIX_DESTRUCT(&newnodes); return rc; } int prte_util_get_ordered_host_list(pmix_list_t *nodes, char *hostfile) { pmix_list_t exclude; pmix_list_item_t *item, *itm, *item2, *item1; char *cptr; int num_empty, i, nodeidx, startempty = 0; bool want_all_empty = false; prte_node_t *node_from_pool, *newnode; int rc; PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s hostfile: creating ordered list of hosts from hostfile %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), hostfile)); PMIX_CONSTRUCT(&exclude, pmix_list_t); /* parse the hostfile and add the contents to the list, keeping duplicates */ if (PRTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) { goto cleanup; } /* parse the nodes to process any relative node directives */ item2 = pmix_list_get_first(nodes); while (item2 != pmix_list_get_end(nodes)) { prte_node_t *node = (prte_node_t *) item2; /* save the next location in case this one gets removed */ item1 = pmix_list_get_next(item2); if ('+' != node->name[0]) { item2 = item1; continue; } /* see if we specified empty nodes */ if ('e' == node->name[1] || 'E' == node->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* insert empty nodes into newnodes list in place of the current item. * since item1 is the next item, we insert in front of it */ if (!prte_hnp_is_allocated && 0 == startempty) { startempty = 1; } for (i = startempty; 0 < num_empty && i < prte_node_pool->size; i++) { node_from_pool = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, i); if (NULL == node_from_pool) { continue; } if (0 == node_from_pool->slots_inuse) { newnode = PMIX_NEW(prte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } pmix_list_insert_pos(nodes, item1, &newnode->super); /* track number added */ --num_empty; } } /* bookmark where we stopped in case they ask for more */ startempty = i; /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { pmix_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = PRTE_ERR_SILENT; goto cleanup; } /* since we have expanded the provided node, remove * it from list */ pmix_list_remove_item(nodes, item2); PMIX_RELEASE(item2); } else if ('n' == node->name[1] || 'N' == node->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node->name[2], NULL, 10); /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!prte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ node_from_pool = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, nodeidx); if (NULL == node_from_pool) { /* this is an error */ pmix_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node->name); rc = PRTE_ERR_SILENT; goto cleanup; } /* create the node object */ newnode = PMIX_NEW(prte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } /* insert it before item1 */ pmix_list_insert_pos(nodes, item1, &newnode->super); /* since we have expanded the provided node, remove * it from list */ pmix_list_remove_item(nodes, item2); PMIX_RELEASE(item2); } else { /* invalid relative node syntax */ pmix_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node->name); rc = PRTE_ERR_SILENT; goto cleanup; } /* move to next */ item2 = item1; } /* remove from the list of nodes those that are in the exclude list */ while (NULL != (item = pmix_list_remove_first(&exclude))) { prte_node_t *exnode = (prte_node_t *) item; /* check for matches on nodes */ for (itm = pmix_list_get_first(nodes); itm != pmix_list_get_end(nodes); itm = pmix_list_get_next(itm)) { prte_node_t *node = (prte_node_t *) itm; if (prte_nptr_match(exnode, node)) { /* match - remove it */ pmix_list_remove_item(nodes, itm); PMIX_RELEASE(itm); /* have to cycle through the entire list as we could * have duplicates */ } } PMIX_RELEASE(item); } cleanup: PMIX_DESTRUCT(&exclude); return rc; } prrte-3.0.13/src/util/hostfile/Makefile.am0000664000175000017500000000263215145263240020540 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2022-2024 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AM_LFLAGS = -Pprte_util_hostfile_ LEX_OUTPUT_ROOT = lex.prte_util_hostfile_ # we do NOT want picky compilers down here CFLAGS = $(PRTE_CFLAGS_BEFORE_PICKY) noinst_LTLIBRARIES = libprrteutilhostfile.la # Source code files libprrteutilhostfile_la_SOURCES = \ hostfile_lex.h \ hostfile_lex.l \ hostfile.h \ hostfile.c maintainer-clean-local: rm -f hostfile_lex.c prrte-3.0.13/src/util/hostfile/hostfile_lex.l0000664000175000017500000001617115145263240021351 0ustar alastairalastair%option nounput %option noinput %{ /* -*- C -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #ifdef HAVE_UNISTD_H #include #endif #include "src/util/hostfile/hostfile_lex.h" /* * local functions */ BEGIN_C_DECLS int prte_util_hostfile_wrap(void); END_C_DECLS int prte_util_hostfile_wrap(void) { prte_util_hostfile_done = true; return 1; } /* * global variables */ int prte_util_hostfile_line=1; prte_hostfile_value_t prte_util_hostfile_value = {0}; bool prte_util_hostfile_done = false; #ifndef YY_BREAK #define YY_BREAK #endif %} WHITE [\f\t\v ] %x comment %% {WHITE}*\n { prte_util_hostfile_line++; return PRTE_HOSTFILE_NEWLINE; } #.*\n { prte_util_hostfile_line++; return PRTE_HOSTFILE_NEWLINE; } "//".*\n { prte_util_hostfile_line++; return PRTE_HOSTFILE_NEWLINE; } "/*" { BEGIN(comment); return PRTE_HOSTFILE_NEWLINE; } [^*\n]* { break; } /* Eat up non '*'s */ "*"+[^*/\n]* { break; } /* Eat '*'s not followed by a '/' */ \n { prte_util_hostfile_line++; return PRTE_HOSTFILE_NEWLINE; } "*"+"/" { BEGIN(INITIAL); /* Done with Block Comment */ return PRTE_HOSTFILE_NEWLINE; } \"[^\"]*\" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_QUOTED_STRING; } {WHITE}+ { break; } /* whitespace */ "=" { return PRTE_HOSTFILE_EQUAL; } cpu { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_CPU; } count { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_COUNT; } slots { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS; } "slots-max" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } slots_max { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } "max-slots" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } max_slots { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } "cpu-max" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } cpu_max { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } "max-cpu" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } max_cpu { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } "count-max" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } count_max { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } "max-count" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } max_count { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOTS_MAX; } rank { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_RANK; } slot { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SLOT; } username { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_USERNAME; } "user-name" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_USERNAME; } "user_name" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_USERNAME; } port { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_PORT; } boards { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_BOARDS; } sockets { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SOCKETS_PER_BOARD; } sockets_per_board { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SOCKETS_PER_BOARD; } "sockets-per-board" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_SOCKETS_PER_BOARD; } cores { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_CORES_PER_SOCKET; } cores_per_socket { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_CORES_PER_SOCKET; } "cores-per-socket" { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_CORES_PER_SOCKET; } \+n[0-9]+ { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_RELATIVE; } \+[eE][\:][0-9]+ { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_RELATIVE; } \+[eE] { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_RELATIVE; } [0-9]+ { prte_util_hostfile_value.ival = atol(yytext); return PRTE_HOSTFILE_INT; } %{ /* First detect hosts as standard Strings (but without ".") * then username@IPv4 or IPV4, then username@IPv6 or IPv6, * followed by username@hostname or hostname */ %} [A-Za-z0-9_\-,:*@]* { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_STRING; } ([A-Za-z0-9][A-Za-z0-9_\-]*"@")?([0-9]{1,3}"."){3}[0-9]{1,3} { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_IPV4; } ([A-Za-z0-9][A-Za-z0-9_\-]*"@")?([A-Fa-f0-9]{0,4}":")+[":"]*([A-Fa-f0-9]{0,4}":")+[A-Fa-f0-9]{1,4} { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_IPV6; } (\^?[A-Za-z0-9][A-Za-z0-9_\-]*"@")?[A-Za-z0-9][A-Za-z0-9_\-\.]* { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_HOSTNAME; } . { prte_util_hostfile_value.sval = yytext; return PRTE_HOSTFILE_ERROR; } %% prrte-3.0.13/src/util/hostfile/hostfile.h0000664000175000017500000000256415145263240020476 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Resource Discovery (Hostfile) */ #ifndef PRTE_UTIL_HOSTFILE_H #define PRTE_UTIL_HOSTFILE_H #include "prte_config.h" #include "src/class/pmix_list.h" BEGIN_C_DECLS PRTE_EXPORT int prte_util_add_hostfile_nodes(pmix_list_t *nodes, char *hostfile); PRTE_EXPORT int prte_util_filter_hostfile_nodes(pmix_list_t *nodes, char *hostfile, bool remove); PRTE_EXPORT int prte_util_get_ordered_host_list(pmix_list_t *nodes, char *hostfile); END_C_DECLS #endif prrte-3.0.13/src/util/hostfile/hostfile_lex.h0000664000175000017500000000545015145263240021343 0ustar alastairalastair/* -*- C -*- * * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_UTIL_HOSTFILE_LEX_H_ #define PRTE_UTIL_HOSTFILE_LEX_H_ #include "prte_config.h" #ifdef malloc # undef malloc #endif #ifdef realloc # undef realloc #endif #ifdef free # undef free #endif #include typedef union { int ival; char *sval; } prte_hostfile_value_t; extern int prte_util_hostfile_lex(void); extern FILE *prte_util_hostfile_in; extern int prte_util_hostfile_line; extern bool prte_util_hostfile_done; extern prte_hostfile_value_t prte_util_hostfile_value; extern int prte_util_hostfile_lex_destroy(void); /* * Make lex-generated files not issue compiler warnings */ #define YY_STACK_USED 0 #define YY_ALWAYS_INTERACTIVE 0 #define YY_NEVER_INTERACTIVE 0 #define YY_MAIN 0 #define YY_NO_UNPUT 1 #define YY_SKIP_YYWRAP 1 #define PRTE_HOSTFILE_DONE 0 #define PRTE_HOSTFILE_ERROR 1 #define PRTE_HOSTFILE_QUOTED_STRING 2 #define PRTE_HOSTFILE_EQUAL 3 #define PRTE_HOSTFILE_INT 4 #define PRTE_HOSTFILE_STRING 5 #define PRTE_HOSTFILE_CPU 6 #define PRTE_HOSTFILE_COUNT 7 #define PRTE_HOSTFILE_SLOTS 8 #define PRTE_HOSTFILE_SLOTS_MAX 9 #define PRTE_HOSTFILE_USERNAME 10 #define PRTE_HOSTFILE_IPV4 11 #define PRTE_HOSTFILE_HOSTNAME 12 #define PRTE_HOSTFILE_NEWLINE 13 #define PRTE_HOSTFILE_IPV6 14 #define PRTE_HOSTFILE_SLOT 15 #define PRTE_HOSTFILE_RELATIVE 16 #define PRTE_HOSTFILE_BOARDS 17 #define PRTE_HOSTFILE_SOCKETS_PER_BOARD 18 #define PRTE_HOSTFILE_CORES_PER_SOCKET 19 /* ensure we can handle a rank_file input */ #define PRTE_HOSTFILE_RANK 20 #define PRTE_HOSTFILE_PORT 21 #endif prrte-3.0.13/src/util/attr.c0000664000175000017500000010627615145263240016016 0ustar alastairalastair/* * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_string_copy.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/util/attr.h" #define MAX_CONVERTERS 5 #define MAX_CONVERTER_PROJECT_LEN 10 typedef struct { int init; char project[MAX_CONVERTER_PROJECT_LEN]; prte_attribute_key_t key_base; prte_attribute_key_t key_max; prte_attr2str_fn_t converter; } prte_attr_converter_t; /* all default to NULL */ static prte_attr_converter_t converters[MAX_CONVERTERS]; bool prte_get_attribute(pmix_list_t *attributes, prte_attribute_key_t key, void **data, pmix_data_type_t type) { prte_attribute_t *kv; int rc; PMIX_LIST_FOREACH(kv, attributes, prte_attribute_t) { if (key == kv->key) { if (kv->data.type != type) { PRTE_ERROR_LOG(PRTE_ERR_TYPE_MISMATCH); pmix_output(0, "KV %s TYPE %s", PMIx_Data_type_string(kv->data.type), PMIx_Data_type_string(type)); return false; } if (NULL != data) { if (PRTE_SUCCESS != (rc = prte_attr_unload(kv, data, type))) { PRTE_ERROR_LOG(rc); } } return true; } } /* not found */ return false; } int prte_set_attribute(pmix_list_t *attributes, prte_attribute_key_t key, bool local, void *data, pmix_data_type_t type) { prte_attribute_t *kv; bool *bl, bltrue = true; int rc; PMIX_LIST_FOREACH(kv, attributes, prte_attribute_t) { if (key == kv->key) { if (kv->data.type != type) { return PRTE_ERR_TYPE_MISMATCH; } if (PMIX_BOOL == type) { if (NULL == data) { bl = &bltrue; } else { bl = (bool*)data; } if (false == *bl) { pmix_list_remove_item(attributes, &kv->super); PMIX_RELEASE(kv); return PRTE_SUCCESS; } } if (PRTE_SUCCESS != (rc = prte_attr_load(kv, data, type))) { PRTE_ERROR_LOG(rc); } return rc; } } /* not found - add it */ kv = PMIX_NEW(prte_attribute_t); kv->key = key; kv->local = local; if (PRTE_SUCCESS != (rc = prte_attr_load(kv, data, type))) { PMIX_RELEASE(kv); return rc; } pmix_list_append(attributes, &kv->super); return PRTE_SUCCESS; } prte_attribute_t *prte_fetch_attribute(pmix_list_t *attributes, prte_attribute_t *prev, prte_attribute_key_t key) { prte_attribute_t *kv, *end, *next; /* if prev is NULL, then find the first attr on the list * that matches the key */ if (NULL == prev) { PMIX_LIST_FOREACH(kv, attributes, prte_attribute_t) { if (key == kv->key) { return kv; } } /* if we get, then the key isn't on the list */ return NULL; } /* if we are at the end of the list, then nothing to do */ end = (prte_attribute_t *) pmix_list_get_end(attributes); if (prev == end || end == (prte_attribute_t *) pmix_list_get_next(&prev->super) || NULL == pmix_list_get_next(&prev->super)) { return NULL; } /* starting with the next item on the list, search * for the next attr with the matching key */ next = (prte_attribute_t *) pmix_list_get_next(&prev->super); while (NULL != next) { if (next->key == key) { return next; } next = (prte_attribute_t *) pmix_list_get_next(&next->super); } /* if we get here, then no matching key was found */ return NULL; } int prte_prepend_attribute(pmix_list_t *attributes, prte_attribute_key_t key, bool local, void *data, pmix_data_type_t type) { prte_attribute_t *kv; int rc; kv = PMIX_NEW(prte_attribute_t); kv->key = key; kv->local = local; if (PRTE_SUCCESS != (rc = prte_attr_load(kv, data, type))) { PMIX_RELEASE(kv); return rc; } pmix_list_prepend(attributes, &kv->super); return PRTE_SUCCESS; } void prte_remove_attribute(pmix_list_t *attributes, prte_attribute_key_t key) { prte_attribute_t *kv; PMIX_LIST_FOREACH(kv, attributes, prte_attribute_t) { if (key == kv->key) { pmix_list_remove_item(attributes, &kv->super); PMIX_RELEASE(kv); return; } } } int prte_attr_register(const char *project, prte_attribute_key_t key_base, prte_attribute_key_t key_max, prte_attr2str_fn_t converter) { int i; for (i = 0; i < MAX_CONVERTERS; ++i) { if (0 == converters[i].init) { converters[i].init = 1; pmix_string_copy(converters[i].project, project, MAX_CONVERTER_PROJECT_LEN); converters[i].project[MAX_CONVERTER_PROJECT_LEN - 1] = '\0'; converters[i].key_base = key_base; converters[i].key_max = key_max; converters[i].converter = converter; return PRTE_SUCCESS; } } return PRTE_ERR_OUT_OF_RESOURCE; } char *prte_attr_print_list(pmix_list_t *attributes) { char *out1, **cache = NULL; prte_attribute_t *attr; PMIX_LIST_FOREACH(attr, attributes, prte_attribute_t) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cache, prte_attr_key_to_str(attr->key)); } if (NULL != cache) { out1 = PMIX_ARGV_JOIN_COMPAT(cache, '\n'); PMIX_ARGV_FREE_COMPAT(cache); } else { out1 = NULL; } return out1; } static char unknownkey[180] = {0}; const char *prte_attr_key_to_str(prte_attribute_key_t key) { int i; if (PRTE_ATTR_KEY_BASE < key && key < PRTE_ATTR_KEY_MAX) { /* belongs to PRTE, so we handle it */ switch (key) { case PRTE_APP_HOSTFILE: return "APP-HOSTFILE"; case PRTE_APP_ADD_HOSTFILE: return "APP-ADD-HOSTFILE"; case PRTE_APP_DASH_HOST: return "APP-DASH-HOST"; case PRTE_APP_ADD_HOST: return "APP-ADD-HOST"; case PRTE_APP_USER_CWD: return "APP-USER-CWD"; case PRTE_APP_SSNDIR_CWD: return "APP-USE-SESSION-DIR-AS-CWD"; case PRTE_APP_PRELOAD_BIN: return "APP-PRELOAD-BIN"; case PRTE_APP_PRELOAD_FILES: return "APP-PRELOAD-FILES"; case PRTE_APP_SSTORE_LOAD: return "APP-SSTORE-LOAD"; case PRTE_APP_RECOV_DEF: return "APP-RECOVERY-DEFINED"; case PRTE_APP_MAX_RESTARTS: return "APP-MAX-RESTARTS"; case PRTE_APP_MIN_NODES: return "APP-MIN-NODES"; case PRTE_APP_MANDATORY: return "APP-NODES-MANDATORY"; case PRTE_APP_MAX_PPN: return "APP-MAX-PPN"; case PRTE_APP_PMIX_PREFIX: return "APP-PMIX-PREFIX"; case PRTE_APP_NO_CACHEDIR: return "PRTE_APP_NO_CACHEDIR"; case PRTE_APP_SET_ENVAR: return "PRTE_APP_SET_ENVAR"; case PRTE_APP_UNSET_ENVAR: return "PRTE_APP_UNSET_ENVAR"; case PRTE_APP_PREPEND_ENVAR: return "PRTE_APP_PREPEND_ENVAR"; case PRTE_APP_APPEND_ENVAR: return "PRTE_APP_APPEND_ENVAR"; case PRTE_APP_ADD_ENVAR: return "PRTE_APP_ADD_ENVAR"; case PRTE_APP_PSET_NAME: return "PRTE_APP_PSET_NAME"; case PRTE_NODE_USERNAME: return "NODE-USERNAME"; case PRTE_NODE_PORT: return "NODE-PORT"; case PRTE_NODE_LAUNCH_ID: return "NODE-LAUNCHID"; case PRTE_NODE_HOSTID: return "NODE-HOSTID"; case PRTE_NODE_SERIAL_NUMBER: return "NODE-SERIAL-NUM"; case PRTE_NODE_ADD_SLOTS: return "NODE-ADD-SLOTS"; case PRTE_JOB_LAUNCH_MSG_SENT: return "JOB-LAUNCH-MSG-SENT"; case PRTE_JOB_LAUNCH_MSG_RECVD: return "JOB-LAUNCH-MSG-RECVD"; case PRTE_JOB_MAX_LAUNCH_MSG_RECVD: return "JOB-MAX-LAUNCH-MSG-RECVD"; case PRTE_JOB_CKPT_STATE: return "JOB-CKPT-STATE"; case PRTE_JOB_SNAPSHOT_REF: return "JOB-SNAPSHOT-REF"; case PRTE_JOB_SNAPSHOT_LOC: return "JOB-SNAPSHOT-LOC"; case PRTE_JOB_SNAPC_INIT_BAR: return "JOB-SNAPC-INIT-BARRIER-ID"; case PRTE_JOB_SNAPC_FINI_BAR: return "JOB-SNAPC-FINI-BARRIER-ID"; case PRTE_JOB_NUM_NONZERO_EXIT: return "JOB-NUM-NONZERO-EXIT"; case PRTE_SPAWN_TIMEOUT_EVENT: return "SPAWN-TIMEOUT-EVENT"; case PRTE_JOB_ABORTED_PROC: return "JOB-ABORTED-PROC"; case PRTE_JOB_MAPPER: return "JOB-MAPPER"; case PRTE_JOB_REDUCER: return "JOB-REDUCER"; case PRTE_JOB_COMBINER: return "JOB-COMBINER"; case PRTE_JOB_INDEX_ARGV: return "JOB-INDEX-ARGV"; case PRTE_JOB_NO_VM: return "JOB-NO-VM"; case PRTE_JOB_SPIN_FOR_DEBUG: return "JOB-SPIN-FOR-DEBUG"; case PRTE_JOB_CONTINUOUS: return "JOB-CONTINUOUS"; case PRTE_JOB_RECOVER_DEFINED: return "JOB-RECOVERY-DEFINED"; case PRTE_JOB_NON_PRTE_JOB: return "JOB-NON-PRTE-JOB"; case PRTE_JOB_STDOUT_TARGET: return "JOB-STDOUT-TARGET"; case PRTE_JOB_POWER: return "JOB-POWER"; case PRTE_JOB_MAX_FREQ: return "JOB-MAX_FREQ"; case PRTE_JOB_MIN_FREQ: return "JOB-MIN_FREQ"; case PRTE_JOB_GOVERNOR: return "JOB-FREQ-GOVERNOR"; case PRTE_JOB_FAIL_NOTIFIED: return "JOB-FAIL-NOTIFIED"; case PRTE_JOB_TERM_NOTIFIED: return "JOB-TERM-NOTIFIED"; case PRTE_JOB_PEER_MODX_ID: return "JOB-PEER-MODX-ID"; case PRTE_JOB_INIT_BAR_ID: return "JOB-INIT-BAR-ID"; case PRTE_JOB_FINI_BAR_ID: return "JOB-FINI-BAR-ID"; case PRTE_JOB_FWDIO_TO_TOOL: return "JOB-FWD-IO-TO-TOOL"; case PRTE_JOB_LAUNCHED_DAEMONS: return "JOB-LAUNCHED-DAEMONS"; case PRTE_JOB_REPORT_BINDINGS: return "JOB-REPORT-BINDINGS"; case PRTE_JOB_CPUSET: return "JOB-CPUSET"; case PRTE_JOB_NOTIFICATIONS: return "JOB-NOTIFICATIONS"; case PRTE_JOB_ROOM_NUM: return "JOB-ROOM-NUM"; case PRTE_JOB_LAUNCH_PROXY: return "JOB-LAUNCH-PROXY"; case PRTE_JOB_NSPACE_REGISTERED: return "JOB-NSPACE-REGISTERED"; case PRTE_JOB_FIXED_DVM: return "PRTE-JOB-FIXED-DVM"; case PRTE_JOB_DVM_JOB: return "PRTE-JOB-DVM-JOB"; case PRTE_JOB_CANCELLED: return "PRTE-JOB-CANCELLED"; case PRTE_JOB_OUTPUT_TO_FILE: return "PRTE-JOB-OUTPUT-TO-FILE"; case PRTE_JOB_MERGE_STDERR_STDOUT: return "PRTE-JOB-MERGE-STDERR-STDOUT"; case PRTE_JOB_TAG_OUTPUT: return "PRTE-JOB-TAG-OUTPUT"; case PRTE_JOB_RANK_OUTPUT: return "PRTE-JOB-RANK-OUTPUT"; case PRTE_JOB_TIMESTAMP_OUTPUT: return "PRTE-JOB-TIMESTAMP-OUTPUT"; case PRTE_JOB_MULTI_DAEMON_SIM: return "PRTE_JOB_MULTI_DAEMON_SIM"; case PRTE_JOB_NOTIFY_COMPLETION: return "PRTE_JOB_NOTIFY_COMPLETION"; case PRTE_JOB_TRANSPORT_KEY: return "PRTE_JOB_TRANSPORT_KEY"; case PRTE_JOB_INFO_CACHE: return "PRTE_JOB_INFO_CACHE"; case PRTE_JOB_SILENT_TERMINATION: return "PRTE_JOB_SILENT_TERMINATION"; case PRTE_JOB_SET_ENVAR: return "PRTE_JOB_SET_ENVAR"; case PRTE_JOB_UNSET_ENVAR: return "PRTE_JOB_UNSET_ENVAR"; case PRTE_JOB_PREPEND_ENVAR: return "PRTE_JOB_PREPEND_ENVAR"; case PRTE_JOB_APPEND_ENVAR: return "PRTE_JOB_APPEND_ENVAR"; case PRTE_JOB_ADD_ENVAR: return "PRTE_APP_ADD_ENVAR"; case PRTE_JOB_APP_SETUP_DATA: return "PRTE_JOB_APP_SETUP_DATA"; case PRTE_JOB_OUTPUT_TO_DIRECTORY: return "PRTE_JOB_OUTPUT_TO_DIRECTORY"; case PRTE_JOB_STOP_ON_EXEC: return "JOB_STOP_ON_EXEC"; case PRTE_JOB_SPAWN_NOTIFIED: return "JOB_SPAWN_NOTIFIED"; case PRTE_JOB_DISPLAY_MAP: return "DISPLAY_JOB_MAP"; case PRTE_JOB_DISPLAY_DEVEL_MAP: return "DISPLAY_DEVEL_JOB_MAP"; case PRTE_JOB_DISPLAY_TOPO: return "DISPLAY_TOPOLOGY"; case PRTE_JOB_DISPLAY_ALLOC: return "DISPLAY_ALLOCATION"; case PRTE_JOB_DO_NOT_LAUNCH: return "DO_NOT_LAUNCH"; case PRTE_JOB_XML_OUTPUT: return "XML_OUTPUT"; case PRTE_JOB_TIMEOUT: return "JOB_TIMEOUT"; case PRTE_JOB_STACKTRACES: return "JOB_STACKTRACES"; case PRTE_JOB_REPORT_STATE: return "JOB_REPORT_STATE"; case PRTE_JOB_TIMEOUT_EVENT: return "JOB_TIMEOUT_EVENT"; case PRTE_JOB_TRACE_TIMEOUT_EVENT: return "JOB_TRACE_TIMEOUT_EVENT"; case PRTE_JOB_INHERIT: return "JOB_INHERIT"; case PRTE_JOB_PES_PER_PROC: return "JOB_PES_PER_PROC"; case PRTE_JOB_DIST_DEVICE: return "JOB_DIST_DEVICE"; case PRTE_JOB_HWT_CPUS: return "JOB_HWT_CPUS"; case PRTE_JOB_CORE_CPUS: return "JOB_CORE_CPUS"; case PRTE_JOB_PPR: return "JOB_PPR"; case PRTE_JOB_NOINHERIT: return "JOB_NOINHERIT"; case PRTE_JOB_FILE: return "JOB-FILE"; case PRTE_JOB_DO_NOT_RESOLVE: return "DO-NOT-RESOLVE"; case PRTE_JOB_DEBUG_TARGET: return "DEBUG-TARGET"; case PRTE_JOB_DEBUG_DAEMONS_PER_NODE: return "DEBUG-DAEMONS-PER-NODE"; case PRTE_JOB_DEBUG_DAEMONS_PER_PROC: return "DEBUG-DAEMONS-PER-PROC"; case PRTE_JOB_STOP_IN_INIT: return "STOP-IN-INIT"; case PRTE_JOB_STOP_IN_APP: return "STOP-IN-APP"; case PRTE_JOB_ENVARS_HARVESTED: return "ENVARS-HARVESTED"; case PRTE_JOB_OUTPUT_NOCOPY: return "DO-NOT-COPY-OUTPUT"; case PRTE_SPAWN_TIMEOUT: return "SPAWN-TIMEOUT"; case PRTE_JOB_RAW_OUTPUT: return "DO-NOT-BUFFER-OUTPUT"; case PRTE_JOB_EXEC_AGENT: return "EXEC-AGENT"; case PRTE_JOB_NOAGG_HELP: return "DO-NOT-AGGREGATE-HELP"; case PRTE_JOB_COLOCATE_PROCS: return "COLOCATE PROCS"; case PRTE_JOB_COLOCATE_NPERPROC: return "NUM PROCS TO COLOCATE PER PROC"; case PRTE_JOB_COLOCATE_NPERNODE: return "NUM PROCS TO COLOCATE PER NODE"; case PRTE_JOB_TAG_OUTPUT_DETAILED: return "DETAILED OUTPUT TAG"; case PRTE_JOB_TAG_OUTPUT_FULLNAME: return "FULL NSPACE IN OUTPUT TAG"; case PRTE_JOB_ERROR_NONZERO_EXIT: return "ERROR IF NONZERO EXIT"; case PRTE_JOB_CONTROLS: return "JOB CONTROLS"; case PRTE_JOB_SHOW_PROGRESS: return "SHOW LAUNCH PROGRESS"; case PRTE_JOB_RECOVERABLE: return "JOB IS RECOVERABLE"; case PRTE_JOB_NOTIFY_ERRORS: return "NOTIFY ERRORS"; case PRTE_JOB_AUTORESTART: return "AUTORESTART"; case PRTE_JOB_OUTPUT_PROCTABLE: return "OUTPUT PROCTABLE"; case PRTE_JOB_DISPLAY_PROCESSORS: return "DISPLAY PROCESSORS"; case PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT: return "DISPLAY PARSEABLE OUTPUT"; case PRTE_JOB_EXTEND_DVM: return "EXTEND DVM"; case PRTE_JOB_CHILD_SEP: return "CHILD SEP"; case PRTE_JOB_GPU_SUPPORT: return "GPU SUPPORT"; case PRTE_JOB_PREFIX: return "PRTE-JOB-PREFIX"; case PRTE_JOB_PMIX_PREFIX: return "PRTE-JOB-PMIX-PREFIX"; case PRTE_JOB_FWD_ENVIRONMENT: return "FWD ENVIRONMENT"; case PRTE_JOB_REPORT_PHYSICAL_CPUS: return "REPORT PHYSICAL CPUS"; case PRTE_JOB_ALLOC_DISPLAYED: return "ALLOCATION DISPLAYED"; case PRTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; case PRTE_PROC_PRIOR_NODE: return "PROC-PRIOR-NODE"; case PRTE_PROC_NRESTARTS: return "PROC-NUM-RESTARTS"; case PRTE_PROC_RESTART_TIME: return "PROC-RESTART-TIME"; case PRTE_PROC_FAST_FAILS: return "PROC-FAST-FAILS"; case PRTE_PROC_CKPT_STATE: return "PROC-CKPT-STATE"; case PRTE_PROC_SNAPSHOT_REF: return "PROC-SNAPHOT-REF"; case PRTE_PROC_SNAPSHOT_LOC: return "PROC-SNAPSHOT-LOC"; case PRTE_PROC_NODENAME: return "PROC-NODENAME"; case PRTE_PROC_CGROUP: return "PROC-CGROUP"; case PRTE_PROC_NBEATS: return "PROC-NBEATS"; case PRTE_RML_TRANSPORT_TYPE: return "RML-TRANSPORT-TYPE"; case PRTE_RML_PROTOCOL_TYPE: return "RML-PROTOCOL-TYPE"; case PRTE_RML_CONDUIT_ID: return "RML-CONDUIT-ID"; case PRTE_RML_INCLUDE_COMP_ATTRIB: return "RML-INCLUDE"; case PRTE_RML_EXCLUDE_COMP_ATTRIB: return "RML-EXCLUDE"; case PRTE_RML_TRANSPORT_ATTRIB: return "RML-TRANSPORT"; case PRTE_RML_QUALIFIER_ATTRIB: return "RML-QUALIFIER"; case PRTE_RML_PROVIDER_ATTRIB: return "RML-DESIRED-PROVIDERS"; case PRTE_RML_PROTOCOL_ATTRIB: return "RML-DESIRED-PROTOCOLS"; case PRTE_RML_ROUTED_ATTRIB: return "RML-DESIRED-ROUTED-MODULES"; default: pmix_snprintf(unknownkey, 180, "UNKNOWN-KEY: %d", key); return unknownkey; } } /* see if one of the converters can handle it */ for (i = 0; i < MAX_CONVERTERS; ++i) { if (0 != converters[i].init) { if (converters[i].key_base < key && key < converters[i].key_max) { return converters[i].converter(key); } } } /* get here if nobody know what to do */ pmix_snprintf(unknownkey, 180, "UNKNOWN-KEY: %d", key); return unknownkey; } int prte_attr_load(prte_attribute_t *kv, void *data, pmix_data_type_t type) { pmix_byte_object_t *boptr; struct timeval *tv; pmix_envar_t *envar; pmix_status_t rc; kv->data.type = type; if (NULL == data) { /* if the type is BOOL, then the user wanted to * use the presence of the attribute to indicate * "true" - so let's mark it that way just in * case a subsequent test looks for the value */ if (PMIX_BOOL == type) { kv->data.data.flag = true; } else { /* otherwise, check to see if this type has storage * that is already allocated, and free it if so */ if (PMIX_STRING == type && NULL != kv->data.data.string) { free(kv->data.data.string); } else if (PMIX_BYTE_OBJECT == type && NULL != kv->data.data.bo.bytes) { free(kv->data.data.bo.bytes); } else if (PMIX_PROC_NSPACE == type && NULL != kv->data.data.proc) { PMIX_PROC_FREE(kv->data.data.proc, 1); } else if (PMIX_PROC == type && NULL != kv->data.data.proc) { PMIX_PROC_FREE(kv->data.data.proc, 1); } else if (PMIX_ENVAR == type) { if (NULL != kv->data.data.envar.envar) { free(kv->data.data.envar.envar); } if (NULL != kv->data.data.envar.value) { free(kv->data.data.envar.value); } } /* just set the fields to zero */ memset(&kv->data.data, 0, sizeof(kv->data.data)); } return PRTE_SUCCESS; } switch (type) { case PMIX_BOOL: kv->data.data.flag = *(bool *) (data); break; case PMIX_BYTE: kv->data.data.byte = *(uint8_t *) (data); break; case PMIX_STRING: if (NULL != kv->data.data.string) { free(kv->data.data.string); } if (NULL != data) { kv->data.data.string = strdup((const char *) data); } else { kv->data.data.string = NULL; } break; case PMIX_SIZE: kv->data.data.size = *(size_t *) (data); break; case PMIX_PID: kv->data.data.pid = *(pid_t *) (data); break; case PMIX_INT: kv->data.data.integer = *(int *) (data); break; case PMIX_INT8: kv->data.data.int8 = *(int8_t *) (data); break; case PMIX_INT16: kv->data.data.int16 = *(int16_t *) (data); break; case PMIX_INT32: kv->data.data.int32 = *(int32_t *) (data); break; case PMIX_INT64: kv->data.data.int64 = *(int64_t *) (data); break; case PMIX_UINT: kv->data.data.uint = *(unsigned int *) (data); break; case PMIX_UINT8: kv->data.data.uint8 = *(uint8_t *) (data); break; case PMIX_UINT16: kv->data.data.uint16 = *(uint16_t *) (data); break; case PMIX_UINT32: kv->data.data.uint32 = *(uint32_t *) data; break; case PMIX_UINT64: kv->data.data.uint64 = *(uint64_t *) (data); break; case PMIX_BYTE_OBJECT: if (NULL != kv->data.data.bo.bytes) { free(kv->data.data.bo.bytes); } boptr = (pmix_byte_object_t *) data; if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) { kv->data.data.bo.bytes = (char *) malloc(boptr->size); memcpy(kv->data.data.bo.bytes, boptr->bytes, boptr->size); kv->data.data.bo.size = boptr->size; } else { kv->data.data.bo.bytes = NULL; kv->data.data.bo.size = 0; } break; case PMIX_FLOAT: kv->data.data.fval = *(float *) (data); break; case PMIX_TIMEVAL: tv = (struct timeval *) data; kv->data.data.tv.tv_sec = tv->tv_sec; kv->data.data.tv.tv_usec = tv->tv_usec; break; case PMIX_POINTER: kv->data.data.ptr = data; break; case PMIX_PROC_RANK: kv->data.data.rank = *(pmix_rank_t *) data; break; case PMIX_PROC_NSPACE: if (NULL == kv->data.data.proc) { PMIX_PROC_CREATE(kv->data.data.proc, 1); if (NULL == kv->data.data.proc) { return PRTE_ERR_OUT_OF_RESOURCE; } } PMIX_LOAD_NSPACE(kv->data.data.proc->nspace, (char *) data); break; case PMIX_PROC: if (NULL == kv->data.data.proc) { PMIX_PROC_CREATE(kv->data.data.proc, 1); if (NULL == kv->data.data.proc) { return PRTE_ERR_OUT_OF_RESOURCE; } } PMIX_XFER_PROCID(kv->data.data.proc, (pmix_proc_t *) data); break; case PMIX_ENVAR: envar = (pmix_envar_t *) data; if (NULL != envar) { if (NULL != kv->data.data.envar.envar) { free(kv->data.data.envar.envar); } if (NULL != kv->data.data.envar.value) { free(kv->data.data.envar.value); } if (NULL != envar->envar) { kv->data.data.envar.envar = strdup(envar->envar); } if (NULL != envar->value) { kv->data.data.envar.value = strdup(envar->value); } kv->data.data.envar.separator = envar->separator; } break; case PMIX_DATA_ARRAY: rc = PMIx_Data_copy((void**)&kv->data.data.darray, data, PMIX_DATA_ARRAY); return rc; break; default: PRTE_ERROR_LOG(PRTE_ERR_NOT_SUPPORTED); return PRTE_ERR_NOT_SUPPORTED; } return PRTE_SUCCESS; } int prte_attr_unload(prte_attribute_t *kv, void **data, pmix_data_type_t type) { pmix_byte_object_t *boptr; pmix_envar_t *envar; pmix_data_array_t *darray; pmix_status_t rc; pmix_data_type_t pointers[] = { PMIX_STRING, PMIX_BYTE_OBJECT, PMIX_POINTER, PMIX_PROC_NSPACE, PMIX_PROC, PMIX_ENVAR, PMIX_DATA_ARRAY, PMIX_UNDEF}; int n; bool found = false; if (type != kv->data.type) { return PRTE_ERR_TYPE_MISMATCH; } if (NULL == data) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } /* if they didn't give us a storage address * and the data type isn't one where we can * create storage, then this is an error */ for (n = 0; PMIX_UNDEF != pointers[n]; n++) { if (type == pointers[n]) { found = true; break; } } if (!found && NULL == *data) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } switch (type) { case PMIX_BOOL: memcpy(*data, &kv->data.data.flag, sizeof(bool)); break; case PMIX_BYTE: memcpy(*data, &kv->data.data.byte, sizeof(uint8_t)); break; case PMIX_STRING: if (NULL != kv->data.data.string) { *data = strdup(kv->data.data.string); } else { *data = NULL; } break; case PMIX_SIZE: memcpy(*data, &kv->data.data.size, sizeof(size_t)); break; case PMIX_PID: memcpy(*data, &kv->data.data.pid, sizeof(pid_t)); break; case PMIX_INT: memcpy(*data, &kv->data.data.integer, sizeof(int)); break; case PMIX_INT8: memcpy(*data, &kv->data.data.int8, sizeof(int8_t)); break; case PMIX_INT16: memcpy(*data, &kv->data.data.int16, sizeof(int16_t)); break; case PMIX_INT32: memcpy(*data, &kv->data.data.int32, sizeof(int32_t)); break; case PMIX_INT64: memcpy(*data, &kv->data.data.int64, sizeof(int64_t)); break; case PMIX_UINT: memcpy(*data, &kv->data.data.uint, sizeof(unsigned int)); break; case PMIX_UINT8: memcpy(*data, &kv->data.data.uint8, 1); break; case PMIX_UINT16: memcpy(*data, &kv->data.data.uint16, 2); break; case PMIX_UINT32: memcpy(*data, &kv->data.data.uint32, 4); break; case PMIX_UINT64: memcpy(*data, &kv->data.data.uint64, 8); break; case PMIX_BYTE_OBJECT: boptr = (pmix_byte_object_t *) malloc(sizeof(pmix_byte_object_t)); if (NULL == boptr) { return PRTE_ERR_OUT_OF_RESOURCE; } if (NULL != kv->data.data.bo.bytes && 0 < kv->data.data.bo.size) { boptr->bytes = (char *) malloc(kv->data.data.bo.size); memcpy(boptr->bytes, kv->data.data.bo.bytes, kv->data.data.bo.size); boptr->size = kv->data.data.bo.size; } else { boptr->bytes = NULL; boptr->size = 0; } *data = boptr; break; case PMIX_FLOAT: memcpy(*data, &kv->data.data.fval, sizeof(float)); break; case PMIX_TIMEVAL: memcpy(*data, &kv->data.data.tv, sizeof(struct timeval)); break; case PMIX_POINTER: *data = kv->data.data.ptr; break; case PMIX_PROC_RANK: memcpy(*data, &kv->data.data.rank, sizeof(pmix_rank_t)); break; case PMIX_PROC_NSPACE: PMIX_PROC_CREATE(*data, 1); if (NULL == *data) { return PRTE_ERR_OUT_OF_RESOURCE; } memcpy(*data, kv->data.data.proc->nspace, sizeof(pmix_nspace_t)); break; case PMIX_PROC: PMIX_PROC_CREATE(*data, 1); if (NULL == *data) { return PRTE_ERR_OUT_OF_RESOURCE; } memcpy(*data, kv->data.data.proc, sizeof(pmix_proc_t)); break; case PMIX_ENVAR: PMIX_ENVAR_CREATE(envar, 1); if (NULL == envar) { return PRTE_ERR_OUT_OF_RESOURCE; } if (NULL != kv->data.data.envar.envar) { envar->envar = strdup(kv->data.data.envar.envar); } if (NULL != kv->data.data.envar.value) { envar->value = strdup(kv->data.data.envar.value); } envar->separator = kv->data.data.envar.separator; *data = envar; break; case PMIX_DATA_ARRAY: rc = PMIx_Data_copy((void**)&darray, kv->data.data.darray, PMIX_DATA_ARRAY); if (PMIX_SUCCESS != rc) { *data = NULL; return prte_pmix_convert_status(rc); } *data = darray; break; default: PRTE_ERROR_LOG(PRTE_ERR_NOT_SUPPORTED); return PRTE_ERR_NOT_SUPPORTED; } return PRTE_SUCCESS; } char* prte_print_proc_flags(struct prte_proc_t *ptr) { prte_proc_t *p = (prte_proc_t*)ptr; char **tmp = NULL; char *ans; // start with the proc name PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, PRTE_NAME_PRINT(&p->name)); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, ": "); if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_ALIVE)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "ALIVE"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_ABORT)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "ABORT"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_UPDATED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "UPDATED"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_LOCAL)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "LOCAL"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_REPORTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "REPORTED"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_REG)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "REGISTERED"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_HAS_DEREG)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "DEREGISTERED"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_AS_MPI)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "MPI"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_IOF_COMPLETE)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "IOF-COMPLETE"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_WAITPID)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "WAITPID"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_RECORDED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "RECORDED"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_DATA_IN_SM)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "DATA-IN-SM"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_DATA_RECVD)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "DATA-RECVD"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_SM_ACCESS)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "SM-ACCESS"); } if (PRTE_FLAG_TEST(p, PRTE_PROC_FLAG_TERM_REPORTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "TERMINATED"); } ans = PMIX_ARGV_JOIN_COMPAT(tmp, '|'); PMIX_ARGV_FREE_COMPAT(tmp); return ans; } char* prte_print_node_flags(struct prte_node_t *ptr) { prte_node_t *p = (prte_node_t*)ptr; char **tmp = NULL; char *ans; // start with the node name PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, p->name); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, ": "); if (PRTE_FLAG_TEST(p, PRTE_NODE_FLAG_DAEMON_LAUNCHED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "DAEMON-LAUNCHED"); } if (PRTE_FLAG_TEST(p, PRTE_NODE_FLAG_LOC_VERIFIED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "LOCATION"); } if (PRTE_FLAG_TEST(p, PRTE_NODE_FLAG_OVERSUBSCRIBED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "OVERSUBSCRIBED"); } if (PRTE_FLAG_TEST(p, PRTE_NODE_FLAG_MAPPED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "MAPPED"); } if (PRTE_FLAG_TEST(p, PRTE_NODE_FLAG_SLOTS_GIVEN)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "SLOTS-GIVEN"); } if (PRTE_FLAG_TEST(p, PRTE_NODE_NON_USABLE)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "NONUSABLE"); } ans = PMIX_ARGV_JOIN_COMPAT(tmp, '|'); PMIX_ARGV_FREE_COMPAT(tmp); return ans; } char* prte_print_app_flags(struct prte_app_context_t *ptr) { prte_app_context_t *p = (prte_app_context_t*)ptr; char **tmp = NULL; char *ans; // start with the app command PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, p->app); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, ": "); if (PRTE_FLAG_TEST(p, PRTE_APP_FLAG_USED_ON_NODE)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "USED-LOCAL-NODE"); } if (PRTE_FLAG_TEST(p, PRTE_APP_FLAG_TOOL)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "TOOL"); } if (PRTE_FLAG_TEST(p, PRTE_APP_FLAG_COMPUTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "NPROCS-COMPUTED"); } ans = PMIX_ARGV_JOIN_COMPAT(tmp, '|'); PMIX_ARGV_FREE_COMPAT(tmp); return ans; } char* prte_print_job_flags(struct prte_job_t *ptr) { prte_job_t *p = (prte_job_t*)ptr; char **tmp = NULL; char *ans; // start with the job name PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, PRTE_JOBID_PRINT(p->nspace)); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, ": "); if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_UPDATED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "UPDATED"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_RESTARTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "RESTARTED"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_ABORTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "ABORTED"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_FORWARD_OUTPUT)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "FORWARD-OUTPUT"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_DO_NOT_MONITOR)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "DO-NOT-MONITOR"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_FORWARD_COMM)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "FWD-COM"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_RESTART)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "RESTART"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_PROCS_MIGRATING)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "MIGRATING"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_OVERSUBSCRIBED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "OVERSUBSCRIBED"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_TOOL)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "TOOL"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_LAUNCHER)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "LAUNCHER"); } if (PRTE_FLAG_TEST(p, PRTE_JOB_FLAG_ERR_REPORTED)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&tmp, "ERROR-REPORTED"); } ans = PMIX_ARGV_JOIN_COMPAT(tmp, '|'); PMIX_ARGV_FREE_COMPAT(tmp); return ans; } prrte-3.0.13/src/util/bipartite_graph.h0000664000175000017500000001276515145263240020214 0ustar alastairalastair/* * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /* Implements an adjacency-list-based weighted directed graph (digraph), * focused on supporting bipartite digraphs and flow-network problems. * * Note that some operations might be more efficient if this structure were * converted to use an adjacency matrix instead of an adjacency list. OTOH * that complicates other pieces of the implementation (specifically, adding * and removing edges). */ #ifndef PRTE_BP_GRAPH_H #define PRTE_BP_GRAPH_H struct prte_bp_graph_vertex_t; struct prte_bp_graph_edge_t; struct prte_bp_graph_t; typedef struct prte_bp_graph_vertex_t prte_bp_graph_vertex_t; typedef struct prte_bp_graph_edge_t prte_bp_graph_edge_t; typedef struct prte_bp_graph_t prte_bp_graph_t; /** * callback function pointer type for cleaning up user data associated with a * vertex or edge */ typedef void (*prte_bp_graph_cleanup_fn_t)(void *user_data); /** * create a new empty graph * * Any new vertices will have NULL user data associated. * * @param[in] v_data_cleanup_fn cleanup function to use for vertex user data * @param[in] e_data_cleanup_fn cleanup function to use for edge user data * @param[out] g_out the created graph * * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_create(prte_bp_graph_cleanup_fn_t v_data_cleanup_fn, prte_bp_graph_cleanup_fn_t e_data_cleanup_fn, prte_bp_graph_t **g_out); /** * free the given graph * * Any user data associated with vertices or edges in the graph will have * the given edge/vertex cleanup callback invoked in some arbitrary order. * * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_free(prte_bp_graph_t *g); /** * clone (deep copy) the given graph * * Note that copy_user_data==true is not currently supported (requires the * addition of a copy callback for user data). * * @param[in] g the graph to clone * @param[in] copy_user_data if true, copy vertex/edge user data to the new * graph * @param[in] g_clone_out the resulting cloned graph * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_clone(const prte_bp_graph_t *g, bool copy_user_data, prte_bp_graph_t **g_clone_out); /** * return the number of edges for which this vertex is a destination * * @param[in] g the graph to query * @param[in] vertex the vertex id to query * @returns the number of edges for which this vertex is a destination */ int prte_bp_graph_indegree(const prte_bp_graph_t *g, int vertex); /** * return the number of edges for which this vertex is a source * * @param[in] g the graph to query * @param[in] vertex the vertex id to query * @returns the number of edges for which this vertex is a source */ int prte_bp_graph_outdegree(const prte_bp_graph_t *g, int vertex); /** * add an edge to the given graph * * @param[in] from source vertex ID * @param[in] to target vertex ID * @param[in] cost cost value for this edge (lower is better) * @param[in] capacity maximum flow transmissible on this edge * @param[in] e_data caller data to associate with this edge, useful for * debugging or minimizing state shared across components * * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_add_edge(prte_bp_graph_t *g, int from, int to, int64_t cost, int capacity, void *e_data); /** * add a vertex to the given graph * * @param[in] g graph to manipulate * @param[in] v_data data to associate with the new vertex * @param[out] index_out integer index of the new vertex. May be NULL. * * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_add_vertex(prte_bp_graph_t *g, void *v_data, int *index_out); /** * compute the order of a graph (number of vertices) * * @param[in] g the graph to query */ int prte_bp_graph_order(const prte_bp_graph_t *g); /** * This function solves the "assignment problem": * http://en.wikipedia.org/wiki/Assignment_problem * * The goal is to find a maximum cardinality, minimum cost matching in a * weighted bipartite graph. Maximum cardinality takes priority over minimum * cost. * * Capacities in the given graph are ignored (assumed to be 1 at the start). * It is also assumed that the graph only contains edges from one vertex set * to the other and that no edges exist in the reverse direction ("forward" * edges only). * * The algorithm(s) used will be deterministic. That is, given the exact same * graph, two calls to this routine will result in the same matching result. * * @param[in] g an acyclic bipartite directed graph for * which a matching is sought * @param[out] num_match_edges_out number edges found in the matching * @param[out] match_edges_out an array of (u,v) vertex pairs indicating * which edges are in the matching * * @returns PRTE_SUCCESS or an OMPI error code */ int prte_bp_graph_solve_bipartite_assignment(const prte_bp_graph_t *g, int *num_match_edges_out, int **match_edges_out); #endif /* PRTE_BP_GRAPH_H */ prrte-3.0.13/src/util/numtostr.h0000664000175000017500000000314315145263240016731 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file */ #ifndef PRTE_NUMTOSTR_UTIL #define PRTE_NUMTOSTR_UTIL #include "prte_config.h" /** * Convert a long integer to a char* string. The returned buffer is * allocated by calling malloc() and must be freed by the caller. * * @param num (IN) Input number * @return String containing number (NULL on failure) */ PRTE_EXPORT char *prte_ltostr(long num); /** * Convert a double to a char* string. The returned buffer is allocated * by calling malloc() and must be freed by the caller. * * @param num (IN) Input number * @return String containing number (NULL on failure) */ PRTE_EXPORT char *prte_dtostr(double num); #endif /* PRTE_NUMTOSTR_UTIL */ prrte-3.0.13/src/util/Makefile.am0000664000175000017500000000507015145263240016722 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2013 NVIDIA Corporation. All rights reserved. # Copyright (c) 2013-2020 Intel, Inc. All rights reserved. # Copyright (c) 2016 Los Alamos National Security, LLC. All rights # reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. # Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # SUBDIRS = hostfile noinst_LTLIBRARIES = libprrteutil.la AM_CPPFLAGS = $(LTDLINCL) # Source code files headers = \ attr.h \ bipartite_graph.h \ bipartite_graph_internal.h \ bit_ops.h \ prte_cmd_line.h \ crc.h \ daemon_init.h \ dash_host/dash_host.h \ error_strings.h \ ethtool.h \ error.h \ malloc.h \ name_fns.h \ nidmap.h \ numtostr.h \ proc_info.h \ session_dir.h \ stacktrace.h \ sys_limits.h \ uri.h libprrteutil_la_SOURCES = \ $(headers) \ attr.c \ bipartite_graph.c \ crc.c \ daemon_init.c \ dash_host/dash_host.c \ error_strings.c \ ethtool.c \ error.c \ malloc.c \ name_fns.c \ nidmap.c \ numtostr.c \ proc_info.c \ session_dir.c \ stacktrace.c \ sys_limits.c \ uri.c libprrteutil_la_LIBADD = \ hostfile/libprrteutilhostfile.la libprrteutil_la_DEPENDENCIES = \ hostfile/libprrteutilhostfile.la # Conditionally install the header files if WANT_INSTALL_HEADERS prtedir = $(prteincludedir)/$(subdir) prte_HEADERS = $(headers) endif prrte-3.0.13/src/util/error.c0000664000175000017500000003320515145263240016164 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include #include #include #include "constants.h" #include "src/runtime/prte_globals.h" #include "src/util/error.h" #include "src/util/pmix_printf.h" #include "src/util/proc_info.h" #include "src/util/pmix_string_copy.h" const char *prte_strerror(int errnum) { const char *retval; switch (errnum) { case PRTE_SUCCESS: retval = "Success"; break; case PRTE_ERROR: retval = "Error"; break; case PRTE_ERR_OUT_OF_RESOURCE: retval = "Out of resource"; break; case PRTE_ERR_TEMP_OUT_OF_RESOURCE: retval = "Temporarily out of resource"; break; case PRTE_ERR_RESOURCE_BUSY: retval = "Resource busy"; break; case PRTE_ERR_BAD_PARAM: retval = "Bad parameter"; break; case PRTE_ERR_FATAL: retval = "Fatal"; break; case PRTE_ERR_NOT_IMPLEMENTED: retval = "Not implemented"; break; case PRTE_ERR_NOT_SUPPORTED: retval = "Not supported"; break; case PRTE_ERR_INTERRUPTED: retval = "Interrupted"; break; case PRTE_ERR_WOULD_BLOCK: retval = "Would block"; break; case PRTE_ERR_IN_ERRNO: retval = "In errno"; break; case PRTE_ERR_UNREACH: retval = "Unreachable"; break; case PRTE_ERR_NOT_FOUND: retval = "Not found"; break; case PRTE_EXISTS: retval = "Exists"; break; case PRTE_ERR_TIMEOUT: retval = "Timeout"; break; case PRTE_ERR_NOT_AVAILABLE: retval = "Not available"; break; case PRTE_ERR_PERM: retval = "No permission"; break; case PRTE_ERR_VALUE_OUT_OF_BOUNDS: retval = "Value out of bounds"; break; case PRTE_ERR_FILE_READ_FAILURE: retval = "File read failure"; break; case PRTE_ERR_FILE_WRITE_FAILURE: retval = "File write failure"; break; case PRTE_ERR_FILE_OPEN_FAILURE: retval = "File open failure"; break; case PRTE_ERR_PACK_MISMATCH: retval = "Pack data mismatch"; break; case PRTE_ERR_PACK_FAILURE: retval = "Data pack failed"; break; case PRTE_ERR_UNPACK_FAILURE: retval = "Data unpack failed"; break; case PRTE_ERR_UNPACK_INADEQUATE_SPACE: retval = "Data unpack had inadequate space"; break; case PRTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER: retval = "Data unpack would read past end of buffer"; break; case PRTE_ERR_OPERATION_UNSUPPORTED: retval = "Requested operation is not supported on referenced data type"; break; case PRTE_ERR_UNKNOWN_DATA_TYPE: retval = "Unknown data type"; break; case PRTE_ERR_BUFFER: retval = "Buffer type (described vs non-described) mismatch - operation not allowed"; break; case PRTE_ERR_DATA_TYPE_REDEF: retval = "Attempt to redefine an existing data type"; break; case PRTE_ERR_DATA_OVERWRITE_ATTEMPT: retval = "Attempt to overwrite a data value"; break; case PRTE_ERR_MODULE_NOT_FOUND: retval = "Framework requires at least one active module, but none found"; break; case PRTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED: retval = "OS topology does not support slot_list process affinity"; break; case PRTE_ERR_TOPO_SOCKET_NOT_SUPPORTED: retval = "Could not obtain socket topology information"; break; case PRTE_ERR_TOPO_CORE_NOT_SUPPORTED: retval = "Could not obtain core topology information"; break; case PRTE_ERR_NOT_ENOUGH_SOCKETS: retval = "Not enough sockets to meet request"; break; case PRTE_ERR_NOT_ENOUGH_CORES: retval = "Not enough cores to meet request"; break; case PRTE_ERR_INVALID_PHYS_CPU: retval = "Invalid physical cpu number returned"; break; case PRTE_ERR_MULTIPLE_AFFINITIES: retval = "Multiple methods for assigning process affinity were specified"; break; case PRTE_ERR_SLOT_LIST_RANGE: retval = "Provided slot_list range is invalid"; break; case PRTE_ERR_NETWORK_NOT_PARSEABLE: retval = "Provided network specification is not parseable"; break; case PRTE_ERR_NOT_INITIALIZED: retval = "Not initialized"; break; case PRTE_ERR_NOT_BOUND: retval = "Not bound"; break; case PRTE_ERR_PROC_ENTRY_NOT_FOUND: retval = "Database entry not found"; break; case PRTE_ERR_DATA_VALUE_NOT_FOUND: retval = "Data for specified key not found"; break; case PRTE_ERR_CONNECTION_FAILED: retval = "Connection failed"; break; case PRTE_ERR_AUTHENTICATION_FAILED: retval = "Authentication failed"; break; case PRTE_ERR_COMM_FAILURE: retval = "Comm failure"; break; case PRTE_ERR_SERVER_NOT_AVAIL: retval = "Server not available"; break; case PRTE_ERR_IN_PROCESS: retval = "Operation in process"; break; case PRTE_ERR_DEBUGGER_RELEASE: retval = "Release debugger"; break; case PRTE_ERR_HANDLERS_COMPLETE: retval = "Event handlers complete"; break; case PRTE_ERR_PARTIAL_SUCCESS: retval = "Partial success"; break; case PRTE_ERR_PROC_ABORTED: retval = "Process abnormally terminated"; break; case PRTE_ERR_PROC_REQUESTED_ABORT: retval = "Process requested abort"; break; case PRTE_ERR_PROC_ABORTING: retval = "Process is aborting"; break; case PRTE_ERR_NODE_DOWN: retval = "Node has gone down"; break; case PRTE_ERR_NODE_OFFLINE: retval = "Node has gone offline"; break; case PRTE_ERR_JOB_TERMINATED: retval = "Job terminated"; break; case PRTE_ERR_PROC_RESTART: retval = "Process restarted"; break; case PRTE_ERR_PROC_CHECKPOINT: retval = "Process checkpoint"; break; case PRTE_ERR_PROC_MIGRATE: retval = "Process migrate"; break; case PRTE_ERR_EVENT_REGISTRATION: retval = "Event registration"; break; case PRTE_ERR_HEARTBEAT_ALERT: retval = "Heartbeat not received"; break; case PRTE_ERR_FILE_ALERT: retval = "File alert - proc may have stalled"; break; case PRTE_ERR_RECV_LESS_THAN_POSTED: retval = "Receive was less than posted size"; break; case PRTE_ERR_RECV_MORE_THAN_POSTED: retval = "Receive was greater than posted size"; break; case PRTE_ERR_NO_MATCH_YET: retval = "No match for receive posted"; break; case PRTE_ERR_REQUEST: retval = "Request error"; break; case PRTE_ERR_NO_CONNECTION_ALLOWED: retval = "No connection allowed"; break; case PRTE_ERR_CONNECTION_REFUSED: retval = "Connection refused"; break; case PRTE_ERR_TYPE_MISMATCH: retval = "Type mismatch"; break; case PRTE_ERR_COMPARE_FAILURE: retval = "Data comparison failure"; break; case PRTE_ERR_COPY_FAILURE: retval = "Data copy failure"; break; case PRTE_ERR_PROC_STATE_MISSING: retval = "The process state information is missing on the registry"; break; case PRTE_ERR_PROC_EXIT_STATUS_MISSING: retval = "The process exit status is missing on the registry"; break; case PRTE_ERR_INDETERMINATE_STATE_INFO: retval = "Request for state returned multiple responses"; break; case PRTE_ERR_NODE_FULLY_USED: retval = "All the slots on a given node have been used"; break; case PRTE_ERR_INVALID_NUM_PROCS: retval = "Multiple applications were specified, but at least one failed to specify the " "number of processes to run"; break; case PRTE_ERR_SILENT: if (prte_report_silent_errors) { retval = "Silent error"; } else { retval = ""; } break; case PRTE_ERR_ADDRESSEE_UNKNOWN: retval = "A message is attempting to be sent to a process whose contact information is " "unknown"; break; case PRTE_ERR_SYS_LIMITS_PIPES: retval = "The system limit on number of pipes a process can open was reached"; break; case PRTE_ERR_PIPE_SETUP_FAILURE: retval = "A pipe could not be setup between a daemon and one of its local processes"; break; case PRTE_ERR_SYS_LIMITS_CHILDREN: retval = "The system limit on number of children a process can have was reached"; break; case PRTE_ERR_FAILED_GET_TERM_ATTRS: retval = "The I/O forwarding system was unable to get the attributes of your terminal"; break; case PRTE_ERR_WDIR_NOT_FOUND: retval = "The specified working directory could not be found"; break; case PRTE_ERR_EXE_NOT_FOUND: retval = "The specified executable could not be found"; break; case PRTE_ERR_PIPE_READ_FAILURE: retval = "A pipe could not be read"; break; case PRTE_ERR_EXE_NOT_ACCESSIBLE: retval = "The specified executable could not be executed"; break; case PRTE_ERR_FAILED_TO_START: retval = "The specified application failed to start"; break; case PRTE_ERR_FILE_NOT_EXECUTABLE: retval = "A system-required executable either could not be found or was not executable by " "this user"; break; case PRTE_ERR_HNP_COULD_NOT_START: retval = "Unable to start a daemon on the local node"; break; case PRTE_ERR_SYS_LIMITS_SOCKETS: retval = "The system limit on number of network connections a process can open was reached"; break; case PRTE_ERR_SOCKET_NOT_AVAILABLE: retval = "Unable to open a TCP socket for out-of-band communications"; break; case PRTE_ERR_SYSTEM_WILL_BOOTSTRAP: retval = "System will determine resources during bootstrap of daemons"; break; case PRTE_ERR_RESTART_LIMIT_EXCEEDED: retval = "Limit on number of process restarts was exceeded"; break; case PRTE_ERR_INVALID_NODE_RANK: retval = "Invalid node rank"; break; case PRTE_ERR_INVALID_LOCAL_RANK: retval = "Invalid local rank"; break; case PRTE_ERR_UNRECOVERABLE: retval = "Unrecoverable error"; break; case PRTE_ERR_MEM_LIMIT_EXCEEDED: retval = "Memory limit exceeded"; break; case PRTE_ERR_HEARTBEAT_LOST: retval = "Heartbeat lost"; break; case PRTE_ERR_PROC_STALLED: retval = "Proc appears to be stalled"; break; case PRTE_ERR_NO_APP_SPECIFIED: retval = "No application specified"; break; case PRTE_ERR_NO_EXE_SPECIFIED: retval = "No executable specified"; break; case PRTE_ERR_COMM_DISABLED: retval = "Communications have been disabled"; break; case PRTE_ERR_FAILED_TO_MAP: retval = "Unable to map job"; break; case PRTE_ERR_TAKE_NEXT_OPTION: if (prte_report_silent_errors) { retval = "Next option"; } else { retval = ""; } break; case PRTE_ERR_SENSOR_LIMIT_EXCEEDED: retval = "Sensor limit exceeded"; break; case PRTE_ERR_ALLOCATION_PENDING: retval = "Allocation pending"; break; case PRTE_ERR_NO_PATH_TO_TARGET: retval = "No OOB path to target"; break; case PRTE_ERR_OP_IN_PROGRESS: retval = "Operation in progress"; break; case PRTE_ERR_OPEN_CONDUIT_FAIL: retval = "Open messaging conduit failed"; break; case PRTE_ERR_OUT_OF_ORDER_MSG: retval = "Out of order message"; break; case PRTE_ERR_FORCE_SELECT: retval = "Force select"; break; case PRTE_ERR_JOB_CANCELLED: retval = "Job cancelled"; break; case PRTE_ERR_CONDUIT_SEND_FAIL: retval = " Transport Conduit returned send error"; break; default: retval = "Unknown error"; } return retval; } prrte-3.0.13/src/util/parse_options.c0000664000175000017500000001101215145263240017710 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/runtime/prte_globals.h" #include "src/util/pmix_parse_options.h" void pmix_util_parse_range_options(char *inp, char ***output) { char **r1 = NULL, **r2 = NULL; int i, vint; int start, end, n; char nstr[32]; char *input, *bang; bool bang_option = false; /* protect against null input */ if (NULL == inp) { return; } /* protect the provided input */ input = strdup(inp); /* check for the special '!' operator */ if (NULL != (bang = strchr(input, '!'))) { bang_option = true; *bang = '\0'; } /* split on commas */ r1 = PMIX_ARGV_SPLIT_COMPAT(input, ','); /* for each resulting element, check for range */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(r1); i++) { r2 = PMIX_ARGV_SPLIT_COMPAT(r1[i], '-'); if (1 < PMIX_ARGV_COUNT_COMPAT(r2)) { /* given range - get start and end */ start = strtol(r2[0], NULL, 10); end = strtol(r2[1], NULL, 10); } else { /* check for wildcard - have to do this here because * the -1 would have been caught in the split */ vint = strtol(r1[i], NULL, 10); if (-1 == vint) { PMIX_ARGV_FREE_COMPAT(*output); *output = NULL; PMIX_ARGV_APPEND_NOSIZE_COMPAT(output, "-1"); PMIX_ARGV_FREE_COMPAT(r2); goto cleanup; } start = strtol(r2[0], NULL, 10); end = start; } for (n = start; n <= end; n++) { snprintf(nstr, 32, "%d", n); PMIX_ARGV_APPEND_NOSIZE_COMPAT(output, nstr); } PMIX_ARGV_FREE_COMPAT(r2); } cleanup: if (bang_option) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(output, "BANG"); } free(input); PMIX_ARGV_FREE_COMPAT(r1); } void prte_util_get_ranges(char *inp, char ***startpts, char ***endpts) { char **r1 = NULL, **r2 = NULL; int i; char *input; /* protect against null input */ if (NULL == inp) { return; } /* protect the provided input */ input = strdup(inp); /* split on commas */ r1 = PMIX_ARGV_SPLIT_COMPAT(input, ','); /* for each resulting element, check for range */ for (i = 0; i < PMIX_ARGV_COUNT_COMPAT(r1); i++) { r2 = PMIX_ARGV_SPLIT_COMPAT(r1[i], '-'); if (2 == PMIX_ARGV_COUNT_COMPAT(r2)) { /* given range - get start and end */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(startpts, r2[0]); PMIX_ARGV_APPEND_NOSIZE_COMPAT(endpts, r2[1]); } else if (1 == PMIX_ARGV_COUNT_COMPAT(r2)) { /* only one value provided, so it is both the start * and the end */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(startpts, r2[0]); PMIX_ARGV_APPEND_NOSIZE_COMPAT(endpts, r2[0]); } else { /* no idea how to parse this */ pmix_output(0, "%s Unknown parse error on string: %s(%s)", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), inp, r1[i]); } PMIX_ARGV_FREE_COMPAT(r2); } free(input); PMIX_ARGV_FREE_COMPAT(r1); } prrte-3.0.13/src/util/session_dir.h0000664000175000017500000000402515145263240017357 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2016-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2024 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * * Find and/or create PRTE session directory. * */ #ifndef PRTE_SESSION_DIR_H_HAS_BEEN_INCLUDED #define PRTE_SESSION_DIR_H_HAS_BEEN_INCLUDED #include "prte_config.h" #include "types.h" #include "src/runtime/prte_globals.h" BEGIN_C_DECLS /** @param proc Pointer to a process name for which the session * dir name is desired. Passing: * * PRTE_NAME_INVALID - top-level session directory * will be created. * * PRTE_NAME_WILDCARD - job-level session directory * will be created * * Valid procID - proc-level session directory will * be created * *@retval PRTE_SUCCESS The directory was found and/or created with * the proper permissions. * @retval PRTE_ERROR The directory cannot be found or created */ PRTE_EXPORT int prte_session_dir(pmix_proc_t *proc); /** The session_dir_finalize functions perform a cleanup of the * relevant session directory tree. */ PRTE_EXPORT void prte_job_session_dir_finalize(prte_job_t *jdata); END_C_DECLS #endif /* PRTE_SESSION_DIR_H_HAS_BEEN_INCLUDED */ prrte-3.0.13/src/util/stacktrace.h0000664000175000017500000000371515145263240017167 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * @file */ #ifndef PRTE_STACKTRACE_H #define PRTE_STACKTRACE_H #include "prte_config.h" /* * File descriptor to be used by the backtrace framework if prte_backtrace_print * is passed NULL for it's FILE file pointer. */ extern int prte_stacktrace_output_fileno; /** * Output the current stack trace (not including the call to this * function) to the stream indicated. */ PRTE_EXPORT void prte_stackframe_output(int stream); /** * Return the current stack trace (not including the call to this * function) as a string (which must be freed by the caller). */ PRTE_EXPORT char *prte_stackframe_output_string(void); /** * Here we register the prte_show_stackframe function for signals * passed to OpenMPI by the mpi_signal-parameter passed to mpirun * by the user. * * @returnvalue PRTE_SUCCESS * @returnvalue PRTE_ERR_BAD_PARAM if the value in the signal-list * is not a valid signal-number * */ PRTE_EXPORT int prte_util_register_stackhandlers(void); #endif /* PRTE_STACKTRACE_H */ prrte-3.0.13/src/util/bipartite_graph_internal.h0000664000175000017500000001300415145263240022073 0ustar alastairalastair/* * Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. All Rights * reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /* * This file defines a number of internal structures to the BP graph * code which need to be exposed only for unit testing. This file * should not be included in code that uses the BP graph interface. */ #ifndef BIPARTITE_GRAPH_INTERNAL #define BIPARTITE_GRAPH_INTERNAL 1 struct prte_bp_graph_edge_t { pmix_object_t super; pmix_list_item_t outbound_li; pmix_list_item_t inbound_li; /** source of this edge */ int source; /** v_index of target of this edge */ int target; /** cost (weight) of this edge */ int64_t cost; /** * (flow-network) capacity of this edge. Zero-capacity edges essentially do * not exist and will be ignored by most of the algorithms implemented here. */ int capacity; /** any other information associated with this edge */ void *e_data; }; struct prte_bp_graph_vertex_t { /** index in the graph's array of vertices */ int v_index; /** any other information associated with the vertex */ void *v_data; /** linked list of edges for which this vertex is a source */ pmix_list_t out_edges; /** linked list of edges for which this vertex is a target */ pmix_list_t in_edges; }; struct prte_bp_graph_t { /** number of vertices currently in this graph */ int num_vertices; /** vertices in this graph (with number of set elements == num_vertices) */ pmix_pointer_array_t vertices; /** index of the source vertex, or -1 if not present */ int source_idx; /** index of the sink vertex, or -1 if not present */ int sink_idx; /** user callback to clean up the v_data */ prte_bp_graph_cleanup_fn_t v_data_cleanup_fn; /** user callback to clean up the e_data */ prte_bp_graph_cleanup_fn_t e_data_cleanup_fn; }; #define LIST_FOREACH_CONTAINED(item, list, type, member) \ for (item = container_of((list)->pmix_list_sentinel.pmix_list_next, type, member); \ &item->member != &(list)->pmix_list_sentinel; \ item = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, \ member)) #define LIST_FOREACH_SAFE_CONTAINED(item, next, list, type, member) \ for (item = container_of((list)->pmix_list_sentinel.pmix_list_next, type, member), \ next = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, member); \ &item->member != &(list)->pmix_list_sentinel; item = next, \ next = container_of(((pmix_list_item_t *) (&item->member))->pmix_list_next, type, member)) #define NUM_VERTICES(g) (g->num_vertices) #define CHECK_VERTEX_RANGE(g, v) \ do { \ if ((v) < 0 || (v) >= NUM_VERTICES(g)) { \ return PRTE_ERR_BAD_PARAM; \ } \ } while (0) /* cast away any constness of &g->vertices b/c the pmix_pointer_array API is * not const-correct */ #define V_ID_TO_PTR(g, v_id) \ ((prte_bp_graph_vertex_t *) pmix_pointer_array_get_item((pmix_pointer_array_t *) &g->vertices, \ v_id)) #define FOREACH_OUT_EDGE(g, v_id, e_ptr, _err) \ prte_bp_graph_vertex_t *_v; \ _v = V_ID_TO_PTR((g), (v_id)); \ if (NULL == _v) { \ return (_err); \ } \ LIST_FOREACH_CONTAINED(e_ptr, &(_v->out_edges), prte_bp_graph_edge_t, outbound_li) #define FOREACH_IN_EDGE(g, v_id, e_ptr, _err) \ prte_bp_graph_vertex_t *_v; \ _v = V_ID_TO_PTR((g), (v_id)); \ if (NULL == _v) { \ return (_err); \ } \ LIST_FOREACH_CONTAINED(e_ptr, &(_v->in_edges), prte_bp_graph_edge_t, inbound_li) /* Iterate over (u,v) edge pairs along the given path, where path is defined * by the predecessor array "pred". Stops when a -1 predecessor is * encountered. Note: because it is a *predecessor* array, the traversal * starts at the sink and progresses towards the source. */ #define FOREACH_UV_ON_PATH(pred, source, sink, u, v) \ for (u = pred[sink], v = sink; u != -1; v = u, u = pred[u]) bool prte_bp_graph_bellman_ford(prte_bp_graph_t *gx, int source, int target, int *pred); int prte_bp_graph_bipartite_to_flow(prte_bp_graph_t *g); #endif prrte-3.0.13/src/util/proc_info.h0000664000175000017500000001160015145263240017011 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * * Populates global structure with process-specific information. * * */ #ifndef _PRTE_PROC_INFO_H_ #define _PRTE_PROC_INFO_H_ #include "prte_config.h" #include #ifdef HAVE_SYS_TYPES_H # include #endif #include "types.h" #include "src/hwloc/hwloc-internal.h" #include BEGIN_C_DECLS typedef uint8_t prte_proc_type_t; #define PRTE_PROC_TYPE_NONE 0x0000 #define PRTE_PROC_DAEMON 0x0002 #define PRTE_PROC_MASTER 0x0004 #define PRTE_PROC_IS_DAEMON (PRTE_PROC_DAEMON & prte_process_info.proc_type) #define PRTE_PROC_IS_MASTER (PRTE_PROC_MASTER & prte_process_info.proc_type) /** * Process information structure * * The prte_proc_info() function fills the pid field and obtains the * process name, storing that information in the global structure. The * structure also holds path names to the universe, job, and process * session directories, and to the stdin, stdout, and stderr temp * files - however, these are all initialized elsewhere. */ typedef struct prte_process_info_t { pmix_proc_t myproc; pmix_proc_t my_hnp; /**< Name of my hnp */ char *my_hnp_uri; /**< Contact info for my hnp */ pmix_proc_t my_parent; /**< Name of my parent (or my HNP if no parent was specified) */ pid_t hnp_pid; /**< hnp pid - used if singleton */ pmix_rank_t num_daemons; /**< number of daemons in system */ int num_nodes; /**< number of nodes in the job */ char *nodename; /**< string name for this node */ char **aliases; /**< aliases for this node */ uid_t uid; /**< Real user UD */ uid_t euid; /**< Effective user ID */ gid_t gid; /**< Real group ID */ gid_t egid; /**< Effective group ID */ pid_t pid; /**< Local process ID for this process */ prte_proc_type_t proc_type; /**< Type of process */ uint16_t my_port; /**< TCP port for out-of-band comm */ /* The session directory has the form * ///, where the prefix * can either be provided by the user via the * --tmpdir command-line flag, the use of one of several * environmental variables, or else a default location. */ char *tmpdir_base; /**< Base directory of the session dir tree */ char *sessdir_prefix; /**< starting string for top session dir name */ char *top_session_dir; /**< Top-most directory of the session tree */ bool rm_session_dirs; /**< Session directories will be cleaned up by RM */ char *cpuset; /**< String-representation of bitmap where we are bound */ bool shared_fs; // whether the tmpdir is on a shared file system } prte_process_info_t; /** * * Global process info descriptor. Initialized to almost no * meaningful information - data is provided by calling \c * prte_rte_init() (which calls \c prte_proc_info() to fill in the * structure). * * The exception to this rule is the \c prte_process_info.seed field, * which will be initialized to \c false, but should be set to \c true * before calling \c prte_rte_info() if the caller is a seed daemon. */ PRTE_EXPORT extern prte_process_info_t prte_process_info; /** * \internal * * Global structure to store a wide range of information about the * process. prte_proc_info populates a global variable with * information about the process being executing. This function should * be called only once, from prte_rte_init(). * * @param None. * * @retval PRTE_SUCCESS Successfully initialized the various fields. * @retval OMPI_ERROR Failed to initialize one or more fields. */ PRTE_EXPORT int prte_proc_info(void); PRTE_EXPORT int prte_proc_info_finalize(void); PRTE_EXPORT void prte_setup_hostname(void); PRTE_EXPORT bool prte_check_host_is_local(const char *name); END_C_DECLS #endif prrte-3.0.13/src/util/daemon_init.c0000664000175000017500000000601315145263240017316 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Geoffroy Vallee. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #include #include #ifdef HAVE_UNISTD_H # include #endif #include #include "constants.h" #include "src/util/daemon_init.h" int prte_daemon_init_callback(char *working_dir, int (*parent_fn)(pid_t)) { #if defined(HAVE_FORK) pid_t pid; int fd; if ((pid = fork()) < 0) { return PRTE_ERROR; } else if (pid != 0) { /* parent goes bye-bye */ int rc = 0; if (NULL != parent_fn) { rc = parent_fn(pid); } exit(rc); } /* child continues */ # if defined(HAVE_SETSID) setsid(); /* become session leader - doing this confuses Cray aprun in some cases */ # endif if (NULL != working_dir) { if (-1 == chdir(working_dir)) { /* change working directory */ return PRTE_ERR_FATAL; } } /* connect input to /dev/null */ fd = open("/dev/null", O_RDONLY); if (0 > fd) { return PRTE_ERR_FATAL; } dup2(fd, STDIN_FILENO); if (fd != STDIN_FILENO) { close(fd); } /* connect outputs to /dev/null */ fd = open("/dev/null", O_RDWR | O_CREAT | O_TRUNC, 0666); if (fd >= 0) { dup2(fd, STDOUT_FILENO); dup2(fd, STDERR_FILENO); /* just to be safe, make sure we aren't trying * to close stdout or stderr! since we dup'd both * of them to the same fd, we can't just close it * since one of the two would still be open and * someone could attempt to use it. */ if (fd != STDOUT_FILENO && fd != STDERR_FILENO) { close(fd); } } else { return PRTE_ERR_FATAL; } return PRTE_SUCCESS; #else /* HAVE_FORK */ return PRTE_ERR_NOT_SUPPORTED; #endif } prrte-3.0.13/src/util/crc.c0000664000175000017500000013723315145263240015610 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * Copyright (c) 2021 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #ifdef HAVE_STDIO_H # include #endif /* HAVE_STDIO_H */ #include #ifdef HAVE_STRINGS_H # include #endif /* HAVE_STRINGS_H */ #include #ifdef HAVE_UNISTD_H # include #endif /* HAVE_UNISTD_H */ #include "src/util/crc.h" #if (ALIGNOF_LONG == 8) # define PRTE_CRC_WORD_MASK_ 0x7 #elif (ALIGNOF_LONG == 4) # define PRTE_CRC_WORD_MASK_ 0x3 #else # define PRTE_CRC_WORD_MASK_ 0xFFFF #endif #define WORDALIGNED(v) (((intptr_t) v & PRTE_CRC_WORD_MASK_) ? false : true) #define INTALIGNED(v) (((intptr_t) v & 3) ? false : true) /* * this version of bcopy_csum() looks a little too long, but it * handles cumulative checksumming for arbitrary lengths and address * alignments as best as it can; the contents of lastPartialLong and * lastPartialLength are updated to reflected the last partial word's * value and length (in bytes) -- this should allow proper handling of * checksumming contiguous or noncontiguous buffers via multiple calls * of bcopy_csum() - Mitch */ unsigned long prte_bcopy_csum_partial(const void *source, void *destination, size_t copylen, size_t csumlen, unsigned long *lastPartialLong, size_t *lastPartialLength) { unsigned long *src = (unsigned long *) source; unsigned long *dest = (unsigned long *) destination; unsigned long csum = 0; size_t csumlenresidue; unsigned long i, temp; csumlenresidue = (csumlen > copylen) ? (csumlen - copylen) : 0; temp = *lastPartialLong; if (WORDALIGNED(source) && WORDALIGNED(dest)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); dest = (unsigned long *) ((char *) dest + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; /* now we have an unaligned source and an unaligned destination */ for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialLong = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned long *) ((char *) src + copylen); dest = (unsigned long *) ((char *) dest + copylen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += copylen; copylen = 0; } } else { /* fast path... */ size_t numLongs = copylen / sizeof(unsigned long); for (i = 0; i < numLongs; i++) { csum += *src; *dest++ = *src++; } *lastPartialLong = 0; *lastPartialLength = 0; if (WORDALIGNED(copylen) && (csumlenresidue == 0)) { return (csum); } else { copylen -= i * sizeof(unsigned long); } } } else if (WORDALIGNED(source)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); dest = (unsigned long *) ((char *) dest + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; /* now we have an unaligned source and an unknown alignment for our destination */ if (WORDALIGNED(dest)) { size_t numLongs = copylen / sizeof(unsigned long); for (i = 0; i < numLongs; i++) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; *dest++ = temp; } copylen -= i * sizeof(unsigned long); } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } } *lastPartialLong = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned long *) ((char *) src + copylen); dest = (unsigned long *) ((char *) dest + copylen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { temp = *src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLong = 0; *lastPartialLength = 0; } } else if (WORDALIGNED(dest)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); dest = (unsigned long *) ((char *) dest + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; /* now we have a source of unknown alignment and a unaligned destination */ if (WORDALIGNED(src)) { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { temp = *src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLong = 0; *lastPartialLength = 0; } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialLong = 0; } } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned long *) ((char *) src + copylen); dest = (unsigned long *) ((char *) dest + copylen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; *dest++ = temp; } *lastPartialLength = 0; *lastPartialLong = 0; } } else { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); dest = (unsigned long *) ((char *) dest + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; /* now we have an unknown alignment for our source and destination */ if (WORDALIGNED(src) && WORDALIGNED(dest)) { size_t numLongs = copylen / sizeof(unsigned long); for (i = 0; i < numLongs; i++) { csum += *src; *dest++ = *src++; } copylen -= i * sizeof(unsigned long); } else { /* safe but slower for all other alignments */ for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } } *lastPartialLong = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned long *) ((char *) src + copylen); dest = (unsigned long *) ((char *) dest + copylen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialLong = 0; } } /* if copylen is non-zero there was a bit left, less than an unsigned long's worth */ if ((copylen != 0) && (csumlenresidue == 0)) { temp = *lastPartialLong; if (*lastPartialLength) { if (copylen >= (sizeof(unsigned long) - *lastPartialLength)) { /* copy all remaining bytes from src to dest */ unsigned long copytemp = 0; memcpy(©temp, src, copylen); memcpy(dest, ©temp, copylen); /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += (temp - *lastPartialLong); copylen -= sizeof(unsigned long) - *lastPartialLength; src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); *lastPartialLength = copylen; /* reset temp, and calculate next partial word */ temp = 0; if (copylen) { memcpy(&temp, src, copylen); } /* add it to the the checksum */ csum += temp; *lastPartialLong = temp; } else { /* copy all remaining bytes from src to dest */ unsigned long copytemp = 0; memcpy(©temp, src, copylen); memcpy(dest, ©temp, copylen); /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += temp - *lastPartialLong; *lastPartialLong = temp; *lastPartialLength += copylen; } } else { /* fast path... */ /* temp and *lastPartialLong are 0 if *lastPartialLength is 0... */ memcpy(&temp, src, copylen); csum += temp; memcpy(dest, &temp, copylen); *lastPartialLong = temp; *lastPartialLength = copylen; /* done...return the checksum */ } } else if (csumlenresidue != 0) { if (copylen != 0) { temp = 0; memcpy(&temp, src, copylen); memcpy(dest, &temp, copylen); } if (csumlenresidue < (sizeof(unsigned long) - copylen - *lastPartialLength)) { temp = *lastPartialLong; memcpy(((char *) &temp + *lastPartialLength), src, (copylen + csumlenresidue)); /* avoid unsigned arithmetic overflow by subtracting the old partial */ /* word from the new one before adding to the checksum... */ csum += temp - *lastPartialLong; src++; *lastPartialLong = temp; *lastPartialLength += copylen + csumlenresidue; csumlenresidue = 0; } else { /* we have enough chksum data to fill out our last partial */ /* word */ temp = *lastPartialLong; memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); /* avoid unsigned arithmetic overflow by subtracting the old partial */ /* word from the new one before adding to the checksum... */ csum += temp - *lastPartialLong; src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); csumlenresidue -= sizeof(unsigned long) - *lastPartialLength - copylen; *lastPartialLength = 0; *lastPartialLong = 0; } if (WORDALIGNED(src)) { for (i = 0; i < csumlenresidue / sizeof(unsigned long); i++) { csum += *src++; } } else { for (i = 0; i < csumlenresidue / sizeof(unsigned long); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } } csumlenresidue -= i * sizeof(unsigned long); if (csumlenresidue) { temp = 0; memcpy(&temp, src, csumlenresidue); csum += temp; *lastPartialLong = temp; *lastPartialLength = csumlenresidue; } } /* end else if (csumlenresidue != 0) */ return csum; } unsigned int prte_bcopy_uicsum_partial(const void *source, void *destination, size_t copylen, size_t csumlen, unsigned int *lastPartialInt, size_t *lastPartialLength) { unsigned int *src = (unsigned int *) source; unsigned int *dest = (unsigned int *) destination; unsigned int csum = 0; size_t csumlenresidue; unsigned long i; unsigned int temp; csumlenresidue = (csumlen > copylen) ? (csumlen - copylen) : 0; temp = *lastPartialInt; if (INTALIGNED(source) && INTALIGNED(dest)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); dest = (unsigned int *) ((char *) dest + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); copylen -= sizeof(unsigned int) - *lastPartialLength; /* now we have an unaligned source and an unaligned destination */ for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialInt = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned int *) ((char *) src + copylen); dest = (unsigned int *) ((char *) dest + copylen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += copylen; copylen = 0; } } else { /* fast path... */ size_t numLongs = copylen / sizeof(unsigned int); for (i = 0; i < numLongs; i++) { csum += *src; *dest++ = *src++; } *lastPartialInt = 0; *lastPartialLength = 0; if (INTALIGNED(copylen) && (csumlenresidue == 0)) { return (csum); } else { copylen -= i * sizeof(unsigned int); } } } else if (INTALIGNED(source)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); dest = (unsigned int *) ((char *) dest + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); copylen -= sizeof(unsigned int) - *lastPartialLength; /* now we have an unaligned source and an unknown alignment for our destination */ if (INTALIGNED(dest)) { size_t numLongs = copylen / sizeof(unsigned int); for (i = 0; i < numLongs; i++) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; *dest++ = temp; } copylen -= i * sizeof(unsigned int); } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } } *lastPartialInt = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned int *) ((char *) src + copylen); dest = (unsigned int *) ((char *) dest + copylen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { temp = *src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialInt = 0; *lastPartialLength = 0; } } else if (INTALIGNED(dest)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); dest = (unsigned int *) ((char *) dest + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); copylen -= sizeof(unsigned int) - *lastPartialLength; /* now we have a source of unknown alignment and a unaligned destination */ if (INTALIGNED(src)) { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { temp = *src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialInt = 0; *lastPartialLength = 0; } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialInt = 0; } } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned int *) ((char *) src + copylen); dest = (unsigned int *) ((char *) dest + copylen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; *dest++ = temp; } *lastPartialLength = 0; *lastPartialInt = 0; } } else { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (copylen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); memcpy(dest, ((char *) &temp + *lastPartialLength), (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); dest = (unsigned int *) ((char *) dest + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); copylen -= sizeof(unsigned int) - *lastPartialLength; /* now we have an unknown alignment for our source and destination */ if (INTALIGNED(src) && INTALIGNED(dest)) { size_t numLongs = copylen / sizeof(unsigned int); for (i = 0; i < numLongs; i++) { csum += *src; *dest++ = *src++; } copylen -= i * sizeof(unsigned int); } else { /* safe but slower for all other alignments */ for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } } *lastPartialInt = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); memcpy(dest, ((char *) &temp + *lastPartialLength), copylen); src = (unsigned int *) ((char *) src + copylen); dest = (unsigned int *) ((char *) dest + copylen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += copylen; copylen = 0; } } else { for (; copylen >= sizeof(*src); copylen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; memcpy(dest, &temp, sizeof(temp)); dest++; } *lastPartialLength = 0; *lastPartialInt = 0; } } /* if copylen is non-zero there was a bit left, less than an unsigned int's worth */ if ((copylen != 0) && (csumlenresidue == 0)) { temp = *lastPartialInt; if (*lastPartialLength) { if (copylen >= (sizeof(unsigned int) - *lastPartialLength)) { /* copy all remaining bytes from src to dest */ unsigned int copytemp = 0; memcpy(©temp, src, copylen); memcpy(dest, ©temp, copylen); /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += (temp - *lastPartialInt); copylen -= sizeof(unsigned int) - *lastPartialLength; src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); *lastPartialLength = copylen; /* reset temp, and calculate next partial word */ temp = 0; if (copylen) { memcpy(&temp, src, copylen); } /* add it to the the checksum */ csum += temp; *lastPartialInt = temp; } else { /* copy all remaining bytes from src to dest */ unsigned int copytemp = 0; memcpy(©temp, src, copylen); memcpy(dest, ©temp, copylen); /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, copylen); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += temp - *lastPartialInt; *lastPartialInt = temp; *lastPartialLength += copylen; } } else { /* fast path... */ /* temp and *lastPartialInt are 0 if *lastPartialLength is 0... */ memcpy(&temp, src, copylen); csum += temp; memcpy(dest, &temp, copylen); *lastPartialInt = temp; *lastPartialLength = copylen; /* done...return the checksum */ } } else if (csumlenresidue != 0) { if (copylen != 0) { temp = 0; memcpy(&temp, src, copylen); memcpy(dest, &temp, copylen); } if (csumlenresidue < (sizeof(unsigned int) - copylen - *lastPartialLength)) { temp = *lastPartialInt; memcpy(((char *) &temp + *lastPartialLength), src, (copylen + csumlenresidue)); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += temp - *lastPartialInt; src++; *lastPartialInt = temp; *lastPartialLength += copylen + csumlenresidue; csumlenresidue = 0; } else { /* we have enough chksum data to fill out our last partial * word */ temp = *lastPartialInt; memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); /* avoid unsigned arithmetic overflow by subtracting the old partial * word from the new one before adding to the checksum... */ csum += temp - *lastPartialInt; src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); csumlenresidue -= sizeof(unsigned int) - *lastPartialLength - copylen; *lastPartialLength = 0; *lastPartialInt = 0; } if (INTALIGNED(src)) { for (i = 0; i < csumlenresidue / sizeof(unsigned int); i++) { csum += *src++; } } else { for (i = 0; i < csumlenresidue / sizeof(unsigned int); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } } csumlenresidue -= i * sizeof(unsigned int); if (csumlenresidue) { temp = 0; memcpy(&temp, src, csumlenresidue); csum += temp; *lastPartialInt = temp; *lastPartialLength = csumlenresidue; } } /* end else if (csumlenresidue != 0) */ return csum; } /* * csum() generates a bcopy_csum() - compatible checksum that can be * called multiple times */ unsigned long prte_csum_partial(const void *source, size_t csumlen, unsigned long *lastPartialLong, size_t *lastPartialLength) { unsigned long *src = (unsigned long *) source; unsigned long csum = 0; unsigned long i, temp; temp = *lastPartialLong; if (WORDALIGNED(source)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (csumlen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); csumlen -= sizeof(unsigned long) - *lastPartialLength; /* now we have an unaligned source */ for (i = 0; i < csumlen / sizeof(unsigned long); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } csumlen -= i * sizeof(unsigned long); *lastPartialLong = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); src = (unsigned long *) ((char *) src + csumlen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += csumlen; csumlen = 0; } } else { /* fast path... */ size_t numLongs = csumlen / sizeof(unsigned long); for (i = 0; i < numLongs; i++) { csum += *src++; } *lastPartialLong = 0; *lastPartialLength = 0; if (WORDALIGNED(csumlen)) { return (csum); } else { csumlen -= i * sizeof(unsigned long); } } } else { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (csumlen >= (sizeof(unsigned long) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); csum += (temp - *lastPartialLong); csumlen -= sizeof(unsigned long) - *lastPartialLength; /* now we have a source of unknown alignment */ if (WORDALIGNED(src)) { for (i = 0; i < csumlen / sizeof(unsigned long); i++) { csum += *src++; } csumlen -= i * sizeof(unsigned long); *lastPartialLong = 0; *lastPartialLength = 0; } else { for (i = 0; i < csumlen / sizeof(unsigned long); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } csumlen -= i * sizeof(unsigned long); *lastPartialLong = 0; *lastPartialLength = 0; } } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); src = (unsigned long *) ((char *) src + csumlen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += csumlen; csumlen = 0; } } else { for (; csumlen >= sizeof(*src); csumlen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; } *lastPartialLength = 0; *lastPartialLong = 0; } } /* if csumlen is non-zero there was a bit left, less than an unsigned long's worth */ if (csumlen != 0) { temp = *lastPartialLong; if (*lastPartialLength) { if (csumlen >= (sizeof(unsigned long) - *lastPartialLength)) { /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned long) - *lastPartialLength)); csum += (temp - *lastPartialLong); csumlen -= sizeof(unsigned long) - *lastPartialLength; src = (unsigned long *) ((char *) src + sizeof(unsigned long) - *lastPartialLength); *lastPartialLength = csumlen; /* reset temp, and calculate next partial word */ temp = 0; if (csumlen) { memcpy(&temp, src, csumlen); } /* add it to the the checksum */ csum += temp; *lastPartialLong = temp; } else { /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); csum += (temp - *lastPartialLong); *lastPartialLong = temp; *lastPartialLength += csumlen; } } else { /* fast path... */ /* temp and *lastPartialLong are 0 if *lastPartialLength is 0... */ memcpy(&temp, src, csumlen); csum += temp; *lastPartialLong = temp; *lastPartialLength = csumlen; /* done...return the checksum */ } } return csum; } unsigned int prte_uicsum_partial(const void *source, size_t csumlen, unsigned int *lastPartialInt, size_t *lastPartialLength) { unsigned int *src = (unsigned int *) source; unsigned int csum = 0; unsigned int temp; unsigned long i; temp = *lastPartialInt; if (INTALIGNED(source)) { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (csumlen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); csumlen -= sizeof(unsigned int) - *lastPartialLength; /* now we have an unaligned source */ for (i = 0; i < csumlen / sizeof(unsigned int); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } csumlen -= i * sizeof(unsigned int); *lastPartialInt = 0; *lastPartialLength = 0; } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); src = (unsigned int *) ((char *) src + csumlen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += csumlen; csumlen = 0; } } else { /* fast path... */ size_t numLongs = csumlen / sizeof(unsigned int); for (i = 0; i < numLongs; i++) { csum += *src++; } *lastPartialInt = 0; *lastPartialLength = 0; if (INTALIGNED(csumlen)) { return (csum); } else { csumlen -= i * sizeof(unsigned int); } } } else { if (*lastPartialLength) { /* do we have enough data to fill out the partial word? */ if (csumlen >= (sizeof(unsigned int) - *lastPartialLength)) { /* YES, we do... */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); csum += (temp - *lastPartialInt); csumlen -= sizeof(unsigned int) - *lastPartialLength; /* now we have a source of unknown alignment */ if (INTALIGNED(src)) { for (i = 0; i < csumlen / sizeof(unsigned int); i++) { csum += *src++; } csumlen -= i * sizeof(unsigned int); *lastPartialInt = 0; *lastPartialLength = 0; } else { for (i = 0; i < csumlen / sizeof(unsigned int); i++) { memcpy(&temp, src, sizeof(temp)); csum += temp; src++; } csumlen -= i * sizeof(unsigned int); *lastPartialInt = 0; *lastPartialLength = 0; } } else { /* NO, we don't... */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); src = (unsigned int *) ((char *) src + csumlen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += csumlen; csumlen = 0; } } else { for (; csumlen >= sizeof(*src); csumlen -= sizeof(*src)) { memcpy(&temp, src, sizeof(temp)); src++; csum += temp; } *lastPartialLength = 0; *lastPartialInt = 0; } } /* if csumlen is non-zero there was a bit left, less than an unsigned int's worth */ if (csumlen != 0) { temp = *lastPartialInt; if (*lastPartialLength) { if (csumlen >= (sizeof(unsigned int) - *lastPartialLength)) { /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, (sizeof(unsigned int) - *lastPartialLength)); csum += (temp - *lastPartialInt); csumlen -= sizeof(unsigned int) - *lastPartialLength; src = (unsigned int *) ((char *) src + sizeof(unsigned int) - *lastPartialLength); *lastPartialLength = csumlen; /* reset temp, and calculate next partial word */ temp = 0; if (csumlen) { memcpy(&temp, src, csumlen); } /* add it to the the checksum */ csum += temp; *lastPartialInt = temp; } else { /* fill out rest of partial word and add to checksum */ memcpy(((char *) &temp + *lastPartialLength), src, csumlen); csum += (temp - *lastPartialInt); *lastPartialInt = temp; *lastPartialLength += csumlen; } } else { /* fast path... */ /* temp and *lastPartialInt are 0 if *lastPartialLength is 0... */ memcpy(&temp, src, csumlen); csum += temp; *lastPartialInt = temp; *lastPartialLength = csumlen; /* done...return the checksum */ } } return csum; } /* globals for CRC32 bcopy and calculation routines */ static bool _prte_crc_table_initialized = false; static unsigned int _prte_crc_table[256]; /* CRC32 table generation routine - thanks to Charles Michael Heard for his * optimized CRC32 code... */ void prte_initialize_crc_table(void) { register int i, j; register unsigned int crc_accum; for (i = 0; i < 256; i++) { crc_accum = (i << 24); for (j = 0; j < 8; j++) { if (crc_accum & 0x80000000) crc_accum = (crc_accum << 1) ^ CRC_POLYNOMIAL; else crc_accum = (crc_accum << 1); } _prte_crc_table[i] = crc_accum; } /* set global bool to true to do this work once! */ _prte_crc_table_initialized = true; return; } unsigned int prte_bcopy_uicrc_partial(const void *source, void *destination, size_t copylen, size_t crclen, unsigned int partial_crc) { size_t crclenresidue = (crclen > copylen) ? (crclen - copylen) : 0; register int i, j; register unsigned char t; unsigned int tmp; if (!_prte_crc_table_initialized) { prte_initialize_crc_table(); } if (INTALIGNED(source) && INTALIGNED(destination)) { register unsigned int *src = (unsigned int *) source; register unsigned int *dst = (unsigned int *) destination; register unsigned char *ts, *td; /* copy whole integers */ while (copylen >= sizeof(unsigned int)) { tmp = *src++; *dst++ = tmp; ts = (unsigned char *) &tmp; for (j = 0; j < (int) sizeof(unsigned int); j++) { i = ((partial_crc >> 24) ^ *ts++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } copylen -= sizeof(unsigned int); } ts = (unsigned char *) src; td = (unsigned char *) dst; /* copy partial integer */ while (copylen--) { t = *ts++; *td++ = t; i = ((partial_crc >> 24) ^ t) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } /* calculate CRC over remaining bytes... */ while (crclenresidue--) { i = ((partial_crc >> 24) ^ *ts++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } } else { register unsigned char *src = (unsigned char *) source; register unsigned char *dst = (unsigned char *) destination; while (copylen--) { t = *src++; *dst++ = t; i = ((partial_crc >> 24) ^ t) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } while (crclenresidue--) { i = ((partial_crc >> 24) ^ *src++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } } return partial_crc; } unsigned int prte_uicrc_partial(const void *source, size_t crclen, unsigned int partial_crc) { register int i, j; register unsigned char *t; unsigned int tmp; if (!_prte_crc_table_initialized) { prte_initialize_crc_table(); } if (INTALIGNED(source)) { register unsigned int *src = (unsigned int *) source; while (crclen >= sizeof(unsigned int)) { tmp = *src++; t = (unsigned char *) &tmp; for (j = 0; j < (int) sizeof(unsigned int); j++) { i = ((partial_crc >> 24) ^ *t++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } crclen -= sizeof(unsigned int); } t = (unsigned char *) src; while (crclen--) { i = ((partial_crc >> 24) ^ *t++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } } else { register unsigned char *src = (unsigned char *) source; while (crclen--) { i = ((partial_crc >> 24) ^ *src++) & 0xff; partial_crc = (partial_crc << 8) ^ _prte_crc_table[i]; } } return partial_crc; } prrte-3.0.13/src/util/error_strings.h0000664000175000017500000000306415145263240017742 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * */ #ifndef _PRTE_ERROR_STRINGS_H_ #define _PRTE_ERROR_STRINGS_H_ #include "prte_config.h" #include "src/mca/plm/plm_types.h" #include "src/runtime/prte_globals.h" BEGIN_C_DECLS PRTE_EXPORT const char *prte_job_state_to_str(prte_job_state_t state); PRTE_EXPORT const char *prte_app_ctx_state_to_str(prte_app_state_t state); PRTE_EXPORT const char *prte_proc_state_to_str(prte_proc_state_t state); PRTE_EXPORT const char *prte_node_state_to_str(prte_node_state_t state); END_C_DECLS #endif prrte-3.0.13/src/util/nidmap.h0000664000175000017500000000266215145263240016313 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_NIDMAP_H #define PRTE_NIDMAP_H #include "prte_config.h" #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" /* pass info about the nodes in an allocation */ PRTE_EXPORT int prte_util_nidmap_create(pmix_pointer_array_t *pool, pmix_data_buffer_t *buf); PRTE_EXPORT int prte_util_decode_nidmap(pmix_data_buffer_t *buf); #endif /* PRTE_NIDMAP_H */ prrte-3.0.13/src/util/proc_info.c0000664000175000017500000002111115145263240017002 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #include #include "src/mca/base/pmix_base.h" #include "src/mca/base/pmix_mca_base_var.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" #include "src/util/pmix_argv.h" #include "src/util/attr.h" #include "src/util/pmix_if.h" #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" #include "src/util/proc_info.h" #include "src/util/proc_info.h" /* provide a connection to a reqd variable */ extern bool prte_keep_fqdn_hostnames; PRTE_EXPORT prte_process_info_t prte_process_info = { .myproc = PMIX_PROC_STATIC_INIT, .my_hnp = PMIX_PROC_STATIC_INIT, .my_hnp_uri = NULL, .my_parent = PMIX_PROC_STATIC_INIT, .hnp_pid = 0, .num_daemons = 1, .num_nodes = 1, .nodename = NULL, .aliases = NULL, .euid = 0, .egid = 0, .pid = 0, .proc_type = PRTE_PROC_TYPE_NONE, .my_port = 0, .tmpdir_base = NULL, .sessdir_prefix = NULL, .top_session_dir = NULL, .cpuset = NULL, .shared_fs = false }; static bool init = false; static char *prte_strip_prefix; void prte_setup_hostname(void) { char *ptr; char hostname[PRTE_MAXHOSTNAMELEN]; char **prefixes; bool match; int i, idx; /* whether or not to keep FQDN hostnames */ prte_keep_fqdn_hostnames = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "keep_fqdn_hostnames", "Whether or not to keep FQDN hostnames [default: no]", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_keep_fqdn_hostnames); /* get the nodename */ gethostname(hostname, sizeof(hostname)); prte_strip_prefix = NULL; (void) pmix_mca_base_var_register( "prte", "prte", NULL, "strip_prefix", "Prefix(es) to match when deciding whether to strip leading characters and zeroes from " "node names returned by daemons", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_strip_prefix); /* we have to strip node names here, if user directs, to ensure that * the names exchanged in the modex match the names found locally */ if (NULL != prte_strip_prefix && !pmix_net_isaddr(hostname)) { prefixes = PMIX_ARGV_SPLIT_COMPAT(prte_strip_prefix, ','); match = false; for (i = 0; NULL != prefixes[i]; i++) { if (0 == strncmp(hostname, prefixes[i], strlen(prefixes[i]))) { /* remove the prefix and leading zeroes */ idx = strlen(prefixes[i]); while (idx < (int) strlen(hostname) && (hostname[idx] <= '0' || '9' < hostname[idx])) { idx++; } if ((int) strlen(hostname) <= idx) { /* there were no non-zero numbers in the name */ prte_process_info.nodename = strdup(&hostname[strlen(prefixes[i])]); } else { prte_process_info.nodename = strdup(&hostname[idx]); } /* add this to our list of aliases */ PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, prte_process_info.nodename); match = true; break; } } /* if we didn't find a match, then just use the hostname as-is */ if (!match) { prte_process_info.nodename = strdup(hostname); } PMIX_ARGV_FREE_COMPAT(prefixes); } else { prte_process_info.nodename = strdup(hostname); } // if we are not keeping FQDN, then strip it off if not an IP address if (!prte_keep_fqdn_hostnames && !pmix_net_isaddr(prte_process_info.nodename)) { ptr = strchr(prte_process_info.nodename, '.'); if (NULL != ptr) { /* add the fqdn name as an alias */ PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, prte_process_info.nodename); /* retain the non-fqdn name as the node's name */ *ptr = '\0'; } } // add the localhost names PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, "localhost"); PMIX_ARGV_APPEND_UNIQUE_COMPAT(&prte_process_info.aliases, "127.0.0.1"); } bool prte_check_host_is_local(const char *name) { int i; if (0 == strcmp(name, prte_process_info.nodename)) { return true; } for (i = 0; NULL != prte_process_info.aliases[i]; i++) { if (0 == strcmp(name, prte_process_info.aliases[i])) { return true; } } /* if it wasn't one of those and we are allowed * to resolve addresses, then try that too */ if (!prte_do_not_resolve) { if (pmix_ifislocal(name)) { /* add to our aliases */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&prte_process_info.aliases, name); return true; } } return false; } int prte_proc_info(void) { char *ptr; if (init) { return PRTE_SUCCESS; } init = true; prte_process_info.my_hnp_uri = NULL; pmix_mca_base_var_register("prte", "prte", NULL, "hnp_uri", "HNP contact info", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_process_info.my_hnp_uri); if (NULL != prte_process_info.my_hnp_uri) { ptr = prte_process_info.my_hnp_uri; /* the uri value passed to us will have quote marks around it to protect * the value if passed on the command line. We must remove those * to have a correct uri string */ if ('"' == ptr[0]) { /* if the first char is a quote, then so will the last one be */ ptr[strlen(ptr) - 1] = '\0'; memmove(ptr, ptr + 1, strlen(ptr)); } } /* get the real uid */ prte_process_info.uid = getuid(); /* get the effective uid */ prte_process_info.euid = geteuid(); /* get the real gid */ prte_process_info.gid = getgid(); /* get the effective gid */ prte_process_info.egid = getegid(); /* get the process id */ prte_process_info.pid = getpid(); /* get the number of nodes in the job */ prte_process_info.num_nodes = 1; (void) pmix_mca_base_var_register("prte", "prte", NULL, "num_nodes", "Number of nodes in the job", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_process_info.num_nodes); return PRTE_SUCCESS; } int prte_proc_info_finalize(void) { if (!init) { return PRTE_SUCCESS; } if (NULL != prte_process_info.tmpdir_base) { free(prte_process_info.tmpdir_base); prte_process_info.tmpdir_base = NULL; } if (NULL != prte_process_info.sessdir_prefix) { free(prte_process_info.sessdir_prefix); prte_process_info.sessdir_prefix = NULL; } if (NULL != prte_process_info.top_session_dir) { free(prte_process_info.top_session_dir); prte_process_info.top_session_dir = NULL; } if (NULL != prte_process_info.nodename) { free(prte_process_info.nodename); prte_process_info.nodename = NULL; } if (NULL != prte_process_info.cpuset) { free(prte_process_info.cpuset); prte_process_info.cpuset = NULL; } prte_process_info.proc_type = PRTE_PROC_TYPE_NONE; PMIX_ARGV_FREE_COMPAT(prte_process_info.aliases); init = false; return PRTE_SUCCESS; } prrte-3.0.13/src/util/uri.h0000664000175000017500000000622015145263240015634 0ustar alastairalastair/* * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * @file */ /* Per RFC3986: The generic URI syntax consists of a hierarchical sequence of components referred to as the scheme, authority, path, query, and fragment. URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] hier-part = "//" authority path-abempty / path-absolute / path-rootless / path-empty The scheme and path components are required, though the path may be empty (no characters). When authority is present, the path must either be empty or begin with a slash ("/") character. When authority is not present, the path cannot begin with two slash characters ("//"). These restrictions result in five different ABNF rules for a path (Section 3.3), only one of which will match any given URI reference. The following are two example URIs and their component parts: foo://example.com:8042/over/there?name=ferret#nose \_/ \______________/\_________/ \_________/ \__/ | | | | | scheme authority path query fragment | _____________________|__ / \ / \ urn:example:animal:ferret:nose Initially, this code supports only part of the first example - a scheme followed by an authority and a path. Queries and fragments are not supported. The APIs are modeled on the Gnome equivalent functions, though the code is NOT in any way based on thoat code. */ #ifndef PRTE_URI_H #define PRTE_URI_H #include "prte_config.h" #ifdef HAVE_UNISTD_H # include #endif BEGIN_C_DECLS /** * Parse a uri to retrieve the scheme * * The caller is responsible for freeing the returned string. */ PRTE_EXPORT char *prte_uri_get_scheme(const char *uri) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; /** * Create a uri from a hostname and filename * * The caller is responsible for freeing the returned string. */ PRTE_EXPORT char *prte_filename_to_uri(const char *filename, const char *hostname) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; /** * Extract the filename (and hostname) from a uri * * @param uri : a uri describing a filename (escaped, encoded in ASCII). * @param hostname : Location to store hostname for the URI, or NULL. * If there is no hostname in the URI, NULL will be * stored in this location. * @retval a newly-allocated string holding the resulting filename, or NULL on an error. * * The caller is responsible for freeing the returned string. */ PRTE_EXPORT char *prte_filename_from_uri(const char *uri, char **hostname) __prte_attribute_malloc__ __prte_attribute_warn_unused_result__; END_C_DECLS #endif /* PRTE_URI_H */ prrte-3.0.13/src/util/uri.c0000664000175000017500000001055215145263240015632 0ustar alastairalastair/* * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_printf.h" #include "src/util/pmix_show_help.h" #include "src/util/uri.h" static const char *uri_reserved_path_chars = "!$&'()*+,;=:@ "; char *prte_uri_get_scheme(const char *uri) { char *turi = strdup(uri); char *ptr; if (NULL == (ptr = strchr(turi, ':'))) { pmix_show_help("help-prte-util.txt", "malformed-uri", true, uri); free(turi); return NULL; } *ptr = '\0'; return turi; } char *prte_filename_to_uri(const char *filename, const char *hostname) { char *uri, *fn; size_t i, j, k, n; /* filename must be an absolute path */ if (!pmix_path_is_absolute(filename)) { pmix_show_help("help-prte-util.txt", "relative-path", true, filename); return NULL; } /* if hostname is NULL, then this is a local file, so * the scheme can either be missing or given as "localhost" */ if (NULL == hostname) { pmix_asprintf(&uri, "file://%s", filename); return uri; } /* count the number of characters that require escaping * in the filename */ n = 0; for (j = 0; j < strlen(uri_reserved_path_chars) - 1; j++) { if (NULL != strchr(filename, uri_reserved_path_chars[j])) { n++; } } /* escape them if necessary */ if (0 < n) { fn = (char *) malloc(strlen(filename) + n + 1); i = 0; for (k = 0; k < strlen(filename) - 1; k++) { for (j = 0; j < strlen(uri_reserved_path_chars) - 1; j++) { if (filename[k] == uri_reserved_path_chars[j]) { fn[i] = '\\'; i++; break; } } fn[i] = filename[k]; i++; } fn[i] = '\0'; } else { fn = strdup(filename); } /* construct the uri - the filename was already tested to * ensure it was absolute, so the required separator should * already be present */ pmix_asprintf(&uri, "file://%s%s", hostname, fn); free(fn); return uri; } char *prte_filename_from_uri(const char *uri, char **hostname) { char *turi; char *ptr, *fn, *sp; /* protect the incoming string */ turi = strdup(uri); /* set defaults */ fn = NULL; if (NULL != hostname) { *hostname = NULL; } /* extract the scheme */ if (NULL == (ptr = strchr(turi, ':'))) { pmix_show_help("help-prte-util.txt", "malformed-uri", true, uri); free(turi); return NULL; } *ptr = '\0'; ptr++; /* step over the new NULL */ /* if there are three '/', then there is no * hostname and the file is local */ if (0 == strncmp(ptr, "///", 3)) { /* step to the filename - as it is required * to be an absolute path, leave one slash * in the name */ ptr += 2; fn = strdup(ptr); } else if (0 != strncmp(ptr, "//", 2)) { /* error */ pmix_show_help("help-prte-util.txt", "malformed-uri", true, uri); } else { ptr += 2; /* step to the hostname */ /* find the separator to the filename */ if (NULL == (sp = strchr(ptr, '/'))) { pmix_show_help("help-prte-util.txt", "malformed-uri", true, uri); } else { *sp = '\0'; if (NULL != hostname) { *hostname = strdup(ptr); } /* the filename is required to be an * absolute path, so restore the slash */ *sp = '/'; fn = strdup(sp); } } free(turi); return fn; } prrte-3.0.13/src/util/dash_host/0000775000175000017500000000000015145263240016640 5ustar alastairalastairprrte-3.0.13/src/util/dash_host/dash_host.c0000664000175000017500000007232515145263240020771 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include "constants.h" #include "types.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_if.h" #include "src/util/pmix_net.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/plm/plm_types.h" #include "src/mca/ras/base/base.h" #include "src/runtime/prte_globals.h" #include "dash_host.h" int prte_util_dash_host_compute_slots(prte_node_t *node, char *hosts) { char **specs, *cptr; int slots = 0; int n; specs = PMIX_ARGV_SPLIT_COMPAT(hosts, ','); /* see if this node appears in the list */ for (n = 0; NULL != specs[n]; n++) { /* check if the #slots was specified */ if (NULL != (cptr = strchr(specs[n], ':'))) { *cptr = '\0'; ++cptr; } else { cptr = NULL; } if (prte_quickmatch(node, specs[n])) { if (NULL != cptr) { if ('*' == *cptr || 0 == strcmp(cptr, "auto")) { slots += node->slots - node->slots_inuse; } else { slots += strtol(cptr, NULL, 10); } } else { ++slots; } } } PMIX_ARGV_FREE_COMPAT(specs); return slots; } /* we can only enter this routine if no other allocation * was found, so we only need to know that finding any * relative node syntax should generate an immediate error */ int prte_util_add_dash_host_nodes(pmix_list_t *nodes, char *hosts, bool allocating) { pmix_list_item_t *item; int32_t i, j, k; int rc, nodeidx; char **host_argv = NULL; char **mapped_nodes = NULL, **mini_map, *ndname; prte_node_t *node, *nd; pmix_list_t adds; bool needcheck; int slots = 0; bool slots_given; char *cptr; char *shortname; char *rawname; bool add_slots = false; PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: parsing args %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), hosts)); PMIX_CONSTRUCT(&adds, pmix_list_t); host_argv = PMIX_ARGV_SPLIT_COMPAT(hosts, ','); if (0 < pmix_list_get_size(nodes)) { needcheck = true; } else { needcheck = false; } /* Accumulate all of the host name mappings */ for (j = 0; j < PMIX_ARGV_COUNT_COMPAT(host_argv); ++j) { mini_map = PMIX_ARGV_SPLIT_COMPAT(host_argv[j], ','); if (mapped_nodes == NULL) { mapped_nodes = mini_map; } else { for (k = 0; NULL != mini_map[k]; ++k) { rc = PMIX_ARGV_APPEND_NOSIZE_COMPAT(&mapped_nodes, mini_map[k]); if (PRTE_SUCCESS != rc) { PMIX_ARGV_FREE_COMPAT(host_argv); PMIX_ARGV_FREE_COMPAT(mini_map); goto cleanup; } } PMIX_ARGV_FREE_COMPAT(mini_map); } } PMIX_ARGV_FREE_COMPAT(host_argv); mini_map = NULL; /* Did we find anything? If not, then do nothing */ if (NULL == mapped_nodes) { rc = PRTE_SUCCESS; goto cleanup; } for (i = 0; NULL != mapped_nodes[i]; ++i) { /* if the specified node contains a relative node syntax, * and we are allocating, then ignore it */ if ('+' == mapped_nodes[i][0]) { if (!allocating) { if ('e' == mapped_nodes[i][1] || 'E' == mapped_nodes[i][1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) { /* the colon indicates a specific # are requested */ ++cptr; j = strtoul(cptr, NULL, 10); } else if ('\0' != mapped_nodes[0][2]) { j = strtoul(&mapped_nodes[0][2], NULL, 10); } else { /* add them all */ j = prte_node_pool->size; } for (k = 0; 0 < j && k < prte_node_pool->size; k++) { if (NULL != (node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, k))) { if (0 == node->num_procs) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&mini_map, node->name); --j; } } } } else if ('n' == mapped_nodes[i][1] || 'N' == mapped_nodes[i][1]) { /* they want a specific relative node #, so * look it up on global pool */ if ('\0' == mapped_nodes[i][2]) { /* they forgot to tell us the # */ pmix_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mapped_nodes[i]); rc = PRTE_ERR_SILENT; goto cleanup; } nodeidx = strtol(&mapped_nodes[i][2], NULL, 10); if (nodeidx < 0 || nodeidx > (int) prte_node_pool->size) { /* this is an error */ pmix_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds", true, nodeidx, mapped_nodes[i]); rc = PRTE_ERR_SILENT; goto cleanup; } /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!prte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, nodeidx); if (NULL == node) { /* this is an error */ pmix_show_help("help-dash-host.txt", "dash-host:relative-node-not-found", true, nodeidx, mapped_nodes[i]); rc = PRTE_ERR_SILENT; goto cleanup; } /* add this node to the list */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&mini_map, node->name); } else { /* invalid relative node syntax */ pmix_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mapped_nodes[i]); rc = PRTE_ERR_SILENT; goto cleanup; } } } else { /* just one node was given */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&mini_map, mapped_nodes[i]); } } if (NULL == mini_map) { rc = PRTE_SUCCESS; goto cleanup; } /* go through the names found and add them to the host list. If they're not unique, then bump the slots count for each duplicate */ for (i = 0; NULL != mini_map[i]; i++) { PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: working node %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), mini_map[i])); /* see if the node contains the number of slots */ slots_given = false; if (NULL != (cptr = strchr(mini_map[i], ':'))) { *cptr = '\0'; ++cptr; if ('*' == *cptr || 0 == strcmp(cptr, "auto")) { /* auto-detect #slots */ slots = -1; slots_given = false; } else { slots = strtol(cptr, NULL, 10); if ('+' == *cptr || '-' == *cptr) { // mark that we are being asked to increase/decrease #slots add_slots = true; } slots_given = true; } } /* check for local name and compute non-fqdn name */ shortname = NULL; rawname = NULL; if (prte_check_host_is_local(mini_map[i])) { ndname = prte_process_info.nodename; } else { ndname = mini_map[i]; } if (!prte_keep_fqdn_hostnames) { // Strip off the FQDN if present, ignore IP addresses if (!pmix_net_isaddr(mini_map[i])) { cptr = strchr(ndname, '.'); if (NULL != cptr) { rawname = strdup(ndname); *cptr = '\0'; shortname = strdup(ndname); *cptr = '.'; } } } /* see if a node of this name is already on the list */ node = prte_node_match(&adds, ndname); if (NULL == node && NULL != shortname) { node = prte_node_match(&adds, shortname); } if (NULL != node) { if (slots_given) { node->slots += slots; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); if (add_slots) { prte_set_attribute(&node->attributes, PRTE_NODE_ADD_SLOTS, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } } else if (slots < 0) { node->slots = 0; PRTE_FLAG_UNSET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } else { ++node->slots; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: node %s already on list - slots %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node->name, node->slots)); if (NULL != shortname) { free(shortname); shortname = NULL; } if (NULL != rawname) { node->rawname = rawname; rawname = NULL; } } else { /* if we didn't find it, add it to the list */ node = PMIX_NEW(prte_node_t); if (NULL == node) { PMIX_ARGV_FREE_COMPAT(mapped_nodes); if (NULL != shortname) { free(shortname); } if (NULL != rawname) { free(rawname); } return PRTE_ERR_OUT_OF_RESOURCE; } if (prte_keep_fqdn_hostnames || NULL == shortname) { node->name = strdup(ndname); } else { node->name = strdup(shortname); } if (NULL != rawname) { node->rawname = rawname; rawname = NULL; } PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: added node %s to list - slots %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node->name, slots)); node->state = PRTE_NODE_STATE_UP; node->slots_inuse = 0; node->slots_max = 0; if (slots_given) { node->slots = slots; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); if (add_slots) { prte_set_attribute(&node->attributes, PRTE_NODE_ADD_SLOTS, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); } } else if (slots < 0) { node->slots = 0; PRTE_FLAG_UNSET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } else { node->slots = 1; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } pmix_list_append(&adds, &node->super); } if (0 != strcmp(node->name, mini_map[i])) { // add the mini_map name to the list of aliases PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, mini_map[i]); } // ensure the non-fqdn version is saved if (NULL != shortname && 0 != strcmp(shortname, node->name)) { PMIX_ARGV_APPEND_UNIQUE_COMPAT(&node->aliases, shortname); } if (NULL != shortname) { free(shortname); } if (NULL != rawname) { free(rawname); } } PMIX_ARGV_FREE_COMPAT(mini_map); /* transfer across all unique nodes */ while (NULL != (item = pmix_list_remove_first(&adds))) { nd = (prte_node_t *) item; if (needcheck) { node = prte_node_match(nodes, nd->name); if (NULL != node) { PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: found existing node %s on input list - adding slots", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), node->name)); if (PRTE_FLAG_TEST(nd, PRTE_NODE_FLAG_SLOTS_GIVEN)) { /* transfer across the number of slots */ node->slots += nd->slots; PRTE_FLAG_SET(node, PRTE_NODE_FLAG_SLOTS_GIVEN); } PMIX_RELEASE(item); } else { PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: adding node %s with %d slots to final list", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), nd->name, nd->slots)); pmix_list_append(nodes, &nd->super); } } else { PMIX_OUTPUT_VERBOSE((1, prte_ras_base_framework.framework_output, "%s dashhost: adding node %s with %d slots to final list", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), nd->name, nd->slots)); pmix_list_append(nodes, &nd->super); } } if (prte_managed_allocation && !allocating) { prte_node_t *node_from_pool = NULL; PMIX_LIST_FOREACH(node, nodes, prte_node_t) { needcheck = true; for (i = 0; i < prte_node_pool->size; i++) { node_from_pool = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, i); if (NULL == node_from_pool) { continue; } if (prte_nptr_match(node_from_pool, node)) { needcheck = false; if (node->slots < node_from_pool->slots) { node_from_pool->slots = node->slots; } break; } } if (needcheck) { // node in -host was not in allocation - this is not allowed pmix_show_help("help-dash-host.txt", "not-all-mapped-alloc", true, node->name); rc = PRTE_ERR_SILENT; goto cleanup; } } } rc = PRTE_SUCCESS; cleanup: if (NULL != mapped_nodes) { PMIX_ARGV_FREE_COMPAT(mapped_nodes); } PMIX_LIST_DESTRUCT(&adds); return rc; } /* the -host option can always be used in both absolute * and relative mode, so we have to check for pre-existing * allocations if we are to use relative node syntax */ static int parse_dash_host(char ***mapped_nodes, char *hosts) { int32_t j, k, start; int rc = PRTE_SUCCESS; char **mini_map = NULL, *cptr; int nodeidx, nnodes, p; prte_node_t *node; char **host_argv = NULL; host_argv = PMIX_ARGV_SPLIT_COMPAT(hosts, ','); if (prte_hnp_is_allocated) { start = 0; } else { start = 1; } /* Accumulate all of the host name mappings */ for (j = 0; j < PMIX_ARGV_COUNT_COMPAT(host_argv); ++j) { mini_map = PMIX_ARGV_SPLIT_COMPAT(host_argv[j], ','); for (k = 0; NULL != mini_map[k]; ++k) { if ('+' == mini_map[k][0]) { /* see if we specified empty nodes */ if ('e' == mini_map[k][1] || 'E' == mini_map[k][1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(mini_map[k], ':'))) { /* the colon indicates a specific # are requested */ ++cptr; if (NULL == cptr) { // missing number of nodes being requested pmix_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mini_map[k]); rc = PRTE_ERR_SILENT; goto cleanup; } nnodes = strtol(cptr, NULL, 10); for (j=start, p=0; j < (int32_t)prte_node_pool->size && p < nnodes; j++) { node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, j); if (NULL == node) { continue; } // if the node is empty, capture it if (0 == node->num_procs) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(mapped_nodes, node->name); ++p; } } if (p < nnodes) { // not enough empty nodes pmix_show_help("help-dash-host.txt", "dash-host:not-enough-empty", true, nnodes-p); rc = PRTE_ERR_SILENT; goto cleanup; } } else { /* add a marker to the list */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(mapped_nodes, "*"); } } else if ('n' == mini_map[k][1] || 'N' == mini_map[k][1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&mini_map[k][2], NULL, 10); if (nodeidx < 0 || nodeidx > (int) prte_node_pool->size) { /* this is an error */ pmix_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds", true, nodeidx, mini_map[k]); rc = PRTE_ERR_SILENT; goto cleanup; } /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!prte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, nodeidx); if (NULL == node) { /* this is an error */ pmix_show_help("help-dash-host.txt", "dash-host:relative-node-not-found", true, nodeidx, mini_map[k]); rc = PRTE_ERR_SILENT; goto cleanup; } /* add this node to the list */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(mapped_nodes, node->name); } else { /* invalid relative node syntax */ pmix_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mini_map[k]); rc = PRTE_ERR_SILENT; goto cleanup; } } else { /* non-relative syntax - add to list */ /* remove any modifier */ if (NULL != (cptr = strchr(mini_map[k], ':'))) { *cptr = '\0'; } /* check for local alias */ if (prte_check_host_is_local(mini_map[k])) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(mapped_nodes, prte_process_info.nodename); } else { PMIX_ARGV_APPEND_NOSIZE_COMPAT(mapped_nodes, mini_map[k]); } } } PMIX_ARGV_FREE_COMPAT(mini_map); mini_map = NULL; } cleanup: if (NULL != host_argv) { PMIX_ARGV_FREE_COMPAT(host_argv); } if (NULL != mini_map) { PMIX_ARGV_FREE_COMPAT(mini_map); } return rc; } int prte_util_filter_dash_host_nodes(pmix_list_t *nodes, char *hosts, bool remove) { pmix_list_item_t *item; pmix_list_item_t *next; int32_t i, j, len_mapped_node = 0; int rc, test; char **mapped_nodes = NULL; prte_node_t *node; int num_empty = 0; pmix_list_t keep; bool want_all_empty = false; char *cptr; size_t lst, lmn; /* if the incoming node list is empty, then there * is nothing to filter! */ if (pmix_list_is_empty(nodes)) { return PRTE_SUCCESS; } if (PRTE_SUCCESS != (rc = parse_dash_host(&mapped_nodes, hosts))) { PRTE_ERROR_LOG(rc); return rc; } /* Did we find anything? If not, then do nothing */ if (NULL == mapped_nodes) { return PRTE_SUCCESS; } /* NOTE: The following logic is based on knowing that * any node can only be included on the incoming * nodes list ONCE. */ len_mapped_node = PMIX_ARGV_COUNT_COMPAT(mapped_nodes); /* setup a working list so we can put the final list * of nodes in order. This way, if the user specifies a * set of nodes, we will use them in the order in which * they were specifed. Note that empty node requests * will always be appended to the end */ PMIX_CONSTRUCT(&keep, pmix_list_t); for (i = 0; i < len_mapped_node; ++i) { /* check if we are supposed to add some number of empty * nodes here */ if ('*' == mapped_nodes[i][0]) { /* if there is a number after the '*', then we are * to insert a specific # of nodes */ if ('\0' == mapped_nodes[i][1]) { /* take all empty nodes from the list */ num_empty = INT_MAX; want_all_empty = true; } else { /* extract number of nodes to take */ num_empty = strtol(&mapped_nodes[i][1], NULL, 10); } /* search for empty nodes and take them */ item = pmix_list_get_first(nodes); while (0 < num_empty && item != pmix_list_get_end(nodes)) { next = pmix_list_get_next(item); /* save this position */ node = (prte_node_t *) item; /* see if this node is empty */ if (0 == node->slots_inuse) { /* check to see if it is specified later */ for (j = i + 1; j < len_mapped_node; j++) { if (0 == strcmp(mapped_nodes[j], node->name)) { /* specified later - skip this one */ goto skipnode; } } if (remove) { /* remove item from list */ pmix_list_remove_item(nodes, item); /* xfer to keep list */ pmix_list_append(&keep, item); } else { /* mark the node as found */ PRTE_FLAG_SET(node, PRTE_NODE_FLAG_MAPPED); } --num_empty; } skipnode: item = next; } } else { /* remove any modifier */ if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) { *cptr = '\0'; } /* we are looking for a specific node on the list. */ cptr = NULL; lmn = strtoul(mapped_nodes[i], &cptr, 10); item = pmix_list_get_first(nodes); while (item != pmix_list_get_end(nodes)) { next = pmix_list_get_next(item); /* save this position */ node = (prte_node_t *) item; /* search -host list to see if this one is found */ if (prte_managed_allocation && (NULL == cptr || 0 == strlen(cptr))) { /* if we are only given a number, then we test the * value against the number in the node name. This allows support for * launch_id-based environments. For example, a hostname * of "nid0015" can be referenced by "--host 15" */ for (j = strlen(node->name) - 1; 0 < j; j--) { if (!isdigit(node->name[j])) { j++; break; } } if (j >= (int) (strlen(node->name) - 1)) { test = 0; } else { lst = strtoul(&node->name[j], NULL, 10); test = (lmn == lst) ? 0 : 1; } } else { test = (prte_quickmatch(node, mapped_nodes[i])) ? 0 : 1; } if (0 == test) { if (remove) { /* remove item from list */ pmix_list_remove_item(nodes, item); /* xfer to keep list */ pmix_list_append(&keep, item); } else { /* mark the node as found */ PRTE_FLAG_SET(node, PRTE_NODE_FLAG_MAPPED); } break; } item = next; } } /* done with the mapped entry */ free(mapped_nodes[i]); mapped_nodes[i] = NULL; } /* was something specified that was -not- found? */ for (i = 0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { pmix_show_help("help-dash-host.txt", "not-all-mapped-alloc", true, mapped_nodes[i]); rc = PRTE_ERR_SILENT; goto cleanup; } } if (!remove) { /* all done */ rc = PRTE_SUCCESS; goto cleanup; } /* clear the rest of the nodes list */ while (NULL != (item = pmix_list_remove_first(nodes))) { PMIX_RELEASE(item); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item = pmix_list_remove_first(&keep))) { pmix_list_append(nodes, item); } /* did they ask for more than we could provide */ if (!want_all_empty && 0 < num_empty) { pmix_show_help("help-dash-host.txt", "dash-host:not-enough-empty", true, num_empty); rc = PRTE_ERR_SILENT; goto cleanup; } rc = PRTE_SUCCESS; /* done filtering existing list */ cleanup: for (i = 0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { free(mapped_nodes[i]); mapped_nodes[i] = NULL; } } if (NULL != mapped_nodes) { free(mapped_nodes); } return rc; } int prte_util_get_ordered_dash_host_list(pmix_list_t *nodes, char *hosts) { int rc, i; char **mapped_nodes = NULL; prte_node_t *node; if (PRTE_SUCCESS != (rc = parse_dash_host(&mapped_nodes, hosts))) { PRTE_ERROR_LOG(rc); } /* for each entry, create a node entry on the list */ for (i = 0; NULL != mapped_nodes[i]; i++) { node = PMIX_NEW(prte_node_t); node->name = strdup(mapped_nodes[i]); pmix_list_append(nodes, &node->super); } /* cleanup */ PMIX_ARGV_FREE_COMPAT(mapped_nodes); return rc; } prrte-3.0.13/src/util/dash_host/dash_host.h0000664000175000017500000000273715145263240020776 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * */ #ifndef PRTE_UTIL_DASH_HOST_H #define PRTE_UTIL_DASH_HOST_H #include "prte_config.h" #include "src/class/pmix_list.h" #include "src/runtime/prte_globals.h" BEGIN_C_DECLS PRTE_EXPORT int prte_util_add_dash_host_nodes(pmix_list_t *nodes, char *hosts, bool allocating); PRTE_EXPORT int prte_util_filter_dash_host_nodes(pmix_list_t *nodes, char *hosts, bool remove); PRTE_EXPORT int prte_util_get_ordered_dash_host_list(pmix_list_t *nodes, char *hosts); PRTE_EXPORT int prte_util_dash_host_compute_slots(prte_node_t *node, char *hosts); END_C_DECLS #endif prrte-3.0.13/src/util/stacktrace.c0000664000175000017500000005614015145263240017162 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2008-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Geoffroy Vallee. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_FCNTL_H # include #else # ifdef HAVE_SYS_FCNTL_H # include # endif #endif #include #include #include "constants.h" #include "src/mca/prtebacktrace/prtebacktrace.h" #include "src/runtime/prte_globals.h" #include "src/util/pmix_argv.h" #include "src/util/error.h" #include "src/util/pmix_output.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/util/stacktrace.h" #ifndef _NSIG # define _NSIG 32 #endif #define HOSTFORMAT "[%s:%05d] " int prte_stacktrace_output_fileno = -1; static char *prte_stacktrace_output_filename_base = NULL; static size_t prte_stacktrace_output_filename_max_len = 0; static char *unable_to_print_msg = "Unable to print stack trace!\n"; /* * Set the stacktrace filename: * stacktrace.PID * -or, if VPID is available- * stacktrace.VPID.PID */ static void set_stacktrace_filename(void) { snprintf(prte_stacktrace_output_filename, prte_stacktrace_output_filename_max_len, "%s.%lu.%lu", prte_stacktrace_output_filename_base, (unsigned long) PRTE_PROC_MY_NAME->rank, (unsigned long) getpid()); return; } /** * This function is being called as a signal-handler in response * to a user-specified signal (e.g. SIGFPE or SIGSEGV). * For Linux/Glibc, it then uses backtrace and backtrace_symbols_fd * to figure the current stack and print that out to stderr. * Where available, the BSD libexecinfo is used to provide Linux/Glibc * compatible backtrace and backtrace_symbols_fd functions. * * @param signo with the signal number raised * @param info with information regarding the reason/send of the signal * @param p * * FIXME: Should distinguish for systems, which don't have siginfo... */ #if PRTE_WANT_PRETTY_PRINT_STACKTRACE static void show_stackframe(int signo, siginfo_t *info, void *p) { char print_buffer[1024]; char *tmp = print_buffer; int size = sizeof(print_buffer); int ret; char *si_code_str = ""; PRTE_HIDE_UNUSED_PARAMS(p); /* Do not print the stack trace */ if (0 > prte_stacktrace_output_fileno && 0 == prte_stacktrace_output_filename_max_len) { /* Raise the signal again, so we don't accidentally mask critical signals. * For critical signals, it is preferred that we call 'raise' instead of * 'exit' or 'abort' so that the return status is set properly for this * process. */ signal(signo, SIG_DFL); raise(signo); return; } /* Update the file name with the RANK, if available */ if (0 < prte_stacktrace_output_filename_max_len) { set_stacktrace_filename(); prte_stacktrace_output_fileno = open(prte_stacktrace_output_filename, O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); if (0 > prte_stacktrace_output_fileno) { pmix_output(0, "Error: Failed to open the stacktrace output file. Default: " "stderr\n\tFilename: %s\n\tErrno: %s", prte_stacktrace_output_filename, strerror(errno)); prte_stacktrace_output_fileno = fileno(stderr); } } /* write out the footer information */ memset(print_buffer, 0, sizeof(print_buffer)); ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** Process received signal ***\n", prte_process_info.nodename, getpid()); if (-1 == write(prte_stacktrace_output_fileno, print_buffer, ret)) { return; } memset(print_buffer, 0, sizeof(print_buffer)); # ifdef HAVE_STRSIGNAL ret = snprintf(tmp, size, HOSTFORMAT "Signal: %s (%d)\n", prte_process_info.nodename, getpid(), strsignal(signo), signo); # else ret = snprintf(tmp, size, HOSTFORMAT "Signal: %d\n", prte_process_info.nodename, getpid(), signo); # endif size -= ret; tmp += ret; if (NULL != info) { switch (signo) { case SIGILL: switch (info->si_code) { # ifdef ILL_ILLOPC case ILL_ILLOPC: si_code_str = "Illegal opcode"; break; # endif # ifdef ILL_ILLOPN case ILL_ILLOPN: si_code_str = "Illegal operand"; break; # endif # ifdef ILL_ILLADR case ILL_ILLADR: si_code_str = "Illegal addressing mode"; break; # endif # ifdef ILL_ILLTRP case ILL_ILLTRP: si_code_str = "Illegal trap"; break; # endif # ifdef ILL_PRVOPC case ILL_PRVOPC: si_code_str = "Privileged opcode"; break; # endif # ifdef ILL_PRVREG case ILL_PRVREG: si_code_str = "Privileged register"; break; # endif # ifdef ILL_COPROC case ILL_COPROC: si_code_str = "Coprocessor error"; break; # endif # ifdef ILL_BADSTK case ILL_BADSTK: si_code_str = "Internal stack error"; break; # endif } break; case SIGFPE: switch (info->si_code) { # ifdef FPE_INTDIV case FPE_INTDIV: si_code_str = "Integer divide-by-zero"; break; # endif # ifdef FPE_INTOVF case FPE_INTOVF: si_code_str = "Integer overflow"; break; # endif case FPE_FLTDIV: si_code_str = "Floating point divide-by-zero"; break; case FPE_FLTOVF: si_code_str = "Floating point overflow"; break; case FPE_FLTUND: si_code_str = "Floating point underflow"; break; # ifdef FPE_FLTRES case FPE_FLTRES: si_code_str = "Floating point inexact result"; break; # endif # ifdef FPE_FLTINV case FPE_FLTINV: si_code_str = "Invalid floating point operation"; break; # endif # ifdef FPE_FLTSUB case FPE_FLTSUB: si_code_str = "Subscript out of range"; break; # endif } break; case SIGSEGV: switch (info->si_code) { # ifdef SEGV_MAPERR case SEGV_MAPERR: si_code_str = "Address not mapped"; break; # endif # ifdef SEGV_ACCERR case SEGV_ACCERR: si_code_str = "Invalid permissions"; break; # endif } break; case SIGBUS: switch (info->si_code) { # ifdef BUS_ADRALN case BUS_ADRALN: si_code_str = "Invalid address alignment"; break; # endif # ifdef BUS_ADRERR case BUS_ADRERR: si_code_str = "Non-existant physical address"; break; # endif # ifdef BUS_OBJERR case BUS_OBJERR: si_code_str = "Object-specific hardware error"; break; # endif } break; case SIGTRAP: switch (info->si_code) { # ifdef TRAP_BRKPT case TRAP_BRKPT: si_code_str = "Process breakpoint"; break; # endif # ifdef TRAP_TRACE case TRAP_TRACE: si_code_str = "Process trace trap"; break; # endif } break; case SIGCHLD: switch (info->si_code) { # ifdef CLD_EXITED case CLD_EXITED: si_code_str = "Child has exited"; break; # endif # ifdef CLD_KILLED case CLD_KILLED: si_code_str = "Child has terminated abnormally and did not create a core file"; break; # endif # ifdef CLD_DUMPED case CLD_DUMPED: si_code_str = "Child has terminated abnormally and created a core file"; break; # endif # ifdef CLD_WTRAPPED case CLD_TRAPPED: si_code_str = "Traced child has trapped"; break; # endif # ifdef CLD_STOPPED case CLD_STOPPED: si_code_str = "Child has stopped"; break; # endif # ifdef CLD_CONTINUED case CLD_CONTINUED: si_code_str = "Stopped child has continued"; break; # endif } break; # ifdef SIGPOLL case SIGPOLL: switch (info->si_code) { # ifdef POLL_IN case POLL_IN: si_code_str = "Data input available"; break; # endif # ifdef POLL_OUT case POLL_OUT: si_code_str = "Output buffers available"; break; # endif # ifdef POLL_MSG case POLL_MSG: si_code_str = "Input message available"; break; # endif # ifdef POLL_ERR case POLL_ERR: si_code_str = "I/O error"; break; # endif # ifdef POLL_PRI case POLL_PRI: si_code_str = "High priority input available"; break; # endif # ifdef POLL_HUP case POLL_HUP: si_code_str = "Device disconnected"; break; # endif } break; # endif /* SIGPOLL */ default: switch (info->si_code) { # ifdef SI_ASYNCNL case SI_ASYNCNL: si_code_str = "SI_ASYNCNL"; break; # endif # ifdef SI_SIGIO case SI_SIGIO: si_code_str = "Queued SIGIO"; break; # endif # ifdef SI_ASYNCIO case SI_ASYNCIO: si_code_str = "Asynchronous I/O request completed"; break; # endif # ifdef SI_MESGQ case SI_MESGQ: si_code_str = "Message queue state changed"; break; # endif case SI_TIMER: si_code_str = "Timer expiration"; break; case SI_QUEUE: si_code_str = "Sigqueue() signal"; break; case SI_USER: si_code_str = "User function (kill, sigsend, abort, etc.)"; break; # ifdef SI_KERNEL case SI_KERNEL: si_code_str = "Kernel signal"; break; # endif /* Dragonfly defines SI_USER and SI_UNDEFINED both as zero: */ /* For some reason, the PGI compiler will not let us combine these two #if tests into a single statement. Sigh. */ # if defined(SI_UNDEFINED) # if SI_UNDEFINED != SI_USER case SI_UNDEFINED: si_code_str = "Undefined code"; break; # endif # endif } } /* print signal errno information */ if (0 != info->si_errno) { ret = snprintf(tmp, size, HOSTFORMAT "Associated errno: %s (%d)\n", prte_process_info.nodename, getpid(), strerror(info->si_errno), info->si_errno); size -= ret; tmp += ret; } ret = snprintf(tmp, size, HOSTFORMAT "Signal code: %s (%d)\n", prte_process_info.nodename, getpid(), si_code_str, info->si_code); size -= ret; tmp += ret; switch (signo) { case SIGILL: case SIGFPE: case SIGSEGV: case SIGBUS: { ret = snprintf(tmp, size, HOSTFORMAT "Failing at address: %p\n", prte_process_info.nodename, getpid(), info->si_addr); size -= ret; tmp += ret; break; } case SIGCHLD: { ret = snprintf(tmp, size, HOSTFORMAT "Sending PID: %d, Sending UID: %d, Status: %d\n", prte_process_info.nodename, getpid(), info->si_pid, info->si_uid, info->si_status); size -= ret; tmp += ret; break; } # ifdef SIGPOLL case SIGPOLL: { # ifdef HAVE_SIGINFO_T_SI_FD ret = snprintf(tmp, size, HOSTFORMAT "Band event: %ld, File Descriptor : %d\n", prte_process_info.nodename, getpid(), (long) info->si_band, info->si_fd); # elif HAVE_SIGINFO_T_SI_BAND ret = snprintf(tmp, size, HOSTFORMAT "Band event: %ld\n", prte_process_info.nodename, getpid(), (long) info->si_band); # else ret = 0; # endif size -= ret; tmp += ret; break; } # endif } } else { ret = snprintf(tmp, size, HOSTFORMAT "siginfo is NULL, additional information unavailable\n", prte_process_info.nodename, getpid()); size -= ret; tmp += ret; } /* write out the signal information generated above */ if (-1 == write(prte_stacktrace_output_fileno, print_buffer, sizeof(print_buffer) - size)) { return; } /* print out the stack trace */ snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT, prte_process_info.nodename, getpid()); ret = prte_backtrace_print(NULL, print_buffer, 2); if (PRTE_SUCCESS != ret) { if (-1 == write(prte_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg))) { return; } } /* write out the footer information */ memset(print_buffer, 0, sizeof(print_buffer)); ret = snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT "*** End of error message ***\n", prte_process_info.nodename, getpid()); if (ret > 0) { if (-1 == write(prte_stacktrace_output_fileno, print_buffer, ret)) { return; } } else { if (-1 == write(prte_stacktrace_output_fileno, unable_to_print_msg, strlen(unable_to_print_msg))) { return; } } if (fileno(stdout) != prte_stacktrace_output_fileno && fileno(stderr) != prte_stacktrace_output_fileno) { close(prte_stacktrace_output_fileno); prte_stacktrace_output_fileno = -1; } /* Raise the signal again, so we don't accidentally mask critical signals. * For critical signals, it is preferred that we call 'raise' instead of * 'exit' or 'abort' so that the return status is set properly for this * process. */ signal(signo, SIG_DFL); raise(signo); } #endif /* PRTE_WANT_PRETTY_PRINT_STACKTRACE */ #if PRTE_WANT_PRETTY_PRINT_STACKTRACE void prte_stackframe_output(int stream) { int traces_size; char **traces; /* print out the stack trace */ if (PRTE_SUCCESS == prte_backtrace_buffer(&traces, &traces_size)) { int i; /* since we have the opportunity, strip off the bottom two function calls, which will be this function and prte_backtrace_buffer(). */ for (i = 2; i < traces_size; ++i) { pmix_output(stream, "%s", traces[i]); } } else { /* Do not print the stack trace */ if (0 > prte_stacktrace_output_fileno && 0 == prte_stacktrace_output_filename_max_len) { return; } /* Update the file name with the RANK, if available */ if (0 < prte_stacktrace_output_filename_max_len) { set_stacktrace_filename(); prte_stacktrace_output_fileno = open(prte_stacktrace_output_filename, O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); if (0 > prte_stacktrace_output_fileno) { pmix_output(0, "Error: Failed to open the stacktrace output file. Default: " "stderr\n\tFilename: %s\n\tErrno: %s", prte_stacktrace_output_filename, strerror(errno)); prte_stacktrace_output_fileno = fileno(stderr); } } prte_backtrace_print(NULL, NULL, 2); if (fileno(stdout) != prte_stacktrace_output_fileno && fileno(stderr) != prte_stacktrace_output_fileno) { close(prte_stacktrace_output_fileno); prte_stacktrace_output_fileno = -1; } } } char *prte_stackframe_output_string(void) { int traces_size, i; size_t len; char *output, **traces; len = 0; if (PRTE_SUCCESS != prte_backtrace_buffer(&traces, &traces_size)) { return NULL; } /* Calculate the space needed for the string */ for (i = 3; i < traces_size; i++) { if (NULL == traces[i]) { break; } len += strlen(traces[i]) + 1; } output = (char *) malloc(len + 1); if (NULL == output) { return NULL; } *output = '\0'; for (i = 3; i < traces_size; i++) { if (NULL == traces[i]) { break; } strcat(output, traces[i]); strcat(output, "\n"); } free(traces); return output; } #endif /* PRTE_WANT_PRETTY_PRINT_STACKTRACE */ /** * Here we register the show_stackframe function for signals * passed to PRTE by the mpi_signal-parameter passed to mpirun * by the user. * * @returnvalue PRTE_SUCCESS * @returnvalue PRTE_ERR_BAD_PARAM if the value in the signal-list * is not a valid signal-number * */ int prte_util_register_stackhandlers(void) { #if PRTE_WANT_PRETTY_PRINT_STACKTRACE struct sigaction act, old; char *tmp; char *next; bool complain, showed_help = false; /* Setup the output stream to use */ if (NULL == prte_stacktrace_output_filename || 0 == strcasecmp(prte_stacktrace_output_filename, "none")) { prte_stacktrace_output_fileno = -1; } else if (0 == strcasecmp(prte_stacktrace_output_filename, "stdout")) { prte_stacktrace_output_fileno = fileno(stdout); } else if (0 == strcasecmp(prte_stacktrace_output_filename, "stderr")) { prte_stacktrace_output_fileno = fileno(stderr); } else if (0 == strcasecmp(prte_stacktrace_output_filename, "file") || 0 == strcasecmp(prte_stacktrace_output_filename, "file:")) { prte_stacktrace_output_filename_base = strdup("stacktrace"); free(prte_stacktrace_output_filename); // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) prte_stacktrace_output_filename_max_len = strlen("stacktrace") + 8 + 8; prte_stacktrace_output_filename = (char *) malloc( sizeof(char) * prte_stacktrace_output_filename_max_len); set_stacktrace_filename(); prte_stacktrace_output_fileno = -1; } else if (0 == strncasecmp(prte_stacktrace_output_filename, "file:", 5)) { char *filename_cpy = NULL; next = strchr(prte_stacktrace_output_filename, ':'); if (NULL == next) { free(prte_stacktrace_output_filename); free(filename_cpy); return PRTE_ERR_NOT_FOUND; } next++; // move past the ':' to the filename specified prte_stacktrace_output_filename_base = strdup(next); free(prte_stacktrace_output_filename); // Magic number: 8 = space for .PID and .RANK (allow 7 digits each) prte_stacktrace_output_filename_max_len = strlen(prte_stacktrace_output_filename_base) + 8 + 8; prte_stacktrace_output_filename = (char *) malloc( sizeof(char) * prte_stacktrace_output_filename_max_len); set_stacktrace_filename(); prte_stacktrace_output_fileno = -1; free(filename_cpy); } else { prte_stacktrace_output_fileno = fileno(stderr); } /* Setup the signals to catch */ memset(&act, 0, sizeof(act)); act.sa_sigaction = show_stackframe; act.sa_flags = SA_SIGINFO; # ifdef SA_ONESHOT act.sa_flags |= SA_ONESHOT; # else act.sa_flags |= SA_RESETHAND; # endif for (tmp = next = prte_signal_string; next != NULL && *next != '\0'; tmp = next + 1) { int sig; int ret; complain = false; sig = strtol(tmp, &next, 10); /* * If there is no sensible number in the string, exit. * Similarly for any number which is not in the signal-number range */ if (((0 == sig) && (tmp == next)) || (0 > sig) || (_NSIG <= sig)) { pmix_show_help("help-prte-util.txt", "stacktrace bad signal", true, prte_signal_string, tmp); return PRTE_ERR_SILENT; } else if (next == NULL) { return PRTE_ERR_BAD_PARAM; } else if (':' == *next && 0 == strncasecmp(next, ":complain", 9)) { complain = true; next += 9; } else if (',' != *next && '\0' != *next) { return PRTE_ERR_BAD_PARAM; } /* Just query first */ ret = sigaction(sig, NULL, &old); if (0 != ret) { return PRTE_ERR_IN_ERRNO; } /* Was there something already there? */ if (SIG_IGN != old.sa_handler && SIG_DFL != old.sa_handler) { if (!showed_help && complain) { /* JMS This is icky; there is no error message aggregation here so this message may be repeated for every single MPI process... */ pmix_show_help("help-prte-util.txt", "stacktrace signal override", true, sig, sig, sig, prte_signal_string); showed_help = true; } } /* Nope, nothing was there, so put in ours */ else { if (0 != sigaction(sig, &act, NULL)) { return PRTE_ERR_IN_ERRNO; } } } #endif /* PRTE_WANT_PRETTY_PRINT_STACKTRACE */ return PRTE_SUCCESS; } prrte-3.0.13/src/util/ethtool.h0000664000175000017500000000104215145263240016510 0ustar alastairalastair/* * Copyright (c) 2016 Karol Mroz. All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_ETHTOOL_H #define PRTE_ETHTOOL_H /* * Obtain an appropriate bandwidth for the interface if_name. On Linux, we * get this via an ioctl(). Elsewhere or in the error case, we return the * speed as 0. */ unsigned int prte_ethtool_get_speed(const char *if_name); #endif prrte-3.0.13/src/pmix/0000775000175000017500000000000015145263240014664 5ustar alastairalastairprrte-3.0.13/src/pmix/pmix-internal.h0000664000175000017500000003133115145263240017625 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_PMIX_H #define PRTE_PMIX_H #include "prte_config.h" #ifdef HAVE_SYS_UN_H # include #endif #include "src/class/pmix_list.h" #include "src/event/event-internal.h" #include "src/include/hash_string.h" #include "src/mca/mca.h" #include "src/threads/pmix_threads.h" #include "src/util/error.h" #include "src/util/pmix_error.h" #include "src/util/pmix_name_fns.h" #include "src/util/pmix_printf.h" #include "src/util/proc_info.h" #include #include #include #include BEGIN_C_DECLS PRTE_EXPORT extern int prte_pmix_verbose_output; typedef struct { pmix_list_item_t super; pmix_app_t app; void *info; } prte_pmix_app_t; PMIX_CLASS_DECLARATION(prte_pmix_app_t); /* define another caddy for putting statically defined * pmix_info_t objects on a list */ typedef struct { pmix_list_item_t super; pmix_info_t info; } prte_info_item_t; PMIX_CLASS_DECLARATION(prte_info_item_t); typedef struct { pmix_list_item_t super; pmix_list_t infolist; } prte_info_array_item_t; PMIX_CLASS_DECLARATION(prte_info_array_item_t); typedef struct { pmix_mutex_t mutex; pthread_cond_t cond; volatile bool active; int status; char *msg; } prte_pmix_lock_t; typedef struct { pmix_list_item_t super; pmix_value_t value; } prte_value_t; PMIX_CLASS_DECLARATION(prte_value_t); #if !defined(WORDS_BIGENDIAN) # define PMIX_PROC_NTOH(guid) pmix_proc_ntoh_intr(&(guid)) static inline __prte_attribute_always_inline__ void pmix_proc_ntoh_intr(pmix_proc_t *name) { name->rank = ntohl(name->rank); } # define PMIX_PROC_HTON(guid) pmix_proc_hton_intr(&(guid)) static inline __prte_attribute_always_inline__ void pmix_proc_hton_intr(pmix_proc_t *name) { name->rank = htonl(name->rank); } #else # define PMIX_PROC_NTOH(guid) # define PMIX_PROC_HTON(guid) #endif #define prte_pmix_condition_wait(a, b) pthread_cond_wait(a, &(b)->m_lock_pthread) #define PRTE_PMIX_CONSTRUCT_LOCK(l) \ do { \ PMIX_CONSTRUCT(&(l)->mutex, pmix_mutex_t); \ pthread_cond_init(&(l)->cond, NULL); \ (l)->active = true; \ (l)->status = 0; \ (l)->msg = NULL; \ PMIX_POST_OBJECT((l)); \ } while (0) #define PRTE_PMIX_DESTRUCT_LOCK(l) \ do { \ PMIX_ACQUIRE_OBJECT((l)); \ PMIX_DESTRUCT(&(l)->mutex); \ pthread_cond_destroy(&(l)->cond); \ if (NULL != (l)->msg) { \ free((l)->msg); \ } \ } while (0) #if PRTE_ENABLE_DEBUG # define PRTE_PMIX_ACQUIRE_THREAD(lck) \ do { \ pmix_mutex_lock(&(lck)->mutex); \ while ((lck)->active) { \ prte_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ } \ (lck)->active = true; \ } while (0) #else # define PRTE_PMIX_ACQUIRE_THREAD(lck) \ do { \ pmix_mutex_lock(&(lck)->mutex); \ while ((lck)->active) { \ prte_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ } \ (lck)->active = true; \ } while (0) #endif #if PRTE_ENABLE_DEBUG # define PRTE_PMIX_WAIT_THREAD(lck) \ do { \ pmix_mutex_lock(&(lck)->mutex); \ while ((lck)->active) { \ prte_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ } \ PMIX_ACQUIRE_OBJECT(&lck); \ pmix_mutex_unlock(&(lck)->mutex); \ } while (0) #else # define PRTE_PMIX_WAIT_THREAD(lck) \ do { \ pmix_mutex_lock(&(lck)->mutex); \ while ((lck)->active) { \ prte_pmix_condition_wait(&(lck)->cond, &(lck)->mutex); \ } \ PMIX_ACQUIRE_OBJECT(lck); \ pmix_mutex_unlock(&(lck)->mutex); \ } while (0) #endif #if PRTE_ENABLE_DEBUG # define PRTE_PMIX_RELEASE_THREAD(lck) \ do { \ (lck)->active = false; \ pthread_cond_broadcast(&(lck)->cond); \ pmix_mutex_unlock(&(lck)->mutex); \ } while (0) #else # define PRTE_PMIX_RELEASE_THREAD(lck) \ do { \ assert(0 != pmix_mutex_trylock(&(lck)->mutex)); \ (lck)->active = false; \ pthread_cond_broadcast(&(lck)->cond); \ pmix_mutex_unlock(&(lck)->mutex); \ } while (0) #endif #define PRTE_PMIX_WAKEUP_THREAD(lck) \ do { \ pmix_mutex_lock(&(lck)->mutex); \ (lck)->active = false; \ PMIX_POST_OBJECT(lck); \ pthread_cond_broadcast(&(lck)->cond); \ pmix_mutex_unlock(&(lck)->mutex); \ } while (0) /* * Count the hash for the the external RM */ #define PRTE_HASH_JOBID(str, hash) \ { \ PRTE_HASH_STR(str, hash); \ hash &= ~(0x8000); \ } /** * Provide a simplified macro for retrieving modex data * from another process when we don't want the PMIx module * to request it from the server if not found: * * r - the integer return status from the modex op (int) * s - string key (char*) * p - pointer to the pmix_proc_t of the proc that posted * the data (pmix_proc_t*) * d - pointer to a location wherein the data object * is to be returned * t - the expected data type */ #define PRTE_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \ do { \ pmix_value_t *_kv = NULL; \ pmix_info_t _info; \ size_t _sz; \ PMIX_OUTPUT_VERBOSE((1, prte_pmix_verbose_output, \ "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, \ PRTE_NAME_PRINT((p)), (s))); \ PMIX_INFO_LOAD(&_info, PMIX_OPTIONAL, NULL, PMIX_BOOL); \ (r) = PMIx_Get((p), (s), &(_info), 1, &(_kv)); \ if (NULL == _kv) { \ (r) = PMIX_ERR_NOT_FOUND; \ } else if (_kv->type != (t)) { \ (r) = PMIX_ERR_TYPE_MISMATCH; \ } else if (PMIX_SUCCESS == (r)) { \ PMIX_VALUE_UNLOAD((r), _kv, (void **) (d), &_sz); \ } \ if (NULL != _kv) { \ PMIX_VALUE_RELEASE(_kv); \ } \ } while (0); #define PRTE_PMIX_SHOW_HELP "prte.show.help" /* PRTE attribute */ typedef uint16_t prte_attribute_key_t; #define PRTE_ATTR_KEY_T PRTE_UINT16 typedef struct { pmix_list_item_t super; /* required for this to be on lists */ prte_attribute_key_t key; /* key identifier */ bool local; // whether or not to pack/send this value pmix_value_t data; } prte_attribute_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_attribute_t); /* some helper functions */ PRTE_EXPORT pmix_proc_state_t prte_pmix_convert_state(int state); PRTE_EXPORT int prte_pmix_convert_pstate(pmix_proc_state_t); PRTE_EXPORT pmix_status_t prte_pmix_convert_rc(int rc); PRTE_EXPORT int prte_pmix_convert_status(pmix_status_t status); PRTE_EXPORT pmix_status_t prte_pmix_convert_job_state_to_error(int state); PRTE_EXPORT pmix_status_t prte_pmix_convert_proc_state_to_error(int state); PRTE_EXPORT int prte_pmix_register_cleanup(char *path, bool directory, bool ignore, bool jobscope); #ifndef PMIX_DATA_BUFFER_STATIC_INIT #define PMIX_DATA_BUFFER_STATIC_INIT \ { \ .base_ptr = NULL, \ .pack_ptr = NULL, \ .unpack_ptr = NULL, \ .bytes_allocated = 0, \ .bytes_used = 0 \ } #endif #define PRTE_MCA_BASE_VERSION_3_0_0(type, type_major, type_minor, type_release) \ PMIX_MCA_BASE_VERSION_2_1_0("prte", PRTE_MAJOR_VERSION, PRTE_MINOR_VERSION, \ PRTE_RELEASE_VERSION, type, type_major, type_minor, type_release) #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_JOIN_COMPAT(a, b) \ pmix_argv_join(a, b) #else #define PMIX_ARGV_JOIN_COMPAT(a, b) \ PMIx_Argv_join(a, b) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_SPLIT_COMPAT(a, b) \ pmix_argv_split(a, b) #else #define PMIX_ARGV_SPLIT_COMPAT(a, b) \ PMIx_Argv_split(a, b) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ pmix_argv_split_with_empty(a, b) #else #define PMIX_ARGV_SPLIT_WITH_EMPTY_COMPAT(a, b) \ PMIx_Argv_split_with_empty(a, b) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_COUNT_COMPAT(a) \ pmix_argv_count(a) #else #define PMIX_ARGV_COUNT_COMPAT(a) \ PMIx_Argv_count(a) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_FREE_COMPAT(a) \ pmix_argv_free(a) #else #define PMIX_ARGV_FREE_COMPAT(a) \ PMIx_Argv_free(a) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ pmix_argv_append_unique_nosize(a, b) #else #define PMIX_ARGV_APPEND_UNIQUE_COMPAT(a, b) \ PMIx_Argv_append_unique_nosize(a, b) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ pmix_argv_append_nosize(a, b) #else #define PMIX_ARGV_APPEND_NOSIZE_COMPAT(a, b) \ PMIx_Argv_append_nosize(a, b) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_ARGV_COPY_COMPAT(a) \ pmix_argv_copy(a) #else #define PMIX_ARGV_COPY_COMPAT(a) \ PMIx_Argv_copy(a) #endif #if PMIX_NUMERIC_VERSION < 0x00040203 #define PMIX_SETENV_COMPAT(a, b, c, d) \ pmix_setenv(a, b, c, d) #else #define PMIX_SETENV_COMPAT(a, b, c, d) \ PMIx_Setenv(a, b, c, d) #endif END_C_DECLS #endif prrte-3.0.13/src/pmix/Makefile.am0000664000175000017500000000234615145263240016725 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2016 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2014-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2015 Research Organization for Information Science # and Technology (RIST). All rights reserved. # Copyright (c) 2017-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This makefile.am does not stand on its own - it is included from src/Makefile.am # Source code files headers += \ pmix/pmix-internal.h libprrte_la_SOURCES += \ pmix/pmix.c prrte-3.0.13/src/pmix/pmix.c0000664000175000017500000003412415145263240016011 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2016-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "prte_config.h" #include "src/include/constants.h" #include #include #include #ifdef HAVE_UNISTD_H # include #endif #include "src/class/pmix_hash_table.h" #include "src/mca/plm/base/plm_private.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/util/proc_info.h" pmix_status_t prte_pmix_convert_rc(int rc) { switch (rc) { case PRTE_ERR_HEARTBEAT_ALERT: case PRTE_ERR_FILE_ALERT: case PRTE_ERR_HEARTBEAT_LOST: case PRTE_ERR_SENSOR_LIMIT_EXCEEDED: return PMIX_ERR_JOB_SENSOR_BOUND_EXCEEDED; case PRTE_ERR_NO_EXE_SPECIFIED: case PRTE_ERR_NO_APP_SPECIFIED: return PMIX_ERR_JOB_NO_EXE_SPECIFIED; case PRTE_ERR_SLOT_LIST_RANGE: case PRTE_ERR_TOPO_SOCKET_NOT_SUPPORTED: case PRTE_ERR_INVALID_PHYS_CPU: case PRTE_ERR_TOPO_CORE_NOT_SUPPORTED: case PRTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED: case PRTE_ERR_MULTIPLE_AFFINITIES: case PRTE_ERR_FAILED_TO_MAP: return PMIX_ERR_JOB_FAILED_TO_MAP; case PRTE_ERR_JOB_CANCELLED: return PMIX_ERR_JOB_CANCELED; case PRTE_ERR_DEBUGGER_RELEASE: return PMIX_ERR_DEBUGGER_RELEASE; case PRTE_ERR_HANDLERS_COMPLETE: return PMIX_EVENT_ACTION_COMPLETE; case PRTE_ERR_PROC_ABORTED: return PMIX_ERR_PROC_ABORTED; case PRTE_ERR_PROC_REQUESTED_ABORT: return PMIX_ERR_PROC_REQUESTED_ABORT; case PRTE_ERR_PROC_ABORTING: return PMIX_ERR_PROC_ABORTING; case PRTE_ERR_NODE_DOWN: return PMIX_ERR_NODE_DOWN; case PRTE_ERR_NODE_OFFLINE: return PMIX_ERR_NODE_OFFLINE; case PRTE_ERR_JOB_TERMINATED: return PMIX_EVENT_JOB_END; case PRTE_ERR_PROC_RESTART: return PMIX_ERR_PROC_RESTART; case PRTE_ERR_PROC_CHECKPOINT: return PMIX_ERR_PROC_CHECKPOINT; case PRTE_ERR_PROC_MIGRATE: return PMIX_ERR_PROC_MIGRATE; case PRTE_ERR_EVENT_REGISTRATION: return PMIX_ERR_EVENT_REGISTRATION; case PRTE_ERR_NOT_IMPLEMENTED: case PRTE_ERR_NOT_SUPPORTED: return PMIX_ERR_NOT_SUPPORTED; case PRTE_ERR_NOT_FOUND: return PMIX_ERR_NOT_FOUND; case PRTE_ERR_PERM: case PRTE_ERR_UNREACH: case PRTE_ERR_SERVER_NOT_AVAIL: return PMIX_ERR_UNREACH; case PRTE_ERR_BAD_PARAM: return PMIX_ERR_BAD_PARAM; case PRTE_ERR_SYS_LIMITS_PIPES: case PRTE_ERR_SYS_LIMITS_CHILDREN: case PRTE_ERR_SOCKET_NOT_AVAILABLE: case PRTE_ERR_NOT_ENOUGH_CORES: case PRTE_ERR_NOT_ENOUGH_SOCKETS: return PMIX_ERR_JOB_INSUFFICIENT_RESOURCES; case PRTE_ERR_PIPE_READ_FAILURE: return PMIX_ERR_JOB_SYS_OP_FAILED; case PRTE_ERR_OUT_OF_RESOURCE: return PMIX_ERR_OUT_OF_RESOURCE; case PRTE_ERR_DATA_VALUE_NOT_FOUND: return PMIX_ERR_DATA_VALUE_NOT_FOUND; case PRTE_ERR_WDIR_NOT_FOUND: return PMIX_ERR_JOB_WDIR_NOT_FOUND; case PRTE_ERR_EXE_NOT_FOUND: case PRTE_ERR_EXE_NOT_ACCESSIBLE: return PMIX_ERR_JOB_EXE_NOT_FOUND; case PRTE_ERR_TIMEOUT: return PMIX_ERR_TIMEOUT; case PRTE_ERR_WOULD_BLOCK: return PMIX_ERR_WOULD_BLOCK; case PRTE_EXISTS: return PMIX_EXISTS; case PRTE_ERR_PARTIAL_SUCCESS: return PMIX_QUERY_PARTIAL_SUCCESS; case PRTE_ERR_MODEL_DECLARED: return PMIX_MODEL_DECLARED; case PRTE_ERROR: return PMIX_ERROR; case PRTE_SUCCESS: return PMIX_SUCCESS; default: return PMIX_ERROR; } } int prte_pmix_convert_status(pmix_status_t status) { switch (status) { case PMIX_ERR_DEBUGGER_RELEASE: return PRTE_ERR_DEBUGGER_RELEASE; case PMIX_EVENT_ACTION_COMPLETE: return PRTE_ERR_HANDLERS_COMPLETE; case PMIX_ERR_PROC_ABORTED: return PRTE_ERR_PROC_ABORTED; case PMIX_ERR_PROC_REQUESTED_ABORT: return PRTE_ERR_PROC_REQUESTED_ABORT; case PMIX_ERR_PROC_ABORTING: return PRTE_ERR_PROC_ABORTING; case PMIX_ERR_NODE_DOWN: return PRTE_ERR_NODE_DOWN; case PMIX_ERR_NODE_OFFLINE: return PRTE_ERR_NODE_OFFLINE; case PMIX_EVENT_JOB_END: return PRTE_ERR_JOB_TERMINATED; case PMIX_ERR_PROC_RESTART: return PRTE_ERR_PROC_RESTART; case PMIX_ERR_PROC_CHECKPOINT: return PRTE_ERR_PROC_CHECKPOINT; case PMIX_ERR_PROC_MIGRATE: return PRTE_ERR_PROC_MIGRATE; case PMIX_ERR_EVENT_REGISTRATION: return PRTE_ERR_EVENT_REGISTRATION; case PMIX_ERR_NOT_SUPPORTED: return PRTE_ERR_NOT_SUPPORTED; case PMIX_ERR_NOT_FOUND: return PRTE_ERR_NOT_FOUND; case PMIX_ERR_OUT_OF_RESOURCE: return PRTE_ERR_OUT_OF_RESOURCE; case PMIX_ERR_INIT: return PRTE_ERROR; case PMIX_ERR_BAD_PARAM: return PRTE_ERR_BAD_PARAM; case PMIX_ERR_UNREACH: case PMIX_ERR_NO_PERMISSIONS: return PRTE_ERR_UNREACH; case PMIX_ERR_TIMEOUT: return PRTE_ERR_TIMEOUT; case PMIX_ERR_WOULD_BLOCK: return PRTE_ERR_WOULD_BLOCK; case PMIX_ERR_LOST_CONNECTION: return PRTE_ERR_COMM_FAILURE; case PMIX_EXISTS: return PRTE_EXISTS; case PMIX_QUERY_PARTIAL_SUCCESS: return PRTE_ERR_PARTIAL_SUCCESS; case PMIX_MONITOR_HEARTBEAT_ALERT: return PRTE_ERR_HEARTBEAT_ALERT; case PMIX_MONITOR_FILE_ALERT: return PRTE_ERR_FILE_ALERT; case PMIX_MODEL_DECLARED: return PRTE_ERR_MODEL_DECLARED; case PMIX_ERROR: return PRTE_ERROR; case PMIX_ERR_SILENT: return PRTE_ERR_SILENT; case PMIX_SUCCESS: case PMIX_OPERATION_SUCCEEDED: return PRTE_SUCCESS; case PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER: return PRTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER; default: return status; } } pmix_proc_state_t prte_pmix_convert_state(int state) { switch (state) { case 0: return PMIX_PROC_STATE_UNDEF; case 1: return PMIX_PROC_STATE_LAUNCH_UNDERWAY; case 2: return PMIX_PROC_STATE_RESTART; case 3: return PMIX_PROC_STATE_TERMINATE; case 4: return PMIX_PROC_STATE_RUNNING; case 5: return PMIX_PROC_STATE_CONNECTED; case 51: return PMIX_PROC_STATE_KILLED_BY_CMD; case 52: return PMIX_PROC_STATE_ABORTED; case 53: return PMIX_PROC_STATE_FAILED_TO_START; case 54: return PMIX_PROC_STATE_ABORTED_BY_SIG; case 55: return PMIX_PROC_STATE_TERM_WO_SYNC; case 56: return PMIX_PROC_STATE_COMM_FAILED; case 58: return PMIX_PROC_STATE_CALLED_ABORT; case 59: return PMIX_PROC_STATE_MIGRATING; case 61: return PMIX_PROC_STATE_CANNOT_RESTART; case 62: return PMIX_PROC_STATE_TERM_NON_ZERO; case 63: return PMIX_PROC_STATE_FAILED_TO_LAUNCH; default: return PMIX_PROC_STATE_UNDEF; } } int prte_pmix_convert_pstate(pmix_proc_state_t state) { switch (state) { case PMIX_PROC_STATE_UNDEF: return 0; case PMIX_PROC_STATE_PREPPED: case PMIX_PROC_STATE_LAUNCH_UNDERWAY: return 1; case PMIX_PROC_STATE_RESTART: return 2; case PMIX_PROC_STATE_TERMINATE: return 3; case PMIX_PROC_STATE_RUNNING: return 4; case PMIX_PROC_STATE_CONNECTED: return 5; case PMIX_PROC_STATE_UNTERMINATED: return 15; case PMIX_PROC_STATE_TERMINATED: return 20; case PMIX_PROC_STATE_KILLED_BY_CMD: return 51; case PMIX_PROC_STATE_ABORTED: return 52; case PMIX_PROC_STATE_FAILED_TO_START: return 53; case PMIX_PROC_STATE_ABORTED_BY_SIG: return 54; case PMIX_PROC_STATE_TERM_WO_SYNC: return 55; case PMIX_PROC_STATE_COMM_FAILED: return 56; case PMIX_PROC_STATE_CALLED_ABORT: return 58; case PMIX_PROC_STATE_MIGRATING: return 60; case PMIX_PROC_STATE_CANNOT_RESTART: return 61; case PMIX_PROC_STATE_TERM_NON_ZERO: return 62; case PMIX_PROC_STATE_FAILED_TO_LAUNCH: return 63; default: return 0; // undef } } pmix_status_t prte_pmix_convert_job_state_to_error(int state) { switch (state) { case PRTE_JOB_STATE_ALLOC_FAILED: return PMIX_ERR_JOB_ALLOC_FAILED; case PRTE_JOB_STATE_MAP_FAILED: return PMIX_ERR_JOB_FAILED_TO_MAP; case PRTE_JOB_STATE_NEVER_LAUNCHED: case PRTE_JOB_STATE_FAILED_TO_LAUNCH: case PRTE_JOB_STATE_FAILED_TO_START: case PRTE_JOB_STATE_CANNOT_LAUNCH: return PMIX_ERR_JOB_FAILED_TO_LAUNCH; case PRTE_JOB_STATE_KILLED_BY_CMD: return PMIX_ERR_JOB_CANCELED; case PRTE_JOB_STATE_ABORTED: case PRTE_JOB_STATE_CALLED_ABORT: case PRTE_JOB_STATE_SILENT_ABORT: return PMIX_ERR_JOB_ABORTED; case PRTE_JOB_STATE_ABORTED_BY_SIG: return PMIX_ERR_JOB_ABORTED_BY_SIG; case PRTE_JOB_STATE_ABORTED_WO_SYNC: return PMIX_ERR_JOB_TERM_WO_SYNC; case PRTE_JOB_STATE_TERMINATED: return PMIX_EVENT_JOB_END; default: return PMIX_ERROR; } } pmix_status_t prte_pmix_convert_proc_state_to_error(int state) { switch (state) { case PRTE_PROC_STATE_KILLED_BY_CMD: return PMIX_ERR_JOB_CANCELED; case PRTE_PROC_STATE_ABORTED: case PRTE_PROC_STATE_CALLED_ABORT: return PMIX_ERR_JOB_ABORTED; case PRTE_PROC_STATE_ABORTED_BY_SIG: return PMIX_ERR_JOB_ABORTED_BY_SIG; case PRTE_PROC_STATE_FAILED_TO_LAUNCH: case PRTE_PROC_STATE_FAILED_TO_START: return PMIX_ERR_JOB_FAILED_TO_LAUNCH; case PRTE_PROC_STATE_TERM_WO_SYNC: return PMIX_ERR_JOB_TERM_WO_SYNC; case PRTE_PROC_STATE_COMM_FAILED: case PRTE_PROC_STATE_UNABLE_TO_SEND_MSG: case PRTE_PROC_STATE_LIFELINE_LOST: case PRTE_PROC_STATE_NO_PATH_TO_TARGET: case PRTE_PROC_STATE_FAILED_TO_CONNECT: case PRTE_PROC_STATE_PEER_UNKNOWN: return PMIX_ERR_COMM_FAILURE; case PRTE_PROC_STATE_CANNOT_RESTART: return PMIX_ERR_PROC_RESTART; case PRTE_PROC_STATE_TERM_NON_ZERO: return PMIX_ERR_JOB_NON_ZERO_TERM; case PRTE_PROC_STATE_SENSOR_BOUND_EXCEEDED: return PMIX_ERR_JOB_SENSOR_BOUND_EXCEEDED; default: return PMIX_ERROR; } } static void cleanup_cbfunc(pmix_status_t status, pmix_info_t *info, size_t ninfo, void *cbdata, pmix_release_cbfunc_t release_fn, void *release_cbdata) { prte_pmix_lock_t *lk = (prte_pmix_lock_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(info, ninfo); PMIX_POST_OBJECT(lk); /* let the library release the data and cleanup from * the operation */ if (NULL != release_fn) { release_fn(release_cbdata); } /* release the block */ lk->status = status; PRTE_PMIX_WAKEUP_THREAD(lk); } int prte_pmix_register_cleanup(char *path, bool directory, bool ignore, bool jobscope) { prte_pmix_lock_t lk; pmix_info_t pinfo[3]; size_t n, ninfo = 0; pmix_status_t rc, ret; PRTE_PMIX_CONSTRUCT_LOCK(&lk); if (ignore) { /* they want this path ignored */ PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_IGNORE, path, PMIX_STRING); ++ninfo; } else { if (directory) { PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP_DIR, path, PMIX_STRING); ++ninfo; /* recursively cleanup directories */ PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_CLEANUP_RECURSIVE, NULL, PMIX_BOOL); ++ninfo; } else { /* order cleanup of the provided path */ PMIX_INFO_LOAD(&pinfo[ninfo], PMIX_REGISTER_CLEANUP, path, PMIX_STRING); ++ninfo; } } /* if they want this applied to the job, then indicate so */ if (jobscope) { rc = PMIx_Job_control_nb(NULL, 0, pinfo, ninfo, cleanup_cbfunc, (void *) &lk); } else { rc = PMIx_Job_control_nb(PRTE_PROC_MY_NAME, 1, pinfo, ninfo, cleanup_cbfunc, (void *) &lk); } if (PMIX_SUCCESS != rc) { ret = rc; } else { PRTE_PMIX_WAIT_THREAD(&lk); ret = lk.status; } PRTE_PMIX_DESTRUCT_LOCK(&lk); for (n = 0; n < ninfo; n++) { PMIX_INFO_DESTRUCT(&pinfo[n]); } return ret; } /* CLASS INSTANTIATIONS */ static void acon(prte_pmix_app_t *p) { PMIX_APP_CONSTRUCT(&p->app); PMIX_INFO_LIST_START(p->info); } static void ades(prte_pmix_app_t *p) { PMIX_APP_DESTRUCT(&p->app); PMIX_INFO_LIST_RELEASE(p->info); } PMIX_CLASS_INSTANCE(prte_pmix_app_t, pmix_list_item_t, acon, ades); static void infoitmcon(prte_info_item_t *p) { PMIX_INFO_CONSTRUCT(&p->info); } static void infoitdecon(prte_info_item_t *p) { PMIX_INFO_DESTRUCT(&p->info); } PRTE_EXPORT PMIX_CLASS_INSTANCE(prte_info_item_t, pmix_list_item_t, infoitmcon, infoitdecon); static void arritmcon(prte_info_array_item_t *p) { PMIX_CONSTRUCT(&p->infolist, pmix_list_t); } static void arritdecon(prte_info_array_item_t *p) { PMIX_LIST_DESTRUCT(&p->infolist); } PRTE_EXPORT PMIX_CLASS_INSTANCE(prte_info_array_item_t, pmix_list_item_t, arritmcon, arritdecon); static void pvcon(prte_value_t *p) { PMIX_VALUE_CONSTRUCT(&p->value); } static void pvdes(prte_value_t *p) { PMIX_VALUE_DESTRUCT(&p->value); } PRTE_EXPORT PMIX_CLASS_INSTANCE(prte_value_t, pmix_list_item_t, pvcon, pvdes); prrte-3.0.13/src/rml/0000775000175000017500000000000015145263240014501 5ustar alastairalastairprrte-3.0.13/src/rml/rml.h0000664000175000017500000002530415145263240015450 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Runtime Messaging Layer (RML) Communication Interface * * The Runtime Messaging Layer (RML) provices basic point-to-point * communication between PRTE processes. The system is available for * most architectures, with some exceptions (the Cray XT3/XT4, for example). */ #ifndef PRTE_RML_H_ #define PRTE_RML_H_ #include "prte_config.h" #include "types.h" #ifdef HAVE_UNISTD_H # include #endif #include "src/rml/rml_types.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS /** * Send a buffer non-blocking message * * Send a buffer to the specified peer. The call * will return immediately, although the buffer may not be modified * until the completion callback is triggered. The buffer *may* be * passed to another call to send_nb before the completion callback is * triggered. The callback being triggered does not give any * indication of remote completion. * * @param[in] peer Name of receiving process * @param[in] buffer Pointer to buffer to be sent * @param[in] tag User defined tag for matching send/recv * @param[in] cbfunc Callback function on message comlpetion * @param[in] cbdata User data to provide during completion callback * * @retval PRTE_SUCCESS The message was successfully started * @retval PRTE_ERR_BAD_PARAM One of the parameters was invalid * @retval PRTE_ERR_ADDRESSEE_UNKNOWN Contact information for the * receiving process is not available * @retval PRTE_ERROR An unspecified error occurred */ PRTE_EXPORT int prte_rml_send_buffer_nb(pmix_rank_t rank, pmix_data_buffer_t *buffer, prte_rml_tag_t tag); #define PRTE_RML_SEND(_r, r, b, t) \ do { \ pmix_output_verbose(2, prte_rml_base.rml_output, \ "RML-SEND(%s:%d): %s:%s:%d", \ PMIX_RANK_PRINT(r), t, \ __FILE__, __func__, __LINE__); \ (_r) = prte_rml_send_buffer_nb(r, b, t); \ } while(0) /** * Purge the RML/OOB of contact info and pending messages * to/from a specified process. Used when a process aborts * and is to be restarted */ PRTE_EXPORT void prte_rml_purge(pmix_proc_t *peer); #define PRTE_RML_PURGE(p) \ do { \ pmix_output_verbose(2, prte_rml_base.rml_output, \ "RML-PURGE(%s): %s:%s:%d", \ PMIX_NAME_PRINT(p), \ __FILE__, __func__, __LINE__); \ prte_rml_purge(p); \ } while(0) /** * Receive a buffer non-blocking message * * @param[in] peer Peer process or PRTE_NAME_WILDCARD for wildcard receive * @param[in] tag User defined tag for matching send/recv * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv * @param[in] cbfunc Callback function on message comlpetion * @param[in] cbdata User data to provide during completion callback */ PRTE_EXPORT void prte_rml_recv_buffer_nb(pmix_proc_t *peer, prte_rml_tag_t tag, bool persistent, prte_rml_buffer_callback_fn_t cbfunc, void *cbdata); #define PRTE_RML_RECV(p, t, prs, c, cb) \ do { \ pmix_output_verbose(2, prte_rml_base.rml_output, \ "RML-RECV(%d): %s:%s:%d", \ t, __FILE__, __func__, __LINE__); \ prte_rml_recv_buffer_nb(p, t, prs, c, cb); \ } while(0) /** * Cancel a posted non-blocking receive * * Attempt to cancel a posted non-blocking receive. * * @param[in] peer Peer process or PRTE_NAME_WILDCARD, exactly as passed * to the non-blocking receive call * @param[in] tag Posted receive tag */ PRTE_EXPORT void prte_rml_recv_cancel(pmix_proc_t *peer, prte_rml_tag_t tag); #define PRTE_RML_CANCEL(p, t) \ do { \ pmix_output_verbose(2, prte_rml_base.rml_output, \ "RML-CANCEL(%d): %s:%s:%d", \ t, __FILE__, __func__, __LINE__); \ prte_rml_recv_cancel(p, t); \ } while(0) typedef struct { int rml_output; int routed_output; int max_retries; pmix_list_t posted_recvs; pmix_list_t unmatched_msgs; pmix_rank_t lifeline; pmix_list_t children; int radix; bool static_ports; } prte_rml_base_t; PRTE_EXPORT extern prte_rml_base_t prte_rml_base; PRTE_EXPORT void prte_rml_register(void); PRTE_EXPORT void prte_rml_close(void); PRTE_EXPORT void prte_rml_open(void); /* common implementations */ PRTE_EXPORT void prte_rml_base_post_recv(int sd, short args, void *cbdata); PRTE_EXPORT void prte_rml_base_process_msg(int fd, short flags, void *cbdata); PRTE_EXPORT void prte_rml_send_callback(int status, pmix_proc_t *peer, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata); PRTE_EXPORT void prte_rml_compute_routing_tree(void); PRTE_EXPORT int prte_rml_get_num_contributors(pmix_rank_t *dmns, size_t ndmns); PRTE_EXPORT int prte_rml_route_lost(pmix_rank_t route); PRTE_EXPORT pmix_rank_t prte_rml_get_route(pmix_rank_t target); #define PRTE_RML_POST_MESSAGE(p, t, s, b, l) \ do { \ prte_rml_recv_t *msg; \ pmix_status_t _rc; \ pmix_byte_object_t _bo; \ pmix_output_verbose(5, prte_rml_base.rml_output, \ "%s Message posted at %s:%d for tag %d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, (t)); \ msg = PMIX_NEW(prte_rml_recv_t); \ PMIX_XFER_PROCID(&msg->sender, (p)); \ msg->tag = (t); \ msg->seq_num = (s); \ _bo.bytes = (char *) (b); \ _bo.size = (l); \ PMIX_DATA_BUFFER_CREATE(msg->dbuf); \ _rc = PMIx_Data_load(msg->dbuf, &_bo); \ if (PMIX_SUCCESS != _rc) { \ PMIX_ERROR_LOG(_rc); \ } \ /* setup the event */ \ prte_event_set(prte_event_base, &msg->ev, -1, PRTE_EV_WRITE, \ prte_rml_base_process_msg, msg); \ prte_event_active(&msg->ev, PRTE_EV_WRITE, 1); \ } while (0); #define PRTE_RML_ACTIVATE_MESSAGE(m) \ do { \ /* setup the event */ \ prte_event_set(prte_event_base, &(m)->ev, -1, PRTE_EV_WRITE, \ prte_rml_base_process_msg, (m)); \ prte_event_active(&(m)->ev, PRTE_EV_WRITE, 1); \ } while (0); #define PRTE_RML_SEND_COMPLETE(m) \ do { \ pmix_output_verbose(5, prte_rml_base.rml_output, \ "%s-%s Send message complete at %s:%d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&((m)->dst)), \ __FILE__, __LINE__); \ /* non-blocking buffer send */ \ prte_rml_send_callback((m)->status, &((m)->dst), \ (m)->dbuf, (m)->tag, (m)->cbdata); \ PMIX_RELEASE(m); \ } while (0); END_C_DECLS #endif prrte-3.0.13/src/rml/rml_contact.h0000664000175000017500000000320715145263240017161 0ustar alastairalastair/* * Copyright (c) 2007 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Interface for manipulating how the RML receives contact information * * Interface for manipulating how the RML receives contact * information. These functions are generally used during prte_init * and prte_finalize. */ #include "prte_config.h" #include "types.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS /** * Parse a contact information string * * Parse a contact infromation string, such as that returned by * prte_rml.get_contact_info(). Generally used to extract the peer * name from a contact information string. It can also be used to * extract the contact URI strings, although this is slightly less * useful as the URIs may be RML componenent specific and not have * general meaning. * * @param[in] contact_info Contact information string for peer * @param[out] peer Peer name in contact_info * @param[out] uris URI strings for peer. May be NULL if * information is not needed * * @retval PRTE_SUCCESS Information successfully extraced * @retval PRTE_ERR_BAD_PARAM The contact_info was not a valid string * @retval PRTE_ERROR An unspecified error occurred */ PRTE_EXPORT int prte_rml_parse_uris(const char *contact_inf, pmix_proc_t *peer, char ***uris); END_C_DECLS prrte-3.0.13/src/rml/rml_base_msg_handlers.c0000664000175000017500000002176515145263240021172 0ustar alastairalastair/* -*- C -*- * * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * */ /* * includes */ #include "prte_config.h" #include #include "constants.h" #include "types.h" #include "src/class/pmix_list.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" #include "src/util/nidmap.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" static void msg_match_recv(prte_rml_posted_recv_t *rcv, bool get_all); void prte_rml_base_post_recv(int sd, short args, void *cbdata) { prte_rml_recv_request_t *req = (prte_rml_recv_request_t *) cbdata; prte_rml_posted_recv_t *post, *recv; PRTE_HIDE_UNUSED_PARAMS(sd, args); PMIX_ACQUIRE_OBJECT(req); pmix_output_verbose(5, prte_rml_base.rml_output, "%s posting recv", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); if (NULL == req) { /* this can only happen if something is really wrong, but * someone managed to get here in a bizarre test */ pmix_output(0, "%s CANNOT POST NULL RML RECV REQUEST", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); return; } post = req->post; /* if the request is to cancel a recv, then find the recv * and remove it from our list */ if (req->cancel) { PMIX_LIST_FOREACH(recv, &prte_rml_base.posted_recvs, prte_rml_posted_recv_t) { if (PMIX_CHECK_PROCID(&post->peer, &recv->peer) && post->tag == recv->tag) { pmix_output_verbose(5, prte_rml_base.rml_output, "%s canceling recv %d for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), post->tag, PRTE_NAME_PRINT(&recv->peer)); /* got a match - remove it */ pmix_list_remove_item(&prte_rml_base.posted_recvs, &recv->super); PMIX_RELEASE(recv); break; } } PMIX_RELEASE(req); return; } /* bozo check - cannot have two receives for the same peer/tag combination */ PMIX_LIST_FOREACH(recv, &prte_rml_base.posted_recvs, prte_rml_posted_recv_t) { if (PMIX_CHECK_PROCID(&post->peer, &recv->peer) && post->tag == recv->tag) { pmix_output(0, "%s TWO RECEIVES WITH SAME PEER %s AND TAG %d - ABORTING", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&post->peer), post->tag); abort(); } } pmix_output_verbose(5, prte_rml_base.rml_output, "%s posting %s recv on tag %d for peer %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (post->persistent) ? "persistent" : "non-persistent", post->tag, PRTE_NAME_PRINT(&post->peer)); /* add it to the list of recvs */ pmix_list_append(&prte_rml_base.posted_recvs, &post->super); req->post = NULL; /* handle any messages that may have already arrived for this recv */ msg_match_recv(post, post->persistent); /* cleanup */ PMIX_RELEASE(req); } static void msg_match_recv(prte_rml_posted_recv_t *rcv, bool get_all) { pmix_list_item_t *item, *next; prte_rml_recv_t *msg; /* scan thru the list of unmatched recvd messages and * see if any matches this spec - if so, push the first * into the recvd msg queue and look no further */ item = pmix_list_get_first(&prte_rml_base.unmatched_msgs); while (item != pmix_list_get_end(&prte_rml_base.unmatched_msgs)) { next = pmix_list_get_next(item); msg = (prte_rml_recv_t *) item; pmix_output_verbose(5, prte_rml_base.rml_output, "%s checking recv for %s against unmatched msg from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&rcv->peer), PRTE_NAME_PRINT(&msg->sender)); /* since names could include wildcards, must use * the more generalized comparison function */ if (PMIX_CHECK_PROCID(&msg->sender, &rcv->peer) && msg->tag == rcv->tag) { PRTE_RML_ACTIVATE_MESSAGE(msg); pmix_list_remove_item(&prte_rml_base.unmatched_msgs, item); if (!get_all) { break; } } item = next; } } void prte_rml_base_process_msg(int fd, short flags, void *cbdata) { prte_rml_recv_t *msg = (prte_rml_recv_t *) cbdata; prte_rml_posted_recv_t *post; PRTE_HIDE_UNUSED_PARAMS(fd, flags); PMIX_ACQUIRE_OBJECT(msg); PMIX_OUTPUT_VERBOSE( (5, prte_rml_base.rml_output, "%s message received from %s for tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->sender), msg->tag)); /* if this message is just to warmup the connection, then drop it */ if (PRTE_RML_TAG_WARMUP_CONNECTION == msg->tag) { if (!prte_nidmap_communicated) { pmix_data_buffer_t *buffer; int rc; PMIX_DATA_BUFFER_CREATE(buffer); if (PRTE_SUCCESS != (rc = prte_util_nidmap_create(prte_node_pool, buffer))) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buffer); return; } PRTE_RML_SEND(rc, msg->sender.rank, buffer, PRTE_RML_TAG_NODE_REGEX_REPORT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buffer); return; } PMIX_RELEASE(msg); return; } } /* see if we have a waiting recv for this message */ PMIX_LIST_FOREACH(post, &prte_rml_base.posted_recvs, prte_rml_posted_recv_t) { /* since names could include wildcards, must use * the more generalized comparison function */ if (PMIX_CHECK_PROCID(&msg->sender, &post->peer) && msg->tag == post->tag) { /* deliver the data to this location */ post->cbfunc(PRTE_SUCCESS, &msg->sender, msg->dbuf, msg->tag, post->cbdata); /* the user must have unloaded the buffer if they wanted * to retain ownership of it, so release whatever remains */ PMIX_OUTPUT_VERBOSE((5, prte_rml_base.rml_output, "%s message received %" PRIsize_t " bytes from %s for tag %d called callback", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), msg->dbuf->bytes_used, PRTE_NAME_PRINT(&msg->sender), msg->tag)); /* release the message */ PMIX_RELEASE(msg); PMIX_OUTPUT_VERBOSE((5, prte_rml_base.rml_output, "%s message tag %d on released", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), post->tag)); /* if the recv is non-persistent, remove it */ if (!post->persistent) { pmix_list_remove_item(&prte_rml_base.posted_recvs, &post->super); /*PMIX_OUTPUT_VERBOSE((5, prte_rml_base.rml_output, "%s non persistent recv %p remove success releasing now", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), post));*/ PMIX_RELEASE(post); } return; } } /* we get here if no matching recv was found - we then hold * the message until such a recv is issued */ PMIX_OUTPUT_VERBOSE( (5, prte_rml_base.rml_output, "%s message received bytes from %s for tag %d Not Matched adding to unmatched msgs", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&msg->sender), msg->tag)); pmix_list_append(&prte_rml_base.unmatched_msgs, &msg->super); } prrte-3.0.13/src/rml/rml.c0000664000175000017500000001515715145263240015450 0ustar alastairalastair/* * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include "src/mca/base/pmix_mca_base_component_repository.h" #include "src/mca/mca.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" prte_rml_base_t prte_rml_base = { .rml_output = -1, .routed_output = -1, .posted_recvs = PMIX_LIST_STATIC_INIT, .unmatched_msgs = PMIX_LIST_STATIC_INIT, .max_retries = 0, .lifeline = PMIX_RANK_INVALID, .children = PMIX_LIST_STATIC_INIT, .radix = 64, .static_ports = false }; static int verbosity = 0; void prte_rml_register(void) { int ret; prte_rml_base.max_retries = 3; pmix_mca_base_var_register("prte", "rml", "base", "max_retries", "Max #times to retry sending a message", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_rml_base.max_retries); verbosity = 0; pmix_mca_base_var_register("prte", "rml", "base", "verbose", "Debug verbosity of the RML subsystem", PMIX_MCA_BASE_VAR_TYPE_INT, &verbosity); if (0 < verbosity) { prte_rml_base.rml_output = pmix_output_open(NULL); pmix_output_set_verbosity(prte_rml_base.rml_output, verbosity); } verbosity = 0; pmix_mca_base_var_register("prte", "routed", "base", "verbose", "Debug verbosity of the Routed subsystem", PMIX_MCA_BASE_VAR_TYPE_INT, &verbosity); if (0 < verbosity) { prte_rml_base.routed_output = pmix_output_open(NULL); pmix_output_set_verbosity(prte_rml_base.routed_output, verbosity); } ret = pmix_mca_base_var_register("prte", "rml", "base", "radix", "Radix to be used for routing tree", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_rml_base.radix); pmix_mca_base_var_register_synonym(ret, "prte", "routed", "radix", NULL, PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); } void prte_rml_close(void) { PMIX_LIST_DESTRUCT(&prte_rml_base.posted_recvs); PMIX_LIST_DESTRUCT(&prte_rml_base.unmatched_msgs); PMIX_LIST_DESTRUCT(&prte_rml_base.children); if (0 <= prte_rml_base.rml_output) { pmix_output_close(prte_rml_base.rml_output); } } void prte_rml_open(void) { /* construct object for holding the active plugin modules */ PMIX_CONSTRUCT(&prte_rml_base.posted_recvs, pmix_list_t); PMIX_CONSTRUCT(&prte_rml_base.unmatched_msgs, pmix_list_t); PMIX_CONSTRUCT(&prte_rml_base.children, pmix_list_t); /* compute the routing tree - only thing we need to know is the * number of daemons in the DVM */ prte_rml_compute_routing_tree(); prte_rml_base.lifeline = PRTE_PROC_MY_PARENT->rank; } void prte_rml_send_callback(int status, pmix_proc_t *peer, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata) { PRTE_HIDE_UNUSED_PARAMS(buffer, cbdata); if (PRTE_SUCCESS != status) { pmix_output_verbose(2, prte_rml_base.rml_output, "%s UNABLE TO SEND MESSAGE TO %s TAG %d: %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), tag, PRTE_ERROR_NAME(status)); if (PRTE_ERR_NO_PATH_TO_TARGET == status) { PRTE_ACTIVATE_PROC_STATE(peer, PRTE_PROC_STATE_NO_PATH_TO_TARGET); } else if (PRTE_ERR_ADDRESSEE_UNKNOWN == status) { PRTE_ACTIVATE_PROC_STATE(peer, PRTE_PROC_STATE_PEER_UNKNOWN); } else { PRTE_ACTIVATE_PROC_STATE(peer, PRTE_PROC_STATE_UNABLE_TO_SEND_MSG); } } } /*** RML CLASS INSTANCES ***/ static void send_cons(prte_rml_send_t *ptr) { ptr->retries = 0; ptr->cbdata = NULL; ptr->dbuf = NULL; ptr->seq_num = 0xFFFFFFFF; } static void send_des(prte_rml_send_t *ptr) { if (ptr->dbuf != NULL) PMIX_DATA_BUFFER_RELEASE(ptr->dbuf); } PMIX_CLASS_INSTANCE(prte_rml_send_t, pmix_list_item_t, send_cons, send_des); static void send_req_cons(prte_rml_send_request_t *ptr) { PMIX_CONSTRUCT(&ptr->send, prte_rml_send_t); } static void send_req_des(prte_rml_send_request_t *ptr) { PMIX_DESTRUCT(&ptr->send); } PMIX_CLASS_INSTANCE(prte_rml_send_request_t, pmix_object_t, send_req_cons, send_req_des); static void recv_cons(prte_rml_recv_t *ptr) { ptr->dbuf = NULL; } static void recv_des(prte_rml_recv_t *ptr) { if (ptr->dbuf != NULL) PMIX_DATA_BUFFER_RELEASE(ptr->dbuf); } PMIX_CLASS_INSTANCE(prte_rml_recv_t, pmix_list_item_t, recv_cons, recv_des); static void rcv_cons(prte_rml_recv_cb_t *ptr) { PMIX_DATA_BUFFER_CONSTRUCT(&ptr->data); ptr->active = false; } static void rcv_des(prte_rml_recv_cb_t *ptr) { PMIX_DATA_BUFFER_DESTRUCT(&ptr->data); } PMIX_CLASS_INSTANCE(prte_rml_recv_cb_t, pmix_object_t, rcv_cons, rcv_des); static void prcv_cons(prte_rml_posted_recv_t *ptr) { ptr->cbdata = NULL; } PMIX_CLASS_INSTANCE(prte_rml_posted_recv_t, pmix_list_item_t, prcv_cons, NULL); static void prq_cons(prte_rml_recv_request_t *ptr) { ptr->cancel = false; ptr->post = PMIX_NEW(prte_rml_posted_recv_t); } static void prq_des(prte_rml_recv_request_t *ptr) { if (NULL != ptr->post) { PMIX_RELEASE(ptr->post); } } PMIX_CLASS_INSTANCE(prte_rml_recv_request_t, pmix_object_t, prq_cons, prq_des); static void rtcon(prte_routed_tree_t *rt) { rt->rank = PMIX_RANK_INVALID; PMIX_CONSTRUCT(&rt->relatives, pmix_bitmap_t); } static void rtdes(prte_routed_tree_t *rt) { PMIX_DESTRUCT(&rt->relatives); } PMIX_CLASS_INSTANCE(prte_routed_tree_t, pmix_list_item_t, rtcon, rtdes); prrte-3.0.13/src/rml/rml_base_contact.c0000664000175000017500000000366115145263240020152 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file */ #include "prte_config.h" #include "constants.h" #include "types.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" int prte_rml_parse_uris(const char *uri, pmix_proc_t *peer, char ***uris) { int rc; /* parse the process name */ char *cinfo = strdup(uri); char *ptr = strchr(cinfo, ';'); if (NULL == ptr) { PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); free(cinfo); return PRTE_ERR_BAD_PARAM; } *ptr = '\0'; ptr++; if (PRTE_SUCCESS != (rc = prte_util_convert_string_to_process_name(peer, cinfo))) { PRTE_ERROR_LOG(rc); free(cinfo); return rc; } if (NULL != uris) { /* parse the remainder of the string into an array of uris */ *uris = PMIX_ARGV_SPLIT_COMPAT(ptr, ';'); } free(cinfo); return PRTE_SUCCESS; } prrte-3.0.13/src/rml/rml_send.c0000664000175000017500000000630315145263240016452 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "types.h" #include "src/pmix/pmix-internal.h" #include "src/util/name_fns.h" #include "src/util/pmix_output.h" #include "src/util/pmix_name_fns.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/oob/base/base.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/rml/rml.h" int prte_rml_send_buffer_nb(pmix_rank_t rank, pmix_data_buffer_t *buffer, prte_rml_tag_t tag) { prte_rml_recv_t *rcv; prte_rml_send_t *snd; PMIX_OUTPUT_VERBOSE((1, prte_rml_base.rml_output, "%s rml_send_buffer to peer %s at tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PMIX_RANK_PRINT(rank), tag)); if (PRTE_RML_TAG_INVALID == tag) { /* cannot send to an invalid tag */ PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } if (PMIX_RANK_INVALID == rank) { /* cannot send to an invalid peer */ PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return PRTE_ERR_BAD_PARAM; } /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ if (PRTE_PROC_MY_NAME->rank == rank) { /* local delivery */ PMIX_OUTPUT_VERBOSE((1, prte_rml_base.rml_output, "%s rml_send_buffer_to_self at tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), tag)); /* copy the message for the recv */ rcv = PMIX_NEW(prte_rml_recv_t); PMIX_LOAD_PROCID(&rcv->sender, PRTE_PROC_MY_NAME->nspace, rank); rcv->tag = tag; rcv->dbuf = buffer; /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first */ PRTE_RML_ACTIVATE_MESSAGE(rcv); return PRTE_SUCCESS; } snd = PMIX_NEW(prte_rml_send_t); PMIX_LOAD_PROCID(&snd->dst, PRTE_PROC_MY_NAME->nspace, rank); snd->origin = *PRTE_PROC_MY_NAME; snd->tag = tag; snd->dbuf = buffer; /* activate the OOB send state */ PRTE_OOB_SEND(snd); return PRTE_SUCCESS; } prrte-3.0.13/src/rml/Makefile.am0000664000175000017500000000220615145263240016535 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # local files headers += \ rml/rml.h \ rml/rml_types.h \ rml/rml_contact.h libprrte_la_SOURCES += \ rml/rml.c \ rml/rml_send.c \ rml/rml_recv.c \ rml/rml_base_contact.c \ rml/rml_base_msg_handlers.c \ rml/routed_radix.c prrte-3.0.13/src/rml/rml_types.h0000664000175000017500000002204015145263240016666 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file: * * Contains the typedefs for the use of the rml */ #ifndef PRTE_RML_TYPES_H_ #define PRTE_RML_TYPES_H_ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_UIO_H /* for struct iovec */ # include #endif #ifdef HAVE_NET_UIO_H # include #endif #include "src/class/pmix_bitmap.h" #include "src/class/pmix_list.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS /** * Message matching tag * * Message matching tag. Unlike MPI, there is no wildcard receive, * all messages must match exactly. Tag values less than * PRTE_RML_TAG_DYNAMIC are reserved and may only be referenced using * a defined constant. */ typedef uint32_t prte_rml_tag_t; /** * Function prototype for callback from non-blocking buffer send and receive * * Function prototype for callback from non-blocking buffer send and * receive. On send, the buffer will be the same pointer passed to * send_buffer_nb. On receive, the buffer will be allocated and owned * by the RML, not the process receiving the callback. * * @note The parameter in/out parameters are relative to the user's callback * function. * * @param[in] status Completion status * @param[in] peer Name of peer process * @param[in] buffer Message buffer * @param[in] tag User defined tag for matching send/recv * @param[in] cbdata User data passed to send_buffer_nb() or recv_buffer_nb() */ typedef void (*prte_rml_buffer_callback_fn_t)(int status, pmix_proc_t *peer, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata); /* Convenience def for readability */ #define PRTE_RML_PERSISTENT true #define PRTE_RML_NON_PERSISTENT false /** * Constant tag values for well-known services */ #define PRTE_RML_TAG PMIX_UINT32 #define PRTE_RML_TAG_INVALID 0 #define PRTE_RML_TAG_DAEMON 1 #define PRTE_RML_TAG_IOF_HNP 2 #define PRTE_RML_TAG_IOF_PROXY 3 #define PRTE_RML_TAG_XCAST_BARRIER 4 #define PRTE_RML_TAG_PLM 5 #define PRTE_RML_TAG_LAUNCH_RESP 6 #define PRTE_RML_TAG_ERRMGR 7 #define PRTE_RML_TAG_WIREUP 8 #define PRTE_RML_TAG_RML_INFO_UPDATE 9 #define PRTE_RML_TAG_PRTED_CALLBACK 10 #define PRTE_RML_TAG_ROLLUP 11 #define PRTE_RML_TAG_REPORT_REMOTE_LAUNCH 12 #define PRTE_RML_TAG_CKPT 13 #define PRTE_RML_TAG_RML_ROUTE 14 #define PRTE_RML_TAG_XCAST 15 #define PRTE_RML_TAG_UPDATE_ROUTE_ACK 19 #define PRTE_RML_TAG_SYNC 20 /* For FileM Base */ #define PRTE_RML_TAG_FILEM_BASE 21 #define PRTE_RML_TAG_FILEM_BASE_RESP 22 /* For FileM RSH Component */ #define PRTE_RML_TAG_FILEM_RSH 23 #define PRTE_RML_TAG_TCONN_RESP 24 /* For tools */ #define PRTE_RML_TAG_TOOL 26 /* support data store/lookup */ #define PRTE_RML_TAG_DATA_SERVER 27 #define PRTE_RML_TAG_DATA_CLIENT 28 /* timing related */ #define PRTE_RML_TAG_COLLECTIVE_TIMER 29 /* collectives */ #define PRTE_RML_TAG_COLLECTIVE 30 #define PRTE_RML_TAG_COLL_RELEASE 31 #define PRTE_RML_TAG_DAEMON_COLL 32 #define PRTE_RML_TAG_ALLGATHER_DIRECT 33 #define PRTE_RML_TAG_ALLGATHER_BRUCKS 34 #define PRTE_RML_TAG_ALLGATHER_RCD 35 /* debugger release */ #define PRTE_RML_TAG_DEBUGGER_RELEASE 37 /* bootstrap */ #define PRTE_RML_TAG_BOOTSTRAP 38 /* report a missed msg */ #define PRTE_RML_TAG_MISSED_MSG 39 /* tag for receiving ack of abort msg */ #define PRTE_RML_TAG_ABORT 40 /* tag for receiving heartbeats */ #define PRTE_RML_TAG_HEARTBEAT 41 /* Process Migration Tool Tag */ #define PRTE_RML_TAG_MIGRATE 42 /* For SStore Framework */ #define PRTE_RML_TAG_SSTORE 43 #define PRTE_RML_TAG_SSTORE_INTERNAL 44 #define PRTE_RML_TAG_SUBSCRIBE 45 /* Notify of failed processes */ #define PRTE_RML_TAG_FAILURE_NOTICE 46 /* distributed file system */ #define PRTE_RML_TAG_DFS_CMD 47 #define PRTE_RML_TAG_DFS_DATA 48 /* sensor data */ #define PRTE_RML_TAG_SENSOR_DATA 49 /* direct modex support */ #define PRTE_RML_TAG_DIRECT_MODEX 50 #define PRTE_RML_TAG_DIRECT_MODEX_RESP 51 /* notifier support */ #define PRTE_RML_TAG_NOTIFIER_HNP 52 #define PRTE_RML_TAG_NOTIFY_COMPLETE 53 /*** QOS specific RML TAGS ***/ #define PRTE_RML_TAG_OPEN_CHANNEL_REQ 54 #define PRTE_RML_TAG_OPEN_CHANNEL_RESP 55 #define PRTE_RML_TAG_MSG_ACK 56 #define PRTE_RML_TAG_CLOSE_CHANNEL_REQ 57 #define PRTE_RML_TAG_CLOSE_CHANNEL_ACCEPT 58 /* error notifications */ #define PRTE_RML_TAG_NOTIFICATION 59 /* stacktrace for debug */ #define PRTE_RML_TAG_STACK_TRACE 60 /* memory profile */ #define PRTE_RML_TAG_MEMPROFILE 61 /* topology report */ #define PRTE_RML_TAG_TOPOLOGY_REPORT 62 /* warmup connection - simply establishes the connection */ #define PRTE_RML_TAG_WARMUP_CONNECTION 63 /* node regex report */ #define PRTE_RML_TAG_NODE_REGEX_REPORT 64 /* pmix log requests */ #define PRTE_RML_TAG_LOGGING 65 /* error propagate */ #define PRTE_RML_TAG_RBCAST 66 /* heartbeat request */ #define PRTE_RML_TAG_HEARTBEAT_REQUEST 70 /* error propagate */ #define PRTE_RML_TAG_PROPAGATE 71 /* scheduler requests */ #define PRTE_RML_TAG_SCHED 72 #define PRTE_RML_TAG_MAX 100 #define PRTE_RML_TAG_NTOH(t) ntohl(t) #define PRTE_RML_TAG_HTON(t) htonl(t) /*** length of the tag. change this when type of prte_rml_tag_t is changed ***/ /*** max valu in unit32_t is 0xFFFF_FFFF when converted to char this is 8 ** #define PRTE_RML_TAG_T_CHAR_LEN 8 #define PRTE_RML_TAG_T_SPRINT "%8x" */ /* ******************************************************************** */ /* * RML proxy commands */ typedef uint8_t prte_rml_cmd_flag_t; #define PRTE_RML_CMD PMIX_UINT8 #define PRTE_RML_UPDATE_CMD 1 typedef struct { pmix_object_t super; pmix_proc_t name; pmix_data_buffer_t data; bool active; } prte_rml_recv_cb_t; PMIX_CLASS_DECLARATION(prte_rml_recv_cb_t); /* structure to send RML messages - used internally */ typedef struct { pmix_list_item_t super; pmix_proc_t dst; // targeted recipient pmix_proc_t origin; int status; // returned status on send prte_rml_tag_t tag; // targeted tag int retries; // #times we have tried to send it /* user's send callback functions and data */ prte_rml_buffer_callback_fn_t cbfunc; void *cbdata; /* data buffer */ pmix_data_buffer_t *dbuf; /* msg seq number */ uint32_t seq_num; } prte_rml_send_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_rml_send_t); /* define an object for transferring send requests to the event lib */ typedef struct { pmix_object_t super; prte_event_t ev; prte_rml_send_t send; } prte_rml_send_request_t; PMIX_CLASS_DECLARATION(prte_rml_send_request_t); /* structure to recv RML messages - used internally */ typedef struct { pmix_list_item_t super; prte_event_t ev; pmix_proc_t sender; // sender prte_rml_tag_t tag; // targeted tag uint32_t seq_num; // sequence number pmix_data_buffer_t *dbuf; // the recvd data } prte_rml_recv_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_rml_recv_t); typedef struct { pmix_list_item_t super; bool buffer_data; pmix_proc_t peer; prte_rml_tag_t tag; bool persistent; prte_rml_buffer_callback_fn_t cbfunc; void *cbdata; } prte_rml_posted_recv_t; PMIX_CLASS_DECLARATION(prte_rml_posted_recv_t); /* define an object for transferring recv requests to the list of posted recvs */ typedef struct { pmix_object_t super; prte_event_t ev; bool cancel; prte_rml_posted_recv_t *post; } prte_rml_recv_request_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_rml_recv_request_t); /* struct for tracking routing trees */ typedef struct { pmix_list_item_t super; pmix_rank_t rank; pmix_bitmap_t relatives; } prte_routed_tree_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_routed_tree_t); END_C_DECLS #endif /* RML_TYPES */ prrte-3.0.13/src/rml/rml_recv.c0000664000175000017500000000663015145263240016463 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #ifdef HAVE_NETINET_IN_H # include #endif #ifdef HAVE_ARPA_INET_H # include #endif #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prtebacktrace/prtebacktrace.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/util/name_fns.h" #include "src/rml/rml.h" void prte_rml_recv_buffer_nb(pmix_proc_t *peer, prte_rml_tag_t tag, bool persistent, prte_rml_buffer_callback_fn_t cbfunc, void *cbdata) { prte_rml_recv_request_t *req; pmix_output_verbose(10, prte_rml_base.rml_output, "%s rml_recv_buffer_nb for peer %s tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), tag); /* push the request into the event base so we can add * the receive to our list of posted recvs */ req = PMIX_NEW(prte_rml_recv_request_t); PMIX_XFER_PROCID(&req->post->peer, peer); req->post->tag = tag; req->post->persistent = persistent; req->post->cbfunc = cbfunc; req->post->cbdata = cbdata; PRTE_PMIX_THREADSHIFT(req, prte_event_base, prte_rml_base_post_recv); } void prte_rml_recv_cancel(pmix_proc_t *peer, prte_rml_tag_t tag) { prte_rml_recv_request_t *req; pmix_output_verbose(10, prte_rml_base.rml_output, "%s rml_recv_cancel for peer %s tag %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(peer), tag); PMIX_ACQUIRE_OBJECT(prte_event_base_active); if (!prte_event_base_active) { /* no event will be processed any more, so simply return. */ return; } /* push the request into the event base so we can remove * the receive from our list of posted recvs */ req = PMIX_NEW(prte_rml_recv_request_t); req->cancel = true; PMIX_XFER_PROCID(&req->post->peer, peer); req->post->tag = tag; PRTE_PMIX_THREADSHIFT(req, prte_event_base, prte_rml_base_post_recv); } prrte-3.0.13/src/rml/owner.txt0000664000175000017500000000024515145263240016375 0ustar alastairalastair# # owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # owner: Nanook status: maintenance prrte-3.0.13/src/rml/routed_radix.c0000664000175000017500000002124415145263240017341 0ustar alastairalastair/* * Copyright (c) 2007-2011 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * Copyright (c) 2023 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include #include "src/class/pmix_bitmap.h" #include "src/util/pmix_output.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/util/name_fns.h" pmix_rank_t prte_rml_get_route(pmix_rank_t target) { pmix_rank_t ret; prte_routed_tree_t *child; /* if it is me, then the route is just direct */ if (PRTE_PROC_MY_NAME->rank == target) { ret = target; goto found; } /* if this is going to the HNP, then send it to our parent * as the parent will have been set to HNP if we are going * direct or don't know any other route. Obviously, if it * is going to the parent, send it there */ if (PRTE_PROC_MY_HNP->rank == target || PRTE_PROC_MY_PARENT->rank == target) { ret = PRTE_PROC_MY_PARENT->rank; goto found; } /* search routing tree for next step to that daemon */ PMIX_LIST_FOREACH(child, &prte_rml_base.children, prte_routed_tree_t) { if (child->rank == target) { /* the child is the target */ ret = target; goto found; } /* otherwise, see if the daemon we need is below the child */ if (pmix_bitmap_is_set_bit(&child->relatives, target)) { /* yep - we need to step through this child */ ret = child->rank; goto found; } } /* if we get here, then the target daemon is not beneath * any of our children, so we have to step up through our parent */ ret = PRTE_PROC_MY_PARENT->rank; found: PMIX_OUTPUT_VERBOSE((1, prte_rml_base.routed_output, "%s routed_radix_get(%s) --> %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(target), PRTE_VPID_PRINT(ret))); return ret; } int prte_rml_route_lost(pmix_rank_t route) { prte_routed_tree_t *child; PMIX_OUTPUT_VERBOSE((2, prte_rml_base.routed_output, "%s route to %s lost", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(route))); /* if we lose the connection to the lifeline and we are NOT already, * in finalize, tell the OOB to abort. * NOTE: we cannot call abort from here as the OOB needs to first * release a thread-lock - otherwise, we will hang!! */ if (!prte_finalizing && route == prte_rml_base.lifeline) { PMIX_OUTPUT_VERBOSE((2, prte_rml_base.routed_output, "%s routed:radix: Connection to lifeline %s lost", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(prte_rml_base.lifeline))); return PRTE_ERR_FATAL; } /* see if it is one of our children - if so, remove it */ PMIX_LIST_FOREACH(child, &prte_rml_base.children, prte_routed_tree_t) { if (child->rank == route) { pmix_list_remove_item(&prte_rml_base.children, &child->super); PMIX_RELEASE(child); return PRTE_SUCCESS; } } /* we don't care about this one, so return success */ return PRTE_SUCCESS; } static void radix_tree(int rank, pmix_list_t *children, pmix_bitmap_t *relatives) { int i, peer, Sum, NInLevel; prte_routed_tree_t *child; pmix_bitmap_t *relations; /* compute how many procs are at my level */ Sum = 1; NInLevel = 1; while (Sum < (rank + 1)) { NInLevel *= prte_rml_base.radix; Sum += NInLevel; } /* our children start at our rank + num_in_level */ peer = rank + NInLevel; for (i = 0; i < prte_rml_base.radix; i++) { if (peer < (int) prte_process_info.num_daemons) { child = PMIX_NEW(prte_routed_tree_t); child->rank = peer; if (NULL != children) { /* this is a direct child - add it to my list */ pmix_list_append(children, &child->super); /* setup the relatives bitmap */ pmix_bitmap_init(&child->relatives, prte_process_info.num_daemons); /* point to the relatives */ relations = &child->relatives; } else { /* we are recording someone's relatives - set the bit */ if (PRTE_SUCCESS != pmix_bitmap_set_bit(relatives, peer)) { pmix_output(0, "%s Error: could not set relations bit!", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); } /* point to this relations */ relations = relatives; PMIX_RELEASE(child); } /* search for this child's relatives */ radix_tree(peer, NULL, relations); } peer += NInLevel; } } void prte_rml_compute_routing_tree(void) { prte_routed_tree_t *child; int j; int Sum, NInLevel, Ii; int NInPrevLevel; prte_job_t *dmns; prte_proc_t *d; /* compute my parent */ Ii = PRTE_PROC_MY_NAME->rank; Sum = 1; NInLevel = 1; while (Sum < (Ii + 1)) { NInLevel *= prte_rml_base.radix; Sum += NInLevel; } Sum -= NInLevel; NInPrevLevel = NInLevel / prte_rml_base.radix; if (0 == Ii) { PRTE_PROC_MY_PARENT->rank = -1; } else { PRTE_PROC_MY_PARENT->rank = (Ii - Sum) % NInPrevLevel; PRTE_PROC_MY_PARENT->rank += (Sum - NInPrevLevel); } /* compute my direct children and the bitmap that shows which vpids * lie underneath their branch. destroy list if it is not empty. * this situation can arise when the DVM is being resized. */ if (pmix_list_get_size(&prte_rml_base.children) > 0) { PMIX_LIST_DESTRUCT(&prte_rml_base.children); PMIX_CONSTRUCT(&prte_rml_base.children, pmix_list_t); } radix_tree(Ii, &prte_rml_base.children, NULL); if (0 < pmix_output_get_verbosity(prte_rml_base.routed_output)) { pmix_output(0, "%s: parent %d num_children %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_PROC_MY_PARENT->rank, (int)pmix_list_get_size(&prte_rml_base.children)); dmns = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); PMIX_LIST_FOREACH(child, &prte_rml_base.children, prte_routed_tree_t) { d = (prte_proc_t *) pmix_pointer_array_get_item(dmns->procs, child->rank); if (NULL == d || NULL == d->node || NULL == d->node->name) { pmix_output(0, "%s: \tchild %d ", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), child->rank); continue; } pmix_output(0, "%s: \tchild %d node %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), child->rank, d->node->name); for (j = 0; j < (int) prte_process_info.num_daemons; j++) { if (pmix_bitmap_is_set_bit(&child->relatives, j)) { pmix_output(0, "%s: \t\trelation %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), j); } } } } } int prte_rml_get_num_contributors(pmix_rank_t *dmns, size_t ndmns) { int j, n; prte_routed_tree_t *child; if (NULL == dmns) { return pmix_list_get_size(&prte_rml_base.children); } n = 0; PMIX_LIST_FOREACH(child, &prte_rml_base.children, prte_routed_tree_t) { for (j = 0; j < (int) ndmns; j++) { /* if the child is one of the daemons, then take it */ if (dmns[j] == child->rank) { n++; break; } if (pmix_bitmap_is_set_bit(&child->relatives, dmns[j])) { n++; break; } } } return n; } prrte-3.0.13/src/common_sym_whitelist.txt0000664000175000017500000000040715145263240020725 0ustar alastairalastair# Ignore symbols in libopen-rte that are auto-generated and we can't # do anything about them (e.g., flex/bison symbols). prte_util_hostfile_leng prte_util_hostfile_text prte_util_keyval_yyleng prte_util_keyval_yytext pmix_show_help_yyleng pmix_show_help_yytext prrte-3.0.13/src/runtime/0000775000175000017500000000000015145263240015372 5ustar alastairalastairprrte-3.0.13/src/runtime/prte_init.c0000664000175000017500000004375215145263240017546 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file **/ #include "prte_config.h" #include "constants.h" #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_STRING_H #include #endif #include "src/util/error.h" #include "src/util/error_strings.h" #include "src/util/pmix_keyval_parse.h" #include "src/util/malloc.h" #include "src/util/name_fns.h" #include "src/util/pmix_if.h" #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_os_path.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/util/stacktrace.h" #include "src/util/sys_limits.h" #include "src/hwloc/hwloc-internal.h" #include "src/prted/pmix/pmix_server.h" #include "src/threads/pmix_threads.h" #include "src/include/prte_frameworks.h" #include "src/mca/base/pmix_base.h" #include "src/mca/base/pmix_mca_base_var.h" #include "src/mca/base/pmix_mca_base_vari.h" #include "src/mca/errmgr/base/base.h" #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/mca/filem/base/base.h" #include "src/mca/grpcomm/base/base.h" #include "src/mca/iof/base/base.h" #include "src/mca/odls/base/base.h" #include "src/mca/oob/base/base.h" #include "src/mca/plm/base/base.h" #include "src/mca/pmdl/base/base.h" #include "src/mca/prtebacktrace/base/base.h" #include "src/mca/prteinstalldirs/base/base.h" #include "src/mca/ras/base/base.h" #include "src/mca/rmaps/base/base.h" #include "src/mca/rtc/base/base.h" #include "src/mca/schizo/base/base.h" #include "src/mca/state/base/base.h" #include "src/runtime/pmix_init_util.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_locks.h" #include "src/runtime/runtime.h" #include "src/runtime/runtime_internals.h" /* * Whether we have completed prte_init or we are in prte_finalize */ bool prte_initialized = false; bool prte_finalizing = false; bool prte_debug_flag = false; int prte_debug_verbosity = -1; char *prte_prohibited_session_dirs = NULL; bool prte_create_session_dirs = true; prte_event_base_t *prte_event_base = {0}; bool prte_event_base_active = true; bool prte_proc_is_bound = false; int prte_progress_thread_debug = -1; hwloc_cpuset_t prte_proc_applied_binding = NULL; int prte_cache_line_size = 128; pmix_proc_t prte_name_wildcard = {{0}, PMIX_RANK_WILDCARD}; pmix_proc_t prte_name_invalid = {{0}, PMIX_RANK_INVALID}; pmix_nspace_t prte_nspace_wildcard = {0}; static bool util_initialized = false; static bool min_initialized = false; const char prte_version_string[] = PRTE_IDENT_STRING; static bool check_exist(char *path) { struct stat buf; /* coverity[TOCTOU] */ if (0 == stat(path, &buf)) { /* exists */ return true; } return false; } static void print_error(unsigned major, unsigned minor, unsigned release) { fprintf(stderr, "************************************************\n"); fprintf(stderr, "We have detected that the runtime version\n"); fprintf(stderr, "of the PMIx library we were given is binary\n"); fprintf(stderr, "incompatible with the version we were built against:\n\n"); fprintf(stderr, " Runtime: 0x%x%02x%02x\n", major, minor, release); fprintf(stderr, " Build: 0x%0x\n\n", PMIX_NUMERIC_VERSION); fprintf(stderr, "Please update your LD_LIBRARY_PATH to point\n"); fprintf(stderr, "us to the same PMIx version used to build PRRTE.\n"); fprintf(stderr, "************************************************\n"); } int prte_init_minimum(void) { int ret, n; char *path = NULL; char *evar, **prefixes; const char *rvers; char token[100]; unsigned int major, minor, release; if (min_initialized) { return PRTE_SUCCESS; } min_initialized = true; /* check to see if the version of PMIx we were given in the * library path matches the version we were built against. * Because we are using PMIx internals, we cannot support * cross version operations from inside of PRRTE. */ rvers = PMIx_Get_version(); ret = sscanf(rvers, "%s %u.%u.%u", token, &major, &minor, &release); /* check the version triplet - we know that version * 5 and above are not runtime compatible with version * 4 and below. Since PRRTE has a minimum PMIx requirement * in the v4.x series, we only need to check v4 vs 5 * and above */ if ((PMIX_VERSION_MAJOR > 4 && 4 == major) || (PMIX_VERSION_MAJOR == 4 && 5 <= major) || 6 <= major) { print_error(major, minor, release); return PRTE_ERR_SILENT; } /* Protect against the envar version of the Slurm * custom args MCA param. This is an unfortunate * hack that hopefully will eventually go away. * See both of the following for detailed * explanations and discussion: * * https://github.com/openpmix/prrte/issues/1974 * https://github.com/open-mpi/ompi/issues/12471 * * Orgs/users wanting to add custom args to the * internal "srun" command used to spawn the * PRRTE daemons must do so via the default MCA * param files (system or user), or via the * prterun (or its proxy) cmd line */ unsetenv("PRTE_MCA_plm_slurm_args"); unsetenv("OMPI_MCA_plm_slurm_args"); /* carry across the toolname */ pmix_tool_basename = prte_tool_basename; // publish MCA prefixes prefixes = NULL; for (n=0; NULL != prte_framework_names[n]; n++) { if (0 == strcmp("common", prte_framework_names[n])) { continue; } PMIx_Argv_append_nosize(&prefixes, prte_framework_names[n]); } evar = PMIx_Argv_join(prefixes, ','); pmix_setenv("PRTE_MCA_PREFIXES", evar, true, &environ); free(evar); PMIx_Argv_free(prefixes); /* initialize install dirs code */ ret = pmix_mca_base_framework_open(&prte_prteinstalldirs_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != ret) { fprintf(stderr, "prte_prteinstalldirs_base_open() failed -- process will likely abort (%s:%d, " "returned %d instead of PRTE_SUCCESS)\n", __FILE__, __LINE__, ret); return ret; } /* initialize the MCA infrastructure */ if (check_exist(prte_install_dirs.prtelibdir)) { pmix_asprintf(&path, "prte@%s", prte_install_dirs.prtelibdir); } ret = pmix_init_util(NULL, 0, path); if (NULL != path) { free(path); } if (PMIX_SUCCESS != ret) { return prte_pmix_convert_status(ret); } ret = pmix_show_help_add_dir(prte_install_dirs.prtedatadir); if (PMIX_SUCCESS != ret) { return prte_pmix_convert_status(ret); } /* keyval lex-based parser */ /* Setup the parameter system */ if (PRTE_SUCCESS != (ret = pmix_mca_base_var_init())) { return ret; } /* pre-load any default mca param files */ prte_preload_default_mca_params(); return PRTE_SUCCESS; } int prte_init_util(prte_proc_type_t flags) { int ret; char *error = NULL; if (util_initialized) { return PRTE_SUCCESS; } util_initialized = true; ret = prte_init_minimum(); if (PRTE_SUCCESS != ret) { return ret; } /* ensure we know the type of proc for when we finalize */ prte_process_info.proc_type = flags; /* initialize the memory allocator */ prte_malloc_init(); /* initialize the output system */ pmix_output_init(); /* set the nodename so anyone who needs it has it - this * must come AFTER we initialize the installdirs */ prte_setup_hostname(); /* pretty-print stack handlers */ if (PRTE_SUCCESS != (ret = prte_util_register_stackhandlers())) { error = "prte_util_register_stackhandlers"; goto error; } /* set system resource limits - internally protected against * doing so twice in cases where the launch agent did it for us */ if (PRTE_SUCCESS != (ret = prte_util_init_sys_limits(&error))) { pmix_show_help("help-prte-runtime.txt", "prte_init:syslimit", false, error); return PRTE_ERR_SILENT; } ret = pmix_mca_base_framework_open(&prte_prtebacktrace_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PRTE_SUCCESS != ret) { error = "prte_backtrace_base_open"; goto error; } return PRTE_SUCCESS; error: if (PRTE_ERR_SILENT != ret) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, error, PRTE_ERROR_NAME(ret), ret); } return ret; } int prte_init(int *pargc, char ***pargv, prte_proc_type_t flags) { int ret; char *error = NULL; PMIX_ACQUIRE_THREAD(&prte_init_lock); if (prte_initialized) { PMIX_RELEASE_THREAD(&prte_init_lock); return PRTE_SUCCESS; } PMIX_RELEASE_THREAD(&prte_init_lock); ret = prte_init_util(flags); if (PRTE_SUCCESS != ret) { return ret; } /* * Initialize the event library */ if (PRTE_SUCCESS != (ret = prte_event_base_open())) { error = "prte_event_base_open"; goto error; } /* setup the locks */ if (PRTE_SUCCESS != (ret = prte_locks_init())) { error = "prte_locks_init"; goto error; } /* Ensure the rest of the process info structure is initialized */ if (PRTE_SUCCESS != (ret = prte_proc_info())) { error = "prte_proc_info"; goto error; } if (PRTE_SUCCESS != (ret = prte_hwloc_base_register())) { error = "prte_hwloc_base_register"; goto error; } /* let the pmix server register params */ pmix_server_register_params(); /* open hwloc */ prte_hwloc_base_open(); /* setup the global job and node arrays */ prte_job_data = PMIX_NEW(pmix_pointer_array_t); ret = pmix_pointer_array_init(prte_job_data, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); error = "setup job array"; goto error; } prte_node_pool = PMIX_NEW(pmix_pointer_array_t); ret = pmix_pointer_array_init(prte_node_pool, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); error = "setup node array"; goto error; } prte_node_topologies = PMIX_NEW(pmix_pointer_array_t); ret = pmix_pointer_array_init(prte_node_topologies, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); error = "setup node topologies array"; goto error; } /* open the SCHIZO framework as everyone needs it, and the * ess will use it to help select its component */ ret = pmix_mca_base_framework_open(&prte_schizo_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); error = "prte_schizo_base_open"; goto error; } if (PRTE_SUCCESS != (ret = prte_schizo_base_select())) { error = "prte_schizo_base_select"; goto error; } /* open the ESS and select the correct module for this environment */ ret = pmix_mca_base_framework_open(&prte_ess_base_framework, PMIX_MCA_BASE_OPEN_DEFAULT); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); error = "prte_ess_base_open"; goto error; } if (PRTE_SUCCESS != (ret = prte_ess_base_select())) { error = "prte_ess_base_select"; goto error; } /* initialize the RTE for this environment */ if (PRTE_SUCCESS != (ret = prte_ess.init(*pargc, *pargv))) { error = "prte_ess_init"; goto error; } /* initialize the cache */ prte_cache = PMIX_NEW(pmix_pointer_array_t); pmix_pointer_array_init(prte_cache, 1, INT_MAX, 1); /* All done */ PMIX_ACQUIRE_THREAD(&prte_init_lock); prte_initialized = true; PMIX_RELEASE_THREAD(&prte_init_lock); return PRTE_SUCCESS; error: if (PRTE_ERR_SILENT != ret) { pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, error, PRTE_ERROR_NAME(ret), ret); } return ret; } static bool check_pmix_overlap(char *var, char *value) { char *tmp; if (0 == strncmp(var, "dl_", 3)) { pmix_asprintf(&tmp, "PMIX_MCA_pdl_%s", &var[3]); setenv(tmp, value, false); free(tmp); return true; } else if (0 == strncmp(var, "oob_", 4) && NULL == strstr(var, "verbose")) { pmix_asprintf(&tmp, "PMIX_MCA_ptl_%s", &var[4]); setenv(tmp, value, false); free(tmp); return true; } else if (0 == strncmp(var, "hwloc_", 6)) { pmix_asprintf(&tmp, "PMIX_MCA_%s", var); setenv(tmp, value, false); free(tmp); return true; } else if (0 == strncmp(var, "if_", 3)) { // need to convert if to pif pmix_asprintf(&tmp, "PMIX_MCA_pif_%s", &var[3]); setenv(tmp, value, false); free(tmp); return true; } else if (0 == strncmp(var, "mca_", 4)) { pmix_asprintf(&tmp, "PMIX_MCA_%s", var); setenv(tmp, value, false); free(tmp); return true; } return false; } void prte_preload_default_mca_params(void) { char *file, *home, *tmp; pmix_list_t params, params2, pfinal; pmix_mca_base_var_file_value_t *fv, *fv2, *fvnext, *fvnext2; bool match; home = (char*)pmix_home_directory(-1); PMIX_CONSTRUCT(¶ms, pmix_list_t); PMIX_CONSTRUCT(¶ms2, pmix_list_t); PMIX_CONSTRUCT(&pfinal, pmix_list_t); /* start with the system-level defaults */ file = pmix_os_path(false, prte_install_dirs.sysconfdir, "prte-mca-params.conf", NULL); pmix_mca_base_parse_paramfile(file, ¶ms); free(file); /* now get the user-level defaults */ file = pmix_os_path(false, home, ".prte", "mca-params.conf", NULL); pmix_mca_base_parse_paramfile(file, ¶ms2); free(file); /* cross-check the lists, keeping the params2 entries over any * matching params entries as they overwrite the system ones */ PMIX_LIST_FOREACH_SAFE(fv, fvnext, ¶ms, pmix_mca_base_var_file_value_t) { match = false; PMIX_LIST_FOREACH_SAFE(fv2, fvnext2, ¶ms2, pmix_mca_base_var_file_value_t) { /* do we have a match? */ if (0 == strcmp(fv->mbvfv_var, fv2->mbvfv_var)) { /* transfer the user-level default to the final list */ pmix_list_remove_item(¶ms2, &fv2->super); pmix_list_append(&pfinal, &fv2->super); /* remove and release the system-level duplicate */ pmix_list_remove_item(¶ms, &fv->super); PMIX_RELEASE(fv); match = true; break; } } if (!match) { /* transfer the system-level default to the final list */ pmix_list_remove_item(¶ms, &fv->super); pmix_list_append(&pfinal, &fv->super); } } /* transfer any remaining use-level defaults to the final list * as they had no matches */ while (NULL != (fv2 = (pmix_mca_base_var_file_value_t*)pmix_list_remove_first(¶ms2))) { pmix_list_append(&pfinal, &fv2->super); } /* now process the final list - but do not overwrite if the * user already has the param in our environment as their * environment settings override all defaults */ PMIX_LIST_FOREACH(fv, &pfinal, pmix_mca_base_var_file_value_t) { if (pmix_pmdl_base_check_pmix_param(fv->mbvfv_var)) { pmix_asprintf(&tmp, "PMIX_MCA_%s", fv->mbvfv_var); // set it, but don't overwrite if they already // have a value in our environment setenv(tmp, fv->mbvfv_value, false); free(tmp); } else { pmix_asprintf(&tmp, "PRTE_MCA_%s", fv->mbvfv_var); // set it, but don't overwrite if they already // have a value in our environment setenv(tmp, fv->mbvfv_value, false); free(tmp); // if this relates to the DL, OOB, HWLOC, or IF, // or mca frameworks, then we also need to set // the equivalent PMIx value check_pmix_overlap(fv->mbvfv_var, fv->mbvfv_value); } } PMIX_LIST_DESTRUCT(¶ms); PMIX_LIST_DESTRUCT(¶ms2); PMIX_LIST_DESTRUCT(&pfinal); } prrte-3.0.13/src/runtime/prte_quit.c0000664000175000017500000004074115145263240017560 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_PARAM_H # include #endif #include #include #include #ifdef HAVE_SYS_TYPES_H # include #endif /* HAVE_SYS_TYPES_H */ #ifdef HAVE_SYS_WAIT_H # include #endif /* HAVE_SYS_WAIT_H */ #ifdef HAVE_SYS_TIME_H # include #endif /* HAVE_SYS_TIME_H */ #include "src/mca/errmgr/errmgr.h" #include "src/mca/plm/plm.h" #include "src/mca/state/state.h" #include "src/threads/pmix_threads.h" #include "src/util/pmix_output.h" #include "src/util/session_dir.h" #include "src/util/pmix_show_help.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_locks.h" #include "src/runtime/prte_quit.h" #include "src/runtime/runtime.h" /* * Globals */ static int num_aborted = 0; static int num_killed = 0; static int num_failed_start = 0; void prte_quit(int fd, short args, void *cbdata) { prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(fd, args); PMIX_ACQUIRE_OBJECT(caddy); /* cleanup */ if (NULL != caddy) { PMIX_RELEASE(caddy); } /* check one-time lock to protect against "bounce" */ if (pmix_mutex_trylock(&prte_quit_lock)) { /* returns 1 if already locked */ return; } /* flag that the event lib should no longer be looped * so we will exit */ prte_event_base_active = false; PMIX_POST_OBJECT(prte_event_base_active); /* break the event loop - this will cause the loop to exit upon completion of any current event */ prte_event_base_loopexit(prte_event_base); } static char *print_aborted_job(prte_job_t *job, prte_app_context_t *approc, prte_proc_t *proc, prte_node_t *node) { char *output = NULL; char *nodename; if (NULL == node) { nodename = "UNKNOWN"; } else { nodename = node->name; } if (PRTE_PROC_STATE_FAILED_TO_START == proc->state || PRTE_PROC_STATE_FAILED_TO_LAUNCH == proc->state) { switch (proc->exit_code) { case PMIX_ERR_SILENT: case PRTE_ERR_SILENT: /* say nothing - it was already reported */ break; case PMIX_ERR_SYS_LIMITS_PIPES: output = pmix_show_help_string("help-prun.txt", "prun:sys-limit-pipe", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_PIPE_SETUP_FAILURE: output = pmix_show_help_string("help-prun.txt", "prun:pipe-setup-failure", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_SYS_LIMITS_CHILDREN: output = pmix_show_help_string("help-prun.txt", "prun:sys-limit-children", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_SYS_LIMITS_FILES: output = pmix_show_help_string("help-prun.txt", "prun:sys-limit-files", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PRTE_ERR_FAILED_GET_TERM_ATTRS: output = pmix_show_help_string("help-prun.txt", "prun:failed-term-attrs", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_JOB_WDIR_NOT_FOUND: output = pmix_show_help_string("help-prun.txt", "prun:wdir-not-found", true, prte_tool_basename, approc->cwd, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_JOB_WDIR_NOT_ACCESSIBLE: output = pmix_show_help_string("help-prun.txt", "prun:wdir-not-accessible", true, prte_tool_basename, approc->cwd, nodename, (unsigned long) proc->name.rank); break; case PMIX_ERR_JOB_EXE_NOT_FOUND: output = pmix_show_help_string("help-prun.txt", "prun:exe-not-found", true, prte_tool_basename, (unsigned long) proc->name.rank, prte_tool_basename, prte_tool_basename, nodename, approc->app); break; case PMIX_ERR_EXE_NOT_ACCESSIBLE: output = pmix_show_help_string("help-prun.txt", "prun:exe-not-accessible", true, prte_tool_basename, approc->app, nodename, (unsigned long) proc->name.rank); break; case PRTE_ERR_MULTIPLE_AFFINITIES: output = pmix_show_help_string("help-prun.txt", "prun:multiple-paffinity-schemes", true, NULL); break; case PRTE_ERR_TOPO_SLOT_LIST_NOT_SUPPORTED: output = pmix_show_help_string("help-prun.txt", "prun:topo-not-supported", true, prte_process_info.nodename, "rankfile containing a slot_list of ", NULL, approc->app); break; case PRTE_ERR_INVALID_NODE_RANK: output = pmix_show_help_string("help-prun.txt", "prun:invalid-node-rank", true); break; case PRTE_ERR_INVALID_LOCAL_RANK: output = pmix_show_help_string("help-prun.txt", "prun:invalid-local-rank", true); break; case PRTE_ERR_NOT_ENOUGH_CORES: output = pmix_show_help_string("help-prun.txt", "prun:not-enough-resources", true, "sockets", nodename, "bind-to-core", approc->app); break; case PRTE_ERR_TOPO_CORE_NOT_SUPPORTED: output = pmix_show_help_string("help-prun.txt", "prun:topo-not-supported", true, nodename, "bind-to-core", "", approc->app); break; case PRTE_ERR_INVALID_PHYS_CPU: output = pmix_show_help_string("help-prun.txt", "prun:invalid-phys-cpu", true); break; case PRTE_ERR_NOT_ENOUGH_SOCKETS: output = pmix_show_help_string("help-prun.txt", "prun:not-enough-resources", true, "sockets", nodename, "bind-to-socket", approc->app); break; case PRTE_ERR_TOPO_SOCKET_NOT_SUPPORTED: output = pmix_show_help_string("help-prun.txt", "prun:topo-not-supported", true, nodename, "bind-to-socket", "", approc->app); break; case PRTE_ERR_MODULE_NOT_FOUND: output = pmix_show_help_string("help-prun.txt", "prun:paffinity-missing-module", true, nodename); break; case PRTE_ERR_SLOT_LIST_RANGE: output = pmix_show_help_string("help-prun.txt", "prun:invalid-slot-list-range", true, nodename, NULL); break; case PRTE_ERR_PIPE_READ_FAILURE: output = pmix_show_help_string("help-prun.txt", "prun:pipe-read-failure", true, prte_tool_basename, nodename, (unsigned long) proc->name.rank); break; case PRTE_ERR_SOCKET_NOT_AVAILABLE: output = pmix_show_help_string("help-prun.txt", "prun:proc-socket-not-avail", true, prte_tool_basename, PRTE_ERROR_NAME(proc->exit_code), nodename, (unsigned long) proc->name.rank); break; default: if (0 != proc->exit_code) { output = pmix_show_help_string("help-prun.txt", "prun:proc-failed-to-start", true, prte_tool_basename, proc->exit_code, PRTE_ERROR_NAME(proc->exit_code), nodename, (unsigned long) proc->name.rank); } else { output = pmix_show_help_string("help-prun.txt", "prun:proc-failed-to-start-no-status", true, prte_tool_basename, nodename); } } return output; } else if (PRTE_PROC_STATE_ABORTED == proc->state || PRTE_PROC_STATE_CALLED_ABORT == proc->state) { output = pmix_show_help_string("help-prun.txt", "prun:proc-ordered-abort", true, prte_tool_basename, (unsigned long) proc->name.rank, (unsigned long) proc->pid, nodename, prte_tool_basename); return output; } else if (PRTE_PROC_STATE_ABORTED_BY_SIG == job->state) { /* aborted by signal */ #ifdef HAVE_STRSIGNAL if (NULL != strsignal(WTERMSIG(proc->exit_code))) { output = pmix_show_help_string("help-prun.txt", "prun:proc-aborted-strsignal", true, prte_tool_basename, (unsigned long) proc->name.rank, (unsigned long) proc->pid, nodename, WTERMSIG(proc->exit_code), strsignal(WTERMSIG(proc->exit_code))); } else { #endif output = pmix_show_help_string("help-prun.txt", "prun:proc-aborted", true, prte_tool_basename, (unsigned long) proc->name.rank, (unsigned long) proc->pid, nodename, WTERMSIG(proc->exit_code)); #ifdef HAVE_STRSIGNAL } #endif return output; } else if (PRTE_PROC_STATE_TERM_WO_SYNC == proc->state) { /* proc exited w/o finalize */ output = pmix_show_help_string("help-prun.txt", "prun:proc-exit-no-sync", true, prte_tool_basename, (unsigned long) proc->name.rank, (unsigned long) proc->pid, nodename, prte_tool_basename, prte_tool_basename); return output; } else if (PRTE_PROC_STATE_COMM_FAILED == proc->state) { output = pmix_show_help_string("help-prun.txt", "prun:proc-comm-failed", true, PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(&proc->name), nodename); return output; } else if (PRTE_PROC_STATE_SENSOR_BOUND_EXCEEDED == proc->state) { switch (proc->exit_code) { case PRTE_ERR_MEM_LIMIT_EXCEEDED: output = pmix_show_help_string("help-prun.txt", "prun:proc-mem-exceeded", true, PRTE_NAME_PRINT(&proc->name), nodename); break; case PRTE_ERR_PROC_STALLED: output = pmix_show_help_string("help-prun.txt", "prun:proc-stalled", true); break; default: output = pmix_show_help_string("help-prun.txt", "prun:proc-sensor-exceeded", true); } return output; } else if (PRTE_PROC_STATE_HEARTBEAT_FAILED == proc->state) { output = pmix_show_help_string("help-prun.txt", "prun:proc-heartbeat-failed", true, prte_tool_basename, PRTE_NAME_PRINT(&proc->name), nodename); return output; } else if (PRTE_PROC_STATE_TERM_NON_ZERO == proc->state) { if (prte_get_attribute(&job->attributes, PRTE_JOB_ERROR_NONZERO_EXIT, NULL, PMIX_BOOL)) { output = pmix_show_help_string("help-prun.txt", "prun:non-zero-exit", true, prte_tool_basename, PRTE_NAME_PRINT(&proc->name), proc->exit_code); return output; } } /* nothing here */ return NULL; } /* * On abnormal termination - dump the * exit status of the aborted procs. */ static char *dump_job(prte_job_t *job) { int32_t i; prte_proc_t *proc, *pptr; prte_app_context_t *approc; prte_node_t *node; /* cycle through and count the number that were killed or aborted */ for (i = 0; i < job->procs->size; i++) { if (NULL == (pptr = (prte_proc_t *) pmix_pointer_array_get_item(job->procs, i))) { /* array is left-justified - we are done */ break; } if (PRTE_PROC_STATE_FAILED_TO_START == pptr->state || PRTE_PROC_STATE_FAILED_TO_LAUNCH == pptr->state) { ++num_failed_start; } else if (PRTE_PROC_STATE_ABORTED == pptr->state) { ++num_aborted; } else if (PRTE_PROC_STATE_ABORTED_BY_SIG == pptr->state) { ++num_killed; } else if (PRTE_PROC_STATE_SENSOR_BOUND_EXCEEDED == pptr->state) { ++num_killed; } } /* see if there is a guilty party */ proc = NULL; if (!prte_get_attribute(&job->attributes, PRTE_JOB_ABORTED_PROC, (void **) &proc, PMIX_POINTER) || NULL == proc) { return NULL; } approc = (prte_app_context_t *) pmix_pointer_array_get_item(job->apps, proc->app_idx); node = proc->node; return print_aborted_job(job, approc, proc, node); } char *prte_dump_aborted_procs(prte_job_t *jdata) { prte_job_t *job, *launcher; char *output = NULL; /* if we already reported it, then don't do it again */ if (PRTE_FLAG_TEST(jdata, PRTE_JOB_FLAG_ERR_REPORTED)) { return NULL; } PRTE_FLAG_SET(jdata, PRTE_JOB_FLAG_ERR_REPORTED); /* if this job is not a launcher itself, then get the launcher for this job */ if (PMIX_NSPACE_INVALID(jdata->launcher)) { launcher = jdata; } else { launcher = prte_get_job_data_object(jdata->launcher); if (NULL == launcher) { output = strdup("LAUNCHER JOB OBJECT NOT FOUND"); return output; } } /* cycle thru all the children of this launcher to find the * one that caused the error */ /* if this is a non-persistent job, it won't have any child * jobs, so look at it directly */ if (0 == pmix_list_get_size(&launcher->children)) { output = dump_job(jdata); } else { PMIX_LIST_FOREACH(job, &launcher->children, prte_job_t) { output = dump_job(job); if (NULL != output) { break; } } } return output; } prrte-3.0.13/src/runtime/prte_finalize.c0000664000175000017500000001213315145263240020371 0ustar alastairalastair/* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting. All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** @file **/ #include "prte_config.h" #include "constants.h" #include "src/mca/base/pmix_mca_base_framework.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/base/pmix_mca_base_alias.h" #include "src/mca/base/pmix_mca_base_var.h" #include "src/mca/base/pmix_base.h" #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_locks.h" #include "src/runtime/runtime.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" int prte_finalize(void) { int rc, n, i; prte_job_t *jdata = NULL, *child_jdata = NULL, *next_jdata = NULL; prte_app_context_t *app; prte_proc_t *p; prte_node_t *node; prte_topology_t *topo; PMIX_ACQUIRE_THREAD(&prte_init_lock); if (!prte_initialized) { PMIX_RELEASE_THREAD(&prte_init_lock); return PRTE_ERROR; } prte_initialized = false; PMIX_RELEASE_THREAD(&prte_init_lock); /* protect against multiple calls */ if (pmix_mutex_trylock(&prte_finalize_lock)) { return PRTE_SUCCESS; } /* flag that we are finalizing */ prte_finalizing = true; /* release the cache */ PMIX_RELEASE(prte_cache); /* call the finalize function for this environment */ if (PRTE_SUCCESS != (rc = prte_ess.finalize())) { return rc; } (void) pmix_mca_base_framework_close(&prte_ess_base_framework); // clean up the node array for (n = 0; n < prte_node_pool->size; n++) { node = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, n); if (NULL == node) { continue; } pmix_pointer_array_set_item(prte_node_pool, n, NULL); PMIX_RELEASE(node); } PMIX_RELEASE(prte_node_pool); for (n = 0; n < prte_job_data->size; n++) { jdata = (prte_job_t *) pmix_pointer_array_get_item(prte_job_data, n); if (NULL == jdata) { continue; } // Remove all children from the list // We do not want to destruct this list here since that occurs in the // prte_job_t destructor - which will happen in the next loop. PMIX_LIST_FOREACH_SAFE(child_jdata, next_jdata, &jdata->children, prte_job_t) { pmix_list_remove_item(&jdata->children, &child_jdata->super); } /* clean up any app contexts as they refcount the jdata object */ for (i=0; i < jdata->apps->size; i++) { app = (prte_app_context_t*)pmix_pointer_array_get_item(jdata->apps, i); if (NULL != app) { pmix_pointer_array_set_item(jdata->apps, i, NULL); PMIX_RELEASE(app); } } // clean up any procs for (i=0; i < jdata->procs->size; i++) { p = (prte_proc_t*)pmix_pointer_array_get_item(jdata->procs, i); if (NULL != p) { pmix_pointer_array_set_item(jdata->procs, i, NULL); PMIX_RELEASE(p); } } pmix_pointer_array_set_item(prte_job_data, n, NULL); PMIX_RELEASE(jdata); } PMIX_RELEASE(prte_job_data); for (n = 0; n < prte_node_topologies->size; n++) { topo = (prte_topology_t *) pmix_pointer_array_get_item(prte_node_topologies, n); if (NULL == topo) { continue; } pmix_pointer_array_set_item(prte_node_topologies, n, NULL); PMIX_RELEASE(topo); } PMIX_RELEASE(prte_node_topologies); /* Close the general debug stream */ pmix_output_close(prte_debug_output); pmix_mca_base_alias_cleanup(); prte_proc_info_finalize(); pmix_output_finalize(); /* now shutdown PMIx - need to do this last as it finalizes * the utilities and class system we depend upon */ PMIx_server_finalize(); return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/prte_wait.h0000664000175000017500000002133715145263240017547 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Interface for waitpid / async notification of child death with the * libevent runtime system. */ #ifndef PRTE_WAIT_H #define PRTE_WAIT_H #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #include #if HAVE_SYS_TIME_H # include #endif #include "src/event/event-internal.h" #include "src/util/pmix_output.h" #include "src/rml/rml_types.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "types.h" BEGIN_C_DECLS /** typedef for callback function used in \c prte_wait_cb */ typedef void (*prte_wait_cbfunc_t)(int fd, short args, void *cb); /* define a tracker */ typedef struct { pmix_list_item_t super; prte_event_t ev; prte_proc_t *child; prte_wait_cbfunc_t cbfunc; void *cbdata; } prte_wait_tracker_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_wait_tracker_t); /** * Disable / re-Enable SIGCHLD handler * * These functions have to be used after prte_wait_init was called. */ PRTE_EXPORT void prte_wait_enable(void); PRTE_EXPORT void prte_wait_disable(void); /** * Register a callback for process termination * * Register a callback for notification when this process causes a SIGCHLD. * \c waitpid() will have already been called on the process at this * time. */ PRTE_EXPORT void prte_wait_cb(prte_proc_t *proc, prte_wait_cbfunc_t callback, void *data); PRTE_EXPORT void prte_wait_cb_cancel(prte_proc_t *proc); /* In a few places, we need to barrier until something happens * that changes a flag to indicate we can release - e.g., waiting * for a specific message to arrive. If no progress thread is running, * we cycle across prte_progress - however, if a progress thread * is active, then we need to just nanosleep to avoid cross-thread * confusion */ #define PRTE_WAIT_FOR_COMPLETION(flg) \ do { \ pmix_output_verbose(1, prte_progress_thread_debug, \ "%s waiting on progress thread at %s:%d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__); \ while ((flg)) { \ /* provide a short quiet period so we \ * don't hammer the cpu while waiting \ */ \ struct timespec tp = {0, 100000}; \ nanosleep(&tp, NULL); \ } \ PMIX_ACQUIRE_OBJECT(flg); \ } while (0); /** * In a number of places within the code, we want to setup a timer * to detect when some procedure failed to complete. For example, * when we launch the daemons, we frequently have no way to directly * detect that a daemon failed to launch. Setting a timer allows us * to automatically fail out of the launch if we don't hear from a * daemon in some specified time window. * * Computing the amount of time to wait takes a few lines of code, but * this macro encapsulates those lines along with the timer event * definition just as a convenience. It also centralizes the * necessary checks to ensure that the microsecond field is always * less than 1M since some systems care about that, and to ensure * that the computed wait time doesn't exceed the desired max * wait * * NOTE: the callback function is responsible for releasing the timer * event back to the event pool! */ #define PRTE_DETECT_TIMEOUT(n, deltat, maxwait, cbfunc, cbd) \ do { \ prte_timer_t *_t; \ int _timeout; \ _t = PMIX_NEW(prte_timer_t); \ _t->payload = (cbd); \ prte_event_evtimer_set(prte_event_base, _t->ev, (cbfunc), _t); \ _timeout = (deltat) * (n); \ if ((maxwait) > 0 && _timeout > (maxwait)) { \ _timeout = (maxwait); \ } \ _t->tv.tv_sec = _timeout / 1000000; \ _t->tv.tv_usec = _timeout % 1000000; \ PMIX_OUTPUT_VERBOSE((1, prte_debug_output, "defining timeout: %ld sec %ld usec at %s:%d", \ (long) _t->tv.tv_sec, (long) _t->tv.tv_usec, __FILE__, __LINE__)); \ PMIX_POST_OBJECT(_t); \ prte_event_evtimer_add(_t->ev, &_t->tv); \ } while (0); /** * There are places in the code where we just want to periodically * wakeup to do something, and then go back to sleep again. Setting * a timer allows us to do this * * NOTE: the callback function is responsible for releasing the timer * event back to the event pool when done! Otherwise, the finalize * function will take care of it. */ #define PRTE_TIMER_EVENT(sec, usec, cbfunc) \ do { \ prte_timer_t *tm; \ tm = PMIX_NEW(prte_timer_t); \ prte_event_evtimer_set(prte_event_base, tm->ev, (cbfunc), tm); \ tm->tv.tv_sec = (sec) + (usec) / 1000000; \ tm->tv.tv_usec = (usec) % 1000000; \ PMIX_OUTPUT_VERBOSE((1, prte_debug_output, \ "defining timer event: %ld sec %ld usec at %s:%d", \ (long) tm->tv.tv_sec, (long) tm->tv.tv_usec, __FILE__, __LINE__)); \ PMIX_POST_OBJECT(tm); \ prte_event_evtimer_add(tm->ev, &tm->tv); \ } while (0); /** * \internal * * Initialize the wait system (allocate mutexes, etc.) */ PRTE_EXPORT int prte_wait_init(void); /** * \internal * * Finalize the wait system (deallocate mutexes, etc.) */ PRTE_EXPORT int prte_wait_finalize(void); END_C_DECLS #endif /* #ifndef PRTE_WAIT_H */ prrte-3.0.13/src/runtime/prte_locks.c0000664000175000017500000000316315145263240017706 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #include "prte_config.h" #include "constants.h" #include "src/runtime/prte_locks.h" /* for everyone */ pmix_mutex_t prte_finalize_lock = PMIX_MUTEX_STATIC_INIT; /* for HNPs */ pmix_mutex_t prte_abort_inprogress_lock = PMIX_MUTEX_STATIC_INIT; pmix_mutex_t prte_jobs_complete_lock = PMIX_MUTEX_STATIC_INIT; pmix_mutex_t prte_quit_lock = PMIX_MUTEX_STATIC_INIT; pmix_lock_t prte_init_lock = PMIX_LOCK_STATIC_INIT; int prte_locks_init(void) { return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/prte_quit.h0000664000175000017500000000126415145263240017562 0ustar alastairalastair/* * Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * */ #ifndef PRTE_QUIT_H #define PRTE_QUIT_H #include "prte_config.h" #include "src/runtime/prte_globals.h" BEGIN_C_DECLS PRTE_EXPORT void prte_quit(int fd, short args, void *cbdata); PRTE_EXPORT char *prte_dump_aborted_procs(prte_job_t *jdata); END_C_DECLS #endif /* PRTE_CR_H */ prrte-3.0.13/src/runtime/data_type_support/0000775000175000017500000000000015145263240021140 5ustar alastairalastairprrte-3.0.13/src/runtime/data_type_support/prte_dt_copy_fns.c0000664000175000017500000001054015145263240024645 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #include #include "src/mca/errmgr/errmgr.h" #include "src/mca/rmaps/rmaps_types.h" #include "src/pmix/pmix-internal.h" #include "src/runtime/prte_globals.h" #include "src/util/pmix_argv.h" /** * JOB */ int prte_job_copy(prte_job_t **dest, prte_job_t *src) { (*dest) = src; PMIX_RETAIN(src); return PRTE_SUCCESS; } /** * NODE */ int prte_node_copy(prte_node_t **dest, prte_node_t *src) { prte_node_t *node; node = PMIX_NEW(prte_node_t); node->name = strdup(src->name); node->state = src->state; node->slots = src->slots; node->slots_inuse = src->slots_inuse; node->slots_max = src->slots_max; node->topology = src->topology; node->flags = src->flags; (*dest) = node; return PRTE_SUCCESS; } /** * PROC */ int prte_proc_copy(prte_proc_t **dest, prte_proc_t *src) { (*dest) = src; PMIX_RETAIN(src); return PRTE_SUCCESS; } /* * APP CONTEXT */ int prte_app_copy(prte_app_context_t **dest, prte_app_context_t *src) { prte_value_t *kv, *kvnew; pmix_status_t rc; /* create the new object */ *dest = PMIX_NEW(prte_app_context_t); if (NULL == *dest) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* copy data into it */ (*dest)->idx = src->idx; if (NULL != src->app) { (*dest)->app = strdup(src->app); } (*dest)->num_procs = src->num_procs; (*dest)->argv = PMIX_ARGV_COPY_COMPAT(src->argv); (*dest)->env = PMIX_ARGV_COPY_COMPAT(src->env); if (NULL != src->cwd) { (*dest)->cwd = strdup(src->cwd); } PMIX_LIST_FOREACH(kv, &src->attributes, prte_value_t) { kvnew = PMIX_NEW(prte_value_t); PMIX_VALUE_XFER_DIRECT(rc, &kvnew->value, &kv->value); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(kvnew); return prte_pmix_convert_status(rc); } pmix_list_append(&(*dest)->attributes, &kvnew->super); } return PRTE_SUCCESS; } /* * JOB_MAP */ int prte_map_copy(struct prte_job_map_t **d, struct prte_job_map_t *s) { int32_t i; prte_job_map_t **dest = (prte_job_map_t **) d; prte_job_map_t *src = (prte_job_map_t *) s; if (NULL == src) { *dest = NULL; return PRTE_SUCCESS; } /* create the new object */ *dest = PMIX_NEW(prte_job_map_t); if (NULL == *dest) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* copy data into it */ (*dest)->mapping = src->mapping; (*dest)->ranking = src->ranking; (*dest)->binding = src->binding; (*dest)->num_new_daemons = src->num_new_daemons; (*dest)->daemon_vpid_start = src->daemon_vpid_start; (*dest)->num_nodes = src->num_nodes; /* copy the pointer array - have to do this manually * as no dss.copy function is setup for that object */ (*dest)->nodes->lowest_free = src->nodes->lowest_free; (*dest)->nodes->number_free = src->nodes->number_free; (*dest)->nodes->size = src->nodes->size; (*dest)->nodes->max_size = src->nodes->max_size; (*dest)->nodes->block_size = src->nodes->block_size; for (i = 0; i < src->nodes->size; i++) { (*dest)->nodes->addr[i] = src->nodes->addr[i]; } return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/data_type_support/prte_dt_packing_fns.c0000664000175000017500000004527615145263240025325 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "types.h" #include #include "src/class/pmix_pointer_array.h" #include "src/hwloc/hwloc-internal.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/rmaps/rmaps_types.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/runtime/prte_globals.h" /* * JOB * NOTE: We do not pack all of the job object's fields as many of them have no * value in sending them to another location. The only purpose in packing and * sending a job object is to communicate the data required to dynamically * spawn another job - so we only pack that limited set of required data */ int prte_job_pack(pmix_data_buffer_t *bkt, prte_job_t *job) { pmix_status_t rc; int32_t j, count, bookmark; prte_app_context_t *app; prte_proc_t *proc; prte_attribute_t *kv; pmix_list_t *cache; prte_info_item_t *val; /* pack the nspace */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->nspace, 1, PMIX_PROC_NSPACE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the flags */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->flags, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the attributes that need to be sent */ count = 0; PMIX_LIST_FOREACH(kv, &job->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { ++count; } } rc = PMIx_Data_pack(NULL, bkt, (void *) &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } PMIX_LIST_FOREACH(kv, &job->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->key, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->data, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } /* check for job info attribute */ cache = NULL; if (prte_get_attribute(&job->attributes, PRTE_JOB_INFO_CACHE, (void **) &cache, PMIX_POINTER) && NULL != cache) { /* we need to pack these as well, but they are composed * of prte_info_item_t's on a list. So first pack the number * of list elements */ count = pmix_list_get_size(cache); rc = PMIx_Data_pack(NULL, bkt, (void *) &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* now pack each element on the list */ PMIX_LIST_FOREACH(val, cache, prte_info_item_t) { rc = PMIx_Data_pack(NULL, bkt, (void *) &val->info, 1, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } else { /* pack a zero to indicate no job info is being passed */ count = 0; rc = PMIx_Data_pack(NULL, bkt, (void *) &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } /* pack the personality */ count = PMIX_ARGV_COUNT_COMPAT(job->personality); rc = PMIx_Data_pack(NULL, bkt, (void *) &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } for (j = 0; j < count; j++) { rc = PMIx_Data_pack(NULL, bkt, (void *) &job->personality[j], 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } /* pack the number of apps */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->num_apps, 1, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* if there are apps, pack the app_contexts */ if (0 < job->num_apps) { for (j = 0; j < job->apps->size; j++) { if (NULL == (app = (prte_app_context_t *) pmix_pointer_array_get_item(job->apps, j))) { continue; } rc = prte_app_pack(bkt, app); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } /* pack the number of procs and offset */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->num_procs, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, (void *) &job->offset, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } if (0 < job->num_procs) { for (j = 0; j < job->procs->size; j++) { if (NULL == (proc = (prte_proc_t *) pmix_pointer_array_get_item(job->procs, j))) { continue; } rc = prte_proc_pack(bkt, proc); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } /* pack the stdin target */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->stdin_target, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the total slots allocated to the job */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->total_slots_alloc, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* if the map is NULL, then we cannot pack it as there is * nothing to pack. However, we have to flag whether or not * the map is included so the unpacking routine can know * what to do */ if (NULL == job->map) { /* pack a zero value */ j = 0; } else { /* pack a one to indicate a map is there */ j = 1; } rc = PMIx_Data_pack(NULL, bkt, (void *) &j, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the map - this will only pack the fields that control * HOW a job is to be mapped. We do -not- pack the mapped procs * or nodes as this info does not need to be transmitted */ if (NULL != job->map) { rc = prte_map_pack(bkt, job->map); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } /* pack the bookmark */ if (NULL == job->bookmark) { bookmark = -1; } else { bookmark = job->bookmark->index; } rc = PMIx_Data_pack(NULL, bkt, (void *) &bookmark, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the job state */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->state, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the launcher ID */ rc = PMIx_Data_pack(NULL, bkt, (void *) &job->launcher, 1, PMIX_PROC_NSPACE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } return PRTE_SUCCESS; } int prte_node_pack(pmix_data_buffer_t *bkt, prte_node_t *node) { int rc; int32_t count; uint8_t flag; prte_attribute_t *kv; /* do not pack the index - it is meaningless on the other end */ /* pack the node name */ rc = PMIx_Data_pack(NULL, bkt, (void *) &node->name, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* do not pack the daemon name or launch id */ /* pack the number of procs on the node */ rc = PMIx_Data_pack(NULL, bkt, (void *) &node->num_procs, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* do not pack the procs */ /* pack whether we are oversubscribed or not */ flag = PRTE_FLAG_TEST(node, PRTE_NODE_FLAG_OVERSUBSCRIBED); rc = PMIx_Data_pack(NULL, bkt, (void *) &flag, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the state */ rc = PMIx_Data_pack(NULL, bkt, (void *) &node->state, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack any shared attributes */ count = 0; PMIX_LIST_FOREACH(kv, &node->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { ++count; } } rc = PMIx_Data_pack(NULL, bkt, (void *) &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } if (0 < count) { PMIX_LIST_FOREACH(kv, &node->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->key, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->data, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } } return PRTE_SUCCESS; } /* * PROC */ int prte_proc_pack(pmix_data_buffer_t *bkt, prte_proc_t *proc) { pmix_status_t rc; int32_t count; prte_attribute_t *kv; /* pack the name */ rc = PMIx_Data_pack(NULL, bkt, &proc->name, 1, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the daemon/node it is on */ rc = PMIx_Data_pack(NULL, bkt, &proc->parent, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the local rank */ rc = PMIx_Data_pack(NULL, bkt, &proc->local_rank, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the node rank */ rc = PMIx_Data_pack(NULL, bkt, &proc->node_rank, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the state */ rc = PMIx_Data_pack(NULL, bkt, &proc->state, 1, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the app context index */ rc = PMIx_Data_pack(NULL, bkt, &proc->app_idx, 1, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the app rank */ rc = PMIx_Data_pack(NULL, bkt, &proc->app_rank, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the cpuset */ rc = PMIx_Data_pack(NULL, bkt, (void *) &proc->cpuset, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the attributes that will go */ count = 0; PMIX_LIST_FOREACH(kv, &proc->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { ++count; } } rc = PMIx_Data_pack(NULL, bkt, &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } if (0 < count) { PMIX_LIST_FOREACH(kv, &proc->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->key, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->data, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } } return PRTE_SUCCESS; } /* * APP CONTEXT */ int prte_app_pack(pmix_data_buffer_t *bkt, prte_app_context_t *app) { pmix_status_t rc; int32_t count, j; prte_attribute_t *kv; /* pack the application index (for multiapp jobs) */ rc = PMIx_Data_pack(NULL, bkt, &app->idx, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the application name */ rc = PMIx_Data_pack(NULL, bkt, &app->app, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the number of processes */ rc = PMIx_Data_pack(NULL, bkt, &app->num_procs, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the first rank for this app */ rc = PMIx_Data_pack(NULL, bkt, &app->first_rank, 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the number of entries in the argv array */ count = PMIX_ARGV_COUNT_COMPAT(app->argv); rc = PMIx_Data_pack(NULL, bkt, &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* if there are entries, pack the argv entries */ for (j = 0; j < count; j++) { rc = PMIx_Data_pack(NULL, bkt, (void *) &app->argv[j], 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } /* pack the number of entries in the enviro array */ count = PMIX_ARGV_COUNT_COMPAT(app->env); rc = PMIx_Data_pack(NULL, bkt, &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* if there are entries, pack the enviro entries */ for (j = 0; j < count; j++) { rc = PMIx_Data_pack(NULL, bkt, (void *) &app->env[j], 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } /* pack the cwd */ rc = PMIx_Data_pack(NULL, bkt, &app->cwd, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the flags */ rc = PMIx_Data_pack(NULL, bkt, &app->flags, 1, PMIX_INT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack attributes */ count = 0; PMIX_LIST_FOREACH(kv, &app->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { ++count; } } rc = PMIx_Data_pack(NULL, bkt, &count, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } if (0 < count) { PMIX_LIST_FOREACH(kv, &app->attributes, prte_attribute_t) { if (PRTE_ATTR_GLOBAL == kv->local) { rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->key, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, (void *) &kv->data, 1, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } } } } return PRTE_SUCCESS; } /* * JOB_MAP * NOTE: There is no obvious reason to include all the node information when * sending a map */ int prte_map_pack(pmix_data_buffer_t *bkt, struct prte_job_map_t *mp) { pmix_status_t rc; prte_job_map_t *map = (prte_job_map_t *) mp; /* pack the requested mapper */ rc = PMIx_Data_pack(NULL, bkt, &map->req_mapper, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the last mapper */ rc = PMIx_Data_pack(NULL, bkt, &map->last_mapper, 1, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the policies */ rc = PMIx_Data_pack(NULL, bkt, &map->mapping, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, &map->ranking, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } rc = PMIx_Data_pack(NULL, bkt, &map->binding, 1, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } /* pack the number of nodes involved in the job */ rc = PMIx_Data_pack(NULL, bkt, &map->num_nodes, 1, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return prte_pmix_convert_status(rc); } return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/data_type_support/prte_dt_unpacking_fns.c0000664000175000017500000005074715145263240025667 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "types.h" #include #include "src/hwloc/hwloc-internal.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/rmaps/rmaps_types.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/runtime/prte_globals.h" /* * JOB * NOTE: We do not pack all of the job object's fields as many of them have no * value in sending them to another location. The only purpose in packing and * sending a job object is to communicate the data required to dynamically * spawn another job - so we only pack that limited set of required data. * Therefore, only unpack what was packed */ int prte_job_unpack(pmix_data_buffer_t *bkt, prte_job_t **job) { int rc; int32_t k, n, count, bookmark; prte_job_t *jptr; prte_app_idx_t j; prte_attribute_t *kv; char *tmp; prte_info_item_t *val; pmix_info_t pval; pmix_list_t *cache; /* create the prte_job_t object */ jptr = PMIX_NEW(prte_job_t); if (NULL == jptr) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* unpack the nspace */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->nspace, &n, PMIX_PROC_NSPACE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* unpack the flags */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->flags, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* unpack the attributes */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { kv = PMIX_NEW(prte_attribute_t); n = 1; rc = PMIx_Data_unpack(NULL, bkt, &kv->key, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } rc = PMIx_Data_unpack(NULL, bkt, &kv->data, &n, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } kv->local = PRTE_ATTR_GLOBAL; // obviously not a local value pmix_list_append(&jptr->attributes, &kv->super); } /* unpack any job info */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } if (0 < count) { cache = PMIX_NEW(pmix_list_t); prte_set_attribute(&jptr->attributes, PRTE_JOB_INFO_CACHE, PRTE_ATTR_LOCAL, (void *) cache, PMIX_POINTER); for (k = 0; k < count; k++) { n = 1; rc = PMIx_Data_unpack(NULL, bkt, &pval, &n, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } val = PMIX_NEW(prte_info_item_t); PMIX_INFO_XFER(&val->info, &pval); PMIX_INFO_DESTRUCT(&pval); pmix_list_append(cache, &val->super); } } /* unpack the personality */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { n = 1; rc = PMIx_Data_unpack(NULL, bkt, &tmp, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&jptr->personality, tmp); free(tmp); } /* unpack the num apps */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->num_apps, &n, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* if there are apps, unpack them */ if (0 < jptr->num_apps) { prte_app_context_t *app; for (j = 0; j < jptr->num_apps; j++) { n = 1; rc = prte_app_unpack(bkt, &app); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } pmix_pointer_array_add(jptr->apps, app); } } /* unpack num procs and offset */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->num_procs, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->offset, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } if (0 < jptr->num_procs) { prte_proc_t *proc; for (j = 0; j < jptr->num_procs; j++) { rc = prte_proc_unpack(bkt, &proc); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } pmix_pointer_array_add(jptr->procs, proc); } } /* unpack stdin target */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->stdin_target, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* unpack the total slots allocated to the job */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->total_slots_alloc, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* if the map is NULL, then we didn't pack it as there was * nothing to pack. Instead, we packed a flag to indicate whether or not * the map is included */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &j, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } if (0 < j) { /* unpack the map */ n = 1; rc = prte_map_unpack(bkt, &(jptr->map)); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } } /* unpack the bookmark */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &bookmark, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } if (0 <= bookmark) { /* retrieve it */ jptr->bookmark = (prte_node_t *) pmix_pointer_array_get_item(prte_node_pool, bookmark); } /* unpack the job state */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->state, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } /* unpack the launcher ID */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &jptr->launcher, &n, PMIX_PROC_NSPACE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(jptr); return prte_pmix_convert_status(rc); } *job = jptr; return PRTE_SUCCESS; } /* * NODE */ int prte_node_unpack(pmix_data_buffer_t *bkt, prte_node_t **nd) { pmix_status_t rc; int32_t n, k, count; prte_node_t *node; uint8_t flag; prte_attribute_t *kv; /* create the node object */ node = PMIX_NEW(prte_node_t); if (NULL == node) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* unpack the node name */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &node->name, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); return prte_pmix_convert_status(rc); } /* unpack the number of procs on the node */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &node->num_procs, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); return prte_pmix_convert_status(rc); } /* unpack whether we are oversubscribed */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &flag, &n, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); return prte_pmix_convert_status(rc); } if (flag) { PRTE_FLAG_SET(node, PRTE_NODE_FLAG_OVERSUBSCRIBED); } /* unpack the state */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &node->state, &n, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); return prte_pmix_convert_status(rc); } /* unpack the attributes */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { kv = PMIX_NEW(prte_attribute_t); n = 1; rc = PMIx_Data_unpack(NULL, bkt, &kv->key, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } rc = PMIx_Data_unpack(NULL, bkt, &kv->data, &n, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(node); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } kv->local = PRTE_ATTR_GLOBAL; // obviously not a local value pmix_list_append(&node->attributes, &kv->super); } *nd = node; return PRTE_SUCCESS; } /* * PROC */ int prte_proc_unpack(pmix_data_buffer_t *bkt, prte_proc_t **pc) { pmix_status_t rc; int32_t n, count, k; prte_attribute_t *kv; ; prte_proc_t *proc; /* create the prte_proc_t object */ proc = PMIX_NEW(prte_proc_t); if (NULL == proc) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* unpack the name */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->name, &n, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the node it is on */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->parent, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the local rank */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->local_rank, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the node rank */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->node_rank, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the state */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->state, &n, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the app context index */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->app_idx, &n, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the app_rank */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->app_rank, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the cpuset */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &proc->cpuset, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } /* unpack the attributes */ rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { kv = PMIX_NEW(prte_attribute_t); n = 1; rc = PMIx_Data_unpack(NULL, bkt, &kv->key, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } rc = PMIx_Data_unpack(NULL, bkt, &kv->data, &n, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(proc); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } kv->local = PRTE_ATTR_GLOBAL; // obviously not a local value pmix_list_append(&proc->attributes, &kv->super); } *pc = proc; return PRTE_SUCCESS; } /* * APP_CONTEXT */ int prte_app_unpack(pmix_data_buffer_t *bkt, prte_app_context_t **ap) { int rc; prte_app_context_t *app; int32_t n, count, k; prte_attribute_t *kv; char *tmp; /* create the app_context object */ app = PMIX_NEW(prte_app_context_t); if (NULL == app) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* get the app index number */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &app->idx, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* unpack the application name */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &app->app, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* get the number of processes */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &app->num_procs, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* get the first rank for this app */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &app->first_rank, &n, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* get the number of argv strings that were packed */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { n = 1; rc = PMIx_Data_unpack(NULL, bkt, &tmp, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&app->argv, tmp); free(tmp); } /* get the number of env strings */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { n = 1; rc = PMIx_Data_unpack(NULL, bkt, &tmp, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&app->env, tmp); free(tmp); } /* unpack the cwd */ rc = PMIx_Data_unpack(NULL, bkt, &app->cwd, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* get the flags */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &app->flags, &n, PMIX_INT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } /* unpack the attributes */ rc = PMIx_Data_unpack(NULL, bkt, &count, &n, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); return prte_pmix_convert_status(rc); } for (k = 0; k < count; k++) { kv = PMIX_NEW(prte_attribute_t); n = 1; rc = PMIx_Data_unpack(NULL, bkt, &kv->key, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } rc = PMIx_Data_unpack(NULL, bkt, &kv->data, &n, PMIX_VALUE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(app); PMIX_RELEASE(kv); return prte_pmix_convert_status(rc); } kv->local = PRTE_ATTR_GLOBAL; // obviously not a local value pmix_list_append(&app->attributes, &kv->super); } *ap = app; return PRTE_SUCCESS; } /* * JOB_MAP * NOTE: There is no obvious reason to include all the node information when * sending a map - hence, we do not pack that field, so don't unpack it here */ int prte_map_unpack(pmix_data_buffer_t *bkt, struct prte_job_map_t **mp) { int rc; int32_t n; prte_job_map_t *map; /* create the prte_rmaps_base_map_t object */ map = PMIX_NEW(prte_job_map_t); if (NULL == map) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return PRTE_ERR_OUT_OF_RESOURCE; } /* unpack the requested mapper */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->req_mapper, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } /* unpack the last mapper */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->last_mapper, &n, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } /* unpack the policies */ n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->mapping, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->ranking, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->binding, &n, PMIX_UINT16); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } /* unpack the number of nodes involved in the job */ n = 1; n = 1; rc = PMIx_Data_unpack(NULL, bkt, &map->num_nodes, &n, PMIX_UINT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_RELEASE(map); return prte_pmix_convert_status(rc); } *mp = map; return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/data_type_support/prte_dt_print_fns.c0000664000175000017500000005670415145263240025043 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "types.h" #include #include "src/hwloc/hwloc-internal.h" #include "src/util/pmix_argv.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/grpcomm/grpcomm.h" #include "src/mca/ras/base/base.h" #include "src/mca/rmaps/base/base.h" #include "src/runtime/prte_globals.h" #include "src/util/error_strings.h" #include "src/util/name_fns.h" /* This function is a modified version of the one found in src/mca/ras/base/ras_base_allocate.c*/ static void display_cpus(prte_topology_t *t, prte_job_t *jdata, char *node, char**output) { char tmp[2048]; unsigned pkg, npkgs; bool bits_as_cores = false, use_hwthread_cpus = prte_hwloc_default_use_hwthread_cpus; unsigned npus, ncores; hwloc_obj_t obj; hwloc_cpuset_t avail = NULL; hwloc_cpuset_t allowed; hwloc_cpuset_t coreset = NULL; char *tmp1, *tmp2; PRTE_HIDE_UNUSED_PARAMS(node); npus = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PU); ncores = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_CORE); if (npus == ncores && !use_hwthread_cpus) { /* the bits in this bitmap represent cores */ bits_as_cores = true; } use_hwthread_cpus = prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL); if (!use_hwthread_cpus && !bits_as_cores) { coreset = hwloc_bitmap_alloc(); } avail = hwloc_bitmap_alloc(); pmix_asprintf(&tmp1, " \n"); npkgs = hwloc_get_nbobjs_by_type(t->topo, HWLOC_OBJ_PACKAGE); allowed = (hwloc_cpuset_t)hwloc_topology_get_allowed_cpuset(t->topo); for (pkg = 0; pkg < npkgs; pkg++) { obj = hwloc_get_obj_by_type(t->topo, HWLOC_OBJ_PACKAGE, pkg); hwloc_bitmap_and(avail, obj->cpuset, allowed); if (hwloc_bitmap_iszero(avail)) { pmix_asprintf(&tmp2, "%s \n", tmp1, pkg, "NONE"); continue; } if (bits_as_cores) { /* can just use the hwloc fn directly */ hwloc_bitmap_list_snprintf(tmp, 2048, avail); pmix_asprintf(&tmp2, "%s \n", tmp1, pkg, tmp); } else if (use_hwthread_cpus) { /* can just use the hwloc fn directly */ hwloc_bitmap_list_snprintf(tmp, 2048, avail); pmix_asprintf(&tmp2, "%s \n", tmp1, pkg, tmp); } else { prte_hwloc_build_map(t->topo, avail, use_hwthread_cpus | bits_as_cores, coreset); /* now print out the string */ hwloc_bitmap_list_snprintf(tmp, 2048, coreset); pmix_asprintf(&tmp2, "%s \n", tmp1, pkg, tmp); } free(tmp1); tmp1 = tmp2; tmp2 = NULL; } hwloc_bitmap_free(avail); if (NULL != coreset) { hwloc_bitmap_free(coreset); } pmix_asprintf(output, "%s \n", tmp1); free(tmp1); return; } /* * JOB */ void prte_job_print(char **output, prte_job_t *src) { char *tmp, *tmp2, *tmp3; int32_t i; prte_app_context_t *app; prte_proc_t *proc; /* set default result */ *output = NULL; tmp2 = PMIX_ARGV_JOIN_COMPAT(src->personality, ','); pmix_asprintf(&tmp, "\nData for job: %s\tPersonality: %s\tRecovery: %s\n\tNum apps: %ld\tStdin " "target: %s\tState: %s\tAbort: %s", PRTE_JOBID_PRINT(src->nspace), tmp2, (prte_get_attribute(&src->attributes, PRTE_JOB_RECOVERABLE, NULL, PMIX_BOOL)) ? "ENABLED" : "DISABLED", (long) src->num_apps, PRTE_VPID_PRINT(src->stdin_target), prte_job_state_to_str(src->state), (PRTE_FLAG_TEST(src, PRTE_JOB_FLAG_ABORTED)) ? "True" : "False"); free(tmp2); for (i = 0; i < src->apps->size; i++) { if (NULL == (app = (prte_app_context_t *) pmix_pointer_array_get_item(src->apps, i))) { continue; } prte_app_print(&tmp2, src, app); pmix_asprintf(&tmp3, "%s\n%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } if (NULL != src->map) { prte_map_print(&tmp2, src); pmix_asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } else { pmix_asprintf(&tmp2, "%s\nNo Map", tmp); free(tmp); tmp = tmp2; } pmix_asprintf(&tmp2, "%s\nNum procs: %ld\tOffset: %ld", tmp, (long) src->num_procs, (long) src->offset); free(tmp); tmp = tmp2; for (i = 0; i < src->procs->size; i++) { if (NULL == (proc = (prte_proc_t *) pmix_pointer_array_get_item(src->procs, i))) { continue; } prte_proc_print(&tmp2, src, proc); pmix_asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } pmix_asprintf(&tmp2, "%s\n\tNum launched: %ld\tNum reported: %ld\tNum terminated: %ld", tmp, (long) src->num_launched, (long) src->num_reported, (long) src->num_terminated); free(tmp); tmp = tmp2; /* set the return */ *output = tmp; return; } /* * NODE */ void prte_node_print(char **output, prte_job_t *jdata, prte_node_t *src) { char *tmp, *tmp1, *tmp2, *tmp3; int32_t i,j; prte_proc_t *proc; prte_topology_t *t; /* set default result */ *output = NULL; if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, NULL, PMIX_BOOL)) { pmix_asprintf(&tmp, " \n", (NULL == src->name) ? "UNKNOWN" : src->name, (int) src->slots, (int) src->slots_max); tmp2 = malloc(1); tmp2[0] = '\0'; for (j=0; j < prte_node_topologies->size; j++) { t = (prte_topology_t*)pmix_pointer_array_get_item(prte_node_topologies, j); if (NULL != t) { display_cpus(t, jdata, "N/A", &tmp1); pmix_asprintf(&tmp3, "%s%s",tmp2, tmp1); free(tmp1); tmp1 = NULL; free(tmp2); tmp2 = NULL; tmp2 = tmp3; } } pmix_asprintf(&tmp3, "%s%s", tmp,tmp2); free(tmp2); tmp2 = NULL; free(tmp1); tmp1 = NULL; free(tmp); tmp = tmp3; /* loop through procs and print their rank */ for (j = 0; j < src->procs->size; j++) { if (NULL == (proc = (prte_proc_t *) pmix_pointer_array_get_item(src->procs, j))) { continue; } if (!PMIX_CHECK_NSPACE(proc->name.nspace, jdata->nspace)) { continue; } prte_proc_print(&tmp2, jdata, proc); pmix_asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp2); tmp2 = NULL; free(tmp); tmp = tmp3; } pmix_asprintf(&tmp3, "%s \n", tmp); free(tmp); *output = tmp3; return; } if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, NULL, PMIX_BOOL)) { /* just provide a simple output for users */ pmix_asprintf(&tmp, "\nData for node: %s\tNum slots: %ld\tMax slots: %ld\tNum procs: %ld", (NULL == src->name) ? "UNKNOWN" : src->name, (long) src->slots, (long) src->slots_max, (long) src->num_procs); if (0 == src->num_procs) { *output = tmp; return; } goto PRINT_PROCS; } tmp3 = prte_ras_base_flag_string(src); pmix_asprintf(&tmp, "\nData for node: %s\tState: %0x\t%s", (NULL == src->name) ? "UNKNOWN" : src->name, src->state, tmp3); free(tmp3); /* does this node have any aliases? */ tmp3 = NULL; if (NULL != src->aliases) { for (i = 0; NULL != src->aliases[i]; i++) { pmix_asprintf(&tmp2, "%s\n resolved from %s", tmp, src->aliases[i]); free(tmp); tmp = tmp2; } } if (NULL != tmp3) { free(tmp3); } pmix_asprintf(&tmp2, "%s\n Daemon: %s\tDaemon launched: %s", tmp, (NULL == src->daemon) ? "Not defined" : PRTE_NAME_PRINT(&(src->daemon->name)), PRTE_FLAG_TEST(src, PRTE_NODE_FLAG_DAEMON_LAUNCHED) ? "True" : "False"); free(tmp); tmp = tmp2; pmix_asprintf(&tmp2, "%s\n Num slots: %ld\tSlots in use: %ld\tOversubscribed: %s", tmp, (long) src->slots, (long) src->slots_inuse, PRTE_FLAG_TEST(src, PRTE_NODE_FLAG_OVERSUBSCRIBED) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; pmix_asprintf(&tmp2, "%s\n Num slots allocated: %ld\tMax slots: %ld\tNum procs: %ld", tmp, (long) src->slots, (long) src->slots_max, (long) src->num_procs); free(tmp); tmp = tmp2; tmp3 = NULL; if (prte_get_attribute(&src->attributes, PRTE_NODE_USERNAME, (void **) &tmp3, PMIX_STRING)) { pmix_asprintf(&tmp2, "%s\n Username on node: %s", tmp, tmp3); free(tmp3); free(tmp); tmp = tmp2; } PRINT_PROCS: /* we want to print these procs in their job-rank'd order, but they * will be in the node array based on the order in which they were * mapped - which doesn't match job-rank'd order in many cases */ for (i = 0; i < jdata->procs->size; i++) { if (NULL == (proc = (prte_proc_t *) pmix_pointer_array_get_item(jdata->procs, i))) { continue; } if (proc->node != src) { continue; } prte_proc_print(&tmp2, jdata, proc); pmix_asprintf(&tmp3, "%s%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } /* set the return */ *output = tmp; return; } /* * PROC */ void prte_proc_print(char **output, prte_job_t *jdata, prte_proc_t *src) { char *tmp, *tmp3, *tmp4, *pfx2 = " "; char *tmp2; hwloc_cpuset_t mycpus; char *str; bool use_hwthread_cpus; int pkgnum; int npus; char *cores = NULL; char xmlsp = ' '; /* set default result */ *output = NULL; /* check for type of cpu being used */ if (prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL)) { use_hwthread_cpus = true; } else { use_hwthread_cpus = false; } if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, NULL, PMIX_BOOL)) { if (NULL != src->cpuset && NULL != src->node->topology && NULL != src->node->topology->topo) { mycpus = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(mycpus, src->cpuset); npus = hwloc_get_nbobjs_by_type(src->node->topology->topo, HWLOC_OBJ_PU); /* assuming each "core" xml element will take 20 characters. There could be at most npus such elements */ int sz = sizeof(char) * npus * 20; cores = (char*)malloc(sz); if (NULL == cores) { pmix_asprintf(&tmp, "\n%*c\n", 8, xmlsp); *output = tmp; return; } prte_hwloc_get_binding_info(mycpus, use_hwthread_cpus, src->node->topology->topo, &pkgnum, cores, sz); hwloc_bitmap_free(mycpus); pmix_asprintf(&tmp, "\n%*c\n%*c\n" "%*c\n%s\n%*c\n%*c\n%*c\n", 8, xmlsp, PRTE_VPID_PRINT(src->name.rank), (long) src->app_idx, 12, xmlsp, 16, xmlsp, pkgnum, cores, 16, xmlsp, 12, xmlsp, 8, xmlsp); free (cores); } else { pmix_asprintf(&tmp, "\n%*c\n%*c\n%*c\n", 8, xmlsp, PRTE_VPID_PRINT(src->name.rank), 12, xmlsp, 8, xmlsp); } /* set the return */ *output = tmp; return; } if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, NULL, PMIX_BOOL)) { if (NULL != src->cpuset && NULL != src->node->topology && NULL != src->node->topology->topo) { mycpus = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(mycpus, src->cpuset); str = prte_hwloc_base_cset2str(mycpus, use_hwthread_cpus, false, src->node->topology->topo); if (NULL == str) { str = strdup("UNBOUND"); } hwloc_bitmap_free(mycpus); pmix_asprintf(&tmp, "\n%sProcess jobid: %s App: %ld Process rank: %s Bound: %s", pfx2, PRTE_JOBID_PRINT(src->name.nspace), (long) src->app_idx, PRTE_VPID_PRINT(src->name.rank), str); free(str); } else { /* just print a very simple output for users */ pmix_asprintf(&tmp, "\n%sProcess jobid: %s App: %ld Process rank: %s Bound: N/A", pfx2, PRTE_JOBID_PRINT(src->name.nspace), (long) src->app_idx, PRTE_VPID_PRINT(src->name.rank)); } /* set the return */ *output = tmp; return; } pmix_asprintf(&tmp, "\n%sData for proc: %s", pfx2, PRTE_NAME_PRINT(&src->name)); pmix_asprintf(&tmp3, "%s\n%s Pid: %ld\tLocal rank: %lu\tNode rank: %lu\tApp rank: %d", tmp, pfx2, (long) src->pid, (unsigned long) src->local_rank, (unsigned long) src->node_rank, src->app_rank); free(tmp); tmp = tmp3; if (NULL != src->cpuset) { mycpus = hwloc_bitmap_alloc(); hwloc_bitmap_list_sscanf(mycpus, src->cpuset); tmp2 = prte_hwloc_base_cset2str(mycpus, use_hwthread_cpus, false, src->node->topology->topo); hwloc_bitmap_free(mycpus); } else { tmp2 = strdup("UNBOUND"); } pmix_asprintf(&tmp4, "%s\n%s State: %s\tApp_context: %ld\n%s\tBinding: %s", tmp, pfx2, prte_proc_state_to_str(src->state), (long) src->app_idx, pfx2, tmp2); free(tmp); free(tmp2); /* set the return */ *output = tmp4; return; } /* * APP CONTEXT */ void prte_app_print(char **output, prte_job_t *jdata, prte_app_context_t *src) { char *tmp, *tmp2, *tmp3; int i, count; PRTE_HIDE_UNUSED_PARAMS(jdata); /* set default result */ *output = NULL; pmix_asprintf(&tmp, "\nData for app_context: index %lu\tapp: %s\n\tNum procs: %lu\tFirstRank: %s", (unsigned long) src->idx, (NULL == src->app) ? "NULL" : src->app, (unsigned long) src->num_procs, PRTE_VPID_PRINT(src->first_rank)); count = PMIX_ARGV_COUNT_COMPAT(src->argv); for (i = 0; i < count; i++) { pmix_asprintf(&tmp2, "%s\n\tArgv[%d]: %s", tmp, i, src->argv[i]); free(tmp); tmp = tmp2; } count = PMIX_ARGV_COUNT_COMPAT(src->env); for (i = 0; i < count; i++) { pmix_asprintf(&tmp2, "%s\n\tEnv[%lu]: %s", tmp, (unsigned long) i, src->env[i]); free(tmp); tmp = tmp2; } tmp3 = NULL; for (i=0; NULL != src->env && NULL != src->env[i]; i++) { if (0 == strncmp(src->env[i], "PMIX_PREFIX", strlen("PMIX_PREFIX"))) { tmp3 = src->env[i]; tmp3 += strlen("PMIX_PREFIX="); } } pmix_asprintf(&tmp2, "%s\n\tWorking dir: %s\n\tPMIxPrefix: %s\n\tUsed on node: %s", tmp, (NULL == src->cwd) ? "NULL" : src->cwd, (NULL == tmp3) ? "NULL" : tmp3, PRTE_FLAG_TEST(src, PRTE_APP_FLAG_USED_ON_NODE) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; /* set the return */ *output = tmp; return; } /* * JOB_MAP */ void prte_map_print(char **output, prte_job_t *jdata) { char *tmp = NULL, *tmp2 = NULL, *tmp3 = NULL, *tmp4 = NULL; char *tmp_node = NULL; int32_t i; prte_node_t *node; prte_job_map_t *src = jdata->map; uint16_t u16, *u16ptr = &u16; char *ppr, *cpus_per_rank, *cpu_type, *cpuset = NULL; /* set default result */ *output = NULL; if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT, NULL, PMIX_BOOL)) { /* creating the output in an XML format */ pmix_asprintf(&tmp4, "\n\n"); tmp = malloc(1); tmp[0] = '\0'; /* loop through nodes */ for (i = 0; i < src->nodes->size; i++) { if (NULL == (node = (prte_node_t*)pmix_pointer_array_get_item(src->nodes, i))) { continue; } prte_node_print(&tmp_node, jdata, node); pmix_asprintf(&tmp3, "%s%s",tmp,tmp_node); free(tmp_node); tmp_node = NULL; free(tmp); tmp = tmp3; } if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)) { pmix_asprintf(&tmp2, "%s\n", tmp); free(tmp); tmp = tmp2; } /* end of the xml "map" tag */ pmix_asprintf(&tmp2, "%s%s\n", tmp4,tmp); *output = tmp2; free(tmp); free(tmp4); return; } if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_PPR, (void **) &ppr, PMIX_STRING)) { ppr = strdup("N/A"); } if (prte_get_attribute(&jdata->attributes, PRTE_JOB_PES_PER_PROC, (void **) &u16ptr, PMIX_UINT16)) { pmix_asprintf(&cpus_per_rank, "%d", (int) u16); } else { cpus_per_rank = strdup("N/A"); } if (prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL)) { cpu_type = "HWT"; } else { cpu_type = "CORE"; } if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_CPUSET, (void **) &cpuset, PMIX_STRING)) { if (NULL == prte_hwloc_default_cpu_list) { cpuset = strdup("N/A"); } else { cpuset = strdup(prte_hwloc_default_cpu_list); } } if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, NULL, PMIX_BOOL)) { pmix_asprintf( &tmp, "\n================================= JOB MAP =================================\n" "Data for JOB %s offset %s Total slots allocated %lu\n" "Mapper requested: %s Last mapper: %s Mapping policy: %s Ranking policy: %s\n" "Binding policy: %s Cpu set: %s PPR: %s Cpus-per-rank: %s Cpu Type: %s", PRTE_JOBID_PRINT(jdata->nspace), PRTE_VPID_PRINT(jdata->offset), (long unsigned) jdata->total_slots_alloc, (NULL == src->req_mapper) ? "NULL" : src->req_mapper, (NULL == src->last_mapper) ? "NULL" : src->last_mapper, prte_rmaps_base_print_mapping(src->mapping), prte_rmaps_base_print_ranking(src->ranking), prte_hwloc_base_print_binding(src->binding), cpuset, ppr, cpus_per_rank, cpu_type); if (PMIX_RANK_INVALID == src->daemon_vpid_start) { pmix_asprintf( &tmp2, "%s\nNum new daemons: %ld\tNew daemon starting vpid INVALID\nNum nodes: %ld", tmp, (long) src->num_new_daemons, (long) src->num_nodes); } else { pmix_asprintf(&tmp2, "%s\nNum new daemons: %ld\tNew daemon starting vpid %ld\nNum nodes: %ld", tmp, (long) src->num_new_daemons, (long) src->daemon_vpid_start, (long) src->num_nodes); } free(tmp); tmp = tmp2; } else { /* this is being printed for a user, so let's make it easier to see */ pmix_asprintf(&tmp, "\n======================== JOB MAP ========================\n" "Data for JOB %s offset %s Total slots allocated %lu\n" " Mapping policy: %s Ranking policy: %s Binding policy: %s\n" " Cpu set: %s PPR: %s Cpus-per-rank: %s Cpu Type: %s\n", PRTE_JOBID_PRINT(jdata->nspace), PRTE_VPID_PRINT(jdata->offset), (long unsigned) jdata->total_slots_alloc, prte_rmaps_base_print_mapping(src->mapping), prte_rmaps_base_print_ranking(src->ranking), prte_hwloc_base_print_binding(src->binding), cpuset, ppr, cpus_per_rank, cpu_type); } free(ppr); free(cpus_per_rank); free(cpuset); for (i = 0; i < src->nodes->size; i++) { if (NULL == (node = (prte_node_t *) pmix_pointer_array_get_item(src->nodes, i))) { continue; } prte_node_print(&tmp2, jdata, node); pmix_asprintf(&tmp3, "%s\n%s", tmp, tmp2); free(tmp); free(tmp2); tmp = tmp3; } /* put some warning out for the donotlaunch case */ if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)) { pmix_asprintf(&tmp2, "%s\n\nWarning: This map has been generated with the DONOTLAUNCH option;\n" "\tThe compute node architecture has not been probed, and the displayed\n" "\tmap reflects the HEADNODE ARCHITECTURE. On systems with a different\n" "\tarchitecture between headnode and compute nodes, the map can be\n" "\tdisplayed using `prte --display map /bin/true`, which will launch\n" "\tenough of the DVM to probe the compute node architecture.", tmp); free(tmp); tmp = tmp2; } /* let's make it easier to see */ pmix_asprintf(&tmp2, "%s\n\n=============================================================\n", tmp); free(tmp); tmp = tmp2; /* set the return */ *output = tmp; return; } prrte-3.0.13/src/runtime/prte_globals.c0000664000175000017500000005670715145263240020232 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017-2020 IBM Corporation. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_hash_table.h" #include "src/class/pmix_pointer_array.h" #include "src/class/pmix_value_array.h" #include "src/hwloc/hwloc-internal.h" #include "src/pmix/pmix-internal.h" #include "src/threads/pmix_threads.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/rmaps/rmaps.h" #include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/util/pmix_argv.h" #include "src/util/name_fns.h" #include "src/util/pmix_net.h" #include "src/util/pmix_output.h" #include "src/util/proc_info.h" #include "src/util/session_dir.h" #include "src/runtime/prte_globals.h" #include "src/runtime/runtime.h" #include "src/runtime/runtime_internals.h" /* State Machine */ pmix_list_t prte_job_states = PMIX_LIST_STATIC_INIT; pmix_list_t prte_proc_states = PMIX_LIST_STATIC_INIT; /* a clean output channel without prefix */ int prte_clean_output = -1; /* globals used by RTE */ bool prte_debug_daemons_file_flag = false; bool prte_leave_session_attached = false; char *prte_topo_signature = NULL; char *prte_data_server_uri = NULL; char *prte_tool_basename = NULL; char *prte_tool_actual = NULL; bool prte_dvm_ready = false; pmix_pointer_array_t *prte_cache = NULL; bool prte_persistent = true; bool prte_allow_run_as_root = false; bool prte_fwd_environment = false; bool prte_show_launch_progress = false; bool prte_bootstrap_setup = false; bool prte_xml_output = false; /* PRTE OOB port flags */ bool prte_static_ports = false; char *prte_oob_static_ports = NULL; bool prte_keep_fqdn_hostnames = false; bool prte_have_fqdn_allocation = false; bool prte_show_resolved_nodenames = false; bool prte_do_not_resolve = false; int prte_hostname_cutoff = 1000; pmix_rank_t prted_debug_failure = PMIX_RANK_INVALID; int prted_debug_failure_delay = -1; bool prte_never_launched = false; bool prte_devel_level_output = false; bool prte_display_topo_with_map = false; char **prte_launch_environ = NULL; bool prte_hnp_is_allocated = false; bool prte_allocation_required = false; bool prte_managed_allocation = false; char *prte_set_slots = NULL; bool prte_set_slots_override = false; bool prte_nidmap_communicated = false; bool prte_node_info_communicated = false; /* launch agents */ char *prte_launch_agent = NULL; char **prted_cmd_line = NULL; /* exit flags */ int prte_exit_status = 0; bool prte_abnormal_term_ordered = false; bool prte_routing_is_enabled = true; bool prte_dvm_abort_ordered = false; bool prte_prteds_term_ordered = false; bool prte_allowed_exit_without_sync = false; int prte_timeout_usec_per_proc = -1; float prte_max_timeout = -1.0; prte_timer_t *prte_mpiexec_timeout = NULL; int prte_stack_trace_wait_timeout = 30; /* global arrays for data storage */ pmix_pointer_array_t *prte_job_data = NULL; pmix_pointer_array_t *prte_node_pool = NULL; pmix_pointer_array_t *prte_node_topologies = NULL; pmix_pointer_array_t *prte_local_children = NULL; pmix_rank_t prte_total_procs = 0; char *prte_base_compute_node_sig = NULL; bool prte_hetero_nodes = false; /* IOF controls */ /* generate new xterm windows to display output from specified ranks */ char *prte_xterm = NULL; /* report launch progress */ bool prte_report_launch_progress = false; /* allocation specification */ char *prte_default_hostfile = NULL; bool prte_default_hostfile_given = false; int prte_num_allocated_nodes = 0; char *prte_default_dash_host = NULL; /* tool communication controls */ bool prte_report_events = false; char *prte_report_events_uri = NULL; /* report bindings */ bool prte_report_bindings = false; /* exit status reporting */ bool prte_report_child_jobs_separately = false; struct timeval prte_child_time_to_exit = {0}; /* length of stat history to keep */ int prte_stat_history_size = -1; /* envars to forward */ char **prte_forwarded_envars = NULL; /* maximum size of virtual machine - used to subdivide allocation */ int prte_max_vm_size = -1; int prte_debug_output = -1; bool prte_debug_daemons_flag = false; char *prte_job_ident = NULL; bool prte_execute_quiet = false; bool prte_report_silent_errors = false; bool prte_hwloc_shmem_available = false; /* See comment in src/tools/prun/debuggers.c about this MCA param */ bool prte_in_parallel_debugger = false; char *prte_daemon_cores = NULL; int prte_dt_init(void) { /* set default output */ prte_debug_output = pmix_output_open(NULL); /* open up the verbose output for PRTE debugging */ if (prte_debug_flag || 0 < prte_debug_verbosity || (prte_debug_daemons_flag && (PRTE_PROC_IS_DAEMON || PRTE_PROC_IS_MASTER))) { if (0 < prte_debug_verbosity) { pmix_output_set_verbosity(prte_debug_output, prte_debug_verbosity); } else { pmix_output_set_verbosity(prte_debug_output, 1); } } return PRTE_SUCCESS; } prte_job_t *prte_get_job_data_object(const pmix_nspace_t job) { prte_job_t *jptr; int i; /* if the job data wasn't setup, we cannot provide the data */ if (NULL == prte_job_data) { return NULL; } /* if the nspace is invalid, then reject it */ if (PMIX_NSPACE_INVALID(job)) { return NULL; } for (i = 0; i < prte_job_data->size; i++) { if (NULL == (jptr = (prte_job_t *) pmix_pointer_array_get_item(prte_job_data, i))) { continue; } if (PMIX_CHECK_NSPACE(jptr->nspace, job)) { return jptr; } } return NULL; } int prte_set_job_data_object(prte_job_t *jdata) { prte_job_t *jptr; int i, save = -1; /* if the job data wasn't setup, we cannot set the data */ if (NULL == prte_job_data) { return PRTE_ERROR; } /* if the nspace is invalid, then that's an error */ if (PMIX_NSPACE_INVALID(jdata->nspace)) { return PRTE_ERROR; } /* verify that we don't already have this object */ for (i = 0; i < prte_job_data->size; i++) { if (NULL == (jptr = (prte_job_t *) pmix_pointer_array_get_item(prte_job_data, i))) { if (0 > save) { save = i; } continue; } if (PMIX_CHECK_NSPACE(jptr->nspace, jdata->nspace)) { return PRTE_EXISTS; } } if (-1 == save) { jdata->index = pmix_pointer_array_add(prte_job_data, jdata); } else { jdata->index = save; pmix_pointer_array_set_item(prte_job_data, save, jdata); } if (0 > jdata->index) { return PRTE_ERROR; } return PRTE_SUCCESS; } prte_proc_t *prte_get_proc_object(const pmix_proc_t *proc) { prte_job_t *jdata; prte_proc_t *proct; if (NULL == (jdata = prte_get_job_data_object(proc->nspace))) { return NULL; } proct = (prte_proc_t *) pmix_pointer_array_get_item(jdata->procs, proc->rank); return proct; } pmix_rank_t prte_get_proc_daemon_vpid(const pmix_proc_t *proc) { prte_job_t *jdata; prte_proc_t *proct; if (NULL == (jdata = prte_get_job_data_object(proc->nspace))) { return PMIX_RANK_INVALID; } if (NULL == (proct = (prte_proc_t *) pmix_pointer_array_get_item(jdata->procs, proc->rank))) { return PMIX_RANK_INVALID; } if (NULL == proct->node || NULL == proct->node->daemon) { return PMIX_RANK_INVALID; } return proct->node->daemon->name.rank; } char *prte_get_proc_hostname(const pmix_proc_t *proc) { prte_proc_t *proct; /* don't bother error logging any not-found situations * as the layer above us will have something to say * about it */ /* look it up on our arrays */ if (NULL == (proct = prte_get_proc_object(proc))) { return NULL; } if (NULL == proct->node || NULL == proct->node->name) { return NULL; } return proct->node->name; } prte_node_rank_t prte_get_proc_node_rank(const pmix_proc_t *proc) { prte_proc_t *proct; /* look it up on our arrays */ if (NULL == (proct = prte_get_proc_object(proc))) { PRTE_ERROR_LOG(PRTE_ERR_NOT_FOUND); return PRTE_NODE_RANK_INVALID; } return proct->node_rank; } prte_node_t* prte_node_match(pmix_list_t *nodes, const char *name) { int m, n; prte_node_t *nptr; char *nm; /* does the name refer to me? */ if (prte_check_host_is_local(name)) { nm = prte_process_info.nodename; } else { nm = (char*)name; } if (NULL != nodes) { PMIX_LIST_FOREACH(nptr, nodes, prte_node_t) { if (0 == strcmp(nptr->name, nm)) { return nptr; } if (NULL == nptr->aliases) { continue; } /* no choice but an exhaustive search - fortunately, these lists are short! */ for (m = 0; NULL != nptr->aliases[m]; m++) { if (0 == strcmp(name, nptr->aliases[m])) { /* this is the node! */ return nptr; } } } } else { /* check the node pool */ for (n=0; n < prte_node_pool->size; n++) { nptr = (prte_node_t*)pmix_pointer_array_get_item(prte_node_pool, n); if (NULL == nptr) { continue; } if (0 == strcmp(nptr->name, nm)) { return nptr; } if (NULL == nptr->aliases) { continue; } /* no choice but an exhaustive search - fortunately, these lists are short! */ for (m = 0; NULL != nptr->aliases[m]; m++) { if (0 == strcmp(name, nptr->aliases[m])) { /* this is the node! */ return nptr; } } } } return NULL; } bool prte_nptr_match(prte_node_t *n1, prte_node_t *n2) { size_t i, m; /* start with the simple check */ if (0 == strcmp(n1->name, n2->name)) { return true; } if (NULL != n1->aliases) { for (i = 0; NULL != n1->aliases[i]; i++) { if (0 == strcmp(n1->aliases[i], n2->name)) { return true; } if (NULL != n2->aliases) { for (m = 0; NULL != n2->aliases[m]; m++) { if (0 == strcmp(n2->aliases[m], n1->name)) { return true; } if (0 == strcmp(n1->aliases[i], n2->aliases[m])) { return true; } } } } } return false; } bool prte_quickmatch(prte_node_t *nd, char *name) { int n; if (0 == strcmp(nd->name, name)) { return true; } if (prte_check_host_is_local(nd->name) && prte_check_host_is_local(name)) { return true; } if (NULL != nd->aliases) { for (n=0; NULL != nd->aliases[n]; n++) { if (0 == strcmp(nd->aliases[n], name)) { return true; } } } return false; } /* * CONSTRUCTORS, DESTRUCTORS, AND CLASS INSTANTIATIONS * FOR PRTE CLASSES */ static void prte_app_context_construct(prte_app_context_t *app_context) { app_context->job = NULL; app_context->idx = 0; app_context->app = NULL; app_context->num_procs = 0; PMIX_CONSTRUCT(&app_context->procs, pmix_pointer_array_t); pmix_pointer_array_init(&app_context->procs, 1, PRTE_GLOBAL_ARRAY_MAX_SIZE, 16); app_context->state = PRTE_APP_STATE_UNDEF; app_context->first_rank = 0; app_context->argv = NULL; app_context->env = NULL; app_context->cwd = NULL; app_context->flags = 0; PMIX_CONSTRUCT(&app_context->attributes, pmix_list_t); PMIX_CONSTRUCT(&app_context->cli, pmix_cli_result_t); } static void prte_app_context_destructor(prte_app_context_t *app_context) { int i; prte_proc_t *proc; if (NULL != app_context->app) { free(app_context->app); app_context->app = NULL; } for (i = 0; i < app_context->procs.size; i++) { if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(&app_context->procs, i))) { PMIX_RELEASE(proc); } } PMIX_DESTRUCT(&app_context->procs); /* argv and env lists created by util/argv copy functions */ if (NULL != app_context->argv) { PMIX_ARGV_FREE_COMPAT(app_context->argv); app_context->argv = NULL; } if (NULL != app_context->env) { PMIX_ARGV_FREE_COMPAT(app_context->env); app_context->env = NULL; } if (NULL != app_context->cwd) { free(app_context->cwd); app_context->cwd = NULL; } PMIX_LIST_DESTRUCT(&app_context->attributes); PMIX_DESTRUCT(&app_context->cli); } PMIX_CLASS_INSTANCE(prte_app_context_t, pmix_object_t, prte_app_context_construct, prte_app_context_destructor); static void prte_job_construct(prte_job_t *job) { job->exit_code = 0; job->personality = NULL; job->schizo = NULL; PMIX_LOAD_NSPACE(job->nspace, NULL); job->session_dir = NULL; job->index = -1; job->offset = 0; job->apps = PMIX_NEW(pmix_pointer_array_t); pmix_pointer_array_init(job->apps, 1, PRTE_GLOBAL_ARRAY_MAX_SIZE, 2); job->num_apps = 0; job->stdin_target = 0; job->total_slots_alloc = 0; job->num_procs = 0; job->procs = PMIX_NEW(pmix_pointer_array_t); pmix_pointer_array_init(job->procs, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); job->map = NULL; job->bookmark = NULL; job->state = PRTE_JOB_STATE_UNDEF; job->num_mapped = 0; job->num_launched = 0; job->num_reported = 0; job->num_terminated = 0; job->num_daemons_reported = 0; job->num_ready_for_debug = 0; PMIX_LOAD_PROCID(&job->originator, NULL, PMIX_RANK_INVALID); job->num_local_procs = 0; job->flags = 0; PRTE_FLAG_SET(job, PRTE_JOB_FLAG_FORWARD_OUTPUT); PMIX_CONSTRUCT(&job->attributes, pmix_list_t); PMIX_DATA_BUFFER_CONSTRUCT(&job->launch_msg); PMIX_CONSTRUCT(&job->children, pmix_list_t); PMIX_LOAD_NSPACE(job->launcher, NULL); job->ntraces = 0; job->traces = NULL; PMIX_CONSTRUCT(&job->cli, pmix_cli_result_t); } static void prte_job_destruct(prte_job_t *job) { prte_proc_t *proc; prte_app_context_t *app; int n; prte_timer_t *evtimer; pmix_list_t *cache = NULL; if (NULL == job) { /* probably just a race condition - just return */ return; } if (NULL != job->personality) { PMIX_ARGV_FREE_COMPAT(job->personality); } for (n = 0; n < job->apps->size; n++) { if (NULL == (app = (prte_app_context_t *) pmix_pointer_array_get_item(job->apps, n))) { continue; } PMIX_RELEASE(app); } PMIX_RELEASE(job->apps); /* release any pointers in the attributes */ evtimer = NULL; if (prte_get_attribute(&job->attributes, PRTE_JOB_TIMEOUT_EVENT, (void **) &evtimer, PMIX_POINTER)) { prte_event_evtimer_del(evtimer->ev); prte_remove_attribute(&job->attributes, PRTE_JOB_TIMEOUT_EVENT); /* the timer is a pointer to prte_timer_t */ PMIX_RELEASE(evtimer); } evtimer = NULL; if (prte_get_attribute(&job->attributes, PRTE_SPAWN_TIMEOUT_EVENT, (void **) &evtimer, PMIX_POINTER)) { prte_event_evtimer_del(evtimer->ev); prte_remove_attribute(&job->attributes, PRTE_SPAWN_TIMEOUT_EVENT); /* the timer is a pointer to prte_timer_t */ PMIX_RELEASE(evtimer); } proc = NULL; if (prte_get_attribute(&job->attributes, PRTE_JOB_ABORTED_PROC, (void **) &proc, PMIX_POINTER)) { prte_remove_attribute(&job->attributes, PRTE_JOB_ABORTED_PROC); /* points to an prte_proc_t */ PMIX_RELEASE(proc); } if (prte_get_attribute(&job->attributes, PRTE_JOB_INFO_CACHE, (void **) &cache, PMIX_POINTER)) { prte_remove_attribute(&job->attributes, PRTE_JOB_INFO_CACHE); PMIX_LIST_RELEASE(cache); } if (NULL != job->map) { PMIX_RELEASE(job->map); job->map = NULL; } for (n = 0; n < job->procs->size; n++) { if (NULL == (proc = (prte_proc_t *) pmix_pointer_array_get_item(job->procs, n))) { continue; } pmix_pointer_array_set_item(job->procs, n, NULL); PMIX_RELEASE(proc); } PMIX_RELEASE(job->procs); /* release the attributes */ PMIX_LIST_DESTRUCT(&job->attributes); PMIX_DATA_BUFFER_DESTRUCT(&job->launch_msg); /* Release the child list - we retained the child jobs before adding them to the list*/ PMIX_LIST_DESTRUCT(&job->children); if (NULL != job->session_dir) { prte_job_session_dir_finalize(job); if (NULL != job->session_dir) { free(job->session_dir); job->session_dir = NULL; } } if (NULL != prte_job_data && 0 <= job->index) { /* remove the job from the global array */ pmix_pointer_array_set_item(prte_job_data, job->index, NULL); } if (NULL != job->traces) { PMIX_ARGV_FREE_COMPAT(job->traces); } PMIX_DESTRUCT(&job->cli); } PMIX_CLASS_INSTANCE(prte_job_t, pmix_list_item_t, prte_job_construct, prte_job_destruct); static void prte_node_construct(prte_node_t *node) { node->index = -1; node->name = NULL; node->rawname = NULL; node->aliases = NULL; node->daemon = NULL; node->available = NULL; node->jobcache = hwloc_bitmap_alloc(); node->num_procs = 0; node->procs = PMIX_NEW(pmix_pointer_array_t); pmix_pointer_array_init(node->procs, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); node->next_node_rank = 0; node->state = PRTE_NODE_STATE_UNKNOWN; node->slots = 0; node->slots_available = 0; node->slots_inuse = 0; node->slots_max = 0; node->topology = NULL; node->flags = 0; PMIX_CONSTRUCT(&node->attributes, pmix_list_t); } static void prte_node_destruct(prte_node_t *node) { int i; prte_proc_t *proc; if (NULL != node->name) { free(node->name); node->name = NULL; } if (NULL != node->rawname) { free(node->rawname); node->rawname = NULL; } if (NULL != node->aliases) { PMIX_ARGV_FREE_COMPAT(node->aliases); node->aliases = NULL; } if (NULL != node->daemon) { node->daemon->node = NULL; PMIX_RELEASE(node->daemon); node->daemon = NULL; } if (NULL != node->available) { hwloc_bitmap_free(node->available); } if (NULL != node->jobcache) { hwloc_bitmap_free(node->jobcache); } for (i = 0; i < node->procs->size; i++) { if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(node->procs, i))) { pmix_pointer_array_set_item(node->procs, i, NULL); PMIX_RELEASE(proc); } } PMIX_RELEASE(node->procs); /* do NOT destroy the topology */ /* release the attributes */ PMIX_LIST_DESTRUCT(&node->attributes); } PMIX_CLASS_INSTANCE(prte_node_t, pmix_list_item_t, prte_node_construct, prte_node_destruct); static void prte_proc_construct(prte_proc_t *proc) { proc->name = *PRTE_NAME_INVALID; proc->parent = PMIX_RANK_INVALID; proc->pid = 0; proc->local_rank = PRTE_LOCAL_RANK_INVALID; proc->node_rank = PRTE_NODE_RANK_INVALID; proc->numa_rank = PRTE_LOCAL_RANK_INVALID; proc->app_rank = -1; proc->last_errmgr_state = PRTE_PROC_STATE_UNDEF; proc->state = PRTE_PROC_STATE_UNDEF; proc->app_idx = 0; proc->node = NULL; proc->obj = NULL; proc->cpuset = NULL; proc->exit_code = 0; /* Assume we won't fail unless otherwise notified */ proc->rml_uri = NULL; proc->flags = 0; PMIX_CONSTRUCT(&proc->attributes, pmix_list_t); } static void prte_proc_destruct(prte_proc_t *proc) { if (NULL != proc->node) { PMIX_RELEASE(proc->node); proc->node = NULL; } if (NULL != proc->cpuset) { free(proc->cpuset); proc->cpuset = NULL; } if (NULL != proc->rml_uri) { free(proc->rml_uri); proc->rml_uri = NULL; } PMIX_LIST_DESTRUCT(&proc->attributes); } PMIX_CLASS_INSTANCE(prte_proc_t, pmix_list_item_t, prte_proc_construct, prte_proc_destruct); static void prte_job_map_construct(prte_job_map_t *map) { map->req_mapper = NULL; map->last_mapper = NULL; map->mapping = 0; map->ranking = 0; map->binding = 0; map->rtos_set = false; map->num_new_daemons = 0; map->daemon_vpid_start = PMIX_RANK_INVALID; map->num_nodes = 0; map->nodes = PMIX_NEW(pmix_pointer_array_t); pmix_pointer_array_init(map->nodes, PRTE_GLOBAL_ARRAY_BLOCK_SIZE, PRTE_GLOBAL_ARRAY_MAX_SIZE, PRTE_GLOBAL_ARRAY_BLOCK_SIZE); } static void prte_job_map_destruct(prte_job_map_t *map) { int32_t i; prte_node_t *node; if (NULL != map->req_mapper) { free(map->req_mapper); } if (NULL != map->last_mapper) { free(map->last_mapper); } for (i = 0; i < map->nodes->size; i++) { if (NULL != (node = (prte_node_t *) pmix_pointer_array_get_item(map->nodes, i))) { PMIX_RELEASE(node); pmix_pointer_array_set_item(map->nodes, i, NULL); } } PMIX_RELEASE(map->nodes); } PMIX_CLASS_INSTANCE(prte_job_map_t, pmix_object_t, prte_job_map_construct, prte_job_map_destruct); static void prte_attr_cons(prte_attribute_t *p) { p->key = 0; p->local = true; // default to local-only data memset(&p->data, 0, sizeof(p->data)); } static void prte_attr_des(prte_attribute_t *p) { PMIX_VALUE_DESTRUCT(&p->data); } PMIX_CLASS_INSTANCE(prte_attribute_t, pmix_list_item_t, prte_attr_cons, prte_attr_des); static void tcon(prte_topology_t *t) { t->topo = NULL; t->sig = NULL; } static void tdes(prte_topology_t *t) { hwloc_obj_t root; if (NULL != t->topo) { root = hwloc_get_root_obj(t->topo); if (NULL != root->userdata) { PMIX_RELEASE(root->userdata); } hwloc_topology_destroy(t->topo); } if (NULL != t->sig) { free(t->sig); } } PMIX_CLASS_INSTANCE(prte_topology_t, pmix_object_t, tcon, tdes); #if PRTE_PICKY_COMPILERS void prte_hide_unused_params(int x, ...) { va_list ap; va_start(ap, x); va_end(ap); } #endif prrte-3.0.13/src/runtime/runtime_internals.h0000664000175000017500000000251515145263240021310 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Interface into the PRTE Run Time Environment */ #ifndef PRTE_RUNTIME_INTERNALS_H #define PRTE_RUNTIME_INTERNALS_H #include "prte_config.h" BEGIN_C_DECLS /** * Init the PRTE datatype support */ PRTE_EXPORT int prte_dt_init(void); PRTE_EXPORT void prte_preload_default_mca_params(void); END_C_DECLS #endif /* PRTE_RUNTIME_INTERNALS_H */ prrte-3.0.13/src/runtime/runtime.h0000664000175000017500000000515115145263240017230 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Interface into the PRTE Run Time Environment */ #ifndef PRTE_RUNTIME_H #define PRTE_RUNTIME_H #include "prte_config.h" #ifdef HAVE_SYS_TYPES_H # include #endif #include "src/util/proc_info.h" BEGIN_C_DECLS /** version string of ompi */ PRTE_EXPORT extern const char prte_version_string[]; /** * Whether PRTE is initialized or we are in prte_finalize */ PRTE_EXPORT extern bool prte_initialized; PRTE_EXPORT extern bool prte_finalizing; PRTE_EXPORT extern int prte_debug_output; PRTE_EXPORT extern bool prte_debug_flag; PRTE_EXPORT extern int prte_cache_line_size; /** * Initialize the Open Run Time Environment * * Initlize the Open Run Time Environment, including process * control, malloc debugging and threads, and out of band messaging. * This function should be called exactly once. This function should * be called by every application using the RTE interface, including * MPI applications and mpirun. * * @param pargc Pointer to the number of arguments in the pargv array * @param pargv The list of arguments. * @param flags Whether we are PRTE tool or not */ PRTE_EXPORT int prte_init(int *pargc, char ***pargv, prte_proc_type_t flags); PRTE_EXPORT int prte_init_util(prte_proc_type_t flags); PRTE_EXPORT int prte_init_minimum(void); /** * Initialize parameters for PRTE. * * @retval PRTE_SUCCESS Upon success. * @retval PRTE_ERROR Upon failure. */ PRTE_EXPORT int prte_register_params(void); /** * Finalize the Open run time environment. Any function calling \code * prte_init should call \code prte_finalize. * */ PRTE_EXPORT int prte_finalize(void); END_C_DECLS #endif /* RUNTIME_H */ prrte-3.0.13/src/runtime/prte_globals.h0000664000175000017500000005753515145263240020237 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2020 IBM Corporation. All rights reserved. * Copyright (c) 2017-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2024 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Global params for PRTE */ #ifndef PRTE_RUNTIME_PRTE_GLOBALS_H #define PRTE_RUNTIME_PRTE_GLOBALS_H #include "prte_config.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_hash_table.h" #include "src/class/pmix_pointer_array.h" #include "src/class/pmix_ring_buffer.h" #include "src/class/pmix_value_array.h" #include "src/event/event-internal.h" #include "src/hwloc/hwloc-internal.h" #include "src/pmix/pmix-internal.h" #include "src/threads/pmix_threads.h" #include "src/mca/plm/plm_types.h" #include "src/rml/rml_types.h" #include "src/runtime/runtime.h" #include "src/util/attr.h" #include "src/util/pmix_cmd_line.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" BEGIN_C_DECLS PRTE_EXPORT extern int prte_debug_verbosity; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern char *prte_prohibited_session_dirs; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern char *prte_job_ident; /* instantiated in src/runtime/prte_globals.c */ PRTE_EXPORT extern bool prte_execute_quiet; /* instantiated in src/runtime/prte_globals.c */ PRTE_EXPORT extern bool prte_report_silent_errors; /* instantiated in src/runtime/prte_globals.c */ PRTE_EXPORT extern bool prte_event_base_active; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern bool prte_proc_is_bound; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern int prte_progress_thread_debug; /* instantiated in src/runtime/prte_init.c */ PRTE_EXPORT extern char *prte_tool_basename; // argv[0] of prun or one of its symlinks PRTE_EXPORT extern char *prte_tool_actual; // actual tool executable PRTE_EXPORT extern char *prte_progress_thread_cpus; PRTE_EXPORT extern bool prte_bind_progress_thread_reqd; PRTE_EXPORT extern bool prte_show_launch_progress; PRTE_EXPORT extern bool prte_bootstrap_setup; PRTE_EXPORT extern bool prte_silence_shared_fs; /** * Global indicating where this process was bound to at launch (will * be NULL if !prte_proc_is_bound) */ PRTE_EXPORT extern hwloc_cpuset_t prte_proc_applied_binding; /* instantiated in src/runtime/prte_init.c */ /* Shortcut for some commonly used names */ #define PRTE_NAME_WILDCARD (&prte_name_wildcard) PRTE_EXPORT extern pmix_proc_t prte_name_wildcard; /** instantiated in src/runtime/prte_init.c */ #define PRTE_NAME_INVALID (&prte_name_invalid) PRTE_EXPORT extern pmix_proc_t prte_name_invalid; /** instantiated in src/runtime/prte_init.c */ #define PRTE_JOBID_WILDCARD (prte_nspace_wildcard) PRTE_EXPORT extern pmix_nspace_t prte_nspace_wildcard; /** instantiated in src/runtime/prte_init.c */ #define PRTE_PROC_MY_NAME (&prte_process_info.myproc) #define PRTE_PROC_MY_PROCID (&prte_process_info.myproc) // backward compatibility synonym /* define a special name that point to my parent (aka the process that spawned me) */ #define PRTE_PROC_MY_PARENT (&prte_process_info.my_parent) /* define a special name that belongs to prte master */ #define PRTE_PROC_MY_HNP (&prte_process_info.my_hnp) /* define some types so we can store the generic * values and still *know* how to convert it for PMIx */ typedef int prte_status_t; typedef uint32_t prte_proc_state_t; // assigned values in src/mca/plm/plm_types.h #define PRTE_PROC_STATE_T PMIX_UINT32 /* define the results values for comparisons so we can change them in only one place */ #define PRTE_VALUE1_GREATER +1 #define PRTE_VALUE2_GREATER -1 #define PRTE_EQUAL 0 /* error manager callback function */ typedef void (*prte_err_cb_fn_t)(pmix_proc_t *proc, prte_proc_state_t state, void *cbdata); /* define an object for timer events */ typedef struct { pmix_object_t super; struct timeval tv; prte_event_t *ev; void *payload; } prte_timer_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_timer_t); PRTE_EXPORT extern int prte_exit_status; /* define some common keys used in PRTE */ #define PRTE_DB_DAEMON_VPID "prte.daemon.vpid" /* State Machine lists */ PRTE_EXPORT extern pmix_list_t prte_job_states; PRTE_EXPORT extern pmix_list_t prte_proc_states; /* a clean output channel without prefix */ PRTE_EXPORT extern int prte_clean_output; #define PRTE_GLOBAL_ARRAY_BLOCK_SIZE 64 #define PRTE_GLOBAL_ARRAY_MAX_SIZE INT_MAX /* define a default error return code for PRTE */ #define PRTE_ERROR_DEFAULT_EXIT_CODE 1 /** * Define a macro for updating the prte_exit_status * The macro provides a convenient way of doing this * so that we can add thread locking at some point * since the prte_exit_status is a global variable. * * Ensure that we do not overwrite the exit status if it has * already been set to some non-zero value. If we don't make * this check, then different parts of the code could overwrite * each other's exit status in the case of abnormal termination. * * For example, if a process aborts, we would record the initial * exit code from the aborted process. However, subsequent processes * will have been aborted by signal as we kill the job. We don't want * the subsequent processes to overwrite the original exit code so * we can tell the user the exit code from the process that caused * the whole thing to happen. */ #define PRTE_UPDATE_EXIT_STATUS(newstatus) \ do { \ if (0 == prte_exit_status && 0 != newstatus) { \ PMIX_OUTPUT_VERBOSE((1, prte_debug_output, "%s:%s(%d) updating exit status to %d", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__, \ newstatus)); \ prte_exit_status = newstatus; \ } \ } while (0); /* sometimes we need to reset the exit status - for example, when we * are restarting a failed process */ #define PRTE_RESET_EXIT_STATUS() \ do { \ PMIX_OUTPUT_VERBOSE((1, prte_debug_output, "%s:%s(%d) reseting exit status", \ PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), __FILE__, __LINE__)); \ prte_exit_status = 0; \ } while (0); /* define a set of flags to control the launch of a job */ typedef uint16_t prte_job_controls_t; #define PRTE_JOB_CONTROL PRTE_UINT16 /* global type definitions used by RTE - instanced in prte_globals.c */ /************ * Declare this to allow us to use it before fully * defining it - resolves potential circular definition */ struct prte_proc_t; struct prte_job_t; struct prte_job_map_t; struct prte_schizo_base_module_t; /************/ /* define an object for storing node topologies */ typedef struct { pmix_object_t super; int index; hwloc_topology_t topo; char *sig; } prte_topology_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_topology_t); /** * Information about a specific application to be launched in the RTE. */ typedef struct { /** Parent object */ pmix_object_t super; /** the job this app belongs to */ struct prte_job_t *job; /** Unique index when multiple apps per job */ prte_app_idx_t idx; /** Absolute pathname of argv[0] */ char *app; /** Number of copies of this process that are to be launched */ int32_t num_procs; /** Array of pointers to the proc objects for procs of this app_context * NOTE - not always used */ pmix_pointer_array_t procs; /** State of the app_context */ prte_app_state_t state; /** First MPI rank of this app_context in the job */ pmix_rank_t first_rank; /** Standard argv-style array, including a final NULL pointer */ char **argv; /** Standard environ-style array, including a final NULL pointer */ char **env; /** Current working directory for this app */ char *cwd; /* flags */ prte_app_context_flags_t flags; /* provide a list of attributes for this app_context in place * of having a continually-expanding list of fixed-use values. * This is a list of prte_value_t's, with the intent of providing * flexibility without constantly expanding the memory footprint * every time we want some new (rarely used) option */ pmix_list_t attributes; // store the result of parsing this app's cmd line pmix_cli_result_t cli; } prte_app_context_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_app_context_t); typedef struct { /** Base object so this can be put on a list */ pmix_list_item_t super; /* index of this node object in global array */ int32_t index; /** String node name */ char *name; char *rawname; // name originally given in allocation, if different from name /** aliases */ char **aliases; /* daemon on this node */ struct prte_proc_t *daemon; /* track the unassigned cpus */ hwloc_cpuset_t available; /* cache the cpuset prior to mapping a job for easy reset */ hwloc_cpuset_t jobcache; /** number of procs on this node */ prte_node_rank_t num_procs; /* array of pointers to procs on this node */ pmix_pointer_array_t *procs; /* next node rank on this node */ prte_node_rank_t next_node_rank; /** State of this node */ prte_node_state_t state; /** A "soft" limit on the number of slots available on the node. This will typically correspond to the number of physical CPUs that we have been allocated on this note and would be the "ideal" number of processes for us to launch. */ int32_t slots; /** Slots available for use in the current mapping operation. This * may differ on a per-job basis from the overall allocated slots * thru use of the -host option and possibly other means */ int32_t slots_available; /** How many processes have already been launched, used by one or more jobs on this node. */ int32_t slots_inuse; /** A "hard" limit (if set -- a value of 0 implies no hard limit) on the number of slots that can be allocated on a given node. This is for some environments (e.g. grid) there may be fixed limits on the number of slots that can be used. This value also could have been a boolean - but we may want to allow the hard limit be different than the soft limit - in other words allow the node to be oversubscribed up to a specified limit. For example, if we have two processors, we may want to allow up to four processes but no more. */ int32_t slots_max; /* system topology for this node */ prte_topology_t *topology; /* flags */ prte_node_flags_t flags; /* list of prte_attribute_t */ pmix_list_t attributes; } prte_node_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_node_t); typedef struct { /** Base object so this can be put on a list */ pmix_list_item_t super; /* record the exit status for this job */ int exit_code; /* personality for this job */ char **personality; struct prte_schizo_base_module_t *schizo; /* jobid for this job */ pmix_nspace_t nspace; // session directory for this job char *session_dir; int index; // index in the job array where this is stored /* offset to the total number of procs so shared memory * components can potentially connect to any spawned jobs*/ pmix_rank_t offset; /* app_context array for this job */ pmix_pointer_array_t *apps; /* number of app_contexts in the array */ prte_app_idx_t num_apps; /* rank desiring stdin - for now, either one rank, all ranks * (wildcard), or none (invalid) */ pmix_rank_t stdin_target; /* total slots allocated to this job */ int32_t total_slots_alloc; /* number of procs in this job */ pmix_rank_t num_procs; /* array of pointers to procs in this job */ pmix_pointer_array_t *procs; /* map of the job */ struct prte_job_map_t *map; /* bookmark for where we are in mapping - this * indicates the node where we stopped */ prte_node_t *bookmark; /* state of the overall job */ prte_job_state_t state; /* number of procs mapped */ pmix_rank_t num_mapped; /* number of procs launched */ pmix_rank_t num_launched; /* number of procs reporting contact info */ pmix_rank_t num_reported; /* number of procs terminated */ pmix_rank_t num_terminated; /* number of daemons reported launched so we can track progress */ pmix_rank_t num_daemons_reported; /* number of procs ready for debug */ pmix_rank_t num_ready_for_debug; /* originator of a dynamic spawn */ pmix_proc_t originator; /* number of local procs */ pmix_rank_t num_local_procs; /* flags */ prte_job_flags_t flags; /* attributes */ pmix_list_t attributes; /* launch msg buffer */ pmix_data_buffer_t launch_msg; /* track children of this job */ pmix_list_t children; /* track the launcher of these jobs */ pmix_nspace_t launcher; /* track the number of stack traces recv'd */ uint32_t ntraces; char **traces; // store the result of parsing this app's cmd line pmix_cli_result_t cli; } prte_job_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_job_t); struct prte_proc_t { /** Base object so this can be put on a list */ pmix_list_item_t super; /* process name */ pmix_proc_t name; /* the vpid of my parent - the daemon vpid for an app * or the vpid of the parent in the routing tree of * a daemon */ pmix_rank_t parent; /* pid */ pid_t pid; /* local rank amongst my peers on the node * where this is running - this value is * needed by MPI procs so that the lowest * rank on a node can perform certain fns - * e.g., open an sm backing file */ prte_local_rank_t local_rank; /* local rank on the node across all procs * and jobs known to this HNP - this is * needed so that procs can do things like * know which static IP port to use */ prte_node_rank_t node_rank; /* rank of this proc within its app context - this * will just equal its vpid for single app_context * applications */ int32_t app_rank; /* rank of this proc amongst its peers within the * NUMA region to which it is bound */ prte_local_rank_t numa_rank; /* Last state used to trigger the errmgr for this proc */ prte_proc_state_t last_errmgr_state; /* process state */ prte_proc_state_t state; /* exit code */ prte_exit_code_t exit_code; /* the app_context that generated this proc */ prte_app_idx_t app_idx; /* pointer to the node where this proc is executing */ prte_node_t *node; /* pointer to the object on that node where the * proc is mapped */ hwloc_obj_t obj; /* cpuset where the proc is bound */ char *cpuset; /* RML contact info */ char *rml_uri; /* some boolean flags */ prte_proc_flags_t flags; /* list of prte_value_t attributes */ pmix_list_t attributes; }; typedef struct prte_proc_t prte_proc_t; PRTE_EXPORT PMIX_CLASS_DECLARATION(prte_proc_t); /** * Get a job data object * We cannot just reference a job data object with its jobid as * the jobid is no longer an index into the array. This change * was necessitated by modification of the jobid to include * an mpirun-unique qualifer to eliminate any global name * service */ PRTE_EXPORT prte_job_t *prte_get_job_data_object(const pmix_nspace_t job); /** * Set a job data object - returns an error if it cannot add the object * to the array */ PRTE_EXPORT int prte_set_job_data_object(prte_job_t *jdata); /** Pack/unpack a job object */ PRTE_EXPORT int prte_job_pack(pmix_data_buffer_t *bkt, prte_job_t *job); PRTE_EXPORT int prte_job_unpack(pmix_data_buffer_t *bkt, prte_job_t **job); PRTE_EXPORT int prte_job_copy(prte_job_t **dest, prte_job_t *src); PRTE_EXPORT void prte_job_print(char **output, prte_job_t *jdata); /** Pack/unpack an app-context */ PRTE_EXPORT int prte_app_pack(pmix_data_buffer_t *bkt, prte_app_context_t *app); PRTE_EXPORT int prte_app_unpack(pmix_data_buffer_t *bkt, prte_app_context_t **app); PRTE_EXPORT int prte_app_copy(prte_app_context_t **dest, prte_app_context_t *src); PRTE_EXPORT void prte_app_print(char **output, prte_job_t *jdata, prte_app_context_t *src); /** Pack/unpack a proc*/ PRTE_EXPORT int prte_proc_pack(pmix_data_buffer_t *bkt, prte_proc_t *proc); PRTE_EXPORT int prte_proc_unpack(pmix_data_buffer_t *bkt, prte_proc_t **proc); PRTE_EXPORT int prte_proc_copy(prte_proc_t **dest, prte_proc_t *src); PRTE_EXPORT void prte_proc_print(char **output, prte_job_t *jdata, prte_proc_t *src); /** Pack/unpack a job map */ PRTE_EXPORT int prte_map_pack(pmix_data_buffer_t *bkt, struct prte_job_map_t *map); PRTE_EXPORT int prte_map_unpack(pmix_data_buffer_t *bkt, struct prte_job_map_t **map); PRTE_EXPORT int prte_map_copy(struct prte_job_map_t **dest, struct prte_job_map_t *src); PRTE_EXPORT void prte_map_print(char **output, prte_job_t *jdata); PRTE_EXPORT int prte_node_pack(pmix_data_buffer_t *bkt, prte_node_t *node); PRTE_EXPORT int prte_node_unpack(pmix_data_buffer_t *bkt, prte_node_t **node); PRTE_EXPORT int prte_node_copy(prte_node_t **dest, prte_node_t *src); PRTE_EXPORT void prte_node_print(char **output, prte_job_t *jdata, prte_node_t *src); /** * Get a proc data object */ PRTE_EXPORT prte_proc_t *prte_get_proc_object(const pmix_proc_t *proc); /** * Get the daemon vpid hosting a given proc */ PRTE_EXPORT pmix_rank_t prte_get_proc_daemon_vpid(const pmix_proc_t *proc); /* Get the hostname of a proc */ PRTE_EXPORT char *prte_get_proc_hostname(const pmix_proc_t *proc); /* get the node rank of a proc */ PRTE_EXPORT prte_node_rank_t prte_get_proc_node_rank(const pmix_proc_t *proc); /* check to see if two nodes match */ PRTE_EXPORT prte_node_t* prte_node_match(pmix_list_t *nodes, const char *name); PRTE_EXPORT bool prte_nptr_match(prte_node_t *n1, prte_node_t *n2); PRTE_EXPORT bool prte_quickmatch(prte_node_t *nd, char *name); /* global variables used by RTE - instanced in prte_globals.c */ PRTE_EXPORT extern bool prte_debug_daemons_flag; PRTE_EXPORT extern bool prte_debug_daemons_file_flag; PRTE_EXPORT extern bool prte_leave_session_attached; PRTE_EXPORT extern char *prte_topo_signature; PRTE_EXPORT extern char *prte_data_server_uri; PRTE_EXPORT extern bool prte_dvm_ready; PRTE_EXPORT extern pmix_pointer_array_t *prte_cache; PRTE_EXPORT extern bool prte_persistent; PRTE_EXPORT extern bool prte_allow_run_as_root; PRTE_EXPORT extern bool prte_fwd_environment; PRTE_EXPORT extern bool prte_xml_output; /* PRTE OOB port flags */ PRTE_EXPORT extern bool prte_static_ports; PRTE_EXPORT extern char *prte_oob_static_ports; /* nodename flags */ PRTE_EXPORT extern bool prte_keep_fqdn_hostnames; PRTE_EXPORT extern bool prte_have_fqdn_allocation; PRTE_EXPORT extern bool prte_show_resolved_nodenames; PRTE_EXPORT extern int prte_hostname_cutoff; PRTE_EXPORT extern bool prte_do_not_resolve; /* debug flags */ PRTE_EXPORT extern pmix_rank_t prted_debug_failure; PRTE_EXPORT extern int prted_debug_failure_delay; PRTE_EXPORT extern bool prte_never_launched; PRTE_EXPORT extern bool prte_devel_level_output; PRTE_EXPORT extern bool prte_display_topo_with_map; PRTE_EXPORT extern char **prte_launch_environ; PRTE_EXPORT extern bool prte_hnp_is_allocated; PRTE_EXPORT extern bool prte_allocation_required; PRTE_EXPORT extern bool prte_managed_allocation; PRTE_EXPORT extern char *prte_set_slots; PRTE_EXPORT extern bool prte_set_slots_override; PRTE_EXPORT extern bool prte_hnp_connected; PRTE_EXPORT extern bool prte_nidmap_communicated; PRTE_EXPORT extern bool prte_node_info_communicated; /* launch agents */ PRTE_EXPORT extern char *prte_launch_agent; PRTE_EXPORT extern char **prted_cmd_line; /* exit flags */ PRTE_EXPORT extern bool prte_abnormal_term_ordered; PRTE_EXPORT extern bool prte_routing_is_enabled; PRTE_EXPORT extern bool prte_dvm_abort_ordered; PRTE_EXPORT extern bool prte_prteds_term_ordered; PRTE_EXPORT extern bool prte_allowed_exit_without_sync; PRTE_EXPORT extern int prte_timeout_usec_per_proc; PRTE_EXPORT extern float prte_max_timeout; PRTE_EXPORT extern prte_timer_t *prte_mpiexec_timeout; /* global arrays for data storage */ PRTE_EXPORT extern pmix_pointer_array_t *prte_job_data; PRTE_EXPORT extern pmix_pointer_array_t *prte_node_pool; PRTE_EXPORT extern pmix_pointer_array_t *prte_node_topologies; PRTE_EXPORT extern pmix_pointer_array_t *prte_local_children; PRTE_EXPORT extern pmix_rank_t prte_total_procs; PRTE_EXPORT extern char *prte_base_compute_node_sig; PRTE_EXPORT extern bool prte_hetero_nodes; /* IOF controls */ /* generate new xterm windows to display output from specified ranks */ PRTE_EXPORT extern char *prte_xterm; /* whether or not to report launch progress */ PRTE_EXPORT extern bool prte_report_launch_progress; /* allocation specification */ PRTE_EXPORT extern char *prte_default_hostfile; PRTE_EXPORT extern bool prte_default_hostfile_given; PRTE_EXPORT extern int prte_num_allocated_nodes; PRTE_EXPORT extern char *prte_default_dash_host; /* tool communication controls */ PRTE_EXPORT extern bool prte_report_events; PRTE_EXPORT extern char *prte_report_events_uri; /* exit status reporting */ PRTE_EXPORT extern bool prte_report_child_jobs_separately; PRTE_EXPORT extern struct timeval prte_child_time_to_exit; /* length of stat history to keep */ PRTE_EXPORT extern int prte_stat_history_size; /* envars to forward */ PRTE_EXPORT extern char **prte_forwarded_envars; /* maximum size of virtual machine - used to subdivide allocation */ PRTE_EXPORT extern int prte_max_vm_size; /* binding directives for daemons to restrict them * to certain cores */ PRTE_EXPORT extern char *prte_daemon_cores; /* Max time to wait for stack straces to return */ PRTE_EXPORT extern int prte_stack_trace_wait_timeout; /* whether or not hwloc shmem support is available */ PRTE_EXPORT extern bool prte_hwloc_shmem_available; extern char *prte_signal_string; extern char *prte_stacktrace_output_filename; extern char *prte_net_private_ipv4; extern char *prte_set_max_sys_limits; extern char *prte_if_include; extern char *prte_if_exclude; #if PRTE_PICKY_COMPILERS #define PRTE_HIDE_UNUSED_PARAMS(...) \ do { \ int __x = 3; \ prte_hide_unused_params(__x, __VA_ARGS__); \ } while(0) PMIX_EXPORT void prte_hide_unused_params(int x, ...); #else #define PRTE_HIDE_UNUSED_PARAMS(...) #endif END_C_DECLS #endif /* PRTE_RUNTIME_PRTE_GLOBALS_H */ prrte-3.0.13/src/runtime/prte_mca_params.c0000664000175000017500000006274315145263240020707 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #ifdef HAVE_SYS_TIME_H # include #endif #include #include #include "src/mca/base/pmix_mca_base_var.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/rml/rml.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_printf.h" #include "src/util/proc_info.h" #include "src/util/pmix_environ.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/runtime/runtime.h" static bool passed_thru = false; static int prte_progress_thread_debug_level = -1; static char *prte_tmpdir_base = NULL; static char *prte_local_tmpdir_base = NULL; static char *prte_remote_tmpdir_base = NULL; static char *local_setup_slots = NULL; char *prte_signal_string = NULL; char *prte_stacktrace_output_filename = NULL; char *prte_net_private_ipv4 = NULL; char *prte_if_include = NULL; char *prte_if_exclude = NULL; char *prte_set_max_sys_limits = NULL; int prte_pmix_verbose_output = 0; char *prte_progress_thread_cpus = NULL; bool prte_bind_progress_thread_reqd = false; bool prte_silence_shared_fs = false; int prte_max_thread_in_progress = 1; int prte_register_params(void) { int ret; pmix_output_stream_t lds; char *string = NULL; char *fstype = NULL; /* only go thru this once - mpirun calls it twice, which causes * any error messages to show up twice */ if (passed_thru) { return PRTE_SUCCESS; } passed_thru = true; /* * This string is going to be used in prte/util/stacktrace.c */ { int j; int signals[] = { #ifdef SIGABRT SIGABRT, #endif #ifdef SIGBUS SIGBUS, #endif #ifdef SIGFPE SIGFPE, #endif #ifdef SIGSEGV SIGSEGV, #endif -1}; for (j = 0; signals[j] != -1; ++j) { if (j == 0) { pmix_asprintf(&string, "%d", signals[j]); } else { char *tmp; pmix_asprintf(&tmp, "%s,%d", string, signals[j]); free(string); string = tmp; } } prte_signal_string = string; ret = pmix_mca_base_var_register("prte", "prte", NULL, "signal", "Comma-delimited list of integer signal numbers to PRTE to attempt to intercept. Upon " "receipt of the intercepted signal, PRTE will display a stack trace and abort. PRTE " "will *not* replace signals if handlers are already installed by the time MPI_INIT is " "invoked. Optionally append \":complain\" to any signal number in the comma-delimited " "list to make PRTE complain if it detects another signal handler (and therefore does " "not insert its own).", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_signal_string); free(string); if (0 > ret) { return ret; } } /* * Where should the stack trace output be directed * This string is going to be used in prte/util/stacktrace.c */ string = strdup("stderr"); prte_stacktrace_output_filename = string; ret = pmix_mca_base_var_register("prte", "prte", NULL, "stacktrace_output", "Specifies where the stack trace output stream goes. " "Accepts one of the following: none (disabled), stderr (default), stdout, file[:filename]. " " " "If 'filename' is not specified, a default filename of 'stacktrace' is used. " "The 'filename' is appended with either '.PID' or '.RANK.PID', if RANK is available. " "The 'filename' can be an absolute path or a relative path to the current working " "directory.", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_stacktrace_output_filename); free(string); if (0 > ret) { return ret; } /* RFC1918 defines - 10.0.0./8 - 172.16.0.0/12 - 192.168.0.0/16 RFC3330 also mentions - 169.254.0.0/16 for DHCP onlink iff there's no DHCP server */ prte_net_private_ipv4 = "10.0.0.0/8;172.16.0.0/12;192.168.0.0/16;169.254.0.0/16"; ret = pmix_mca_base_var_register("prte", "prte", "net", "private_ipv4", "Semicolon-delimited list of CIDR notation entries specifying what networks are considered " "\"private\" (default value based on RFC1918 and RFC3330)", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_net_private_ipv4); if (0 > ret) { return ret; } prte_if_include = NULL; ret = pmix_mca_base_var_register("prte", "prte", NULL, "if_include", "Comma-delimited list of devices and/or CIDR notation of TCP networks to use for PRTE " "bootstrap communication (e.g., \"eth0,192.168.0.0/16\"). Mutually exclusive with " "prte_if_exclude.", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_if_include); (void) pmix_mca_base_var_register_synonym(ret, "prte", "oob", "tcp", "include", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); (void) pmix_mca_base_var_register_synonym(ret, "prte", "oob", "tcp", "if_include", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); prte_if_exclude = NULL; ret = pmix_mca_base_var_register("prte", "prte", NULL, "if_exclude", "Comma-delimited list of devices and/or CIDR notation of TCP networks to NOT use for PRTE " "bootstrap communication -- all devices not matching these specifications will be used " "(e.g., \"eth0,192.168.0.0/16\"). If set to a non-default value, it is mutually exclusive " "with prte_if_include.", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_if_exclude); (void) pmix_mca_base_var_register_synonym(ret, "prte", "oob", "tcp", "exclude", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); (void) pmix_mca_base_var_register_synonym(ret, "prte", "oob", "tcp", "if_exclude", PMIX_MCA_BASE_VAR_SYN_FLAG_DEPRECATED); /* if_include and if_exclude need to be mutually exclusive */ if (NULL != prte_if_include && NULL != prte_if_exclude) { /* Return ERR_NOT_AVAILABLE so that a warning message about "open" failing is not printed */ pmix_show_help("help-oob-tcp.txt", "include-exclude", true, prte_if_include, prte_if_exclude); return PRTE_ERR_SILENT; } prte_set_max_sys_limits = NULL; ret = pmix_mca_base_var_register("prte", "prte", NULL, "set_max_sys_limits", "Set the specified system-imposed limits to the specified value, including \"unlimited\"." "Supported params: core, filesize, maxmem, openfiles, stacksize, maxchildren", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_set_max_sys_limits); if (0 > ret) { return ret; } /* get a clean output channel too - need to do this here because * we use it below, and prun and some other tools call this * function prior to calling prte_init */ PMIX_CONSTRUCT(&lds, pmix_output_stream_t); lds.lds_want_stdout = true; prte_clean_output = pmix_output_open(&lds); PMIX_DESTRUCT(&lds); /* check directive for warning about shared fs on tmpdir */ (void) pmix_mca_base_var_register("prte", "prte", NULL, "silence_shared_fs", "Silence the shared file system warning", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_silence_shared_fs); /* LOOK FOR A TMP DIRECTORY BASE */ /* Several options are provided to cover a range of possibilities: * * (a) all processes need to use a specified location as the base * for tmp directories * (b) daemons on remote nodes need to use a specified location, but * one different from that used by mpirun * (c) mpirun needs to use a specified location, but one different * from that used on remote nodes */ prte_tmpdir_base = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "tmpdir_base", "Base of the session directory tree to be used by all processes", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_tmpdir_base); prte_local_tmpdir_base = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "local_tmpdir_base", "Base of the session directory tree to be used by prun/mpirun", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_local_tmpdir_base); prte_remote_tmpdir_base = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "remote_tmpdir_base", "Base of the session directory tree on remote nodes, if " "required to be different from head node", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_remote_tmpdir_base); /* if a global tmpdir was specified, then we do not allow specification * of the local or remote values to avoid confusion */ if (NULL != prte_tmpdir_base && (NULL != prte_local_tmpdir_base || NULL != prte_remote_tmpdir_base)) { pmix_output(prte_clean_output, "------------------------------------------------------------------\n" "The MCA param prte_tmpdir_base was specified, which sets the base\n" "of the temporary directory tree for all procs. However, values for\n" "the local and/or remote tmpdir base were also given. This can lead\n" "to confusion and is therefore not allowed. Please specify either a\n" "global tmpdir base OR a local/remote tmpdir base value\n" "------------------------------------------------------------------"); exit(1); } if (NULL != prte_tmpdir_base) { if (NULL != prte_process_info.tmpdir_base) { free(prte_process_info.tmpdir_base); } prte_process_info.tmpdir_base = strdup(prte_tmpdir_base); } else if (PRTE_PROC_IS_MASTER && NULL != prte_local_tmpdir_base) { /* prun will pickup the value for its own use */ if (NULL != prte_process_info.tmpdir_base) { free(prte_process_info.tmpdir_base); } prte_process_info.tmpdir_base = strdup(prte_local_tmpdir_base); } else if (PRTE_PROC_IS_DAEMON && NULL != prte_remote_tmpdir_base) { /* prun will pickup the value and forward it along, but must not * use it in its own work. So only a daemon needs to get it, and the * daemon will pass it down to its application procs. Note that prun * will pass -its- value to any procs local to it */ if (NULL != prte_process_info.tmpdir_base) { free(prte_process_info.tmpdir_base); } prte_process_info.tmpdir_base = strdup(prte_remote_tmpdir_base); } else { if (NULL != prte_process_info.tmpdir_base) { free(prte_process_info.tmpdir_base); } prte_process_info.tmpdir_base = strdup(pmix_tmp_directory()); } // check to see if this is on a shared file system // as we know this will impact launch as well as // application execution performance prte_process_info.shared_fs = pmix_path_nfs(prte_process_info.tmpdir_base, &fstype); if (prte_process_info.shared_fs && !prte_silence_shared_fs) { // this is a shared file system - warn the user pmix_show_help("help-prte-runtime.txt", "prte:session:dir:shared", true, prte_process_info.tmpdir_base, fstype, prte_tool_basename); } if (NULL != fstype) { free(fstype); } prte_prohibited_session_dirs = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "no_session_dirs", "Prohibited locations for session directories (multiple " "locations separated by ',', default=NULL)", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_prohibited_session_dirs); prte_fwd_environment = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "fwd_environment", "Forward the entire local environment", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_fwd_environment); prte_execute_quiet = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "execute_quiet", "Do not output error and help messages", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_execute_quiet); prte_report_silent_errors = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "report_silent_errors", "Report all errors, including silent ones", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_report_silent_errors); prte_progress_thread_debug_level = -1; (void) pmix_mca_base_var_register("prte", "prte", NULL, "progress_thread_debug", "Debug level for PRTE progress threads", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_progress_thread_debug_level); if (0 <= prte_progress_thread_debug_level) { prte_progress_thread_debug = pmix_output_open(NULL); pmix_output_set_verbosity(prte_progress_thread_debug, prte_progress_thread_debug_level); } prted_debug_failure = PMIX_RANK_INVALID; (void) pmix_mca_base_var_register("prte", "prte", NULL, "daemon_fail", "Have the specified prted fail after init for debugging purposes", PMIX_MCA_BASE_VAR_TYPE_INT, &prted_debug_failure); prted_debug_failure_delay = 0; (void) pmix_mca_base_var_register("prte", "prte", NULL, "daemon_fail_delay", "Have the specified prted fail after specified number of seconds [default: 0 => no delay]", PMIX_MCA_BASE_VAR_TYPE_INT, &prted_debug_failure_delay); /* default hostfile */ prte_default_hostfile = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "default_hostfile", "Name of the default hostfile (relative or absolute path, " "\"none\" to ignore environmental or default MCA param setting)", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_default_hostfile); if (NULL == prte_default_hostfile) { /* nothing was given, so define the default */ pmix_asprintf(&prte_default_hostfile, "%s/prte-default-hostfile", prte_install_dirs.sysconfdir); /* flag that nothing was given */ prte_default_hostfile_given = false; } else if (0 == strcmp(prte_default_hostfile, "none")) { free(prte_default_hostfile); prte_default_hostfile = NULL; /* flag that it was given */ prte_default_hostfile_given = true; } else { /* flag that it was given */ prte_default_hostfile_given = true; } /* default dash-host */ prte_default_dash_host = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "default_dash_host", "Default -host setting (specify \"none\" to ignore " "environmental or default MCA param setting)", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_default_dash_host); if (NULL != prte_default_dash_host && 0 == strcmp(prte_default_dash_host, "none")) { free(prte_default_dash_host); prte_default_dash_host = NULL; } prte_show_resolved_nodenames = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "show_resolved_nodenames", "Display any node names that are resolved to a different name [default: false]", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_show_resolved_nodenames); prte_do_not_resolve = true; (void) pmix_mca_base_var_register("prte", "prte", NULL, "do_not_resolve", "Do not attempt to resolve hostnames " "[defaults to true]", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_do_not_resolve); /* allow specification of the launch agent */ prte_launch_agent = "prted"; (void) pmix_mca_base_var_register("prte", "prte", NULL, "launch_agent", "Executable for DVM daemons on remote nodes [default: prted]", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_launch_agent); /* whether or not to require RM allocation */ prte_allocation_required = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "allocation_required", "Whether or not an allocation by a resource manager is required [default: no]", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_allocation_required); prte_allowed_exit_without_sync = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "allowed_exit_without_sync", "Set default process exiting without calling finalize policy to not trigger job termination", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_allowed_exit_without_sync); prte_report_child_jobs_separately = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "report_child_jobs_separately", "Set default to return the exit status of the primary job only", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_report_child_jobs_separately); prte_stat_history_size = 1; (void) pmix_mca_base_var_register("prte", "prte", NULL, "stat_history_size", "Number of stat samples to keep", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_stat_history_size); prte_max_vm_size = -1; (void) pmix_mca_base_var_register("prte", "prte", NULL, "max_vm_size", "Maximum size of virtual machine - used to subdivide allocation", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_max_vm_size); local_setup_slots = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "set_default_slots", "Set the number of slots on nodes that lack such info to the" " number of specified objects [a number, \"cores\" (default)," " \"packages\", or \"hwthreads\" (default if hwthreads_as_cpus" " is set), or a fixed number to be applied to all nodes", PMIX_MCA_BASE_VAR_TYPE_STRING, &local_setup_slots); if (NULL == local_setup_slots) { prte_set_slots = strdup("core"); } else { prte_set_slots = strdup(local_setup_slots); } prte_set_slots_override = false; (void) pmix_mca_base_var_register("prte", "prte", NULL, "set_default_slots_override", "Set the number of slots on nodes to the number of " "objects specified by prte_set_default_slots regardless " "whather we are in a managed allocation or specifications " "were given in a hostfile", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_set_slots_override); /* allow specification of the cores to be used by daemons */ prte_daemon_cores = NULL; (void) pmix_mca_base_var_register("prte", "prte", NULL, "daemon_cores", "Restrict the PRTE daemons (including mpirun) to operate on " "the specified cores (comma-separated list of ranges)", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_daemon_cores); /* Amount of time to wait for a stack trace to return from the daemons */ prte_stack_trace_wait_timeout = 30; (void) pmix_mca_base_var_register("prte", "prte", NULL, "timeout_for_stack_trace", "Seconds to wait for stack traces to return before terminating " "the job (<= 0 wait forever)", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_stack_trace_wait_timeout); /* register the URI of the UNIVERSAL data server */ prte_data_server_uri = NULL; (void) pmix_mca_base_var_register("prte", "pmix", NULL, "server_uri", "URI of a session-level keyval server for publish/lookup operations", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_data_server_uri); pmix_mca_base_var_register("prte", "prte", NULL, "pmix_verbose", "Verbosity for PRRTE-level PMIx code", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_pmix_verbose_output); (void) pmix_mca_base_var_register("prte", "prte", NULL, "progress_thread_cpus", "Comma-delimited list of ranges of CPUs to which" "the internal PRRTE progress thread is to be bound", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_progress_thread_cpus); (void) pmix_mca_base_var_register("prte", "prte", NULL, "bind_progress_thread_reqd", "Whether binding of internal PRRTE progress thread is required", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_bind_progress_thread_reqd); (void) pmix_mca_base_var_register("prte", "prte", NULL, "hetero_nodes", "Allocation contains hetero nodes", PMIX_MCA_BASE_VAR_TYPE_BOOL, &prte_hetero_nodes); /* pickup the RML params */ prte_rml_register(); return PRTE_SUCCESS; } prrte-3.0.13/src/runtime/data_server/0000775000175000017500000000000015145263240017671 5ustar alastairalastairprrte-3.0.13/src/runtime/data_server/ds_lookup.c0000664000175000017500000002662615145263240022050 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2025 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/util/name_fns.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/data_server/ds.h" pmix_status_t prte_ds_lookup(pmix_proc_t *sender, int room_number, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer) { int32_t count; int i, k; size_t nanswers; pmix_status_t rc; pmix_proc_t requestor; size_t n, ninfo; char **keys = NULL, **cache = NULL; char *str; pmix_info_t *info; pmix_data_buffer_t pbkt; uint32_t uid = UINT32_MAX; bool wait = false; pmix_data_range_t range=PMIX_RANGE_UNDEF; prte_data_object_t *data; prte_ds_info_t *rinfo; prte_info_item_t *ds1, *ds2; pmix_list_t answers; bool found; prte_data_req_t *req, rq; pmix_byte_object_t pbo; /* unpack the requestor */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &requestor, &count, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } /* unpack the number of keys */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &ninfo, &count, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } if (0 == ninfo) { /* they forgot to send us the keys?? */ PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return PMIX_ERR_BAD_PARAM; } /* unpack the keys */ for (n = 0; n < ninfo; n++) { count = 1; rc = PMIx_Data_unpack(NULL, buffer, &str, &count, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_ARGV_FREE_COMPAT(keys); return rc; } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&keys, str); free(str); } /* unpack the number of directives, if any */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &ninfo, &count, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_ARGV_FREE_COMPAT(keys); return rc; } if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); count = ninfo; rc = PMIx_Data_unpack(NULL, buffer, info, &count, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_INFO_FREE(info, ninfo); PMIX_ARGV_FREE_COMPAT(keys); return rc; } /* scan the directives for things we care about */ for (n = 0; n < ninfo; n++) { if (PMIx_Check_key(info[n].key, PMIX_USERID)) { uid = info[n].value.data.uint32; } else if (PMIx_Check_key(info[n].key, PMIX_WAIT)) { /* flag that we wait until the data is present */ wait = true; } else if (PMIx_Check_key(info[n].key, PMIX_RANGE)) { range = info[n].value.data.range; } } /* ignore anything else for now */ PMIX_INFO_FREE(info, ninfo); } /* cycle across the provided keys */ PMIX_DATA_BUFFER_CONSTRUCT(&pbkt); PMIX_CONSTRUCT(&answers, pmix_list_t); PMIX_CONSTRUCT(&rq, prte_data_req_t); memcpy(&rq.requestor, &requestor, sizeof(pmix_proc_t)); memcpy(&rq.proxy, sender, sizeof(pmix_proc_t)); for (i = 0; NULL != keys[i]; i++) { pmix_output_verbose(10, prte_data_store.output, "%s data server: looking for %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), keys[i]); found = false; /* cycle across the stored data, looking for a match */ for (k = 0; k < prte_data_store.store.size && !found; k++) { data = (prte_data_object_t *) pmix_pointer_array_get_item(&prte_data_store.store, k); if (NULL == data) { continue; } /* for security reasons, can only access data posted by the same user id */ if (uid != data->uid) { pmix_output_verbose(10, prte_data_store.output, "%s\tMISMATCH UID %u %u", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (unsigned) uid, (unsigned) data->uid); continue; } /* check the range */ if (PMIX_SUCCESS != prte_data_server_check_range(&rq, data)) { continue; } /* see if we have this key */ PMIX_LIST_FOREACH_SAFE(ds1, ds2, &data->info, prte_info_item_t) { pmix_output_verbose(10, prte_data_store.output, "%s COMPARING %s %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), keys[i], ds1->info.key); if (PMIx_Check_key(ds1->info.key, keys[i])) { rinfo = PMIX_NEW(prte_ds_info_t); memcpy(&rinfo->source, &data->owner, sizeof(pmix_proc_t)); PMIX_INFO_XFER(&rinfo->info, &ds1->info); // check the persistence pmix_output_verbose(1, prte_data_store.output, "%s data server: adding %s to data from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ds1->info.key, PRTE_NAME_PRINT(&data->owner)); if (PMIX_PERSIST_FIRST_READ == data->persistence) { pmix_list_remove_item(&data->info, &ds1->super); PMIX_RELEASE(ds1); } pmix_list_append(&answers, &rinfo->super); // can only find it once - keys are required to be globally unique // within a given range, and we checked the range above found = true; break; } } } // loop over stored data if (!found) { // cache the key PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cache, keys[i]); } } // loop over keys nanswers = pmix_list_get_size(&answers); rc = PMIX_ERR_NOT_FOUND; if (0 < nanswers) { /* pack the number of data items found */ rc = PMIx_Data_pack(NULL, &pbkt, &nanswers, 1, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_LIST_DESTRUCT(&answers); PMIX_ARGV_FREE_COMPAT(keys); PMIX_ARGV_FREE_COMPAT(cache); return rc; } /* loop thru and pack the individual responses - this is somewhat less * efficient than packing an info array, but avoids another malloc * operation just to assemble all the return values into a contiguous * array */ PMIX_LIST_FOREACH(rinfo, &answers, prte_ds_info_t) { /* pack the data owner */ rc = PMIx_Data_pack(NULL, &pbkt, &rinfo->source, 1, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_LIST_DESTRUCT(&answers); PMIX_ARGV_FREE_COMPAT(keys); PMIX_ARGV_FREE_COMPAT(cache); return rc; } rc = PMIx_Data_pack(NULL, &pbkt, &rinfo->info, 1, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_LIST_DESTRUCT(&answers); PMIX_ARGV_FREE_COMPAT(keys); PMIX_ARGV_FREE_COMPAT(cache); return rc; } } } PMIX_LIST_DESTRUCT(&answers); i = PMIX_ARGV_COUNT_COMPAT(cache); if (0 < i) { if (wait) { pmix_output_verbose(1, prte_data_store.output, "%s data server:lookup: at least some data not found %d vs %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (int) nanswers, (int) PMIX_ARGV_COUNT_COMPAT(keys)); req = PMIX_NEW(prte_data_req_t); req->room_number = room_number; req->proxy = *sender; memcpy(&req->requestor, &requestor, sizeof(pmix_proc_t)); req->uid = uid; req->range = range; req->keys = cache; cache = NULL; pmix_list_append(&prte_data_store.pending, &req->super); PMIX_ARGV_FREE_COMPAT(keys); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); return PMIX_SUCCESS; // do not return an answer } else { PMIX_ARGV_FREE_COMPAT(cache); if (0 == nanswers) { /* nothing was found - indicate that situation */ rc = PMIX_ERR_NOT_FOUND; PMIX_ARGV_FREE_COMPAT(keys); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); return rc; } else { rc = PMIX_ERR_PARTIAL_SUCCESS; } } } PMIX_ARGV_FREE_COMPAT(keys); pmix_output_verbose(1, prte_data_store.output, "%s data server:lookup: data found - status %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PMIx_Error_string(rc)); if (PMIX_SUCCESS == rc) { /* pack the status */ rc = PMIx_Data_pack(NULL, answer, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); return rc; } /* unload the packed values */ rc = PMIx_Data_unload(&pbkt, &pbo); /* pack it into our reply */ rc = PMIx_Data_pack(NULL, answer, &pbo, 1, PMIX_BYTE_OBJECT); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } PRTE_RML_SEND(rc, sender->rank, answer, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); } } return rc; } prrte-3.0.13/src/runtime/data_server/ds_main.c0000664000175000017500000002460115145263240021452 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2025 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/util/name_fns.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/data_server/ds.h" // globals prte_data_store_t prte_data_store = { .store = PMIX_POINTER_ARRAY_STATIC_INIT, .pending = PMIX_LIST_STATIC_INIT, .output = -1, .verbosity = 0 }; /* locals */ static bool initialized = false; int prte_data_server_init(void) { pmix_status_t rc; if (initialized) { return PRTE_SUCCESS; } initialized = true; /* register a verbosity */ prte_data_store.verbosity = -1; (void) pmix_mca_base_var_register("prte", "prte", "data", "server_verbose", "Debug verbosity for PRTE data server", PMIX_MCA_BASE_VAR_TYPE_INT, &prte_data_store.verbosity); if (0 <= prte_data_store.verbosity) { prte_data_store.output = pmix_output_open(NULL); pmix_output_set_verbosity(prte_data_store.output, prte_data_store.verbosity); } PMIX_CONSTRUCT(&prte_data_store.store, pmix_pointer_array_t); if (PMIX_SUCCESS != (rc = pmix_pointer_array_init(&prte_data_store.store, 1, INT_MAX, 1))) { PMIX_ERROR_LOG(rc); return rc; } PMIX_CONSTRUCT(&prte_data_store.pending, pmix_list_t); PRTE_RML_RECV(PRTE_NAME_WILDCARD, PRTE_RML_TAG_DATA_SERVER, PRTE_RML_PERSISTENT, prte_data_server, NULL); return PRTE_SUCCESS; } void prte_data_server_finalize(void) { int32_t i; prte_data_object_t *data; if (!initialized) { return; } initialized = false; for (i = 0; i < prte_data_store.store.size; i++) { data = (prte_data_object_t *) pmix_pointer_array_get_item(&prte_data_store.store, i); if (NULL != data) { PMIX_RELEASE(data); } } PMIX_DESTRUCT(&prte_data_store.store); PMIX_LIST_DESTRUCT(&prte_data_store.pending); } void prte_data_server(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata) { uint8_t command; int32_t count; pmix_data_buffer_t *answer; pmix_status_t rc; int room_number; PRTE_HIDE_UNUSED_PARAMS(status, tag, cbdata); pmix_output_verbose(1, prte_data_store.output, "%s data server got message from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(sender)); /* unpack the room number of the caller's request */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &room_number, &count, PMIX_INT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return; } /* unpack the command */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &command, &count, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return; } PMIX_DATA_BUFFER_CREATE(answer); /* pack the room number as this must lead any response */ rc = PMIx_Data_pack(NULL, answer, &room_number, 1, PMIX_INT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); return; } /* and the command */ rc = PMIx_Data_pack(NULL, answer, &command, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); return; } switch (command) { case PRTE_PMIX_PUBLISH_CMD: rc = prte_ds_publish(sender, buffer, answer); break; case PRTE_PMIX_LOOKUP_CMD: pmix_output_verbose(1, prte_data_store.output, "%s data server: lookup data from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(sender)); rc = prte_ds_lookup(sender, room_number, buffer, answer); break; case PRTE_PMIX_UNPUBLISH_CMD: rc = prte_ds_unpublish(sender, buffer, answer); break; case PRTE_PMIX_PURGE_PROC_CMD: prte_ds_purge(sender, buffer, answer); return; default: PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); rc = PRTE_ERR_BAD_PARAM; break; } if (PMIX_SUCCESS != rc) { pmix_output_verbose(1, prte_data_store.output, "%s data server: sending error %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_ERROR_NAME(rc)); /* pack the error code */ rc = PMIx_Data_pack(NULL, answer, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } PRTE_RML_SEND(rc, sender->rank, answer, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); } } } pmix_status_t prte_data_server_check_range(prte_data_req_t *req, prte_data_object_t *data) { // we automatically accept session and global ranges if (PMIX_RANGE_SESSION == data->range || PMIX_RANGE_GLOBAL == data->range || PMIX_RANGE_UNDEF == data->range) { return PMIX_SUCCESS; } if (PMIX_RANGE_NAMESPACE == data->range) { if (PMIX_CHECK_NSPACE(req->requestor.nspace, data->owner.nspace)) { pmix_output_verbose(10, prte_data_store.output, "%s\tMATCH NSPACES %s %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), req->requestor.nspace, data->owner.nspace); return PMIX_SUCCESS; } } if (PMIX_RANGE_LOCAL == data->range) { // the sender is the requestor's daemon, so see if // that matches the published data's proxy if (PMIX_CHECK_PROCID(&data->proxy, &req->proxy)) { pmix_output_verbose(10, prte_data_store.output, "%s\tMATCH LOCATION %s %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PMIX_NAME_PRINT(&data->proxy), PMIX_NAME_PRINT(&req->proxy)); return PMIX_SUCCESS; } } if (PMIX_RANGE_PROC_LOCAL == data->range) { // the requestor must be the same as the owner if (PMIX_CHECK_PROCID(&data->owner, &req->requestor)) { pmix_output_verbose(10, prte_data_store.output, "%s\tMATCH LOCAL %s %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PMIX_NAME_PRINT(&data->owner), PMIX_NAME_PRINT(&req->requestor)); return PMIX_SUCCESS; } } if (PMIX_RANGE_CUSTOM == data->range) { // requestor must be on the list of allowed accessors } if (PMIX_RANGE_RM == data->range) { // the requestor must be from the host - which means // the nspace of the requestor must match that of // the host's server, which is my own if (PMIX_CHECK_NSPACE(req->requestor.nspace, PRTE_PROC_MY_NAME->nspace)) { pmix_output_verbose(10, prte_data_store.output, "%s\tMATCH RM %s %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), req->requestor.nspace, PRTE_PROC_MY_NAME->nspace); return PMIX_SUCCESS; } } // no matches return PMIX_ERROR; } // CLASS INSTANCE static void construct(prte_data_object_t *ptr) { ptr->index = -1; PMIX_PROC_CONSTRUCT(&ptr->owner); ptr->uid = UINT32_MAX; ptr->range = PMIX_RANGE_SESSION; ptr->persistence = PMIX_PERSIST_SESSION; PMIX_CONSTRUCT(&ptr->info, pmix_list_t); } static void destruct(prte_data_object_t *ptr) { PMIX_LIST_DESTRUCT(&ptr->info); } PMIX_CLASS_INSTANCE(prte_data_object_t, pmix_object_t, construct, destruct); static void rqcon(prte_data_req_t *p) { p->keys = NULL; p->uid = UINT32_MAX; p->range = PMIX_RANGE_UNDEF; } static void rqdes(prte_data_req_t *p) { PMIX_ARGV_FREE_COMPAT(p->keys); } PMIX_CLASS_INSTANCE(prte_data_req_t, pmix_list_item_t, rqcon, rqdes); PMIX_CLASS_INSTANCE(prte_data_cleanup_t, pmix_list_item_t, NULL, NULL); static void dsicon(prte_ds_info_t *p) { PMIX_PROC_CONSTRUCT(&p->source); PMIX_INFO_CONSTRUCT(&p->info); } static void dsides(prte_ds_info_t *p) { PMIX_INFO_DESTRUCT(&p->info); } PMIX_CLASS_INSTANCE(prte_ds_info_t, pmix_list_item_t, dsicon, dsides); prrte-3.0.13/src/runtime/data_server/ds_unpublish.c0000664000175000017500000001502415145263240022536 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2025 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/util/name_fns.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/data_server/ds.h" pmix_status_t prte_ds_unpublish(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer) { int32_t count; prte_data_object_t *data; pmix_status_t rc; int k; size_t n, ninfo; uint32_t i; char *str; prte_data_req_t rq; prte_info_item_t *ds1, *ds2; pmix_info_t *info; pmix_output_verbose(1, prte_data_store.output, "%s data server got unpublish from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_NAME_PRINT(sender)); PMIX_CONSTRUCT(&rq, prte_data_req_t); memcpy(&rq.proxy, sender, sizeof(pmix_proc_t)); /* unpack the requestor */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &rq.requestor, &count, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } pmix_output_verbose(1, prte_data_store.output, "%s data server: unpublish data from %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PMIX_NAME_PRINT(&rq.requestor)); /* unpack the number of keys */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &ninfo, &count, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } if (0 == ninfo) { /* they forgot to send us the keys?? */ PMIX_ERROR_LOG(PMIX_ERR_BAD_PARAM); return PMIX_ERR_BAD_PARAM; } /* unpack the keys */ for (n = 0; n < ninfo; n++) { count = 1; rc = PMIx_Data_unpack(NULL, buffer, &str, &count, PMIX_STRING); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DESTRUCT(&rq); return rc; } PMIX_ARGV_APPEND_NOSIZE_COMPAT(&rq.keys, str); free(str); } /* unpack the number of directives, if any */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &ninfo, &count, PMIX_SIZE); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); return rc; } if (0 < ninfo) { PMIX_INFO_CREATE(info, ninfo); count = ninfo; rc = PMIx_Data_unpack(NULL, buffer, info, &count, PMIX_INFO); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_INFO_FREE(info, ninfo); return rc; } /* scan the directives for things we care about */ for (n = 0; n < ninfo; n++) { if (PMIx_Check_key(info[n].key, PMIX_USERID)) { rq.uid = info[n].value.data.uint32; } else if (PMIx_Check_key(info[n].key, PMIX_RANGE)) { rq.range = info[n].value.data.range; } } /* ignore anything else for now */ PMIX_INFO_FREE(info, ninfo); } /* cycle across the provided keys */ for (i = 0; NULL != rq.keys[i]; i++) { /* cycle across the stored data, looking for a match */ for (k = 0; k < prte_data_store.store.size; k++) { data = (prte_data_object_t *) pmix_pointer_array_get_item(&prte_data_store.store, k); if (NULL == data) { continue; } /* can only access data posted by the same user id */ if (rq.uid != data->uid) { continue; } /* can only access data posted by the same process */ if (!PMIX_CHECK_NSPACE(rq.requestor.nspace, data->owner.nspace) || rq.requestor.rank != data->owner.rank) { continue; } /* check the range */ if (PMIX_SUCCESS != prte_data_server_check_range(&rq, data)) { continue; } /* see if we have this key */ PMIX_LIST_FOREACH_SAFE(ds1, ds2, &data->info, prte_info_item_t) { if (PMIx_Check_key(ds1->info.key, rq.keys[i])) { /* found it - remove that item */ pmix_list_remove_item(&data->info, &ds1->super); PMIX_RELEASE(ds1); } } /* if all the data has been removed, then remove the object */ if (0 == pmix_list_get_size(&data->info)) { pmix_pointer_array_set_item(&prte_data_store.store, data->index, NULL); PMIX_RELEASE(data); } } } PMIX_DESTRUCT(&rq); if (PMIX_SUCCESS == rc) { // send back an answer rc = PMIx_Data_pack(NULL, answer, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } PRTE_RML_SEND(rc, sender->rank, answer, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); } } return rc; } prrte-3.0.13/src/runtime/data_server/prte_data_server.h0000664000175000017500000000337315145263240023401 0ustar alastairalastair/* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Data server for PRTE */ #ifndef PRTE_DATA_SERVER_H #define PRTE_DATA_SERVER_H #include "prte_config.h" #include "types.h" #include "src/rml/rml_types.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS #define PRTE_PMIX_PUBLISH_CMD 0x01 #define PRTE_PMIX_LOOKUP_CMD 0x02 #define PRTE_PMIX_UNPUBLISH_CMD 0x03 #define PRTE_PMIX_PURGE_PROC_CMD 0x04 /* provide hooks to startup and finalize the data server */ PRTE_EXPORT int prte_data_server_init(void); PRTE_EXPORT void prte_data_server_finalize(void); /* provide hook for the non-blocking receive */ PRTE_EXPORT void prte_data_server(int status, pmix_proc_t *sender, pmix_data_buffer_t *buffer, prte_rml_tag_t tag, void *cbdata); END_C_DECLS #endif /* PRTE_DATA_SERVER_H */ prrte-3.0.13/src/runtime/data_server/Makefile.am0000664000175000017500000000247515145263240021735 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2014-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2025 Nanook Consulting All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This makefile.am does not stand on its own - it is included from src/runtime/Makefile.am headers += \ runtime/data_server/prte_data_server.h \ runtime/data_server/ds.h libprrte_la_SOURCES += \ runtime/data_server/ds_main.c \ runtime/data_server/ds_publish.c \ runtime/data_server/ds_lookup.c \ runtime/data_server/ds_unpublish.c \ runtime/data_server/ds_purge.c prrte-3.0.13/src/runtime/data_server/ds_purge.c0000664000175000017500000000671015145263240021651 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2025 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/util/name_fns.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/data_server/ds.h" void prte_ds_purge(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer) { int32_t count; prte_data_object_t *data; int k; pmix_status_t rc; pmix_proc_t requestor; /* unpack the proc whose data is to be purged - session * data is purged by providing a requestor whose rank * is wildcard */ count = 1; rc = PMIx_Data_unpack(NULL, buffer, &requestor, &count, PMIX_PROC); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); goto done; } pmix_output_verbose(1, prte_data_store.output, "%s data server: purge data from %s:%d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), requestor.nspace, requestor.rank); /* cycle across the stored data, looking for a match */ for (k = 0; k < prte_data_store.store.size; k++) { data = (prte_data_object_t *) pmix_pointer_array_get_item(&prte_data_store.store, k); if (NULL == data) { continue; } /* check if data posted by the specified process */ if (!PMIX_CHECK_PROCID(&requestor, &data->owner)) { continue; } /* remove the object */ pmix_pointer_array_set_item(&prte_data_store.store, data->index, NULL); PMIX_RELEASE(data); } done: // send back an answer rc = PMIx_Data_pack(NULL, answer, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } PRTE_RML_SEND(rc, sender->rank, answer, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); } } prrte-3.0.13/src/runtime/data_server/ds_publish.c0000664000175000017500000002774615145263240022211 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * Copyright (c) 2025 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #include "types.h" #include #ifdef HAVE_SYS_TIME_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/pmix/pmix-internal.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_output.h" #include "src/mca/errmgr/errmgr.h" #include "src/rml/rml.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/util/name_fns.h" #include "src/runtime/data_server/prte_data_server.h" #include "src/runtime/data_server/ds.h" pmix_status_t prte_ds_publish(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer) { uint8_t command; int32_t count; prte_data_object_t *data; pmix_data_buffer_t *reply; int rc; size_t ninfo; uint32_t i; bool complete_resolved, found; prte_data_req_t *req, *rqnext; pmix_data_buffer_t pbkt; pmix_byte_object_t pbo; pmix_status_t ret; prte_info_item_t *ds1, *ds2, *ds3; size_t n; pmix_info_t *info; char **cache; pmix_list_t answers; data = PMIX_NEW(prte_data_object_t); memcpy(&data->proxy, sender, sizeof(pmix_proc_t)); /* unpack the publisher */ count = 1; ret = PMIx_Data_unpack(NULL, buffer, &data->owner, &count, PMIX_PROC); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PMIX_RELEASE(data); return ret; } pmix_output_verbose(1, prte_data_store.output, "%s data server: publishing data from %s:%d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), data->owner.nspace, data->owner.rank); /* unpack the number of infos and directives they sent */ count = 1; ret = PMIx_Data_unpack(NULL, buffer, &ninfo, &count, PMIX_SIZE); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PMIX_RELEASE(data); return ret; } /* if it isn't at least one, then that's an error */ if (1 > ninfo) { ret = PMIX_ERR_BAD_PARAM; PMIX_ERROR_LOG(ret); PMIX_RELEASE(data); rc = PRTE_ERR_UNPACK_FAILURE; return rc; } /* create the space */ PMIX_INFO_CREATE(info, ninfo); /* unpack into it */ count = ninfo; if (PMIX_SUCCESS != (ret = PMIx_Data_unpack(NULL, buffer, info, &count, PMIX_INFO))) { PMIX_ERROR_LOG(ret); PMIX_RELEASE(data); PMIX_INFO_FREE(info, ninfo); rc = PRTE_ERR_UNPACK_FAILURE; return rc; } /* check for directives */ for (n = 0; n < ninfo; n++) { if (PMIx_Check_key(info[n].key, PMIX_RANGE)) { data->range = info[n].value.data.range; } else if (PMIx_Check_key(info[n].key, PMIX_PERSISTENCE)) { data->persistence = info[n].value.data.persist; } else if (PMIx_Check_key(info[n].key, PMIX_USERID)) { data->uid = info[n].value.data.uint32; } else { /* add it to the list of data */ ds1 = PMIX_NEW(prte_info_item_t); PMIX_INFO_XFER(&ds1->info, &info[n]); pmix_list_append(&data->info, &ds1->super); } } // add this data to our store data->index = pmix_pointer_array_add(&prte_data_store.store, data); pmix_output_verbose(1, prte_data_store.output, "%s data server: checking for pending requests", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* check for pending requests that match this data */ reply = NULL; rc = PRTE_SUCCESS; PMIX_LIST_FOREACH_SAFE(req, rqnext, &prte_data_store.pending, prte_data_req_t) { if (req->uid != data->uid) { continue; } /* check the range */ if (PMIX_SUCCESS != prte_data_server_check_range(req, data)) { continue; } complete_resolved = false; cache = NULL; PMIX_CONSTRUCT(&answers, pmix_list_t); for (i = 0; NULL != req->keys[i]; i++) { /* cycle thru the data keys for matches */ found = false; PMIX_LIST_FOREACH_SAFE(ds1, ds2, &data->info, prte_info_item_t) { pmix_output_verbose(10, prte_data_store.output, "%s\tCHECKING %s TO %s", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ds1->info.key, req->keys[i]); if (PMIx_Check_key(ds1->info.key, req->keys[i])) { pmix_output_verbose(10, prte_data_store.output, "%s data server: packaging return", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)); /* track this response */ pmix_output_verbose( 10, prte_data_store.output, "%s data server: adding %s data %s from %s:%d to response", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ds1->info.key, PMIx_Data_type_string(ds1->info.value.type), data->owner.nspace, data->owner.rank); ds3 = PMIX_NEW(prte_info_item_t); PMIX_INFO_XFER(&ds3->info, &ds1->info); pmix_list_append(&answers, &ds3->super); // if the persistence is "first read", then remove this info if (PMIX_PERSIST_FIRST_READ == data->persistence) { pmix_list_remove_item(&data->info, &ds1->super); PMIX_RELEASE(ds1); } found = true; break; // a key can only occur once } } if (!found) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&cache, req->keys[i]); } } // update the keys to remove all that have been resolved if (0 < PMIX_ARGV_COUNT_COMPAT(cache)) { PMIX_ARGV_FREE_COMPAT(req->keys); req->keys = cache; } else { // if no keys are in the cache, then all keys were resolved complete_resolved = true; } n = pmix_list_get_size(&answers); if (0 == n) { PMIX_LIST_DESTRUCT(&answers); continue; } /* send the answers back to the requestor */ pmix_output_verbose(1, prte_data_store.output, "%s data server:publish returning %lu data to %s:%d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), (unsigned long)n, req->requestor.nspace, req->requestor.rank); PMIX_DATA_BUFFER_CREATE(reply); /* start with their room number */ rc = PMIx_Data_pack(NULL, reply, &req->room_number, 1, PMIX_INT); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(reply); return rc; } /* we are responding to a lookup cmd */ command = PRTE_PMIX_LOOKUP_CMD; rc = PMIx_Data_pack(NULL, reply, &command, 1, PMIX_UINT8); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(reply); return rc; } /* if we found all of the requested keys, then indicate so */ if (n == (size_t) PMIX_ARGV_COUNT_COMPAT(req->keys)) { rc = PMIX_SUCCESS; } else { rc = PMIX_ERR_PARTIAL_SUCCESS; } /* return the status */ rc = PMIx_Data_pack(NULL, reply, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(reply); return rc; } /* pack the rest into a pmix_data_buffer_t */ PMIX_DATA_BUFFER_CONSTRUCT(&pbkt); /* pack the number of returned info's */ if (PMIX_SUCCESS != (ret = PMIx_Data_pack(NULL, &pbkt, &n, 1, PMIX_SIZE))) { PMIX_ERROR_LOG(ret); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); rc = PRTE_ERR_PACK_FAILURE; PMIX_DATA_BUFFER_RELEASE(reply); return rc; } /* loop thru and pack the individual responses - this is somewhat less * efficient than packing an info array, but avoids another malloc * operation just to assemble all the return values into a contiguous * array */ while (NULL != (ds3 = (prte_info_item_t *) pmix_list_remove_first(&answers))) { /* pack the data owner */ ret = PMIx_Data_pack(NULL, &pbkt, &data->owner, 1, PMIX_PROC); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); rc = PRTE_ERR_PACK_FAILURE; PMIX_DATA_BUFFER_RELEASE(reply); return rc; } /* pack the data */ ret = PMIx_Data_pack(NULL, &pbkt, &ds3->info, 1, PMIX_INFO); if (PMIX_SUCCESS != ret) { PMIX_ERROR_LOG(ret); PMIX_DATA_BUFFER_DESTRUCT(&pbkt); rc = PRTE_ERR_PACK_FAILURE; PMIX_DATA_BUFFER_RELEASE(reply); return rc; } } PMIX_LIST_DESTRUCT(&answers); /* unload the pmix buffer */ rc = PMIx_Data_unload(&pbkt, &pbo); /* pack it into our reply */ rc = PMIx_Data_pack(NULL, reply, &pbo, 1, PMIX_BYTE_OBJECT); PMIX_BYTE_OBJECT_DESTRUCT(&pbo); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(reply); PMIX_RELEASE(req); return rc; } PRTE_RML_SEND(rc, req->proxy.rank, reply, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(reply); } if (0 == pmix_list_get_size(&data->info)) { // all the data was removed, so we no longer need this entry pmix_pointer_array_set_item(&prte_data_store.store, data->index, NULL); PMIX_RELEASE(data); data = NULL; } if (complete_resolved) { // completely resolved this pending request, so remove it pmix_list_remove_item(&prte_data_store.pending, &req->super); PMIX_RELEASE(req); } if (NULL == data) { break; } } if (PMIX_SUCCESS == rc) { // send back an answer rc = PMIx_Data_pack(NULL, answer, &rc, 1, PMIX_STATUS); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); } PRTE_RML_SEND(rc, sender->rank, answer, PRTE_RML_TAG_DATA_CLIENT); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(answer); } } return rc; } prrte-3.0.13/src/runtime/data_server/ds.h0000664000175000017500000000733415145263240020457 0ustar alastairalastair/* * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Data server for PRTE */ #ifndef PRTE_DS_INTERNAL_H #define PRTE_DS_INTERNAL_H #include "prte_config.h" #include "types.h" #include "src/pmix/pmix-internal.h" BEGIN_C_DECLS /* define an object to hold data */ typedef struct { /* base object */ pmix_object_t super; /* index of this object in the storage array */ int32_t index; // daemon that sent the data pmix_proc_t proxy; /* process that owns this data - only the * owner can remove it */ pmix_proc_t owner; /* uid of the owner - helps control * access rights */ uint32_t uid; /* characteristics */ pmix_data_range_t range; pmix_persistence_t persistence; /* and the values themselves - we store them as a list * because we may (if persistence is set to "first-read") * remove them upon read */ pmix_list_t info; } prte_data_object_t; PMIX_CLASS_DECLARATION(prte_data_object_t); /* define a request object for delayed answers */ typedef struct { pmix_list_item_t super; pmix_proc_t proxy; pmix_proc_t requestor; int room_number; uint32_t uid; pmix_data_range_t range; char **keys; } prte_data_req_t; PMIX_CLASS_DECLARATION(prte_data_req_t); /* define a container for data object cleanups */ typedef struct { pmix_list_item_t super; prte_data_object_t *data; } prte_data_cleanup_t; /* define a caddy for pointing to pmix_info_t that * are to be included in an answer */ typedef struct { pmix_list_item_t super; pmix_proc_t source; pmix_info_t info; } prte_ds_info_t; PMIX_CLASS_DECLARATION(prte_ds_info_t); typedef struct { pmix_pointer_array_t store; pmix_list_t pending; int output; int verbosity; } prte_data_store_t; extern prte_data_store_t prte_data_store; PRTE_EXPORT pmix_status_t prte_ds_publish(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer); PRTE_EXPORT pmix_status_t prte_ds_lookup(pmix_proc_t *sender, int room_number, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer); PRTE_EXPORT pmix_status_t prte_ds_unpublish(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer); PRTE_EXPORT void prte_ds_purge(pmix_proc_t *sender, pmix_data_buffer_t *buffer, pmix_data_buffer_t *answer); PRTE_EXPORT pmix_status_t prte_data_server_check_range(prte_data_req_t *req, prte_data_object_t *data); END_C_DECLS #endif /* PRTE_DS_INTERNAL_H */ prrte-3.0.13/src/runtime/prte_progress_threads.c0000664000175000017500000003107515145263240022154 0ustar alastairalastair/* * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "constants.h" #ifdef HAVE_UNISTD_H # include #endif #include #include #ifdef HAVE_PTHREAD_NP_H # include #endif #include "src/class/pmix_list.h" #include "src/event/event-internal.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/util/pmix_argv.h" #include "src/util/error.h" #include "src/util/pmix_fd.h" #include "src/runtime/prte_progress_threads.h" /* create a tracking object for progress threads */ typedef struct { pmix_list_item_t super; int refcount; char *name; prte_event_base_t *ev_base; /* This will be set to false when it is time for the progress thread to exit */ volatile bool ev_active; /* This event will always be set on the ev_base (so that the ev_base is not empty!) */ prte_event_t block; bool engine_constructed; pmix_thread_t engine; #if PRTE_HAVE_LIBEV ev_async async; pthread_mutex_t mutex; pthread_cond_t cond; pmix_list_t list; #endif } prte_progress_tracker_t; static void tracker_constructor(prte_progress_tracker_t *p) { p->refcount = 1; // start at one since someone created it p->name = NULL; p->ev_base = NULL; p->ev_active = false; p->engine_constructed = false; #if PRTE_HAVE_LIBEV pthread_mutex_init(&p->mutex, NULL); PMIX_CONSTRUCT(&p->list, pmix_list_t); #endif } static void tracker_destructor(prte_progress_tracker_t *p) { prte_event_del(&p->block); if (NULL != p->name) { free(p->name); } if (NULL != p->ev_base) { prte_event_base_free(p->ev_base); } if (p->engine_constructed) { PMIX_DESTRUCT(&p->engine); } #if PRTE_HAVE_LIBEV pthread_mutex_destroy(&p->mutex); PMIX_LIST_DESTRUCT(&p->list); #endif } static PMIX_CLASS_INSTANCE(prte_progress_tracker_t, pmix_list_item_t, tracker_constructor, tracker_destructor); #if PRTE_HAVE_LIBEV typedef enum { PRTE_EVENT_ACTIVE, PRTE_EVENT_ADD, PRTE_EVENT_DEL } prte_event_type_t; typedef struct { pmix_list_item_t super; struct event *ev; struct timeval *tv; int res; short ncalls; prte_event_type_t type; } prte_event_caddy_t; static PMIX_CLASS_INSTANCE(prte_event_caddy_t, pmix_list_item_t, NULL, NULL); static prte_progress_tracker_t *prte_progress_tracker_get_by_base(struct event_base *); static void prte_libev_ev_async_cb(EV_P_ ev_async *w, int revents) { prte_progress_tracker_t *trk = prte_progress_tracker_get_by_base((struct event_base *) EV_A); assert(NULL != trk); pthread_mutex_lock(&trk->mutex); prte_event_caddy_t *cd, *next; PMIX_LIST_FOREACH_SAFE(cd, next, &trk->list, prte_event_caddy_t) { switch (cd->type) { case PRTE_EVENT_ADD: (void) event_add(cd->ev, cd->tv); break; case PRTE_EVENT_DEL: (void) event_del(cd->ev); break; case PRTE_EVENT_ACTIVE: (void) event_active(cd->ev, cd->res, cd->ncalls); break; } pmix_list_remove_item(&trk->list, &cd->super); PMIX_RELEASE(cd); } pthread_mutex_unlock(&trk->mutex); } int prte_event_add(struct event *ev, struct timeval *tv) { int res; prte_progress_tracker_t *trk = prte_progress_tracker_get_by_base(ev->ev_base); if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { prte_event_caddy_t *cd = PMIX_NEW(prte_event_caddy_t); cd->type = PRTE_EVENT_ADD; cd->ev = ev; cd->tv = tv; pthread_mutex_lock(&trk->mutex); pmix_list_append(&trk->list, &cd->super); ev_async_send((struct ev_loop *) trk->ev_base, &trk->async); pthread_mutex_unlock(&trk->mutex); res = PRTE_SUCCESS; } else { res = event_add(ev, tv); } return res; } int prte_event_del(struct event *ev) { int res; prte_progress_tracker_t *trk = prte_progress_tracker_get_by_base(ev->ev_base); if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { prte_event_caddy_t *cd = PMIX_NEW(prte_event_caddy_t); cd->type = PRTE_EVENT_DEL; cd->ev = ev; pthread_mutex_lock(&trk->mutex); pmix_list_append(&trk->list, &cd->super); ev_async_send((struct ev_loop *) trk->ev_base, &trk->async); pthread_mutex_unlock(&trk->mutex); res = PRTE_SUCCESS; } else { res = event_del(ev); } return res; } void prte_event_active(struct event *ev, int res, short ncalls) { prte_progress_tracker_t *trk = prte_progress_tracker_get_by_base(ev->ev_base); if ((NULL != trk) && !pthread_equal(pthread_self(), trk->engine.t_handle)) { prte_event_caddy_t *cd = PMIX_NEW(prte_event_caddy_t); cd->type = PRTE_EVENT_ACTIVE; cd->ev = ev; cd->res = res; cd->ncalls = ncalls; pthread_mutex_lock(&trk->mutex); pmix_list_append(&trk->list, &cd->super); ev_async_send((struct ev_loop *) trk->ev_base, &trk->async); pthread_mutex_unlock(&trk->mutex); } else { event_active(ev, res, ncalls); } } void prte_event_base_loopexit(prte_event_base_t *ev_base) { prte_progress_tracker_t *trk = prte_progress_tracker_get_by_base(ev_base); assert(NULL != trk); ev_async_send((struct ev_loop *) trk->ev_base, &trk->async); } #endif static bool inited = false; static pmix_list_t tracking; static struct timeval long_timeout = {.tv_sec = 3600, .tv_usec = 0}; static const char *shared_thread_name = "PRTE-wide async progress thread"; /* * If this event is fired, just restart it so that this event base * continues to have something to block on. */ static void dummy_timeout_cb(int fd, short args, void *cbdata) { prte_progress_tracker_t *trk = (prte_progress_tracker_t *) cbdata; PRTE_HIDE_UNUSED_PARAMS(fd, args); prte_event_add(&trk->block, &long_timeout); } /* * Main for the progress thread */ static void *progress_engine(pmix_object_t *obj) { pmix_thread_t *t = (pmix_thread_t *) obj; prte_progress_tracker_t *trk = (prte_progress_tracker_t *) t->t_arg; while (trk->ev_active) { prte_event_loop(trk->ev_base, PRTE_EVLOOP_ONCE); } return PMIX_THREAD_CANCELLED; } static void stop_progress_engine(prte_progress_tracker_t *trk) { assert(trk->ev_active); trk->ev_active = false; /* break the event loop - this will cause the loop to exit upon completion of any current event */ prte_event_base_loopexit(trk->ev_base); pmix_thread_join(&trk->engine, NULL); } static int start_progress_engine(prte_progress_tracker_t *trk) { #ifdef HAVE_PTHREAD_SETAFFINITY_NP cpu_set_t cpuset; char **ranges, *dash; int k, n, start, end; #endif assert(!trk->ev_active); trk->ev_active = true; /* fork off a thread to progress it */ trk->engine.t_run = progress_engine; trk->engine.t_arg = trk; int rc = pmix_thread_start(&trk->engine); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); } #ifdef HAVE_PTHREAD_SETAFFINITY_NP if (NULL != prte_progress_thread_cpus) { CPU_ZERO(&cpuset); // comma-delimited list of cpu ranges ranges = PMIX_ARGV_SPLIT_COMPAT(prte_progress_thread_cpus, ','); for (n=0; NULL != ranges[n]; n++) { // look for '-' start = strtoul(ranges[n], &dash, 10); if (NULL == dash) { CPU_SET(start, &cpuset); } else { ++dash; // skip over the '-' end = strtoul(dash, NULL, 10); for (k=start; k < end; k++) { CPU_SET(k, &cpuset); } } } rc = pthread_setaffinity_np(trk->engine.t_handle, sizeof(cpu_set_t), &cpuset); if (0 != rc && prte_bind_progress_thread_reqd) { pmix_output(0, "Failed to bind progress thread %s", (NULL == trk->name) ? "NULL" : trk->name); rc = PRTE_ERR_NOT_SUPPORTED; } else { rc = PRTE_SUCCESS; } } #endif return rc; } prte_event_base_t *prte_progress_thread_init(const char *name) { prte_progress_tracker_t *trk; int rc; if (!inited) { PMIX_CONSTRUCT(&tracking, pmix_list_t); inited = true; } if (NULL == name) { name = shared_thread_name; } /* check if we already have this thread */ PMIX_LIST_FOREACH(trk, &tracking, prte_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { /* we do, so up the refcount on it */ ++trk->refcount; /* return the existing base */ return trk->ev_base; } } trk = PMIX_NEW(prte_progress_tracker_t); if (NULL == trk) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); return NULL; } trk->name = strdup(name); if (NULL == trk->name) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); PMIX_RELEASE(trk); return NULL; } if (NULL == (trk->ev_base = prte_event_base_create())) { PRTE_ERROR_LOG(PRTE_ERR_OUT_OF_RESOURCE); PMIX_RELEASE(trk); return NULL; } /* add an event to the new event base (if there are no events, prte_event_loop() will return immediately) */ prte_event_set(trk->ev_base, &trk->block, -1, PRTE_EV_PERSIST, dummy_timeout_cb, trk); prte_event_add(&trk->block, &long_timeout); #if PRTE_HAVE_LIBEV ev_async_init(&trk->async, prte_libev_ev_async_cb); ev_async_start((struct ev_loop *) trk->ev_base, &trk->async); #endif /* construct the thread object */ PMIX_CONSTRUCT(&trk->engine, pmix_thread_t); trk->engine_constructed = true; if (PRTE_SUCCESS != (rc = start_progress_engine(trk))) { PRTE_ERROR_LOG(rc); PMIX_RELEASE(trk); return NULL; } pmix_list_append(&tracking, &trk->super); return trk->ev_base; } int prte_progress_thread_finalize(const char *name) { prte_progress_tracker_t *trk; if (!inited) { /* nothing we can do */ return PRTE_ERR_NOT_FOUND; } if (NULL == name) { name = shared_thread_name; } /* find the specified engine */ PMIX_LIST_FOREACH(trk, &tracking, prte_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { /* decrement the refcount */ --trk->refcount; /* If the refcount is still above 0, we're done here */ if (trk->refcount > 0) { return PRTE_SUCCESS; } /* If the progress thread is active, stop it */ if (trk->ev_active) { stop_progress_engine(trk); } pmix_list_remove_item(&tracking, &trk->super); PMIX_RELEASE(trk); return PRTE_SUCCESS; } } return PRTE_ERR_NOT_FOUND; } /* * Stop the progress thread, but don't delete the tracker (or event base) */ int prte_progress_thread_pause(const char *name) { prte_progress_tracker_t *trk; if (!inited) { /* nothing we can do */ return PRTE_ERR_NOT_FOUND; } if (NULL == name) { name = shared_thread_name; } /* find the specified engine */ PMIX_LIST_FOREACH(trk, &tracking, prte_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { if (trk->ev_active) { stop_progress_engine(trk); } return PRTE_SUCCESS; } } return PRTE_ERR_NOT_FOUND; } #if PRTE_HAVE_LIBEV static prte_progress_tracker_t *prte_progress_tracker_get_by_base(prte_event_base_t *base) { prte_progress_tracker_t *trk; if (inited) { PMIX_LIST_FOREACH(trk, &tracking, prte_progress_tracker_t) { if (trk->ev_base == base) { return trk; } } } return NULL; } #endif int prte_progress_thread_resume(const char *name) { prte_progress_tracker_t *trk; if (!inited) { /* nothing we can do */ return PRTE_ERR_NOT_FOUND; } if (NULL == name) { name = shared_thread_name; } /* find the specified engine */ PMIX_LIST_FOREACH(trk, &tracking, prte_progress_tracker_t) { if (0 == strcmp(name, trk->name)) { if (trk->ev_active) { return PRTE_ERR_RESOURCE_BUSY; } return start_progress_engine(trk); } } return PRTE_ERR_NOT_FOUND; } prrte-3.0.13/src/runtime/Makefile.am0000664000175000017500000000367615145263240017442 0ustar alastairalastair# -*- makefile -*- # # Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2012 Los Alamos National Security, LLC. # All rights reserved. # Copyright (c) 2017-2020 Intel, Inc. All rights reserved. # Copyright (c) 2021-2025 Nanook Consulting All rights reserved. # Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # This makefile.am does not stand on its own - it is included from src/Makefile.am headers += \ runtime/runtime.h \ runtime/prte_locks.h \ runtime/prte_globals.h \ runtime/prte_quit.h \ runtime/runtime_internals.h \ runtime/prte_wait.h \ runtime/prte_progress_threads.h libprrte_la_SOURCES += \ runtime/prte_finalize.c \ runtime/prte_init.c \ runtime/prte_locks.c \ runtime/prte_globals.c \ runtime/prte_quit.c \ runtime/data_type_support/prte_dt_copy_fns.c \ runtime/data_type_support/prte_dt_print_fns.c \ runtime/data_type_support/prte_dt_packing_fns.c \ runtime/data_type_support/prte_dt_unpacking_fns.c \ runtime/prte_mca_params.c \ runtime/prte_wait.c \ runtime/prte_progress_threads.c include runtime/data_server/Makefile.am prrte-3.0.13/src/runtime/prte_wait.c0000664000175000017500000001655715145263240017552 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2008 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_SYS_QUEUE_H # include #endif #include #ifdef HAVE_SYS_TIME_H # include #endif #ifdef HAVE_SYS_TYPES_H # include #endif #include #include #include #include #include #ifdef HAVE_SYS_WAIT_H # include #endif #include "src/class/pmix_list.h" #include "src/class/pmix_object.h" #include "src/event/event-internal.h" #include "src/threads/pmix_mutex.h" #include "src/util/pmix_output.h" #include "constants.h" #include "src/mca/errmgr/errmgr.h" #include "src/runtime/prte_globals.h" #include "src/threads/pmix_threads.h" #include "src/util/name_fns.h" #include "src/runtime/prte_wait.h" /* Timer Object Declaration */ static void timer_const(prte_timer_t *tm) { tm->ev = prte_event_alloc(); tm->payload = NULL; } static void timer_dest(prte_timer_t *tm) { prte_event_free(tm->ev); } PMIX_CLASS_INSTANCE(prte_timer_t, pmix_object_t, timer_const, timer_dest); static void wccon(prte_wait_tracker_t *p) { p->child = NULL; p->cbfunc = NULL; p->cbdata = NULL; } static void wcdes(prte_wait_tracker_t *p) { if (NULL != p->child) { PMIX_RELEASE(p->child); } } PMIX_CLASS_INSTANCE(prte_wait_tracker_t, pmix_list_item_t, wccon, wcdes); /* Local Variables */ static prte_event_t handler; static pmix_list_t pending_cbs; /* Local Function Prototypes */ static void wait_signal_callback(int fd, short event, void *arg); /* Interface Functions */ void prte_wait_disable(void) { prte_event_del(&handler); } void prte_wait_enable(void) { prte_event_add(&handler, NULL); } int prte_wait_init(void) { PMIX_CONSTRUCT(&pending_cbs, pmix_list_t); prte_event_set(prte_event_base, &handler, SIGCHLD, PRTE_EV_SIGNAL | PRTE_EV_PERSIST, wait_signal_callback, &handler); prte_event_add(&handler, NULL); return PRTE_SUCCESS; } int prte_wait_finalize(void) { prte_event_del(&handler); /* clear out the pending cbs */ PMIX_LIST_DESTRUCT(&pending_cbs); return PRTE_SUCCESS; } /* this function *must* always be called from * within an event in the prte_event_base */ void prte_wait_cb(prte_proc_t *child, prte_wait_cbfunc_t callback, void *data) { prte_wait_tracker_t *t2; if (NULL == child || NULL == callback) { /* bozo protection */ PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return; } /* see if this proc is still alive */ if (!PRTE_FLAG_TEST(child, PRTE_PROC_FLAG_ALIVE)) { if (NULL != callback) { /* already heard this proc is dead, so just do the callback */ t2 = PMIX_NEW(prte_wait_tracker_t); PMIX_RETAIN(child); // protect against race conditions t2->child = child; t2->cbfunc = callback; t2->cbdata = data; prte_event_set(prte_event_base, &t2->ev, -1, PRTE_EV_WRITE, t2->cbfunc, t2); prte_event_active(&t2->ev, PRTE_EV_WRITE, 1); } return; } /* we just override any existing registration */ PMIX_LIST_FOREACH(t2, &pending_cbs, prte_wait_tracker_t) { if (t2->child == child) { t2->cbfunc = callback; t2->cbdata = data; return; } } /* get here if this is a new registration */ t2 = PMIX_NEW(prte_wait_tracker_t); PMIX_RETAIN(child); // protect against race conditions t2->child = child; t2->cbfunc = callback; t2->cbdata = data; pmix_list_append(&pending_cbs, &t2->super); } static void cancel_callback(int fd, short args, void *cbdata) { prte_wait_tracker_t *trk = (prte_wait_tracker_t *) cbdata; prte_wait_tracker_t *t2; PRTE_HIDE_UNUSED_PARAMS(fd, args); PMIX_ACQUIRE_OBJECT(trk); PMIX_LIST_FOREACH(t2, &pending_cbs, prte_wait_tracker_t) { if (t2->child == trk->child) { pmix_list_remove_item(&pending_cbs, &t2->super); PMIX_RELEASE(t2); PMIX_RELEASE(trk); return; } } PMIX_RELEASE(trk); } void prte_wait_cb_cancel(prte_proc_t *child) { prte_wait_tracker_t *trk; if (NULL == child) { /* bozo protection */ PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM); return; } /* push this into the event library for handling */ trk = PMIX_NEW(prte_wait_tracker_t); PMIX_RETAIN(child); // protect against race conditions trk->child = child; PRTE_PMIX_THREADSHIFT(trk, prte_event_base, cancel_callback); } /* callback from the event library whenever a SIGCHLD is received */ static void wait_signal_callback(int fd, short event, void *arg) { prte_event_t *signal = (prte_event_t *) arg; int status; pid_t pid; prte_wait_tracker_t *t2; PRTE_HIDE_UNUSED_PARAMS(fd, event); PMIX_ACQUIRE_OBJECT(signal); if (SIGCHLD != PRTE_EVENT_SIGNAL(signal)) { return; } /* we can have multiple children leave but only get one * sigchild callback, so reap all the waitpids until we * don't get anything valid back */ while (1) { pid = waitpid(-1, &status, WNOHANG); if (-1 == pid && EINTR == errno) { /* try it again */ continue; } /* if we got garbage, then nothing we can do */ if (pid <= 0) { return; } /* we are already in an event, so it is safe to access the list */ PMIX_LIST_FOREACH(t2, &pending_cbs, prte_wait_tracker_t) { if (pid == t2->child->pid) { /* found it! */ t2->child->exit_code = status; pmix_list_remove_item(&pending_cbs, &t2->super); if (NULL != t2->cbfunc) { prte_event_set(prte_event_base, &t2->ev, -1, PRTE_EV_WRITE, t2->cbfunc, t2); prte_event_active(&t2->ev, PRTE_EV_WRITE, 1); } else { PMIX_RELEASE(t2); } break; } } } } prrte-3.0.13/src/runtime/prte_locks.h0000664000175000017500000000320615145263240017711 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All Rights * reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ /** * @file * * Locks to prevent loops inside PRTE */ #ifndef PRTE_LOCKS_H #define PRTE_LOCKS_H #include "prte_config.h" #include "src/threads/pmix_mutex.h" #include "src/threads/pmix_threads.h" BEGIN_C_DECLS /* for everyone */ PRTE_EXPORT extern pmix_mutex_t prte_finalize_lock; /* for HNPs */ PRTE_EXPORT extern pmix_mutex_t prte_abort_inprogress_lock; PRTE_EXPORT extern pmix_mutex_t prte_jobs_complete_lock; PRTE_EXPORT extern pmix_mutex_t prte_quit_lock; PRTE_EXPORT extern pmix_lock_t prte_init_lock; /** * Initialize the locks */ PRTE_EXPORT int prte_locks_init(void); END_C_DECLS #endif /* #ifndef PRTE_LOCKS_H */ prrte-3.0.13/src/runtime/prte_progress_threads.h0000664000175000017500000000440615145263240022157 0ustar alastairalastair/* * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_PROGRESS_THREADS_H #define PRTE_PROGRESS_THREADS_H #include "prte_config.h" #include "src/event/event-internal.h" /** * Initialize a progress thread name; if a progress thread is not * already associated with that name, start a progress thread. * * If you have general events that need to run in *a* progress thread * (but not necessarily a your own, dedicated progress thread), pass * NULL the "name" argument to the prte_progress_thead_init() function * to glom on to the general PRTE-wide progress thread. * * If a name is passed that was already used in a prior call to * prte_progress_thread_init(), the event base associated with that * already-running progress thread will be returned (i.e., no new * progress thread will be started). */ PRTE_EXPORT prte_event_base_t *prte_progress_thread_init(const char *name); /** * Finalize a progress thread name (reference counted). * * Once this function is invoked as many times as * prte_progress_thread_init() was invoked on this name (or NULL), the * progress function is shut down and the event base associated with * it is destroyed. * * Will return PRTE_ERR_NOT_FOUND if the progress thread name does not * exist; PRTE_SUCCESS otherwise. */ PRTE_EXPORT int prte_progress_thread_finalize(const char *name); /** * Temporarily pause the progress thread associated with this name. * * This function does not destroy the event base associated with this * progress thread name, but it does stop processing all events on * that event base until prte_progress_thread_resume() is invoked on * that name. * * Will return PRTE_ERR_NOT_FOUND if the progress thread name does not * exist; PRTE_SUCCESS otherwise. */ PRTE_EXPORT int prte_progress_thread_pause(const char *name); /** * Restart a previously-paused progress thread associated with this * name. * * Will return PRTE_ERR_NOT_FOUND if the progress thread name does not * exist; PRTE_SUCCESS otherwise. */ PRTE_EXPORT int prte_progress_thread_resume(const char *name); #endif prrte-3.0.13/src/mca/0000775000175000017500000000000015145263240014447 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/0000775000175000017500000000000015145263240017261 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/base/0000775000175000017500000000000015145263240020173 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/base/backtrace_component.c0000664000175000017500000000340315145263240024340 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/mca/prtebacktrace/base/base.h" /* * The following file was created by configure. It contains extern * statements and the definition of an array of pointers to each * component's public pmix_mca_base_component_t struct. */ #include "src/mca/prtebacktrace/base/static-components.h" /* * Globals */ /* Uses default register/open/close functions */ PMIX_MCA_BASE_FRAMEWORK_DECLARE(prte, prtebacktrace, NULL, NULL, NULL, NULL, prte_prtebacktrace_base_static_components, PMIX_MCA_BASE_FRAMEWORK_FLAG_DEFAULT); prrte-3.0.13/src/mca/prtebacktrace/base/base.h0000664000175000017500000000245515145263240021264 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * */ #ifndef PRTE_BACKTRACE_BASE_H #define PRTE_BACKTRACE_BASE_H #include "prte_config.h" #include "src/mca/base/pmix_mca_base_framework.h" #include "src/mca/prtebacktrace/prtebacktrace.h" /* * Global functions for MCA overall backtrace open and close */ BEGIN_C_DECLS PRTE_EXPORT extern pmix_mca_base_framework_t prte_prtebacktrace_base_framework; END_C_DECLS #endif /* PRTE_BASE_BACKTRACE_H */ prrte-3.0.13/src/mca/prtebacktrace/base/Makefile.am0000664000175000017500000000165115145263240022232 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # headers += \ base/base.h libprtemca_prtebacktrace_la_SOURCES += \ base/backtrace_component.c prrte-3.0.13/src/mca/prtebacktrace/base/owner.txt0000664000175000017500000000024415145263240022066 0ustar alastairalastair# # owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # owner:project status:maintenance prrte-3.0.13/src/mca/prtebacktrace/configure.m40000664000175000017500000000173115145263240021506 0ustar alastairalastairdnl -*- shell-script -*- dnl dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana dnl University Research and Technology dnl Corporation. All rights reserved. dnl Copyright (c) 2004-2005 The University of Tennessee and The University dnl of Tennessee Research Foundation. All rights dnl reserved. dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, dnl University of Stuttgart. All rights reserved. dnl Copyright (c) 2004-2006 The Regents of the University of California. dnl All rights reserved. dnl Copyright (c) 2019-2020 Intel, Inc. All rights reserved. dnl Copyright (c) 2020 Cisco Systems, Inc. All rights reserved dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow dnl dnl $HEADER$ dnl dnl we only want one :) m4_define(MCA_prte_prtebacktrace_CONFIGURE_MODE, STOP_AT_FIRST) prrte-3.0.13/src/mca/prtebacktrace/printstack/0000775000175000017500000000000015145263240021443 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/printstack/configure.m40000664000175000017500000000324215145263240023667 0ustar alastairalastair# -*- shell-script -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AC_DEFUN([MCA_prte_prtebacktrace_printstack_PRIORITY], [30]) AC_DEFUN([MCA_prte_prtebacktrace_printstack_COMPILE_MODE], [ AC_MSG_CHECKING([for MCA component $1:$2 compile mode]) $3="static" AC_MSG_RESULT([$$3]) ]) # MCA_prtebacktrace_printstack_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_prte_prtebacktrace_printstack_CONFIG],[ AC_CONFIG_FILES([src/mca/prtebacktrace/printstack/Makefile]) AC_CHECK_HEADERS([ucontext.h]) # FreeBSD has backtrace in -lexecinfo, usually in libc AC_CHECK_FUNCS([printstack], [prtebacktrace_printstack_happy="yes"], [prtebacktrace_printstack_happy="no"]) AS_IF([test "$prtebacktrace_printstack_happy" = "yes"], [$1], [$2]) ]) prrte-3.0.13/src/mca/prtebacktrace/printstack/backtrace_printstack.c0000664000175000017500000000325315145263240025773 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #include "constants.h" #include "src/mca/prtebacktrace/prtebacktrace.h" int prte_backtrace_print(FILE *file, char *prefix, int strip) { int fd = prte_stacktrace_output_fileno; if (NULL != file) { fd = fileno(file); } printstack(fd); return PRTE_SUCCESS; } int prte_backtrace_buffer(char ***message_out, int *len_out) { *message_out = NULL; *len_out = 0; /* BWB - I think we can implement this in a similar way that printstack is implemented. I just don't have time right now. */ return PRTE_ERR_NOT_IMPLEMENTED; } prrte-3.0.13/src/mca/prtebacktrace/printstack/Makefile.am0000664000175000017500000000176415145263240023507 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # noinst_LTLIBRARIES = libprtemca_prtebacktrace_printstack.la libprtemca_prtebacktrace_printstack_la_SOURCES = \ backtrace_printstack.c \ backtrace_printstack_component.c prrte-3.0.13/src/mca/prtebacktrace/printstack/backtrace_printstack_component.c0000664000175000017500000000326715145263240030062 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/mca/prtebacktrace/prtebacktrace.h" const pmix_mca_base_component_t prte_mca_prtebacktrace_printstack_component = { PRTE_BACKTRACE_BASE_VERSION_2_0_0, /* Component name and version */ .pmix_mca_component_name = "printstack", PMIX_MCA_BASE_MAKE_VERSION(component, PRTE_MAJOR_VERSION, PRTE_MINOR_VERSION, PMIX_RELEASE_VERSION), }; prrte-3.0.13/src/mca/prtebacktrace/printstack/owner.txt0000664000175000017500000000024215145263240023334 0ustar alastairalastair# # owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # owner: SNL status: maintenance prrte-3.0.13/src/mca/prtebacktrace/prtebacktrace.h0000664000175000017500000000473315145263240022253 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #ifndef PRTE_MCA_BACKTRACE_BACKTRACE_H #define PRTE_MCA_BACKTRACE_BACKTRACE_H #include "prte_config.h" #include "src/pmix/pmix-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/mca.h" #include "src/util/stacktrace.h" BEGIN_C_DECLS /* * Most of this file is just for ompi_info. There are two interface * functions, both of which are called directly. The joy of link-time * components. */ /* * Print back trace to FILE file with a prefix for each line. * First strip lines are not printed. * If 'file' is NULL then the component should try to use the file descriptor * saved in prte_stacktrace_output_fileno * * \note some attempts made to be signal safe. */ PRTE_EXPORT int prte_backtrace_print(FILE *file, char *prefix, int strip); /* * Return back trace in buffer. buffer will be allocated by the * backtrace component, but should be free'ed by the caller. * * \note Probably bad to call this from a signal handler. * */ PRTE_EXPORT int prte_backtrace_buffer(char ***messages, int *len); /** * Structure for backtrace components. */ typedef pmix_mca_base_component_t prte_backtrace_base_component_t; /* * Macro for use in components that are of type backtrace */ #define PRTE_BACKTRACE_BASE_VERSION_2_0_0 PRTE_MCA_BASE_VERSION_3_0_0("backtrace", 2, 0, 0) END_C_DECLS #endif /* PRTE_MCA_BACKTRACE_BACKTRACE_H */ prrte-3.0.13/src/mca/prtebacktrace/execinfo/0000775000175000017500000000000015145263240021061 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/execinfo/backtrace_execinfo_component.c0000664000175000017500000000326315145263240027112 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/mca/prtebacktrace/prtebacktrace.h" const pmix_mca_base_component_t prte_mca_prtebacktrace_execinfo_component = { PRTE_BACKTRACE_BASE_VERSION_2_0_0, /* Component name and version */ .pmix_mca_component_name = "execinfo", PMIX_MCA_BASE_MAKE_VERSION(component, PRTE_MAJOR_VERSION, PRTE_MINOR_VERSION, PMIX_RELEASE_VERSION), }; prrte-3.0.13/src/mca/prtebacktrace/execinfo/configure.m40000664000175000017500000000327615145263240023314 0ustar alastairalastair# -*- shell-script -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AC_DEFUN([MCA_prte_prtebacktrace_execinfo_PRIORITY], [30]) AC_DEFUN([MCA_prte_prtebacktrace_execinfo_COMPILE_MODE], [ AC_MSG_CHECKING([for MCA component $1:$2 compile mode]) $3="static" AC_MSG_RESULT([$$3]) ]) # MCA_prtebacktrace_execinfo_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_prte_prtebacktrace_execinfo_CONFIG],[ AC_CONFIG_FILES([src/mca/prtebacktrace/execinfo/Makefile]) AC_CHECK_HEADERS([execinfo.h]) # FreeBSD has backtrace in -lexecinfo, usually in libc PRTE_SEARCH_LIBS_COMPONENT([backtrace_execinfo], [backtrace], [execinfo], [prtebacktrace_execinfo_happy="yes"], [prtebacktrace_execinfo_happy="no"]) AS_IF([test "$prtebacktrace_execinfo_happy" = "yes"], [$1], [$2]) ]) prrte-3.0.13/src/mca/prtebacktrace/execinfo/backtrace_execinfo.c0000664000175000017500000000427715145263240025036 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include #ifdef HAVE_UNISTD_H # include #endif #ifdef HAVE_EXECINFO_H # include #endif #include "constants.h" #include "src/util/pmix_fd.h" #include "src/mca/prtebacktrace/prtebacktrace.h" int prte_backtrace_print(FILE *file, char *prefix, int strip) { int i, len; int trace_size; void *trace[32]; char buf[6]; int fd = prte_stacktrace_output_fileno; if (NULL != file) { fd = fileno(file); } if (-1 == fd) { return PRTE_ERR_BAD_PARAM; } trace_size = backtrace(trace, 32); for (i = strip; i < trace_size; i++) { if (NULL != prefix) { pmix_fd_write(fd, strlen(prefix), prefix); } len = snprintf(buf, sizeof(buf), "[%2d] ", i - strip); pmix_fd_write(fd, len, buf); backtrace_symbols_fd(&trace[i], 1, fd); } return PRTE_SUCCESS; } int prte_backtrace_buffer(char ***message_out, int *len_out) { int trace_size; void *trace[32]; char **funcs = (char **) NULL; trace_size = backtrace(trace, 32); funcs = backtrace_symbols(trace, trace_size); *message_out = funcs; *len_out = trace_size; return PRTE_SUCCESS; } prrte-3.0.13/src/mca/prtebacktrace/execinfo/Makefile.am0000664000175000017500000000175415145263240023124 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # noinst_LTLIBRARIES = libprtemca_prtebacktrace_execinfo.la libprtemca_prtebacktrace_execinfo_la_SOURCES = \ backtrace_execinfo.c \ backtrace_execinfo_component.c prrte-3.0.13/src/mca/prtebacktrace/execinfo/owner.txt0000664000175000017500000000024115145263240022751 0ustar alastairalastair# # owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # owner: SNL status:maintenance prrte-3.0.13/src/mca/prtebacktrace/Makefile.am0000664000175000017500000000241115145263240021313 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # # main library setup noinst_LTLIBRARIES = libprtemca_prtebacktrace.la libprtemca_prtebacktrace_la_SOURCES = # local files headers = prtebacktrace.h libprtemca_prtebacktrace_la_SOURCES += $(headers) # Conditionally install the header files prtedir = $(prteincludedir)/$(subdir) nobase_prte_HEADERS = $(headers) include base/Makefile.am distclean-local: rm -f base/static-components.h prrte-3.0.13/src/mca/prtebacktrace/none/0000775000175000017500000000000015145263240020220 5ustar alastairalastairprrte-3.0.13/src/mca/prtebacktrace/none/configure.m40000664000175000017500000000247315145263240022451 0ustar alastairalastair# -*- shell-script -*- # # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2010-2020 Cisco Systems, Inc. All rights reserved # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # AC_DEFUN([MCA_prte_prtebacktrace_none_PRIORITY], [0]) AC_DEFUN([MCA_prte_prtebacktrace_none_COMPILE_MODE], [ AC_MSG_CHECKING([for MCA component $1:$2 compile mode]) $3="static" AC_MSG_RESULT([$$3]) ]) # MCA_prtebacktrace_none_CONFIG(action-if-can-compile, # [action-if-cant-compile]) # ------------------------------------------------ AC_DEFUN([MCA_prte_prtebacktrace_none_CONFIG],[ AC_CONFIG_FILES([src/mca/prtebacktrace/none/Makefile]) ]) prrte-3.0.13/src/mca/prtebacktrace/none/backtrace_none_component.c0000664000175000017500000000343415145263240025410 0ustar alastairalastair/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021-2022 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include "src/mca/prtebacktrace/prtebacktrace.h" BEGIN_C_DECLS PRTE_EXPORT extern const pmix_mca_base_component_t prte_mca_backtrace_none_component; END_C_DECLS const pmix_mca_base_component_t prte_mca_prtebacktrace_none_component = { PRTE_BACKTRACE_BASE_VERSION_2_0_0, /* Component name and version */ .pmix_mca_component_name = "none", PMIX_MCA_BASE_MAKE_VERSION(component, PRTE_MAJOR_VERSION, PRTE_MINOR_VERSION, PMIX_RELEASE_VERSION), }; prrte-3.0.13/src/mca/prtebacktrace/none/Makefile.am0000664000175000017500000000173415145263240022261 0ustar alastairalastair# # Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. # Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2019-2020 Intel, Inc. All rights reserved. # Copyright (c) 2022 Nanook Consulting. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow # # $HEADER$ # noinst_LTLIBRARIES = libprtemca_prtebacktrace_none.la libprtemca_prtebacktrace_none_la_SOURCES = \ backtrace_none.c \ backtrace_none_component.c prrte-3.0.13/src/mca/prtebacktrace/none/backtrace_none.c0000664000175000017500000000252215145263240023323 0ustar alastairalastair/* * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2006 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2019-2020 Intel, Inc. All rights reserved. * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ */ #include "prte_config.h" #include #include "constants.h" #include "src/mca/prtebacktrace/prtebacktrace.h" int prte_backtrace_print(FILE *file, char *prefix, int strip) { return PRTE_ERR_NOT_IMPLEMENTED; } int prte_backtrace_buffer(char ***message_out, int *len_out) { *message_out = NULL; *len_out = 0; return PRTE_ERR_NOT_IMPLEMENTED; } prrte-3.0.13/src/mca/prtebacktrace/none/owner.txt0000664000175000017500000000024115145263240022110 0ustar alastairalastair# # owner/status file # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # owner: SNL status:maintenance prrte-3.0.13/src/mca/plm/0000775000175000017500000000000015145263240015237 5ustar alastairalastairprrte-3.0.13/src/mca/plm/ssh/0000775000175000017500000000000015145263240016034 5ustar alastairalastairprrte-3.0.13/src/mca/plm/ssh/plm_ssh_module.c0000664000175000017500000017531615145263240021227 0ustar alastairalastair/* * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2021 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2019 IBM Corporation. All rights reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * Copyright (c) 2015-2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2021-2025 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow * * $HEADER$ * * These symbols are in a file by themselves to provide nice linker * semantics. Since linkers generally pull in symbols by object * files, keeping these symbols as the only symbols in this file * prevents utility programs such as "ompi_info" from having to import * entire components just to query their version and parameters. */ #include "prte_config.h" #include "constants.h" #include #ifdef HAVE_UNISTD_H # include #endif #include #include #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_SYS_SELECT_H # include #endif #ifdef HAVE_SYS_TIME_H # include #endif #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef HAVE_SYS_WAIT_H # include #endif #include #include #ifdef HAVE_PWD_H # include #endif #include "src/class/pmix_pointer_array.h" #include "src/event/event-internal.h" #include "src/mca/base/pmix_base.h" #include "src/mca/prteinstalldirs/prteinstalldirs.h" #include "src/mca/pinstalldirs/pinstalldirs_types.h" #include "src/util/pmix_argv.h" #include "src/util/pmix_basename.h" #include "src/util/pmix_output.h" #include "src/util/pmix_path.h" #include "src/util/pmix_environ.h" #include "src/runtime/prte_globals.h" #include "src/runtime/prte_wait.h" #include "src/threads/pmix_threads.h" #include "src/util/pmix_fd.h" #include "src/util/name_fns.h" #include "src/util/proc_info.h" #include "src/util/pmix_show_help.h" #include "src/mca/errmgr/errmgr.h" #include "src/mca/ess/base/base.h" #include "src/mca/ess/ess.h" #include "src/mca/grpcomm/base/base.h" #include "src/mca/oob/base/base.h" #include "src/mca/rmaps/rmaps.h" #include "src/rml/rml_contact.h" #include "src/rml/rml.h" #include "src/mca/state/state.h" #include "src/mca/plm/base/base.h" #include "src/mca/plm/base/plm_private.h" #include "src/mca/plm/plm.h" #include "src/mca/plm/ssh/plm_ssh.h" static int ssh_init(void); static int ssh_launch(prte_job_t *jdata); static int remote_spawn(void); static int ssh_terminate_prteds(void); static int ssh_finalize(void); prte_plm_base_module_t prte_plm_ssh_module = { .init = ssh_init, .set_hnp_name = prte_plm_base_set_hnp_name, .spawn = ssh_launch, .remote_spawn = remote_spawn, .terminate_job = prte_plm_base_prted_terminate_job, .terminate_orteds = ssh_terminate_prteds, .terminate_procs = prte_plm_base_prted_kill_local_procs, .signal_job = prte_plm_base_prted_signal_local_procs, .finalize = ssh_finalize}; typedef struct { pmix_list_item_t super; int argc; char **argv; prte_proc_t *daemon; } prte_plm_ssh_caddy_t; static void caddy_const(prte_plm_ssh_caddy_t *ptr) { ptr->argv = NULL; ptr->daemon = NULL; } static void caddy_dest(prte_plm_ssh_caddy_t *ptr) { if (NULL != ptr->argv) { PMIX_ARGV_FREE_COMPAT(ptr->argv); } if (NULL != ptr->daemon) { PMIX_RELEASE(ptr->daemon); } } PMIX_CLASS_INSTANCE(prte_plm_ssh_caddy_t, pmix_list_item_t, caddy_const, caddy_dest); typedef enum { PRTE_PLM_SSH_SHELL_BASH = 0, PRTE_PLM_SSH_SHELL_ZSH, PRTE_PLM_SSH_SHELL_TCSH, PRTE_PLM_SSH_SHELL_CSH, PRTE_PLM_SSH_SHELL_KSH, PRTE_PLM_SSH_SHELL_SH, PRTE_PLM_SSH_SHELL_UNKNOWN } prte_plm_ssh_shell_t; /* These strings *must* follow the same order as the enum PRTE_PLM_SSH_SHELL_* */ static const char *prte_plm_ssh_shell_name[7] = {"bash", "zsh", "tcsh", /* tcsh has to be first otherwise strstr finds csh */ "csh", "ksh", "sh", "unknown"}; /* * Local functions */ static void set_handler_default(int sig); static prte_plm_ssh_shell_t find_shell(char *shell); static int launch_agent_setup(const char *agent, char *path); static void ssh_child(int argc, char **argv) __prte_attribute_noreturn__; static int ssh_probe(char *nodename, prte_plm_ssh_shell_t *shell); static int setup_shell(prte_plm_ssh_shell_t *sshell, prte_plm_ssh_shell_t *lshell, char *nodename, int *argc, char ***argv); static void launch_daemons(int fd, short args, void *cbdata); static void process_launch_list(int fd, short args, void *cbdata); /* local global storage */ static int num_in_progress = 0; static pmix_list_t launch_list; static prte_event_t launch_event; static char *ssh_agent_path = NULL; static char **ssh_agent_argv = NULL; /** * Init the module */ static int ssh_init(void) { char *tmp; int rc; /* we were selected, so setup the launch agent */ if (prte_mca_plm_ssh_component.using_qrsh) { /* perform base setup for qrsh */ pmix_asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC")); if (PRTE_SUCCESS != (rc = launch_agent_setup("qrsh", tmp))) { PRTE_ERROR_LOG(rc); free(tmp); return rc; } free(tmp); /* automatically add -inherit and grid engine PE related flags */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-inherit"); /* Don't use the "-noshell" flag as qrsh would have a problem * swallowing a long command */ PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-nostdin"); PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-V"); if (0 < pmix_output_get_verbosity(prte_plm_base_framework.framework_output)) { PMIX_ARGV_APPEND_NOSIZE_COMPAT(&ssh_agent_argv, "-verbose"); tmp = PMIX_ARGV_JOIN_COMPAT(ssh_agent_argv, ' '); pmix_output_verbose(1, prte_plm_base_framework.framework_output, "%s plm:ssh: using \"%s\" for launching\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), tmp); free(tmp); } } else if (prte_mca_plm_ssh_component.using_llspawn) { /* perform base setup for llspawn */ if (PRTE_SUCCESS != (rc = launch_agent_setup("llspawn", NULL))) { PRTE_ERROR_LOG(rc); return rc; } pmix_output_verbose(1, prte_plm_base_framework.framework_output, "%s plm:ssh: using \"%s\" for launching\n", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), ssh_agent_path); } else { /* not using qrsh or llspawn - use MCA-specified agent */ if (PRTE_SUCCESS != (rc = launch_agent_setup(prte_mca_plm_ssh_component.agent, NULL))) { PRTE_ERROR_LOG(rc); return rc; } } /* point to our launch command */ if (PRTE_SUCCESS != (rc = prte_state.add_job_state(PRTE_JOB_STATE_LAUNCH_DAEMONS, launch_daemons))) { PRTE_ERROR_LOG(rc); return rc; } /* setup the event for metering the launch */ PMIX_CONSTRUCT(&launch_list, pmix_list_t); prte_event_set(prte_event_base, &launch_event, -1, 0, process_launch_list, NULL); /* start the recvs */ if (PRTE_SUCCESS != (rc = prte_plm_base_comm_start())) { PRTE_ERROR_LOG(rc); } /* we assign daemon nodes at launch */ prte_plm_globals.daemon_nodes_assigned_at_launch = true; return rc; } /** * Callback on daemon exit. */ static void ssh_wait_daemon(int sd, short flags, void *cbdata) { prte_job_t *jdata; prte_wait_tracker_t *t2 = (prte_wait_tracker_t *) cbdata; prte_plm_ssh_caddy_t *caddy = (prte_plm_ssh_caddy_t *) t2->cbdata; prte_proc_t *daemon = caddy->daemon; pmix_status_t rc; PRTE_HIDE_UNUSED_PARAMS(sd, flags); if (prte_prteds_term_ordered || prte_abnormal_term_ordered) { /* ignore any such report - it will occur if we left the * session attached, e.g., while debugging */ PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } if (!WIFEXITED(daemon->exit_code) || WEXITSTATUS(daemon->exit_code) != 0) { /* if abnormal exit */ /* if we are not the HNP, send a message to the HNP alerting it * to the failure */ if (!PRTE_PROC_IS_MASTER) { pmix_data_buffer_t *buf; PMIX_OUTPUT_VERBOSE( (1, prte_plm_base_framework.framework_output, "%s daemon %s failed with status %d", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), PRTE_VPID_PRINT(daemon->name.rank), WEXITSTATUS(daemon->exit_code))); PMIX_DATA_BUFFER_CREATE(buf); rc = PMIx_Data_pack(NULL, buf, &(daemon->name.rank), 1, PMIX_PROC_RANK); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } rc = PMIx_Data_pack(NULL, buf, &daemon->exit_code, 1, PMIX_INT32); if (PMIX_SUCCESS != rc) { PMIX_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } PRTE_RML_SEND(rc, PRTE_PROC_MY_HNP->rank, buf, PRTE_RML_TAG_REPORT_REMOTE_LAUNCH); if (PRTE_SUCCESS != rc) { PRTE_ERROR_LOG(rc); PMIX_DATA_BUFFER_RELEASE(buf); PMIX_RELEASE(caddy); PMIX_RELEASE(t2); return; } /* note that this daemon failed */ daemon->state = PRTE_PROC_STATE_FAILED_TO_START; } else { jdata = prte_get_job_data_object(PRTE_PROC_MY_NAME->nspace); pmix_output(prte_clean_output, "------------------------------------------------------------\n" "A daemon failed to report back after being spawned. This could\n" "be due to several factors, including inability to find the\n" "daemon executable, or the executable was unable to find its\n" "required supporting libraries. In some cases, the daemon was\n" "able to execute, but was unable to complete a TCP connection\n" "back to %s:\n\n" " Local host: %s\n" " Remote host: %s\n" " Daemon exit status: %d\n\n" "This may also be caused by a firewall on the remote host. Please\n" "check that any firewall (e.g., iptables) has been disabled and\n" "try again.\n" "------------------------------------------------------------", prte_tool_basename, prte_process_info.nodename, (NULL == daemon->node->name) ? "" : daemon->node->name, WEXITSTATUS(daemon->exit_code)); /* set the exit status */ PRTE_UPDATE_EXIT_STATUS(WEXITSTATUS(daemon->exit_code)); /* note that this daemon failed */ daemon->state = PRTE_PROC_STATE_FAILED_TO_START; /* increment the #daemons terminated so we will exit properly */ jdata->num_terminated++; /* remove it from the routing table to ensure num_routes * returns the correct value */ prte_rml_route_lost(daemon->name.rank); /* report that the daemon has failed so we can exit */ PRTE_ACTIVATE_PROC_STATE(&daemon->name, PRTE_PROC_STATE_FAILED_TO_START); } } /* release any delay */ --num_in_progress; if (num_in_progress < prte_mca_plm_ssh_component.num_concurrent) { /* trigger continuation of the launch */ prte_event_active(&launch_event, EV_WRITE, 1); } /* cleanup */ PMIX_RELEASE(t2); } static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node_name_index1, int *proc_vpid_index, char *prefix_dir, char *pmix_prefix) { int argc; char **argv; char *param, *value, *value2; prte_plm_ssh_shell_t remote_shell, local_shell; int orted_argc; char **orted_argv; char *orted_cmd, *orted_prefix, *final_cmd; int orted_index; int rc; int i; char *full_orted_cmd = NULL; char **final_argv = NULL; char *tmp; /* Figure out the basenames for the libdir and bindir. This requires some explanation: - Use prte_install_dirs.libdir and prte_install_dirs.bindir. - After a discussion on the devel-core mailing list, the developers decided that we should use the local directory basenames as the basis for the prefix on the remote note. This does not handle a few notable cases (e.g., if the libdir/bindir is not simply a subdir under the prefix, if the libdir/bindir basename is not the same on the remote node as it is here on the local node, etc.), but we decided that --prefix was meant to handle "the common case". If you need something more complex than this, a) edit your shell startup files to set PATH/LD_LIBRARY_PATH properly on the remove node, or b) use some new/to-be-defined options that explicitly allow setting the bindir/libdir on the remote node. We decided to implement these options (e.g., --remote-bindir and --remote-libdir) to prun when it actually becomes a problem for someone (vs. a hypothetical situation). Hence, for now, we simply take the basename of this install's libdir and bindir and use it to append this install's prefix and use that on the remote node. */ /* * Build argv array */ argv = PMIX_ARGV_COPY_COMPAT(ssh_agent_argv); argc = PMIX_ARGV_COUNT_COMPAT(argv); /* if any ssh args were provided, now is the time to add them */ if (NULL != prte_mca_plm_ssh_component.ssh_args) { char **ssh_argv; ssh_argv = PMIX_ARGV_SPLIT_COMPAT(prte_mca_plm_ssh_component.ssh_args, ' '); for (i = 0; NULL != ssh_argv[i]; i++) { pmix_argv_append(&argc, &argv, ssh_argv[i]); } PMIX_ARGV_FREE_COMPAT(ssh_argv); } *node_name_index1 = argc; pmix_argv_append(&argc, &argv, "