legion test on development branch fails on corigpu
To reproduce this error, follow steps on Quick Start on Cori GPU then run legion test. An example script for running this test is shown here:
#!/bin/bash
set -xe
export CI_PIPELINE_ID=000
target=${SPINIFEL_TARGET:-${NERSC_HOST:-$(hostname --fqdn)}}
export PYCUDA_CACHE_DIR="/tmp"
if [[ ${target} = *"ascent"* ]]; then
export all_proxy=socks://proxy.ccs.ornl.gov:3128/
export ftp_proxy=ftp://proxy.ccs.ornl.gov:3128/
export http_proxy=http://proxy.ccs.ornl.gov:3128/
export https_proxy=http://proxy.ccs.ornl.gov:3128/
export no_proxy='localhost,127.0.0.0/8,*.ccs.ornl.gov'
export test_data_dir="/gpfs/wolf/chm137/proj-shared/spinifel_data/testdata"
export OUT_DIR="/gpfs/wolf/chm137/proj-shared/ci/${CI_PIPELINE_ID}/spinifel_output"
elif [[ ${target} = *"summit"* ]]; then
export test_data_dir="/gpfs/alpine/proj-shared/chm137/data/testdata"
export OUT_DIR="/gpfs/alpine/proj-shared/chm137/test_main/${CI_PIPELINE_ID}/spinifel_output"
else
export test_data_dir="${CFS}/m2859/data/testdata"
export OUT_DIR="${SCRATCH}/spinifel_output"
fi
if [[ ${target} = *"summit"* || ${target} = *"ascent"* ]]; then
export SPINIFEL_TEST_LAUNCHER="jsrun -n1 -a1 -g1"
export SPINIFEL_PSANA2_LAUNCHER="jsrun -n4 -g1"
else
export SPINIFEL_TEST_LAUNCHER="srun -n1 -G1"
export SPINIFEL_PSANA2_LAUNCHER="srun -n4 -G1"
fi
PYTHONPATH="$PYTHONPATH:$EXTERNAL_WORKDIR:$PWD/mpi4py_poison_wrapper" $SPINIFEL_TEST_LAUNCHER legion_python -ll:py 1 -ll:csize 8192 legion_main.py --default-settings=summit_ci.toml --mode=legion
Error:
WARNING: Found 9 IB HCAs, but GASNet was configured with '--with-ibv-max-hcas=4'. To utilize all your HCAs, you should reconfigure GASNet using '--with-ibv-max-hcas=9'. You can silence this warning by setting the environment variable GASNET_IBV_PORTS as described in the file 'gasnet/ibv-conduit/README' to specify the desired HCA(s), or by setting the environment variable GASNET_IBV_PORTS_VERBOSE=0 to use the default.
[0 - 15551599e840] 2.473322 {6}{python}: python exception occurred within task:
Traceback (most recent call last):
File "/global/u2/m/monarin/spinifel_development/setup/install/lib/python3.8/site-packages/legion_top.py", line 434, in legion_python_main
run_path(args[start], run_name='__main__')
File "/global/u2/m/monarin/spinifel_development/setup/install/lib/python3.8/site-packages/legion_top.py", line 255, in run_path
exec(code, module.__dict__, module.__dict__)
File "legion_main.py", line 2, in <module>
from spinifel.legion import main
File "/global/u2/m/monarin/spinifel_development/spinifel/legion/__init__.py", line 1, in <module>
from .main import main
File "/global/u2/m/monarin/spinifel_development/spinifel/legion/main.py", line 12, in <module>
from .autocorrelation import solve_ac
File "/global/u2/m/monarin/spinifel_development/spinifel/legion/autocorrelation.py", line 3, in <module>
import skopi as skp
File "/global/u2/m/monarin/spinifel_development/setup/skopi/skopi/__init__.py", line 1, in <module>
from skopi.diffraction import *
File "/global/u2/m/monarin/spinifel_development/setup/skopi/skopi/diffraction.py", line 3, in <module>
from scipy.interpolate import CubicSpline
File "/global/u2/m/monarin/spinifel_development/setup/conda/envs/myenv/lib/python3.8/site-packages/scipy/interpolate/__init__.py", line 166, in <module>
from .interpolate import *
File "/global/u2/m/monarin/spinifel_development/setup/conda/envs/myenv/lib/python3.8/site-packages/scipy/interpolate/interpolate.py", line 21, in <module>
from .interpnd import _ndim_coords_from_arrays
File "interpnd.pyx", line 1, in init scipy.interpolate.interpnd
File "/global/u2/m/monarin/spinifel_development/setup/conda/envs/myenv/lib/python3.8/site-packages/scipy/spatial/__init__.py", line 96, in <module>
from .kdtree import *
File "/global/u2/m/monarin/spinifel_development/setup/conda/envs/myenv/lib/python3.8/site-packages/scipy/spatial/kdtree.py", line 5, in <module>
from .ckdtree import cKDTree, cKDTreeNode
ImportError: /usr/common/software/sles15_cgpu/gcc/8.3.0/lib64/libstdc++.so.6: version `GLIBCXX_3.4.29' not found (required by /global/u2/m/monarin/spinifel_development/setup/conda/envs/myenv/lib/python3.8/site-packages/scipy/spatial/ckdtree.cpython-38-x86_64-linux-gnu.so)
WARNING! The environment variable OUT_DIR supersedes all other inputs for this setting. If this is unintensional unset OUT_DIR.
legion_python: /global/u2/m/monarin/spinifel_development/setup/legion/runtime/realm/python/python_module.cc:992: virtual void Realm::LocalPythonProcessor::execute_task(Realm::Processor::TaskFuncID, const Realm::ByteArrayRef&): Assertion `0' failed.
*** Caught a fatal signal (proc 0): SIGABRT(6)
NOTICE: Before reporting bugs, run with GASNET_BACKTRACE=1 in the environment to generate a backtrace.
NOTICE: We recommend linking the debug version of GASNet to assist you in resolving this application issue.
WARNING: ODP shutdown in signal context
srun: error: cgpu18: task 0: Aborted
srun: launch/slurm: _step_signal: Terminating StepId=3459493.3