Commit 01b7b6a0 authored by Elliott Slaughter's avatar Elliott Slaughter
Browse files

Refactor CI to work on both Crusher and Ascent.

parent 9be84857
# this configuration is intended for use at https://code.ornl.gov # global configuration, applies to all machines
variables: variables:
THREADS: 4 # reduce parallelism to avoid OOM in Legion build
# this configuration is intended for use at Ascent / https://code.ornl.gov
.ascent_variables: &ascent_variables
SCHEDULER_PARAMETERS: "-P CHM137 -W 1:30 -nnodes 1 -alloc_flags gpumps" SCHEDULER_PARAMETERS: "-P CHM137 -W 1:30 -nnodes 1 -alloc_flags gpumps"
EXTERNAL_WORKDIR: /gpfs/wolf/chm137/proj-shared/ci/${CI_PIPELINE_ID} EXTERNAL_WORKDIR: /gpfs/wolf/chm137/proj-shared/ci/${CI_PIPELINE_ID}
# launcher for tests
SPINIFEL_TEST_LAUNCHER: "jsrun -n1 -a1 -g1"
# for script test # for script test
DATA_DIR: /gpfs/wolf/chm137/proj-shared/spinifel_data DATA_DIR: /gpfs/wolf/chm137/proj-shared/spinifel_data
DATA_FILENAME: 2CEX-10k-2.h5 DATA_FILENAME: 2CEX-10k-2.h5
...@@ -13,16 +19,30 @@ variables: ...@@ -13,16 +19,30 @@ variables:
# for pytest # for pytest
test_data_dir: /gpfs/wolf/chm137/proj-shared/spinifel_data/testdata test_data_dir: /gpfs/wolf/chm137/proj-shared/spinifel_data/testdata
THREADS: 4 # reduce parallelism to avoid OOM in Legion build IS_CRUSHER_JOB: 0
stages: # this configuration is intended for use at Crusher / https://code.olcf.ornl.gov
- build .crusher_variables: &crusher_variables
- unit_test SCHEDULER_PARAMETERS: "-A CHM137_crusher -t 1:30:00 -N 1 -p batch"
- test EXTERNAL_WORKDIR: /gpfs/alpine/chm137/proj-shared/ci/${CI_PIPELINE_ID}
- cleanup
build: # launcher for tests
stage: build SPINIFEL_TEST_LAUNCHER: "srun -n1"
# for script test
DATA_DIR: /gpfs/alpine/chm137/proj-shared/spinifel_data
DATA_FILENAME: 2CEX-10k-2.h5
OUT_DIR: /gpfs/alpine/chm137/proj-shared/ci/${CI_PIPELINE_ID}/spinifel_output
CUPY_CACHE_DIR: /gpfs/alpine/chm137/proj-shared/ci/${CI_PIPELINE_ID}/cupy_cache
# for pytest
test_data_dir: /gpfs/alpine/chm137/proj-shared/spinifel_data/testdata
IS_CRUSHER_JOB: 1
# scripts for the various test stages
.build_script: &build_script
before_script: before_script:
- mkdir -p $(dirname ${EXTERNAL_WORKDIR}) - mkdir -p $(dirname ${EXTERNAL_WORKDIR})
- cp -r ${CI_PROJECT_DIR} ${EXTERNAL_WORKDIR} - cp -r ${CI_PROJECT_DIR} ${EXTERNAL_WORKDIR}
...@@ -31,92 +51,227 @@ build: ...@@ -31,92 +51,227 @@ build:
- git submodule update --init --recursive - git submodule update --init --recursive
script: script:
- ./setup/build_from_scratch.sh - ./setup/build_from_scratch.sh
tags:
- nobatch
unit_test: .test_before_script: &test_before_script
stage: unit_test
before_script: before_script:
- cd ${EXTERNAL_WORKDIR} - cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR}
- source ./setup/env.sh
.unit_test_script: &unit_test_script
script: script:
- ./scripts/test.sh - ./scripts/test.sh
tags:
- batch
test_mpi: .mpi_test_script: &mpi_test_script
stage: test script:
before_script: - $SPINIFEL_TEST_LAUNCHER python -m spinifel --default-settings=summit_ci.toml --mode=mpi
- cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR} .legion_test_script: &legion_test_script
- source ./setup/env.sh script:
- PYTHONPATH="$PYTHONPATH:$EXTERNAL_WORKDIR:$PWD/mpi4py_poison_wrapper" $SPINIFEL_TEST_LAUNCHER legion_python -ll:py 1 -ll:csize 8192 legion_main.py --default-settings=summit_ci.toml --mode=legion
.sequential_test_script: &sequential_test_script
script:
- $SPINIFEL_TEST_LAUNCHER python -m spinifel --default-settings=summit_ci.toml --mode=sequential
.large_test_script: &large_test_script
script:
- $SPINIFEL_TEST_LAUNCHER python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.small_problem=false
.finufft_test_script: &finufft_test_script
script:
- $SPINIFEL_TEST_LAUNCHER python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.use_cufinufft=false
.nocuda_test_script: &nocuda_test_script
script: script:
- jsrun -n1 -a1 -g1 python -m spinifel --default-settings=summit_ci.toml --mode=mpi - $SPINIFEL_TEST_LAUNCHER python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.use_cufinufft=false runtime.use_cuda=false runtime.use_cupy=false
.cleanup_script: &cleanup_script
script:
- rm -rf ${EXTERNAL_WORKDIR}
# rules that determine when each job runs
.test_rules: &test_rules
rules:
# run don't run Crusher jobs on Ascent and vice versa
- if: '$IS_CRUSHER_JOB == "1" && $RUN_CRUSHER_JOBS != "1"'
when: never
- if: '$IS_CRUSHER_JOB != "1" && $RUN_CRUSHER_JOBS == "1"'
when: never
# if no other rule matches, run the job
- when: always
stages:
- build
- unit_test
- test
- cleanup
######################################################################
### CI Configuration for Ascent
######################################################################
build_ascent:
<<: [*test_rules, *build_script]
stage: build
tags:
- nobatch
variables:
<<: [*ascent_variables]
unit_test_ascent:
<<: [*test_rules, *test_before_script, *unit_test_script]
stage: unit_test
tags: tags:
- batch - batch
variables:
<<: [*ascent_variables]
test_legion: test_mpi_ascent:
<<: [*test_rules, *test_before_script, *mpi_test_script]
stage: test stage: test
before_script:
- cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR}
- source ./setup/env.sh
- export PYTHONPATH=${PYTHONPATH}:${EXTERNAL_WORKDIR}
script:
- PYTHONPATH="$PYTHONPATH:$PWD/mpi4py_poison_wrapper" jsrun -n1 -a1 -g1 legion_python -ll:py 1 -ll:csize 8192 legion_main.py --default-settings=summit_ci.toml --mode=legion
tags: tags:
- batch - batch
variables:
<<: [*ascent_variables]
test_legion_ascent:
<<: [*test_rules, *test_before_script, *legion_test_script]
stage: test
tags:
- batch
variables:
<<: [*ascent_variables]
# TODO: suggest deprecating # TODO: suggest deprecating
#test_sequential: #test_sequential_ascent:
# <<: [*test_rules, *test_before_script, *sequential_test_script]
# stage: test # stage: test
# before_script:
# - cd ${EXTERNAL_WORKDIR}
# - mkdir -p ${OUT_DIR}
# - source ./setup/env.sh
# script:
# - jsrun -n1 -a1 -g1 python -m spinifel --default-settings=summit_ci.toml --mode=sequential
# tags: # tags:
# - batch # - batch
# variables:
# <<: [*ascent_variables]
# TODO: Skip for now -- runs out of memory -- not sure why though # TODO: Skip for now -- runs out of memory -- not sure why though
# test_large: # test_large_ascent:
# <<: [*test_rules, *test_before_script, *large_test_script]
# stage: test # stage: test
# before_script:
# - cd ${EXTERNAL_WORKDIR}
# - mkdir -p ${OUT_DIR}
# - source ./setup/env.sh
# script:
# - jsrun -n1 -a1 -g1 python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.small_problem=false
# tags: # tags:
# - batch # - batch
# variables:
# <<: [*ascent_variables]
test_finufft: test_finufft_ascent:
<<: [*test_rules, *test_before_script, *finufft_test_script]
stage: test stage: test
before_script:
- cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR}
- source ./setup/env.sh
script:
- jsrun -n1 -a1 -g1 python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.use_cufinufft=false
tags: tags:
- batch - batch
variables:
<<: [*ascent_variables]
test_nocuda: test_nocuda_ascent:
<<: [*test_rules, *test_before_script, *nocuda_test_script]
stage: test stage: test
before_script:
- cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR}
- source ./setup/env.sh
script:
- jsrun -n1 -a1 -g1 python -m spinifel --default-settings=summit_ci.toml --mode=mpi runtime.use_cufinufft=false runtime.use_cuda=false runtime.use_cupy=false
tags: tags:
- batch - batch
variables:
<<: [*ascent_variables]
cleanup: cleanup_ascent:
<<: [*test_rules, *cleanup_script]
stage: cleanup stage: cleanup
before_script:
script:
- rm -rf ${EXTERNAL_WORKDIR}
tags: tags:
- nobatch - nobatch
variables:
<<: [*ascent_variables]
######################################################################
### CI Configuration for Crusher
######################################################################
build_crusher:
<<: [*test_rules, *build_script]
stage: build
tags:
- shell
- crusher
variables:
<<: [*crusher_variables]
unit_test_crusher:
<<: [*test_rules, *test_before_script, *unit_test_script]
stage: unit_test
tags:
- slurm
- crusher
variables:
<<: [*crusher_variables]
test_mpi_crusher:
<<: [*test_rules, *test_before_script, *mpi_test_script]
stage: test
tags:
- slurm
- crusher
variables:
<<: [*crusher_variables]
test_legion_crusher:
<<: [*test_rules, *test_before_script, *legion_test_script]
stage: test
tags:
- slurm
- crusher
variables:
<<: [*crusher_variables]
# TODO: suggest deprecating
#test_sequential_crusher:
# <<: [*test_rules, *test_before_script, *sequential_test_script]
# stage: test
# tags:
# - slurm
# - crusher
# variables:
# <<: [*crusher_variables]
# TODO: Skip for now -- runs out of memory -- not sure why though
# test_large_crusher:
# <<: [*test_rules, *test_before_script, *large_test_script]
# stage: test
# tags:
# - slurm
# - crusher
# variables:
# <<: [*crusher_variables]
test_finufft_crusher:
<<: [*test_rules, *test_before_script, *finufft_test_script]
stage: test
tags:
- slurm
- crusher
variables:
<<: [*crusher_variables]
test_nocuda_crusher:
<<: [*test_rules, *test_before_script, *nocuda_test_script]
stage: test
tags:
- slurm
- crusher
variables:
<<: [*crusher_variables]
cleanup_crusher:
<<: [*test_rules, *cleanup_script]
stage: cleanup
tags:
- shell
- crusher
variables:
<<: [*crusher_variables]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment