Commit f35a76f2 authored by chuckie82's avatar chuckie82
Browse files

Merge branch 'development' of http://gitlab.osti.gov/mtip/spinifel into development

parents 5534d9b9 6adbfb4e
......@@ -54,18 +54,17 @@ test_mpi:
tags:
- batch
# TODO: Skip for now till MPI version is stable
#test_legion:
# stage: test
# before_script:
# - cd ${EXTERNAL_WORKDIR}
# - mkdir -p ${OUT_DIR}
# - source ./setup/env.sh
# - export PYTHONPATH=${PYTHONPATH}:${EXTERNAL_WORKDIR}
# script:
# - PYTHONPATH="$PYTHONPATH:$PWD/mpi4py_poison_wrapper" jsrun -n1 -a1 -g1 legion_python -ll:py 1 -ll:csize 8192 legion_main.py --default-settings=summit_ci.toml --mode=legion
# tags:
# - batch
test_legion:
stage: test
before_script:
- cd ${EXTERNAL_WORKDIR}
- mkdir -p ${OUT_DIR}
- source ./setup/env.sh
- export PYTHONPATH=${PYTHONPATH}:${EXTERNAL_WORKDIR}
script:
- PYTHONPATH="$PYTHONPATH:$PWD/mpi4py_poison_wrapper" jsrun -n1 -a1 -g1 legion_python -ll:py 1 -ll:csize 8192 legion_main.py --default-settings=summit_ci.toml --mode=legion
tags:
- batch
# TODO: suggest deprecating
#test_sequential:
......
import re
import argparse
import numpy as np
def parse_output(filename, num_nodes, num_ranks):
pattern_load = re.compile("Loaded in \d+\.\d+s")
pattern_merge = re.compile("AC recovered in \d+\.\d+s")
pattern_phase = re.compile("Problem phased in \d+\.\d+s")
pattern_slice = re.compile("slice=\d+\.\d+s")
pattern_slice_oh = re.compile("slice_oh=\d+\.\d+s")
pattern_match = re.compile("match=\d+\.\d+s")
pattern_match_oh = re.compile("match_oh=\d+\.\d+s")
pattern_completed = re.compile("completed in \d+\.\d+s")
merge = []
phase = []
slices = []
slice_oh = []
ori_match = []
ori_match_oh = []
completed = []
for i, line in enumerate(open(filename)):
for match in re.findall(pattern_load, line):
loading_time = float(re.findall("\d+\.\d+", match)[0])
for match in re.findall(pattern_merge, line):
merge.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_phase, line):
phase.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_slice, line):
slices.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_slice_oh, line):
slice_oh.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_match, line):
ori_match.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_match_oh, line):
ori_match_oh.append(float(re.findall("\d+\.\d+", match)[0]))
for match in re.findall(pattern_completed, line):
completed_time = float(re.findall("\d+\.\d+", match)[0])
# merging & phasing are performed independently by each rank
# monitor the longest time
merging_max, merging_min, merging_mean, merging_std = np.max(merge), np.min(merge), np.mean(merge), np.std(merge)
phasing_max, phasing_min, phasing_mean, phasing_std = np.max(phase), np.min(phase), np.mean(phase), np.std(phase)
# slicing & orientation matching perform a subset of the task
tot_ranks = int(num_nodes * num_ranks)
num_gen = int(len(slices) / tot_ranks)
num_times = tot_ranks * num_gen
slicing_max, slicing_min, slicing_mean, slicing_std = np.max(slice_oh)+np.max(slices), \
np.min(slice_oh)+np.min(slices), \
(np.sum(slice_oh) + np.sum(slices))/num_times, \
(np.std(slice_oh) + np.std(slices))/num_times
ori_match_max, ori_match_min, ori_match_mean, ori_match_std = np.max(ori_match_oh)+np.max(ori_match), \
np.min(ori_match_oh)+np.min(ori_match), \
(np.sum(ori_match_oh) + np.sum(ori_match))/num_times,\
(np.std(ori_match_oh) + np.std(ori_match))/num_times
print(f"Max/min/mean/std time per generation and total time for {num_gen} generations in seconds")
print(f"Loading time: {loading_time:.3f}")
print(f"Slicing time: {slicing_max:.3f}/{slicing_min:.3f}/{slicing_mean:.3f}/{slicing_std:.3f}/{(sum(slice_oh)+sum(slices))/tot_ranks:.3f}")
print(f"Orientation matching time: {ori_match_max:.3f}/{ori_match_min:.3f}/{ori_match_mean:.3f}/{ori_match_std:.3f}/{(sum(ori_match_oh)+sum(ori_match))/tot_ranks:.3f}")
print(f"Merging time: {merging_max:.3f}/{merging_min:.3f}/{merging_mean:.3f}/{merging_std:.3f}/{sum(merge):.3f}")
print(f"Phasing time: {phasing_max:.3f}/{phasing_min:.3f}/{phasing_mean:.3f}/{phasing_std:.3f}/{sum(phase):.3f}")
print(f"Total time from start to finish for {num_gen} generations: {completed_time:.3f}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Process arguments to parseOutput')
#positional arguments
parser.add_argument('--fname', help='name of output file', type=str)
parser.add_argument('--nodes', help='number of nodes', type=int)
parser.add_argument('--ranks', help='number of ranks per node', type=int)
args = parser.parse_args()
parse_output(args.fname, args.nodes, args.ranks)
......@@ -11,5 +11,5 @@ export PYTHONPATH="$PYTHONPATH:$root_dir"
set -x
pytest spinifel/tests
pytest -s spinifel/tests
# pytest tests
# This toml file contains settings for the Cori quicktart
[debug]
test = "Quickstart settings for Cori"
verbose = true
verbosity = 0
use_callmonitor = false
[data]
in_dir = "${CFS}/m2859/data/3iyf/clean"
out_dir = "${SCRATCH}/spinifel_output"
name = "3iyf_ariana_20k_hires.h5"
[detector]
shape = [1, 128, 128]
[psana]
enable = false
ps_smd_n_events = 10000
ps_eb_nodes = 1
ps_srv_nodes = 0
[runtime]
N_images_per_rank = 2500
use_cuda = false
use_cufinufft = false
use_cupy = false
use_single_prec = false
chk_convergence = false
[gpu]
devices_per_node = 1
[algorithm]
N_generations = 10
nER = 50
nHIO = 25
beta = 0.3
cutoff = 0.05
N_phase_loops = 10
N_clipping = 0
N_binning = 0
N_orientations = 10000 # model_slices
N_batch_size = 100
# This toml file contains settings for the Cori quicktart with PSANA and Legion
[debug]
test = "Quickstart settings for Cori with Psana and Legion"
verbose = true
verbosity = 0
use_callmonitor = false
[data]
in_dir = "${CFS}/m2859/data/3iyf/clean"
out_dir = "${SCRATCH}/spinifel_output"
name = "3iyf_sim_400k.h5"
[detector]
shape = [1, 128, 128]
[psana]
enable = true
ps_smd_n_events = 1000
ps_eb_nodes = 1
ps_srv_nodes = 0
ps_batch_size=100
ps_dir = "/global/cfs/cdirs/m2859/data/3iyf/xtc2"
exp = "xpptut15"
runnum = 1
ps_parallel = "legion"
[runtime]
N_images_per_rank = 1000
use_cuda = false
use_cufinufft = false
use_cupy = false
use_single_prec = false
chk_convergence = false
[gpu]
devices_per_node = 1
[algorithm]
N_generations = 3
nER = 50
nHIO = 25
beta = 0.3
cutoff = 0.05
N_phase_loops = 10
N_clipping = 0
N_binning = 0
N_orientations = 10000 # model_slices
N_batch_size = 100
Subproject commit e4006374e32bfd8b2c9644e756aae3265a564c3f
Subproject commit 6886cd006a7a4368baf36b3a71d860265596ba76
......@@ -51,7 +51,7 @@ class SpinifelContexts(metaclass=Singleton):
self._mpi_initialized = False
self._dev_id = 0
self._cuda_initialized = False
self.ctx = None
def init_mpi(self):
"""
......
......@@ -110,9 +110,10 @@ def right_hand_ADb_task(slices, uregion, nonuniform_v, ac, weights, M,
nonuniform_v.H,
nonuniform_v.K,
nonuniform_v.L,
ac.support, M,
reciprocal_extent, use_reciprocal_symmetry
)
M,
support=ac.support,
use_recip_sym=use_reciprocal_symmetry)
if settings.verbosity > 0:
print(f"{socket.gethostname()} computed ADb.", flush=True)
......@@ -143,10 +144,9 @@ def prep_Fconv_task(uregion_ups, nonuniform_v, ac, weights, M_ups, Mtot, N,
nonuniform_v.H,
nonuniform_v.K,
nonuniform_v.L,
1, M_ups,
reciprocal_extent, use_reciprocal_symmetry
)
uregion_ups.F_conv_[:] += np.fft.fftn(np.fft.ifftshift(conv_ups)) / Mtot
M_ups,
use_reciprocal_symmetry, support=None)
uregion_ups.F_conv_[:] += np.fft.fftn(np.fft.ifftshift(conv_ups)) #/ Mtot
if settings.verbosity > 0:
print(f"{socket.gethostname()} computed Fconv.", flush=True)
......@@ -171,7 +171,7 @@ def prep_Fconv(uregion_ups, nonuniform_v, nonuniform_v_p,
def prep_Fantisupport(uregion, M):
lu = np.linspace(-np.pi, np.pi, M)
Hu_, Ku_, Lu_ = np.meshgrid(lu, lu, lu, indexing='ij')
Qu_ = np.sqrt(Hu_**2 + Ku_**2 + Lu_**2)
Qu_ = np.around(np.sqrt(Hu_**2 + Ku_**2 + Lu_**2), 4)
uregion.F_antisupport[:] = Qu_ > np.pi / settings.oversampling
Fantisup = uregion.F_antisupport
......@@ -296,30 +296,10 @@ def select_ac(generation, summary):
# Take corner of L-curve: min (v1+v2)
iref = np.argmin(summary.v1+summary.v2)
ref_rank = summary.rank[iref]
fig, axes = plt.subplots(figsize=(6.0, 8.0), nrows=3, ncols=1)
axes[0].loglog(summary.rlambda, summary.v1)
axes[0].loglog(summary.rlambda[iref], summary.v1[iref], "rD")
axes[0].set_xlabel("$\lambda_{r}$")
axes[0].set_ylabel("$||x_{\lambda_{r}}||_{2}$")
axes[1].loglog(summary.rlambda, summary.v2)
axes[1].loglog(summary.rlambda[iref], summary.v2[iref], "rD")
axes[1].set_xlabel("$\lambda_{r}$")
axes[1].set_ylabel("$||W \lambda_{r}-d||_{2}$")
axes[2].loglog(summary.v2, summary.v1) # L-curve
axes[2].loglog(summary.v2[iref], summary.v1[iref], "rD")
axes[2].set_xlabel("Residual norm $||W \lambda_{r}-d||_{2}$")
axes[2].set_ylabel("Solution norm $||x_{\lambda_{r}}||_{2}$")
fig.tight_layout()
plt.savefig(settings.out_dir / f"summary_{generation}.png")
plt.close('all')
print(f"Keeping result from rank {ref_rank}.", flush=True)
return iref
@nvtx.annotate("legion/autocorrelation.py", is_prefix=True)
def solve_ac(generation,
pixel_position,
......@@ -367,8 +347,8 @@ def solve_ac(generation,
alambda = 1
# rlambdas = Mtot/Ntot * 1e2**(np.arange(N_procs) - N_procs/2)
rlambdas = Mtot/Ntot * 2**(np.arange(N_procs) - N_procs/2)
flambda = 0
rlambdas = Mtot/Ntot * 2**(np.arange(N_procs) - N_procs/2).astype(np.float)
flambdas = 1e5 * 10**(np.arange(N_procs) - N_procs//2).astype(np.float)
summary = Region((N_procs,),
{"rank": pygion.int32, "rlambda": pygion.float32, "v1": pygion.float32, "v2": pygion.float32})
......@@ -379,7 +359,7 @@ def solve_ac(generation,
solve(
uregion, uregion_ups, ac, results_p[i], summary_p[i],
weights, M, M_ups, Mtot, N,
generation, i, alambda, rlambdas[i], flambda,
generation, i, alambda, rlambdas[i], flambdas[i],
reciprocal_extent, use_reciprocal_symmetry, maxiter)
iref = select_ac(generation, summary)
......@@ -387,3 +367,4 @@ def solve_ac(generation,
# I tried to have `results` as a partition and copy into a region,
# but I couldn't get it to work.
return results_p[iref.get()]
import numpy as np
import PyNVTX as nvtx
import os
import pygion
from pygion import task, Partition, Region, Tunable, WD, RO
from spinifel import settings, utils, contexts, checkpoint
from . import utils as lgutils
@task(privileges=[WD("ac","support_","rho_"), WD("quaternions")])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def checkpoint_load_task(phased, orientations, out_dir, load_gen, tag_gen):
print(f"Loading checkpoint: {checkpoint.generate_checkpoint_name(out_dir, load_gen, tag_gen)}", flush=True)
myRes = checkpoint.load_checkpoint(out_dir,
load_gen,
tag_gen)
# Unpack dictionary
phased.ac[:] = myRes['ac_phased']
phased.support_[:] = myRes['support_']
phased.rho_[:] = myRes['rho_']
orientations.quaternions[:] = myRes['orientations']
''' Create and Fill Regions [phased:{ac,support_,rho}],
[orientations:{quaternions}]
'''
def load_checkpoint(outdir: str, gen_num: int, tag=''):
phased = Region((settings.M,)*3, {
"ac": pygion.float32, "support_": pygion.float32,
"rho_": pygion.float32})
# setup the orientation region
N_images_per_rank = settings.N_images_per_rank
fields_dict = {"quaternions": pygion.float32}
sec_shape = (4,)
orientations, orientations_p = lgutils.create_distributed_region(
N_images_per_rank, fields_dict, sec_shape)
checkpoint_load_task(phased,orientations, outdir,gen_num,tag)
return phased, orientations, orientations_p
'''
Save pixel_position_reciprocal, pixel_distance_reciprocal
Save Regions [slices:{data}],
[solved:{ac}]
'''
@task(privileges=[RO("data"), RO("ac"), RO, RO])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def save_checkpoint_solve_ac(
slices, solved,
pixel_position,
pixel_distance,
out_dir: str, gen_num: int, tag=''):
# Pack dictionary
myRes = {
'pixel_position_reciprocal': pixel_position.reciprocal,
'pixel_distance_reciprocal': pixel_distance.reciprocal,
'slices_': slices.data,
'ac': solved.ac
}
checkpoint.save_checkpoint(myRes, out_dir, gen_num, tag)
''' Save Regions [solved:{ac}:float32],
[phased:{ac,support_,rho_}]
'''
@task(privileges=[RO("ac"), RO("ac"), RO("support_"), RO("rho_")])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def save_checkpoint_phase(solved, phased,
out_dir: str, gen_num: int, tag=''):
# Pack dictionary
myRes = {
'ac': solved.ac,
'ac_phased': phased.ac,
'support_': phased.support_,
'rho_': phased.rho_
}
checkpoint.save_checkpoint(myRes, out_dir, gen_num, tag)
''' Save Regions [slices:{data}:float32],
[phased:{ac,support_,rho_}]: what about support/rho?
[orientations:{quaternions}]
[pixel_position:{reciprocal}]
[pixel_distance:{reciprocal}]
'''
@task(privileges=[RO("data"), RO("ac"), RO("quaternions"), RO, RO])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def save_checkpoint_match(slices, phased, orientations,
pixel_position,
pixel_distance,
out_dir: str, gen_num: int, tag=''):
# Pack dictionary
myRes = {'ac_phased': phased.ac,
'slices_': slices.data,
'pixel_position_reciprocal': pixel_position.reciprocal,
'pixel_distance_reciprocal': pixel_distance.reciprocal,
'orientations': orientations.quaternions
}
checkpoint.save_checkpoint(myRes, out_dir, gen_num, tag)
''' Save Regions [solved:{ac}:float32],
[prev_phased:{prev_rho_}]:
[phased:{ac,support_,rho_}]:
'''
@task(privileges=[RO("ac"), RO("prev_rho_"), RO("ac","support_","rho_")])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def save_checkpoint_phase_prev(solved, prev_phased, phased,
prev_support,
out_dir: str, gen_num: int, tag=''):
myRes = {
'ac': solved.ac,
'prev_support_': prev_support,
'prev_rho_': prev_phased.prev_rho_,
'ac_phased': phased.ac,
'support_': phased.support_,
'rho_': phased.rho_
}
checkpoint.save_checkpoint(myRes,out_dir, gen_num, tag)
''' Save Regions [phased:{ac,support_,rho_}]
[orientations:{quaternions}]
'''
@task(privileges=[RO("ac", "support_", "rho_"), RO("quaternions")])
@lgutils.gpu_task_wrapper
@nvtx.annotate("legion/checkpoint.py", is_prefix=True)
def save_checkpoint(phased, orientations,
out_dir: str, gen_num: int, tag=''):
# Pack dictionary
myRes = {
'ac_phased': phased.ac,
'support_': phased.support_,
'rho_': phased.rho_,
'orientations': orientations.quaternions
}
checkpoint.save_checkpoint(myRes, out_dir, gen_num, tag)
......@@ -3,66 +3,125 @@ import PyNVTX as nvtx
import os
import pygion
from pygion import acquire, attach_hdf5, task, Partition, Region, R, Tunable, WD
from pygion import acquire, attach_hdf5, execution_fence, task, Partition, Region, R, Tunable, WD, RO
from spinifel import settings
from spinifel import settings, utils, contexts, checkpoint
from spinifel.prep import save_mrc
from .prep import get_data
from .autocorrelation import solve_ac
from .phasing import phase, prev_phase, cov
from .orientation_matching import match
from . import mapper
from . import checkpoint
@task(replicable=True)
@nvtx.annotate("legion/main.py", is_prefix=True)
def main():
print("In Legion main", flush=True)
def load_psana():
logger = utils.Logger(True)
assert settings.use_psana == True
# Reading input images using psana2
# For now, we use one smd chunk per node just to keep things simple.
assert settings.ps_smd_n_events == settings.N_images_per_rank
from psana.psexp.tools import mode
from psana import DataSource
total_procs = Tunable.select(Tunable.GLOBAL_PYS).get()
N_images_per_rank = settings.N_images_per_rank
batch_size = min(N_images_per_rank, 100)
max_events = min(settings.N_images_max, total_procs*N_images_per_rank)
ds = None
if settings.use_psana:
# For now, we use one smd chunk per node just to keep things simple.
# os.environ['PS_SMD_N_EVENTS'] = str(N_images_per_rank)
settings.ps_smd_n_events = N_images_per_rank
from psana import DataSource
ds = DataSource(exp=settings.exp, run=settings.runnum,
dir=settings.data_dir, batch_size=batch_size,
max_events=max_events)
batch_size = min(settings.N_images_per_rank, 100)
max_events = min(settings.N_images_max, total_procs*settings.N_images_per_rank)
logger.log(f'Using psana: exp={settings.ps_exp}, run={settings.ps_runnum}, dir={settings.ps_dir}, batch_size={batch_size}, max_events={max_events}, mode={mode}')
assert mode == 'legion'
ds = DataSource(exp=settings.ps_exp, run=settings.ps_runnum,
dir=settings.ps_dir, batch_size=batch_size,
max_events=max_events)
# Load unique set of intensity slices for python process
(pixel_position,
pixel_distance,
pixel_index,
pixel_index,
slices, slices_p) = get_data(ds)
solved = solve_ac(0, pixel_position, pixel_distance, slices, slices_p)
phased = phase(0, solved)
return pixel_position, pixel_distance, pixel_index, slices, slices_p
@task(privileges=[RO, RO, RO, RO])
def main_task(pixel_position, pixel_distance, pixel_index, slices, slices_p):
logger = utils.Logger(True)
timer = utils.Timer()
curr_gen = 0
if settings.load_gen > 0: # Load input from previous generation
curr_gen = settings.load_gen
phased, orientations, orientations_p = checkpoint.load_checkpoint(settings.out_dir, settings.load_gen)
else:
solved = solve_ac(0, pixel_position, pixel_distance, slices, slices_p)
logger.log(f"AC recovered in {timer.lap():.2f}s.")
phased = phase(0, solved)
rho = np.fft.ifftshift(phased.rho_)
logger.log(f"Problem phased in {timer.lap():.2f}s.")
save_mrc(settings.out_dir / f"ac-0.mrc", phased.ac)
save_mrc(settings.out_dir / f"rho-0.mrc", rho)
# Use improvement of cc(prev_rho, cur_rho) to dertemine if we should
# terminate the loop
prev_phased = None
cov_xy = 0
cov_delta = .05
curr_gen +=1
N_generations = settings.N_generations
for generation in range(1, N_generations):
for generation in range(curr_gen, N_generations+1):
logger.log(f"#"*27)
logger.log(f"##### Generation {generation}/{N_generations} #####")
logger.log(f"#"*27)
# Orientation matching
orientations, orientations_p = match(
phased, slices, slices_p, pixel_position, pixel_distance)
logger.log(f"Orientations matched in {timer.lap():.2f}s.")
# Solve autocorrelation
solved = solve_ac(
generation, pixel_position, pixel_distance, slices, slices_p,
orientations, orientations_p, phased)
logger.log(f"AC recovered in {timer.lap():.2f}s.")
prev_phased = prev_phase(generation, phased, prev_phased)
phased = phase(generation, solved, phased)
logger.log(f"Problem phased in {timer.lap():.2f}s.")
cov_xy, is_cov = cov(prev_phased, phased, cov_xy, cov_delta)
if is_cov:
break;
# Check if density converges
if settings.chk_convergence:
cov_xy, is_cov = cov(prev_phased, phased, cov_xy, cov_delta)
if is_cov:
print("Stopping criteria met!")
break;
rho = np.fft.ifftshift(phased.rho_)
save_mrc(settings.out_dir / f"ac-{generation}.mrc", phased.ac)
save_mrc(settings.out_dir / f"rho-{generation}.mrc", rho)
execution_fence(block=True)
logger.log(f"Results saved in {settings.out_dir}")
logger.log(f"Successfully completed in {timer.total():.2f}s.")
# read the data and run the main algorithm. This can be repeated
@nvtx.annotate("legion/main.py", is_prefix=True)
def main():
logger = utils.Logger(True)