Commit 9da1d354 authored by Chun Hong Yoon's avatar Chun Hong Yoon
Browse files

Merge branch 'ap/fsc' into 'development'

Overhauled FSC code

See merge request !37
parents 6902b35b 10e0f3ea
import numpy as np
import mrcfile
from geom import *
from scipy import ndimage
def save_mrc(output, data, voxel_size=None, header_origin=None):
"""
Save numpy array as an MRC file.
Parameters
----------
output : string, default None
if supplied, save the aligned volume to this path in MRC format
data : numpy.ndarray
image or volume to save
voxel_size : float, default None
if supplied, use as value of voxel size in Angstrom in the header
header_origin : numpy.recarray
if supplied, use the origin from this header object
"""
mrc = mrcfile.new(output, overwrite=True)
mrc.header.map = mrcfile.constants.MAP_ID
mrc.set_data(data.astype(np.float32))
if voxel_size is not None:
mrc.voxel_size = voxel_size
if header_origin is not None:
mrc.header['origin']['x'] = float(header_origin['origin']['x'])
mrc.header['origin']['y'] = float(header_origin['origin']['y'])
mrc.header['origin']['z'] = float(header_origin['origin']['z'])
mrc.update_header_from_data()
mrc.update_header_stats()
mrc.close()
return
def rotate_volume(vol, quat):
"""
Rotate copies of the volume by the given quaternions.
Parameters
----------
vol : numpy.ndarray, shape (n,n,n)
volume to be rotated
quat : numpy.ndarray, shape (n_quat,4)
quaternions to apply to the volume
Returns
-------
rot_vol : numpy.ndarray, shape (n_quat,n,n,n)
rotated copies of volume
"""
M = vol.shape[0]
lincoords = np.arange(M)
coords = np.meshgrid(lincoords,lincoords,lincoords)
xyz = np.vstack([coords[0].reshape(-1) - int(M/2),
coords[1].reshape(-1) - int(M/2),
coords[2].reshape(-1) - int(M/2)])
R = quaternion2rot3d(np.array(quat))
transformed_xyz = np.dot(R,xyz) + int(M/2)
new_xyz = [transformed_xyz[:,1,:].flatten(),
transformed_xyz[:,0,:].flatten(),
transformed_xyz[:,2,:].flatten()]
rot_vol = ndimage.map_coordinates(vol, new_xyz, order=1)
rot_vol = rot_vol.reshape((quat.shape[0],M,M,M))
return rot_vol
def center_volume(vol):
"""
Apply translational shifts to center the density within the volume.
Parameters
----------
vol : numpy.ndarray, shape (n,n,n)
volume to be centered
Returns
-------
cen_vol : numpy.ndarray, shape (n,n,n)
centered volume
"""
old_center = np.array(ndimage.measurements.center_of_mass(vol))
new_center = np.array(np.array(vol.shape)/2).astype(int)
cen_vol = ndimage.shift(vol, -1*(old_center - new_center))
return cen_vol
def pearson_cc(arr1, arr2):
"""
Compute the Pearson correlation-coefficient between the input arrays.
Parameters
----------
arr1 : numpy.ndarray, shape (n_samples, n_points)
input array
arr2 : numpy.ndarray, shape (n_samples, n_points) or (1, n_points)
input array to compute CC with
Returns
-------
ccs : numpy.ndarray, shape (n_samples)
correlation coefficient between paired sample arrays, or if
arr2.shape[0] == 1, then between each sample of arr1 to arr2
"""
vx = arr1 - arr1.mean(axis=-1)[:,None]
vy = arr2 - arr2.mean(axis=-1)[:,None]
numerator = np.sum(vx * vy, axis=1)
denom = np.sqrt(np.sum(vx**2, axis=1)) * np.sqrt(np.sum(vy**2, axis=1))
return numerator / denom
def score_deformations(mrc1, mrc2, warp):
"""
Compute the Pearson correlation coefficient between the input volumes
after rotating or displacing the first volume by the given quaternions
or translations.
Parameters
----------
mrc1 : numpy.ndarray, shape (n,n,n)
volume to be warped
mrc2 : numpy.ndarray, shape (n,n,n)
volume to be held fixed
warp : numpy.ndarray, shape (n_quat,4) or (n_trans,3)
rotations or displacements to apply to mrc2
Returns
-------
ccs : numpy.ndarray, shape (n_quat)
correlation coefficients associated with warp
"""
# deform one of the input volumes by rotation or displacement
if warp.shape[-1] == 4:
wmrc1 = rotate_volume(mrc1, warp)
elif warp.shape[-1] == 3:
print("Not yet implemented")
return
else:
print("Warp input must be quaternions or translations")
return
# score each deformed volume using the Pearson CC
wmrc1_flat = wmrc1.reshape(wmrc1.shape[0],-1)
mrc2_flat = np.expand_dims(mrc2.flatten(), axis=0)
ccs = pearson_cc(wmrc1_flat, mrc2_flat)
return ccs
def scan_orientations_fine(mrc1, mrc2, opt_q, prev_score, n_iterations=10, n_search=420):
"""
Perform a fine alignment search in the vicinity of the input quaternion
to align mrc1 to mrc2.
Parameters
----------
mrc1 : numpy.ndarray, shape (n,n,n)
volume to be rotated
mrc2 : numpy.ndarray, shape (n,n,n)
volume to be held fixed
opt_q : numpy.ndarray, shape (1,4)
starting quaternion to apply to mrc1 to align it with mrc2
prev_score : float
cross-correlation associated with alignment quat opt_q
n_iterations: int, default 10
number of iterations of alignment to perform
n_search : int, default 420
number of quaternions to score at each orientation
Returns
-------
opt_q : numpy.ndarray, shape (4)
quaternion to apply to mrc1 to align it with mrc2
prev_score : float
cross-correlation between aligned mrc1 and mrc2
"""
# perform a series of fine alignment, ending if CC no longer improves
sigmas = 2-0.2*np.arange(1,10)
for n in range(1, n_iterations):
quat = get_preferred_orientation_quat(n_search-1, float(sigmas[n-1]), base_quat=opt_q)
quat = np.vstack((opt_q, quat))
ccs = score_deformations(mrc1, mrc2, quat)
if np.max(ccs) < prev_score:
break
else:
opt_q = quat[np.argmax(ccs)]
#print(torch.max(ccs), opt_q) # useful for debugging
prev_score = np.max(ccs)
return opt_q, prev_score
def scan_orientations(mrc1, mrc2, n_iterations=10, n_search=420, nscs=1):
"""
Find the quaternion and its associated score that best aligns volume mrc1 to mrc2.
Candidate orientations are scored based on the Pearson correlation coefficient.
First a coarse search is performed, followed by a series of increasingly fine
searches in angular space. To prevent getting stuck in a bad solution, the top nscs
solutions from the coarse grained search can be investigated.
Parameters
----------
mrc1 : numpy.ndarray, shape (n,n,n)
volume to be rotated
mrc2 : numpy.ndarray, shape (n,n,n)
volume to be held fixed
n_iterations: int, default 10
number of iterations of alignment to perform
n_search : int, default 420
number of quaternions to score at each orientation
nscs : int, default 1
number of solutions from the coarse-grained search to investigate
Returns
-------
opt_q : numpy.ndarray, shape (4)
quaternion to apply to mrc1 to align it with mrc2
score : float
cross-correlation between aligned mrc1 and mrc2
"""
# perform a coarse alignment to start
quat = sk.get_uniform_quat(n_search) # update for skopi
ccs = score_deformations(mrc1, mrc2, quat)
ccs_order = np.argsort(ccs)[::-1]
# scan the top solutions
opt_q_list, ccs_list = np.zeros((nscs,4)), np.zeros(nscs)
for n in range(nscs):
start_q, start_score = quat[ccs_order[n]], ccs[ccs_order[n]]
opt_q_list[n], ccs_list[n] = scan_orientations_fine(mrc1, mrc2, start_q, start_score,
n_iterations=n_iterations, n_search=n_search)
opt_q, score = opt_q_list[np.argmax(ccs_list)], np.max(ccs_list)
return opt_q, score
def align_volumes(mrc1, mrc2, zoom=1, sigma=0, n_iterations=10, n_search=420, nscs=1, output=None, voxel_size=None):
"""
Find the quaternion that best aligns volume mrc1 to mrc2. Volumes are
optionally preprocessed by up / downsampling and applying a Gaussian
filter.
Parameters
----------
mrc1 : numpy.ndarray, shape (n,n,n)
volume to be rotated
mrc2 : numpy.ndarray, shape (n,n,n)
volume to be held fixed
zoom : float, default 1
if not 1, sample by which to up or downsample volume
sigma : int, default 0
sigma of Gaussian filter to apply to each volume
n_iterations: int, default 10
number of iterations of alignment to perform
n_search : int, default 420
number of quaternions to score at each orientation
nscs : int, default 1
number of solutions from the coarse-grained alignment search to investigate
output : string, default None
if supplied, save the aligned volume to this path in MRC format
voxel_size : float, default None
if supplied, use as value of voxel size in Angstrom for output
Returns
-------
r_vol : numpy.ndarray, shape (n,n,n)
copy of centered mrc1 aligned with centered mrc2
mrc2_original : umpy.ndarray, shape (n,n,n)
copy of centered mrc2
"""
# center input volumes, then make copies
mrc1, mrc2 = center_volume(mrc1), center_volume(mrc2)
mrc1_original = mrc1.copy()
mrc2_original = mrc2.copy()
# optionally up/downsample volumes
if zoom != 1:
mrc1 = ndimage.zoom(mrc1, (zoom,zoom,zoom))
mrc2 = ndimage.zoom(mrc2, (zoom,zoom,zoom))
# optionally apply a Gaussian filter to volumes
if sigma != 0:
mrc1 = ndimage.gaussian_filter(mrc1, sigma=sigma)
mrc2 = ndimage.gaussian_filter(mrc2, sigma=sigma)
# evaluate both hands
opt_q1, cc1 = scan_orientations(mrc1, mrc2, n_iterations, n_search, nscs=nscs)
opt_q2, cc2 = scan_orientations(np.flip(mrc1, [0,1,2]), mrc2, n_iterations, n_search, nscs=nscs)
if cc1 > cc2:
opt_q, cc_r, invert = opt_q1, cc1, False
else:
opt_q, cc_r, invert = opt_q2, cc2, True
print(f"Alignment CC after rotation is: {cc_r:.3f}")
# generate final aligned map
if invert:
print("Map had to be inverted")
mrc1_original = np.flip(mrc1_original, [0,1,2])
r_vol = rotate_volume(mrc1_original, np.expand_dims(opt_q, axis=0))[0]
final_cc = pearson_cc(np.expand_dims(r_vol.flatten(), axis=0), np.expand_dims(mrc2_original.flatten(), axis=0))
print(f"Final CC between unzoomed / unfiltered volumes is: {float(final_cc):.3f}")
if output is not None:
save_mrc(output, np.array(r_vol), voxel_size=voxel_size)
return r_vol, mrc2_original
import argparse, time, os
import numpy as np
import h5py, mrcfile
import scipy.interpolate
from align import save_mrc, align_volumes
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
"""
Estimate resolution based on the Fourier shell correlation (FSC) between
the output MRC file and a reference density map generated from a PDB file.
"""
def parse_input():
"""
Parse command line input.
"""
parser = argparse.ArgumentParser(description="Simulate a simple SPI dataset.")
parser.add_argument('-m', '--mrc_file', help='mrc file of reconstructed map', required=True, type=str)
parser.add_argument('-d', '--dataset', help='h5 file of simulated data', required=True, type=str)
parser.add_argument('-p', '--pdb_file', help='pdb file for reference structure', required=True, type=str)
parser.add_argument('-o', '--output', help='output directory for aligned volumes', required=False, type=str)
# optional arguments to adjust alignment protocol
parser.add_argument('--zoom', help='Zoom factor during alignment', required=False, type=float, default=1)
parser.add_argument('--sigma', help='Sigma for Gaussian filtering during alignment', required=False, type=float, default=0)
parser.add_argument('--niter', help='Number of alignment iterations to run', required=False, type=int, default=10)
parser.add_argument('--nsearch', help='Number of quaternions to score per iteration', required=False, type=int, default=360)
return vars(parser.parse_args())
def get_reciprocal_mesh(voxel_number_1d, distance_reciprocal_max):
"""
Get a centered, symetric mesh of given dimensions. Altered from skopi.
Parameters
----------
voxel_number_1d : int
number of voxels per axis
distance_reciprocal_max : float
maximum voxel resolution in inverse Angstrom
Returns
-------
reciprocal_mesh : numpy.ndarray, shape (n,n,n,3)
grid of reciprocal space vectors for each voxel
"""
max_value = distance_reciprocal_max
linspace = np.linspace(-max_value, max_value, voxel_number_1d)
reciprocal_mesh_stack = np.asarray(
np.meshgrid(linspace, linspace, linspace, indexing='ij'))
reciprocal_mesh = np.moveaxis(reciprocal_mesh_stack, 0, -1)
return reciprocal_mesh
def compute_reference(pdb_file, M, distance_reciprocal_max):
"""
Compute the reference density map from a PDB file using skopi.
Parameters
----------
pdb_file : string
path to coordinates file in pdb format
M : int
number of voxels along each dimension of map
distance_reciprocal_max : floa
maximum voxel resolution in inverse Angstrom
Returns
-------
density : numpy.ndarray, shape (M,M,M)
reference density map
"""
import skopi.gpu as pg
import skopi as sk
# set up Particle object
particle = sk.Particle()
particle.read_pdb(pdb_file, ff='WK')
# compute ideal diffraction volume and take FT for density map
mesh = get_reciprocal_mesh(M, distance_reciprocal_max)
cfield = pg.calculate_diffraction_pattern_gpu(mesh, particle, return_type='complex_field')
ivol = np.square(np.abs(cfield))
density = np.fft.fftshift(np.fft.ifftn(np.fft.ifftshift(cfield))).real
return density
def compute_fsc(volume1, volume2, distance_reciprocal_max, spacing=0.01, output=None):
"""
Compute the Fourier shell correlation (FSC) curve, with the
estimated resolution based on a threshold of 0.5.
Parameters
----------
volume1 : numpy.ndarray, shape (n,n,n)
reference map
volume2 : numpy.ndarray, shape (n,n,n)
reconstructed map
distance_reciprocal_max : float
maximum voxel resolution in inverse Angstrom
spacing : float
spacing for evaluating FSC in inverse Angstrom
output : string, optional
directory to which to save png of FSC curve
Returns
-------
resolution : float
estimated resolution of reconstructed map in Angstroms
"""
mesh = get_reciprocal_mesh(volume1.shape[0], distance_reciprocal_max)
smags = np.linalg.norm(mesh, axis=-1).flatten() * 1e-10
r_spacings = np.arange(0, smags.max() / np.sqrt(3), spacing)
ft1 = np.fft.fftshift(np.fft.fftn(volume1)).flatten()
ft2 = np.conjugate(np.fft.fftshift(np.fft.fftn(volume2)).flatten())
rshell, fsc = np.zeros(len(r_spacings)), np.zeros(len(r_spacings))
for i,r in enumerate(r_spacings):
indices = np.where((smags>r) & (smags<r+spacing))[0]
numerator = np.sum(ft1[indices] * ft2[indices])
denominator = np.sqrt(np.sum(np.square(np.abs(ft1[indices]))) * np.sum(np.square(np.abs(ft2[indices]))))
rshell[i] = r + 0.5*spacing
fsc[i] = numerator.real / denominator
f = scipy.interpolate.interp1d(fsc, rshell)
try:
resolution = 1.0/f(0.5)
print(f"Estimated resolution from FSC: {resolution:.1f} Angstrom")
except ValueError:
resolution = -1
print("Resolution could not be estimated.")
# optionally plot
if output is not None:
f, ax1 = plt.subplots(figsize=(5,3))
ax1.plot(rshell,fsc, c='black')
ax1.scatter(rshell,fsc, c='black')
ax1.plot([rshell.min(),rshell.max()],[0.5,0.5], c='grey', linestyle='dashed')
ax1.set_xlim(rshell.min(),rshell.max())
ax1.set_xlabel("Resolution (1/${\mathrm{\AA}}$)")
ax1.set_ylabel("FSC", fontsize=12)
f.savefig(os.path.join(output, "fsc.png"), dpi=300, bbox_inches='tight')
return resolution
def main():
args = parse_input()
if not os.path.isdir(args['output']):
os.mkdir(args['output'])
# load and prepare input files
volume = mrcfile.open(args['mrc_file']).data.copy()
with h5py.File(args['dataset'], "r") as f:
dist_recip_max = np.linalg.norm(f['pixel_position_reciprocal'][:], axis=-1).max()
reference = compute_reference(args['pdb_file'], volume.shape[0], dist_recip_max)
# align volumes
ali_volume, ali_reference = align_volumes(volume, reference, zoom=args['zoom'], sigma=args['sigma'],
n_iterations=args['niter'], n_search=args['nsearch'])
if args['output'] is not None:
save_mrc(os.path.join(args['output'], "reference.mrc"), ali_reference)
save_mrc(os.path.join(args['output'], "aligned.mrc"), ali_volume)
# compute fsc
resolution = compute_fsc(ali_reference, ali_volume, dist_recip_max, output=args['output'])
if __name__ == '__main__':
main()
import numpy as np
import skopi as sk
"""
Batched versions of skopi geometry functions for handling rotations.
"""
def quaternion2rot3d(quat):
"""
Convert a set of quaternions to rotation matrices.
This function was originally adopted from:
https://github.com/duaneloh/Dragonfly/blob/master/src/interp.c
and then from skopi and has been modified to convert in batch.
Parameters
----------
quat : numpy.ndarray, shape (n_quat, 4)
series of quaternions
Returns
-------
rotation : numpy.ndarray, shape (n_quat, 3, 3)
series of rotation matrices
"""
q01 = quat[:,0] * quat[:,1]
q02 = quat[:,0] * quat[:,2]
q03 = quat[:,0] * quat[:,3]
q11 = quat[:,1] * quat[:,1]
q12 = quat[:,1] * quat[:,2]
q13 = quat[:,1] * quat[:,3]
q22 = quat[:,2] * quat[:,2]
q23 = quat[:,2] * quat[:,3]
q33 = quat[:,3] * quat[:,3]
# Obtain the rotation matrix
rotation = np.zeros((quat.shape[0], 3, 3))
rotation[:,0, 0] = (1. - 2. * (q22 + q33))
rotation[:,0, 1] = 2. * (q12 - q03)
rotation[:,0, 2] = 2. * (q13 + q02)
rotation[:,1, 0] = 2. * (q12 + q03)
rotation[:,1, 1] = (1. - 2. * (q11 + q33))
rotation[:,1, 2] = 2. * (q23 - q01)
rotation[:,2, 0] = 2. * (q13 - q02)
rotation[:,2, 1] = 2. * (q23 + q01)
rotation[:,2, 2] = (1. - 2. * (q11 + q22))
return rotation
def axis_angle_to_quaternion(axis, theta):
"""
Convert an angular rotation around an axis series to quaternions.
Parameters
----------
axis : numpy.ndarray, size (num_pts, 3)
axis vector defining rotation
theta : numpy.ndarray, size (num_pts)
angle in radians defining anticlockwise rotation around axis
Returns
-------
quat : numpy.ndarray, size (num_pts, 4)
quaternions corresponding to axis/theta rotations
"""
axis /= np.linalg.norm(axis, axis=1)[:,None]
angle = theta / 2
quat = np.zeros((len(theta), 4))
quat[:,0] = np.cos(angle)
quat[:,1:] = np.sin(angle)[:,None] * axis
return quat
def quaternion_product(q1, q0):
"""
Compute quaternion product, q1 x q0, according to:
https://www.mathworks.com/help/aeroblks/quaternionmultiplication.html.
This should yield the same result as pytorch3d.transforms.quaternion_multiply
(or possibly the -1*q_prod, which represents the same rotation).
Parameters
----------
q0 : numpy.ndarray, shape (n, 4)
first quaternion to rotate by
q1 : numpy.ndarray, shape (n, 4)
second quaternion to rotate by
Returns
-------
q_prod : numpy.ndarray, shape (n, 4)
quaternion product q1 x q0, rotation by q0 followed by q1
"""
p0, p1, p2, p3 = q1[:,0], q1[:,1], q1[:,2], q1[:,3]
r0, r1, r2, r3 = q0[:,0], q0[:,1], q0[:,2], q0[:,3]
q_prod = np.array([r0 * p0 - r1 * p1 - r2 * p2 - r3 * p3,
r0 * p1 + r1 * p0 - r2 * p3 + r3 * p2,
r0 * p2 + r1 * p3 + r2 * p0 - r3 * p1,
r0 * p3 - r1 * p2 + r2 * p1 + r3 * p0]).T
return q_prod
def get_preferred_orientation_quat(num_pts, sigma, base_quat=None):
"""
Sample quaternions distributed around a given or random position in a restricted
range in SO(3), where the spread of the distribution is determined by sigma.
Parameters
----------
num_pts : int
number of quaternions to generate
sigma : float
standard deviation in radians for angular sampling
base_quat : numpy.ndarray, size (4)
quaternion about which to distribute samples, random if None
Returns
-------
quat : numpy.ndarray, size (num_quats, 4)
quaternions with preferred orientations
"""
if base_quat is None:
base_quat = sk.get_random_quat(1) ### need to change to skopi
base_quat = np.tile(base_quat, num_pts).reshape(num_pts, 4)
R_random = quaternion2rot3d(sk.get_random_quat(num_pts)) ### need to change to skopi
unitvec = np.array([0,0,1.0])
rot_axis = np.matmul(unitvec, R_random)
theta = sigma * np.random.randn(num_pts)
rand_axis = theta[:,None] * rot_axis
pref_quat = axis_angle_to_quaternion(rand_axis, theta)
quat = quaternion_product(pref_quat, base_quat)
return quat