-
Notifications
You must be signed in to change notification settings - Fork 6
/
pallatom.py
executable file
·103 lines (86 loc) · 4.53 KB
/
pallatom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Copyright 2024 LEVINTHAL Biotechnology Co. Ltd
#
# Licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License (the "License");
# you may not use this work except in compliance with the License.
# You may obtain a copy of the License at
#
# https://creativecommons.org/licenses/by-nc-sa/4.0/
#
# Unless required by applicable law or agreed to in writing, this work is provided on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import jax
import os
import numpy as np
from alphafold.common.residue_constants import restypes_wo_x
from modules.feature import save_all_pdb
from modules.ref_features import atom14_to_atom37
from modules.sampling import Sampler
from tqdm import trange
def main(data_dir, model_name, savepath, L, cuda_devices, t_min, t_max, gamma, step_scale, T, rounds):
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_devices
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = "false"
print('Run denoising.....')
# Ensure savepath exists
if not os.path.exists(savepath):
os.makedirs(savepath)
# Create or open the fasta file for writing sequences
fasta_file_path = os.path.join(savepath, 'sample_seq.fasta')
with open(fasta_file_path, 'w') as fasta_file:
# Initialize sampler
sampler = Sampler(
T=T, sample_len=L, use_selfcond=True,
add_noise_level=[t_min, t_max, gamma], step_scale=step_scale,
is_training=False, params_dir=data_dir, model_name=model_name)
# Seed random key
seed = np.random.randint(0, 2147483647)
key = jax.random.PRNGKey(seed)
aatype_tores = {i: res for i, res in enumerate(restypes_wo_x)}
for round_idx in trange(rounds):
key, key1, key2 = jax.random.split(key, 3) # Update key
# Prepare batch & Sample noise
batch = sampler.prepare_batch(L)
batch = sampler.SampleReference(batch, key1)
# Denoise
results, out_traj = sampler.Sample(batch, key=key2)
print(round_idx)
prefix = f'L{L}_denoised_{round_idx}'
mask_atom = batch['seq_mask'][0][batch['ref_space_uid'][0]]
mask_seq = batch['seq_mask'][0]
final_aa = np.argmax(out_traj['seq_logits'][:,-1, :], axis=-1)[:int(mask_seq.sum()), ...]
# Write sequence to fasta file
seq = ''.join([aatype_tores[x] for x in final_aa.tolist()])
fasta_file.write(f'>{prefix}\n{seq}\n')
# save sample proteins as pdbs
final_atoms = np.array(out_traj['px0'][:,-1,:])[:int(mask_atom.sum()), ...]
final_atoms = atom14_to_atom37(final_aa, final_atoms.reshape(-1, 14, 3))
save_all_pdb(savepath, final_aa, final_atoms, plddt_array=None, prefix=prefix)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run the Pallatom model sampling process.')
parser.add_argument('--data_dir', type=str, default='./', help='Directory where model parameters are stored')
parser.add_argument('--model_name', type=str, default='Pallatom', help='Name of the model to use')
parser.add_argument('--savepath', type=str, default='./results', help='Directory where results will be saved')
parser.add_argument('--L', type=int, default=100, help='Length of the sequence to sample')
parser.add_argument('--cuda_devices', type=str, default='3', help='CUDA visible devices')
parser.add_argument('--t_min', type=float, default=0.01, help='Minimum noise level for add_noise_level')
parser.add_argument('--t_max', type=float, default=1.0, help='Maximum noise level for add_noise_level')
parser.add_argument('--gamma', type=float, default=0.2, help='Gamma value for add_noise_level')
parser.add_argument('--step_scale', type=float, default=2.25, help='Scale of the step')
parser.add_argument('--T', type=int, default=200, help='Number of steps for the sampling process')
parser.add_argument('--rounds', type=int, default=10, help='Number of rounds to run')
args = parser.parse_args()
main(
data_dir=args.data_dir,
model_name=args.model_name,
savepath=args.savepath,
L=args.L,
cuda_devices=args.cuda_devices,
t_min=args.t_min,
t_max=args.t_max,
gamma=args.gamma,
step_scale=args.step_scale,
T=args.T,
rounds=args.rounds
)