Skip to content

Commit

Permalink
build program interface for buildGraph
Browse files Browse the repository at this point in the history
  • Loading branch information
n-beckage committed Apr 4, 2023
1 parent e23e857 commit 9c74cd9
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 41 deletions.
86 changes: 58 additions & 28 deletions ChemTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,23 +312,24 @@ def configure(receptor,ligand,iteration='test',fname="config",size=20,exhaustive
config_i=fname+"_"+iteration+'.txt'
out_name=out+"_"+iteration
os.chdir('configs')
with open(config_i,'w') as f:
f.write('receptor = '+receptor+'\n')
f.write('ligand = '+ligand+'\n\n')
f.write('out = '+out_name+'.pdbqt'+'\n\n')
f.write('center_x = '+str(center_x)+'\n')
f.write('center_y = '+str(center_y)+'\n')
f.write('center_z = '+str(center_z)+'\n\n')
f.write('size_x = '+str(size)+'\n')
f.write('size_y = '+str(size)+'\n')
f.write('size_z = '+str(size)+'\n\n')
f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n')
f.write('cpu = '+str(cpu)+'\n\n')
f.write('num_modes = '+str(num_modes)+'\n\n')
f.write('seed = '+str(seed)+'\n\n')
f.write('verbosity = '+str(verbosity))
if score_only:
f.write('\n\nscore_only = true')
f = open(config_i,'w')
f.write('receptor = '+receptor+'\n')
f.write('ligand = '+ligand+'\n\n')
f.write('out = '+out_name+'.pdbqt'+'\n\n')
f.write('center_x = '+str(center_x)+'\n')
f.write('center_y = '+str(center_y)+'\n')
f.write('center_z = '+str(center_z)+'\n\n')
f.write('size_x = '+str(size)+'\n')
f.write('size_y = '+str(size)+'\n')
f.write('size_z = '+str(size)+'\n\n')
f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n')
f.write('cpu = '+str(cpu)+'\n\n')
f.write('num_modes = '+str(num_modes)+'\n\n')
f.write('seed = '+str(seed)+'\n\n')
f.write('verbosity = '+str(verbosity))
if score_only:
f.write('\n\nscore_only = true')
f.close()
os.chdir('../')
return config_i,out_name

Expand Down Expand Up @@ -388,30 +389,31 @@ def dock_it(lig_smile,prot_pdbqt,exhaustiveness=8,iiter='test'):
configuration,out_name=configure(prot_pdbqt,lig_pdbqt,iiter,exhaustiveness=exhaustiveness,seed=1,verbosity=2)
# runs vina and logs results
logfile="logs/log_"+iiter+".txt"
with open(logfile,'w') as log:
if pf.system()=='Linux':
run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log)
elif pf.system()=='Windows':
run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log)
log = open(logfile, 'w')
if pf.system()=='Linux':
run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log)
elif pf.system()=='Windows':
run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log)
log.close()
# splitting output
sp.call("vina_split --input "+out_name+'.pdbqt',shell=True)
# deleting the original out file from vina plus all but the best modes from vina_split
print("OUT_NAME:",out_name)
os.remove(out_name+'.pdbqt')
# systematically deleting all output ligands other than the best (ligand_1)
count=2
fname=out_name+f'_ligand_{count}.pdbqt'
fname = '{}_ligand_{}.pdbqt'.format(out_name, count)
while os.path.isfile(fname):
os.remove(fname)
count+=1
fname=out_name+f'_ligand_{count}.pdbqt'
fname = '{}_ligand_{}.pdbqt'.format(out_name, count)
# renaming the best ligand output
best_out=out_name+"_ligand_1.pdbqt"
split=best_out.rsplit("_1",1)
os.rename(best_out,''.join(split))
# Opening the log file to read in the best affinity
with open(logfile,"r") as results:
lines=results.readlines()[::-1]
results = open(logfile, 'r')
lines=results.readlines()[::-1]
best_mode=[i for i in lines if re.match('\s+1\s',i)][0]
print("9th line from the end:\n",best_mode)
s=0
Expand Down Expand Up @@ -790,8 +792,23 @@ def check_if_not_real(smiles):
##### string seed - the SMILE string of the starting molecule to seed the graph
##### int depth - the number of generations to explore
##### boolean complete_connections - flag to determine wether or not to add the remaing connections to outermost nodes post-loop
##### boolean write_to_log - flag todetermine if log file for grpah build should be written. Default True.
### returns nx.Graph chemical_space_graph - the completed chemical space graph
def buildGraph(seed, depth, complete_connections = False):
def buildGraph(seed, depth, complete_connections = False, write_to_log = True):

###
# if write_to_log:
# if log_file_name is None:
# log_file_name = f"buildGraph_log_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
# log_file = open(log_file_name, "w")
# log_file.write(f"GRAPH PARAMETERS\nSeed: {seed}\nDepth: {depth}\nComplete Connections: {complete_connections}\n\n")
###

log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(complete_connections)+".txt"
if write_to_log:
logfile = open(log_file_name, "w")
logfile.write(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections'])+'\n')

start_graph_time = time()
print('GRAPH PARAMETERS')
print(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections']),'\n')
Expand Down Expand Up @@ -828,13 +845,18 @@ def buildGraph(seed, depth, complete_connections = False):
#set the leaf list to the new one
leafs=new_leafs
print("number of leafs for next iter",len(leafs))

if write_to_log:
logfile.write("\nnumber of leafs for next iter: "+str(len(leafs)))
# estimating and reporting time for next generation of nodes to be added
expected_time = len(leafs)*np.average(leaf_times)
print("expected time for next iter:",reportTime(expected_time))
if write_to_log:
logfile.write("\nexpected time for next iter: "+reportTime(expected_time))

if complete_connections:
print("all nodes created; now adding remaing edges (expected time above)")
if write_to_log:
logfile.write("\nall nodes created; now adding remaing edges (expected time above)")
# it will be most efficient to just add the last remaining connections post-loop, once all nodes are created
ti = time()
for leaf in leafs:
Expand All @@ -844,13 +866,21 @@ def buildGraph(seed, depth, complete_connections = False):
chemical_space_graph.add_edge(leaf,neigh)
tf = time() - ti
print("adding final edges actually took:",reportTime(tf))
if write_to_log:
logfile.write("\nadding final edges actually took: "+reportTime(tf))

build_time = time() - start_graph_time
print("total build time for this graph:",reportTime(build_time))

print("total number of nodes:",chemical_space_graph.number_of_nodes())
print("total number of edges",chemical_space_graph.number_of_edges())

if write_to_log:
logfile.write("\ntotal build time for this graph: "+reportTime(build_time))
logfile.write("\ntotal number of nodes: "+str(chemical_space_graph.number_of_nodes()))
logfile.write("\ntotal number of edges: "+str(chemical_space_graph.number_of_edges()))
logfile.close()

return chemical_space_graph

### def reportTime
Expand Down
62 changes: 56 additions & 6 deletions graphBuild.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#!/usr/bin/env python

import argparse as ap
import argparse
from ChemTools import *
import datetime
import time

################################################################# BEGIN SCRIPT #####################################################################################

# start by creating command-line interface
csBuilder = ap.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph")
csBuilder = argparse.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph")
# SMILE seed is the only positional argument
csBuilder.add_argument('SMILE',type=str,help="the SMILE string of the molecule to seed the graph with")
# depth and cc have defaults
Expand Down Expand Up @@ -50,9 +52,57 @@

# print()


seed = args.SMILE
depth = args.depth
cc = args.exhaustive_connections


########### my old script ###################
# running buildGraph() with args
csg = buildGraph(args.SMILE,args.depth,args.exhaustive_connections)
# csg = buildGraph(seed,depth,cc)

# gname = input("Please enter a filename for the graph: ")
# csg_name = gname+"_d"+str(depth)+"_cc"+str(cc)
# faerunPlot(csg, csg_name)

####################### chatGPT ################
# Record start time
start_time = time.time()

# Run buildGraph function
csg = buildGraph(seed,depth,cc)

# Record time taken for buildGraph function
buildGraph_time = time.time() - start_time

csg_name = seed+"_d"+str(depth)+"_cc"+str(cc)

# Record start time for faerunPlot function
start_time = time.time()

# Run faerunPlot function
faerunPlot(csg, csg_name)

# Record time taken for faerunPlot function
faerunPlot_time = time.time() - start_time

# Calculate total time taken for entire program
total_time = buildGraph_time + faerunPlot_time

# Create filename for text file
filename = seed + "_d" + str(depth) + "_cc" + str(cc) + ".txt"
log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(cc)+".txt"

# Convert times to datetime format for formatting
buildGraph_datetime = datetime.timedelta(seconds=buildGraph_time)
faerunPlot_datetime = datetime.timedelta(seconds=faerunPlot_time)
total_time_datetime = datetime.timedelta(seconds=total_time)

gname = input("Please enter a filename for the graph: ")
csg_name = gname+"_d"+str(depth)+"_cc"+str(cc)
faerunPlot(csg, csg_name)
# Write data to text file
# with open(filename, "w") as f:
f = open(log_file_name, "a")
f.write("\n\nBuildGraph time: " + str(buildGraph_datetime)[:-3] + "\n")
f.write("faerunPlot time: " + str(faerunPlot_datetime)[:-3] + "\n")
f.write("total time: " + str(total_time_datetime)[:-3] + "\n")
f.close()
56 changes: 49 additions & 7 deletions test_script.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,55 @@
import argparse as ap
import cProfile
import pstats
from ChemTools import *
import csv
import time

bgRun=cProfile.Profile()
bgRun.run('buildGraph("C",2,True)')
stats = pstats.Stats(bgRun)
# bgRun=cProfile.Profile()
# bgRun.run('buildGraph("C",3,True)')
# stats = pstats.Stats(bgRun)

stats.strip_dirs()
stats.sort_stats('cumtime')
stats.print_stats()
print("Total time: ", stats.total_tt)
# stats.strip_dirs()
# stats.sort_stats('cumtime')
# stats.print_stats()
# print("Total time: ", stats.total_tt)


# new_smis = ['C','CC','CO','CJK']
# new_mols = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in new_smis if Chem.MolFromSmiles(smi) is not None]

# print(new_mols)

runData = []
seed = 'C'
for depth in range(1, 4):
for exhaustive in [True, False]:
bgRun=cProfile.Profile()
run_params = "buildGraph("+"'"+seed+"'"+","+str(depth)+","+str(exhaustive)+")"
start_time = time.time()
graph = buildGraph(seed,depth,exhaustive)
print(run_params)
fae_params = "faerunPlot(graph,'test_graph')"
bgRun.run(fae_params)
end_time = time.time()
stats = pstats.Stats(bgRun)
stats.strip_dirs()
stats.sort_stats('cumtime')
log_file_name = f"faerunPlot_profile_{seed}_d{depth}_ec{exhaustive}.txt"
with open(log_file_name, "w") as f:
f.write(f"Seed: {seed}\n")
f.write(f"Depth: {depth}\n")
f.write(f"Exhaustive: {exhaustive}\n\n")
stats.stream = f
stats.print_stats()
f.write(f"Total faerunPlot time: {stats.total_tt}\n")
f.write("Total buildGraph+faerunPlot time: "+reportTime(end_time-start_time))
runData.append([seed,depth,exhaustive,stats.total_tt])

csv_file_name = "Linux_C_plot_Data.csv"
with open(csv_file_name, "w", newline='') as csvfile:
csvfile = open(csv_file_name, "w", newline='')
writer = csv.writer(csvfile)
writer.writerow(["Seed", "Depth", "Exhaustive", "Total Time"])
for run in runData:
writer.writerow(run)

0 comments on commit 9c74cd9

Please sign in to comment.