build program interface for buildGraph

n-beckage · Apr 4, 2023 · 9c74cd9 · 9c74cd9
1 parent e23e857
commit 9c74cd9
Show file tree

Hide file tree

Showing 3 changed files with 163 additions and 41 deletions.
diff --git a/ChemTools.py b/ChemTools.py
@@ -312,23 +312,24 @@ def configure(receptor,ligand,iteration='test',fname="config",size=20,exhaustive
     config_i=fname+"_"+iteration+'.txt'
     out_name=out+"_"+iteration
     os.chdir('configs')
-    with open(config_i,'w') as f:
-        f.write('receptor = '+receptor+'\n')
-        f.write('ligand = '+ligand+'\n\n')
-        f.write('out = '+out_name+'.pdbqt'+'\n\n')
-        f.write('center_x = '+str(center_x)+'\n')
-        f.write('center_y = '+str(center_y)+'\n')
-        f.write('center_z = '+str(center_z)+'\n\n')
-        f.write('size_x = '+str(size)+'\n')
-        f.write('size_y = '+str(size)+'\n')
-        f.write('size_z = '+str(size)+'\n\n')
-        f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n')
-        f.write('cpu = '+str(cpu)+'\n\n')
-        f.write('num_modes = '+str(num_modes)+'\n\n')
-        f.write('seed = '+str(seed)+'\n\n')
-        f.write('verbosity = '+str(verbosity))
-        if score_only:
-            f.write('\n\nscore_only = true')
+    f = open(config_i,'w')
+    f.write('receptor = '+receptor+'\n')
+    f.write('ligand = '+ligand+'\n\n')
+    f.write('out = '+out_name+'.pdbqt'+'\n\n')
+    f.write('center_x = '+str(center_x)+'\n')
+    f.write('center_y = '+str(center_y)+'\n')
+    f.write('center_z = '+str(center_z)+'\n\n')
+    f.write('size_x = '+str(size)+'\n')
+    f.write('size_y = '+str(size)+'\n')
+    f.write('size_z = '+str(size)+'\n\n')
+    f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n')
+    f.write('cpu = '+str(cpu)+'\n\n')
+    f.write('num_modes = '+str(num_modes)+'\n\n')
+    f.write('seed = '+str(seed)+'\n\n')
+    f.write('verbosity = '+str(verbosity))
+    if score_only:
+        f.write('\n\nscore_only = true')
+    f.close()
     os.chdir('../')
     return config_i,out_name
 
@@ -388,30 +389,31 @@ def dock_it(lig_smile,prot_pdbqt,exhaustiveness=8,iiter='test'):
     configuration,out_name=configure(prot_pdbqt,lig_pdbqt,iiter,exhaustiveness=exhaustiveness,seed=1,verbosity=2)
     # runs vina and logs results
     logfile="logs/log_"+iiter+".txt"
-    with open(logfile,'w') as log:
-        if pf.system()=='Linux':
-            run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log)
-        elif pf.system()=='Windows':
-            run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log)
+    log = open(logfile, 'w')
+    if pf.system()=='Linux':
+        run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log)
+    elif pf.system()=='Windows':
+        run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log)
+    log.close()
     # splitting output
     sp.call("vina_split --input "+out_name+'.pdbqt',shell=True)
     # deleting the original out file from vina plus all but the best modes from vina_split
     print("OUT_NAME:",out_name)
     os.remove(out_name+'.pdbqt')
     # systematically deleting all output ligands other than the best (ligand_1)
     count=2
-    fname=out_name+f'_ligand_{count}.pdbqt'
+    fname = '{}_ligand_{}.pdbqt'.format(out_name, count)
     while os.path.isfile(fname):
         os.remove(fname)
         count+=1
-        fname=out_name+f'_ligand_{count}.pdbqt'
+        fname = '{}_ligand_{}.pdbqt'.format(out_name, count)
     # renaming the best ligand output
     best_out=out_name+"_ligand_1.pdbqt"
     split=best_out.rsplit("_1",1)
     os.rename(best_out,''.join(split))
     # Opening the log file to read in the best affinity
-    with open(logfile,"r") as results:
-        lines=results.readlines()[::-1]
+    results = open(logfile, 'r')
+    lines=results.readlines()[::-1]
     best_mode=[i for i in lines if re.match('\s+1\s',i)][0]
     print("9th line from the end:\n",best_mode)
     s=0
@@ -790,8 +792,23 @@ def check_if_not_real(smiles):
 ##### string seed - the SMILE string of the starting molecule to seed the graph
 ##### int depth - the number of generations to explore
 ##### boolean complete_connections - flag to determine wether or not to add the remaing connections to outermost nodes post-loop
+##### boolean write_to_log - flag todetermine if log file for grpah build should be written. Default True.
 ### returns nx.Graph chemical_space_graph - the completed chemical space graph
-def buildGraph(seed, depth, complete_connections = False):
+def buildGraph(seed, depth, complete_connections = False, write_to_log = True):
+
+    ###
+    # if write_to_log:
+    #     if log_file_name is None:
+    #         log_file_name = f"buildGraph_log_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+    #     log_file = open(log_file_name, "w")
+    #     log_file.write(f"GRAPH PARAMETERS\nSeed: {seed}\nDepth: {depth}\nComplete Connections: {complete_connections}\n\n")
+    ###
+
+    log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(complete_connections)+".txt"
+    if write_to_log:
+        logfile = open(log_file_name, "w")
+        logfile.write(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections'])+'\n')
+
     start_graph_time = time()
     print('GRAPH PARAMETERS')
     print(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections']),'\n')
@@ -828,13 +845,18 @@ def buildGraph(seed, depth, complete_connections = False):
         #set the leaf list to the new one
         leafs=new_leafs
         print("number of leafs for next iter",len(leafs))
-
+        if write_to_log:
+            logfile.write("\nnumber of leafs for next iter: "+str(len(leafs)))
         # estimating and reporting time for next generation of nodes to be added
         expected_time = len(leafs)*np.average(leaf_times)
         print("expected time for next iter:",reportTime(expected_time))
+        if write_to_log:
+            logfile.write("\nexpected time for next iter: "+reportTime(expected_time))
 
     if complete_connections:
         print("all nodes created; now adding remaing edges (expected time above)")
+        if write_to_log:
+            logfile.write("\nall nodes created; now adding remaing edges (expected time above)")
         # it will be most efficient to just add the last remaining connections post-loop, once all nodes are created
         ti = time()
         for leaf in leafs:
@@ -844,13 +866,21 @@ def buildGraph(seed, depth, complete_connections = False):
                     chemical_space_graph.add_edge(leaf,neigh)
         tf = time() - ti
         print("adding final edges actually took:",reportTime(tf))
+        if write_to_log:
+            logfile.write("\nadding final edges actually took: "+reportTime(tf))
 
     build_time = time() - start_graph_time
     print("total build time for this graph:",reportTime(build_time))
 
     print("total number of nodes:",chemical_space_graph.number_of_nodes())
     print("total number of edges",chemical_space_graph.number_of_edges())
 
+    if write_to_log:
+        logfile.write("\ntotal build time for this graph: "+reportTime(build_time))
+        logfile.write("\ntotal number of nodes: "+str(chemical_space_graph.number_of_nodes()))
+        logfile.write("\ntotal number of edges: "+str(chemical_space_graph.number_of_edges()))
+    logfile.close()
+
     return chemical_space_graph
 
 ### def reportTime

diff --git a/graphBuild.py b/graphBuild.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python
 
-import argparse as ap
+import argparse
 from ChemTools import *
+import datetime
+import time
 
 ################################################################# BEGIN SCRIPT #####################################################################################
 
 # start by creating command-line interface
-csBuilder = ap.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph")
+csBuilder = argparse.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph")
 # SMILE seed is the only positional argument
 csBuilder.add_argument('SMILE',type=str,help="the SMILE string of the molecule to seed the graph with")
 # depth and cc have defaults
@@ -50,9 +52,57 @@
 
 # print()
 
+
+seed = args.SMILE
+depth = args.depth
+cc = args.exhaustive_connections
+
+
+########### my old script ###################
 # running buildGraph() with args
-csg = buildGraph(args.SMILE,args.depth,args.exhaustive_connections)
+# csg = buildGraph(seed,depth,cc)
+
+# gname = input("Please enter a filename for the graph: ")
+# csg_name = gname+"_d"+str(depth)+"_cc"+str(cc)
+# faerunPlot(csg, csg_name)
+
+####################### chatGPT ################
+# Record start time
+start_time = time.time()
+
+# Run buildGraph function
+csg = buildGraph(seed,depth,cc)
+
+# Record time taken for buildGraph function
+buildGraph_time = time.time() - start_time
+
+csg_name = seed+"_d"+str(depth)+"_cc"+str(cc)
+
+# Record start time for faerunPlot function
+start_time = time.time()
+
+# Run faerunPlot function
+faerunPlot(csg, csg_name)
+
+# Record time taken for faerunPlot function
+faerunPlot_time = time.time() - start_time
+
+# Calculate total time taken for entire program
+total_time = buildGraph_time + faerunPlot_time
+
+# Create filename for text file
+filename = seed + "_d" + str(depth) + "_cc" + str(cc) + ".txt"
+log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(cc)+".txt"
+
+# Convert times to datetime format for formatting
+buildGraph_datetime = datetime.timedelta(seconds=buildGraph_time)
+faerunPlot_datetime = datetime.timedelta(seconds=faerunPlot_time)
+total_time_datetime = datetime.timedelta(seconds=total_time)
 
-gname = input("Please enter a filename for the graph: ")
-csg_name = gname+"_d"+str(depth)+"_cc"+str(cc)
-faerunPlot(csg, csg_name)
+# Write data to text file
+# with open(filename, "w") as f:
+f = open(log_file_name, "a")
+f.write("\n\nBuildGraph time: " + str(buildGraph_datetime)[:-3] + "\n")
+f.write("faerunPlot time: " + str(faerunPlot_datetime)[:-3] + "\n")
+f.write("total time: " + str(total_time_datetime)[:-3] + "\n")
+f.close()
diff --git a/test_script.py b/test_script.py
@@ -1,13 +1,55 @@
 import argparse as ap
 import cProfile
+import pstats
 from ChemTools import *
+import csv
+import time
 
-bgRun=cProfile.Profile()
-bgRun.run('buildGraph("C",2,True)')
-stats = pstats.Stats(bgRun)
+# bgRun=cProfile.Profile()
+# bgRun.run('buildGraph("C",3,True)')
+# stats = pstats.Stats(bgRun)
 
-stats.strip_dirs()
-stats.sort_stats('cumtime')
-stats.print_stats()
-print("Total time: ", stats.total_tt)
+# stats.strip_dirs()
+# stats.sort_stats('cumtime')
+# stats.print_stats()
+# print("Total time: ", stats.total_tt)
 
+
+# new_smis = ['C','CC','CO','CJK']
+# new_mols = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in new_smis if Chem.MolFromSmiles(smi) is not None]
+
+# print(new_mols)
+
+runData = []
+seed = 'C'
+for depth in range(1, 4):
+	for exhaustive in [True, False]:
+		bgRun=cProfile.Profile()
+		run_params = "buildGraph("+"'"+seed+"'"+","+str(depth)+","+str(exhaustive)+")"
+		start_time = time.time()
+		graph = buildGraph(seed,depth,exhaustive)
+		print(run_params)
+		fae_params = "faerunPlot(graph,'test_graph')"
+		bgRun.run(fae_params)
+		end_time = time.time()
+		stats = pstats.Stats(bgRun)
+		stats.strip_dirs()
+		stats.sort_stats('cumtime')
+		log_file_name = f"faerunPlot_profile_{seed}_d{depth}_ec{exhaustive}.txt"
+		with open(log_file_name, "w") as f:
+			f.write(f"Seed: {seed}\n")
+			f.write(f"Depth: {depth}\n")
+			f.write(f"Exhaustive: {exhaustive}\n\n")
+			stats.stream = f
+			stats.print_stats()
+			f.write(f"Total faerunPlot time: {stats.total_tt}\n")
+			f.write("Total buildGraph+faerunPlot time: "+reportTime(end_time-start_time))
+		runData.append([seed,depth,exhaustive,stats.total_tt])
+
+csv_file_name = "Linux_C_plot_Data.csv"
+with open(csv_file_name, "w", newline='') as csvfile:
+	csvfile = open(csv_file_name, "w", newline='')
+	writer = csv.writer(csvfile)
+	writer.writerow(["Seed", "Depth", "Exhaustive", "Total Time"])
+	for run in runData:
+		writer.writerow(run)