From 9c74cd9f681880e04d04665ef69a670d0b33b516 Mon Sep 17 00:00:00 2001 From: n-beckage Date: Mon, 3 Apr 2023 20:00:07 -0400 Subject: [PATCH] build program interface for buildGraph --- ChemTools.py | 86 ++++++++++++++++++++++++++++++++++---------------- graphBuild.py | 62 ++++++++++++++++++++++++++++++++---- test_script.py | 56 ++++++++++++++++++++++++++++---- 3 files changed, 163 insertions(+), 41 deletions(-) mode change 100644 => 100755 graphBuild.py diff --git a/ChemTools.py b/ChemTools.py index 8e1029f..a419045 100644 --- a/ChemTools.py +++ b/ChemTools.py @@ -312,23 +312,24 @@ def configure(receptor,ligand,iteration='test',fname="config",size=20,exhaustive config_i=fname+"_"+iteration+'.txt' out_name=out+"_"+iteration os.chdir('configs') - with open(config_i,'w') as f: - f.write('receptor = '+receptor+'\n') - f.write('ligand = '+ligand+'\n\n') - f.write('out = '+out_name+'.pdbqt'+'\n\n') - f.write('center_x = '+str(center_x)+'\n') - f.write('center_y = '+str(center_y)+'\n') - f.write('center_z = '+str(center_z)+'\n\n') - f.write('size_x = '+str(size)+'\n') - f.write('size_y = '+str(size)+'\n') - f.write('size_z = '+str(size)+'\n\n') - f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n') - f.write('cpu = '+str(cpu)+'\n\n') - f.write('num_modes = '+str(num_modes)+'\n\n') - f.write('seed = '+str(seed)+'\n\n') - f.write('verbosity = '+str(verbosity)) - if score_only: - f.write('\n\nscore_only = true') + f = open(config_i,'w') + f.write('receptor = '+receptor+'\n') + f.write('ligand = '+ligand+'\n\n') + f.write('out = '+out_name+'.pdbqt'+'\n\n') + f.write('center_x = '+str(center_x)+'\n') + f.write('center_y = '+str(center_y)+'\n') + f.write('center_z = '+str(center_z)+'\n\n') + f.write('size_x = '+str(size)+'\n') + f.write('size_y = '+str(size)+'\n') + f.write('size_z = '+str(size)+'\n\n') + f.write('exhaustiveness = '+str(exhaustiveness)+'\n\n') + f.write('cpu = '+str(cpu)+'\n\n') + f.write('num_modes = '+str(num_modes)+'\n\n') + f.write('seed = '+str(seed)+'\n\n') + f.write('verbosity = '+str(verbosity)) + if score_only: + f.write('\n\nscore_only = true') + f.close() os.chdir('../') return config_i,out_name @@ -388,11 +389,12 @@ def dock_it(lig_smile,prot_pdbqt,exhaustiveness=8,iiter='test'): configuration,out_name=configure(prot_pdbqt,lig_pdbqt,iiter,exhaustiveness=exhaustiveness,seed=1,verbosity=2) # runs vina and logs results logfile="logs/log_"+iiter+".txt" - with open(logfile,'w') as log: - if pf.system()=='Linux': - run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log) - elif pf.system()=='Windows': - run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log) + log = open(logfile, 'w') + if pf.system()=='Linux': + run=sp.run("vina_1.2.3_linux_x86_64 --config=configs/"+configuration,shell=True,stdout=log) + elif pf.system()=='Windows': + run=sp.run("vina --config=configs/"+configuration,shell=True,stdout=log) + log.close() # splitting output sp.call("vina_split --input "+out_name+'.pdbqt',shell=True) # deleting the original out file from vina plus all but the best modes from vina_split @@ -400,18 +402,18 @@ def dock_it(lig_smile,prot_pdbqt,exhaustiveness=8,iiter='test'): os.remove(out_name+'.pdbqt') # systematically deleting all output ligands other than the best (ligand_1) count=2 - fname=out_name+f'_ligand_{count}.pdbqt' + fname = '{}_ligand_{}.pdbqt'.format(out_name, count) while os.path.isfile(fname): os.remove(fname) count+=1 - fname=out_name+f'_ligand_{count}.pdbqt' + fname = '{}_ligand_{}.pdbqt'.format(out_name, count) # renaming the best ligand output best_out=out_name+"_ligand_1.pdbqt" split=best_out.rsplit("_1",1) os.rename(best_out,''.join(split)) # Opening the log file to read in the best affinity - with open(logfile,"r") as results: - lines=results.readlines()[::-1] + results = open(logfile, 'r') + lines=results.readlines()[::-1] best_mode=[i for i in lines if re.match('\s+1\s',i)][0] print("9th line from the end:\n",best_mode) s=0 @@ -790,8 +792,23 @@ def check_if_not_real(smiles): ##### string seed - the SMILE string of the starting molecule to seed the graph ##### int depth - the number of generations to explore ##### boolean complete_connections - flag to determine wether or not to add the remaing connections to outermost nodes post-loop +##### boolean write_to_log - flag todetermine if log file for grpah build should be written. Default True. ### returns nx.Graph chemical_space_graph - the completed chemical space graph -def buildGraph(seed, depth, complete_connections = False): +def buildGraph(seed, depth, complete_connections = False, write_to_log = True): + + ### + # if write_to_log: + # if log_file_name is None: + # log_file_name = f"buildGraph_log_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt" + # log_file = open(log_file_name, "w") + # log_file.write(f"GRAPH PARAMETERS\nSeed: {seed}\nDepth: {depth}\nComplete Connections: {complete_connections}\n\n") + ### + + log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(complete_connections)+".txt" + if write_to_log: + logfile = open(log_file_name, "w") + logfile.write(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections'])+'\n') + start_graph_time = time() print('GRAPH PARAMETERS') print(tab([[seed, depth, complete_connections]],headers=['Seed','Depth','Complete Connections']),'\n') @@ -828,13 +845,18 @@ def buildGraph(seed, depth, complete_connections = False): #set the leaf list to the new one leafs=new_leafs print("number of leafs for next iter",len(leafs)) - + if write_to_log: + logfile.write("\nnumber of leafs for next iter: "+str(len(leafs))) # estimating and reporting time for next generation of nodes to be added expected_time = len(leafs)*np.average(leaf_times) print("expected time for next iter:",reportTime(expected_time)) + if write_to_log: + logfile.write("\nexpected time for next iter: "+reportTime(expected_time)) if complete_connections: print("all nodes created; now adding remaing edges (expected time above)") + if write_to_log: + logfile.write("\nall nodes created; now adding remaing edges (expected time above)") # it will be most efficient to just add the last remaining connections post-loop, once all nodes are created ti = time() for leaf in leafs: @@ -844,6 +866,8 @@ def buildGraph(seed, depth, complete_connections = False): chemical_space_graph.add_edge(leaf,neigh) tf = time() - ti print("adding final edges actually took:",reportTime(tf)) + if write_to_log: + logfile.write("\nadding final edges actually took: "+reportTime(tf)) build_time = time() - start_graph_time print("total build time for this graph:",reportTime(build_time)) @@ -851,6 +875,12 @@ def buildGraph(seed, depth, complete_connections = False): print("total number of nodes:",chemical_space_graph.number_of_nodes()) print("total number of edges",chemical_space_graph.number_of_edges()) + if write_to_log: + logfile.write("\ntotal build time for this graph: "+reportTime(build_time)) + logfile.write("\ntotal number of nodes: "+str(chemical_space_graph.number_of_nodes())) + logfile.write("\ntotal number of edges: "+str(chemical_space_graph.number_of_edges())) + logfile.close() + return chemical_space_graph ### def reportTime diff --git a/graphBuild.py b/graphBuild.py old mode 100644 new mode 100755 index 6e9d877..45deb7e --- a/graphBuild.py +++ b/graphBuild.py @@ -1,12 +1,14 @@ #!/usr/bin/env python -import argparse as ap +import argparse from ChemTools import * +import datetime +import time ################################################################# BEGIN SCRIPT ##################################################################################### # start by creating command-line interface -csBuilder = ap.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph") +csBuilder = argparse.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph") # SMILE seed is the only positional argument csBuilder.add_argument('SMILE',type=str,help="the SMILE string of the molecule to seed the graph with") # depth and cc have defaults @@ -50,9 +52,57 @@ # print() + +seed = args.SMILE +depth = args.depth +cc = args.exhaustive_connections + + +########### my old script ################### # running buildGraph() with args -csg = buildGraph(args.SMILE,args.depth,args.exhaustive_connections) +# csg = buildGraph(seed,depth,cc) + +# gname = input("Please enter a filename for the graph: ") +# csg_name = gname+"_d"+str(depth)+"_cc"+str(cc) +# faerunPlot(csg, csg_name) + +####################### chatGPT ################ +# Record start time +start_time = time.time() + +# Run buildGraph function +csg = buildGraph(seed,depth,cc) + +# Record time taken for buildGraph function +buildGraph_time = time.time() - start_time + +csg_name = seed+"_d"+str(depth)+"_cc"+str(cc) + +# Record start time for faerunPlot function +start_time = time.time() + +# Run faerunPlot function +faerunPlot(csg, csg_name) + +# Record time taken for faerunPlot function +faerunPlot_time = time.time() - start_time + +# Calculate total time taken for entire program +total_time = buildGraph_time + faerunPlot_time + +# Create filename for text file +filename = seed + "_d" + str(depth) + "_cc" + str(cc) + ".txt" +log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(cc)+".txt" + +# Convert times to datetime format for formatting +buildGraph_datetime = datetime.timedelta(seconds=buildGraph_time) +faerunPlot_datetime = datetime.timedelta(seconds=faerunPlot_time) +total_time_datetime = datetime.timedelta(seconds=total_time) -gname = input("Please enter a filename for the graph: ") -csg_name = gname+"_d"+str(depth)+"_cc"+str(cc) -faerunPlot(csg, csg_name) \ No newline at end of file +# Write data to text file +# with open(filename, "w") as f: +f = open(log_file_name, "a") +f.write("\n\nBuildGraph time: " + str(buildGraph_datetime)[:-3] + "\n") +f.write("faerunPlot time: " + str(faerunPlot_datetime)[:-3] + "\n") +f.write("total time: " + str(total_time_datetime)[:-3] + "\n") +f.close() \ No newline at end of file diff --git a/test_script.py b/test_script.py index e251e1d..c2ea5e5 100644 --- a/test_script.py +++ b/test_script.py @@ -1,13 +1,55 @@ import argparse as ap import cProfile +import pstats from ChemTools import * +import csv +import time -bgRun=cProfile.Profile() -bgRun.run('buildGraph("C",2,True)') -stats = pstats.Stats(bgRun) +# bgRun=cProfile.Profile() +# bgRun.run('buildGraph("C",3,True)') +# stats = pstats.Stats(bgRun) -stats.strip_dirs() -stats.sort_stats('cumtime') -stats.print_stats() -print("Total time: ", stats.total_tt) +# stats.strip_dirs() +# stats.sort_stats('cumtime') +# stats.print_stats() +# print("Total time: ", stats.total_tt) + +# new_smis = ['C','CC','CO','CJK'] +# new_mols = [Chem.MolToSmiles(Chem.MolFromSmiles(smi)) for smi in new_smis if Chem.MolFromSmiles(smi) is not None] + +# print(new_mols) + +runData = [] +seed = 'C' +for depth in range(1, 4): + for exhaustive in [True, False]: + bgRun=cProfile.Profile() + run_params = "buildGraph("+"'"+seed+"'"+","+str(depth)+","+str(exhaustive)+")" + start_time = time.time() + graph = buildGraph(seed,depth,exhaustive) + print(run_params) + fae_params = "faerunPlot(graph,'test_graph')" + bgRun.run(fae_params) + end_time = time.time() + stats = pstats.Stats(bgRun) + stats.strip_dirs() + stats.sort_stats('cumtime') + log_file_name = f"faerunPlot_profile_{seed}_d{depth}_ec{exhaustive}.txt" + with open(log_file_name, "w") as f: + f.write(f"Seed: {seed}\n") + f.write(f"Depth: {depth}\n") + f.write(f"Exhaustive: {exhaustive}\n\n") + stats.stream = f + stats.print_stats() + f.write(f"Total faerunPlot time: {stats.total_tt}\n") + f.write("Total buildGraph+faerunPlot time: "+reportTime(end_time-start_time)) + runData.append([seed,depth,exhaustive,stats.total_tt]) + +csv_file_name = "Linux_C_plot_Data.csv" +with open(csv_file_name, "w", newline='') as csvfile: + csvfile = open(csv_file_name, "w", newline='') + writer = csv.writer(csvfile) + writer.writerow(["Seed", "Depth", "Exhaustive", "Total Time"]) + for run in runData: + writer.writerow(run) \ No newline at end of file