DarwinCode DarwinCode - 4 months ago 16
Python Question

How to implement python script to run on "N" number of CPU CORES?

I have made a script to optimize particular part of structure. (scientific terms , you can ignore it) but the main purpose of script is optimization and it takes major time during these two steps optimize () and refine() function where it uses only one CPU out of 4 CPU's in my local system but i want to make this script to use all 4 CPU's (especially for these two functions optimize () and refine()).

I didn't have much idea about multiprocessing/multicore but still i uses multiprocessing module but it fails use all the CPU's. So, if someone knows how to implement the python script to run on all avail multiple CPU's could give me some suggestion would be really helpful.

MY SCRIPT:

import sys
import os

from modeller import *
from modeller.optimizers import molecular_dynamics,conjugate_gradients
from modeller.automodel import autosched

def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)

def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False

def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ+'_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points = 5, aln=aln,
spline_on_site=True)

log.verbose()

env = environ(rand_seed=int(-4243))
env.io.hetatm = True
env.edat.dynamic_sphere=False
env.edat.dynamic_lennard=True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39

env.libs.topology.read(file='$(LIB)/top_heav.lib')

env.libs.parameters.read(file='$(LIB)/par.lib')

mdl1 = model(env, file = "3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files= "3O26.pdb", align_codes= "3O26")

s = selection(mdl1.chains["A"].residues["275"])

s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")

mdl1.clear_topology()
mdl1.generate_topology(ali[-1])

mdl1.transfer_xyz(ali)

mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')

mdl2 = model(env, file="3O26.pdb")

mdl1.res_num_from(mdl2,ali)

mdl1.write(file="3O26"+"ALA"+"275"+"A"+'.tmp')
mdl1.read(file="3O26"+"ALA"+"275"+"A"+'.tmp')

make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms=1
sched = autosched.loop.make_for_model(mdl1)

s = selection(mdl1.atoms['CA:'+"275"+':'+"A"].select_sphere(5)).by_residue()

mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)

s.energy()

s.randomize_xyz(deviation=4.0)

mdl1.env.edat.nonbonded_sel_atoms=2
optimize(s,sched)
mdl1.env.edat.nonbonded_sel_atoms=1
optimize(s,sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="hi.pdb")

os.remove("3O26"+"ALA"+"275"+"A"+'.tmp')

from multiprocessing import Process
if __name__ == '__main__':
p = Process(target=optimize, args=(atmsel,sched))
p.start()
p.join()


In case of demo, kindly paste this ( http://files.rcsb.org/view/3o26.pdb) into a file 3O26.pdb and keep it in same directory.

Thanking you in advance

Based on @Dinesh suggestion I have modified the code by including pp module where its working fine with using all the cores but i am getting some errors that i couldn't figure out.

Modified script:

import sys
import os
import pp
from modeller import *
from modeller.optimizers import molecular_dynamics, conjugate_gradients
from modeller.automodel import autosched


def optimize(atmsel, sched):
for step in sched:
step.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)
refine(atmsel)
cg = conjugate_gradients()
cg.optimize(atmsel, max_iterations=200, min_atom_shift=0.001)


def refine(atmsel):
md = molecular_dynamics(cap_atom_shift=0.39, md_time_step=4.0,
md_return='FINAL')
init_vel = True
for (its, equil, temps) in ((200, 20, (150.0, 250.0, 400.0, 700.0, 1000.0)),
(200, 600,
(1000.0, 800.0, 600.0, 500.0, 400.0, 300.0))):
for temp in temps:
md.optimize(atmsel, init_velocities=init_vel, temperature=temp,
max_iterations=its, equilibrate=equil)
init_vel = False

def make_restraints(mdl1, aln):
rsr = mdl1.restraints
rsr.clear()
s = selection(mdl1)
for typ in ('stereo', 'phi-psi_binormal'):
rsr.make(s, restraint_type=typ, aln=aln, spline_on_site=True)
for typ in ('omega', 'chi1', 'chi2', 'chi3', 'chi4'):
rsr.make(s, restraint_type=typ + '_dihedral', spline_range=4.0,
spline_dx=0.3, spline_min_points=5, aln=aln,
spline_on_site=True)

################################### PPMODULE ############################
def main(s,sched):
print s,"*************************************************************************"
ppservers = ()
if len(sys.argv) > 1:
ncpus = int(sys.argv[1])
job_server = pp.Server(ncpus, ppservers=ppservers)
else:
job_server = pp.Server(ppservers=ppservers)
print "Starting pp with", job_server.get_ncpus(), "workers"
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
#################################### PPMODULE ############################

if __name__=="__main__":
log.verbose()
env = environ(rand_seed=int(-4345))
env.io.hetatm = True
env.edat.dynamic_sphere = False
env.edat.dynamic_lennard = True
env.edat.contact_shell = 4.0
env.edat.update_dynamic = 0.39
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
mdl1 = model(env, file="3O26")
ali = alignment(env)
ali.append_model(mdl1, atom_files="3O26.pdb", align_codes="3O26")
s = selection(mdl1.chains["A"].residues["275"])
s.mutate(residue_type="ALA")
ali.append_model(mdl1, align_codes="3O26")
mdl1.clear_topology()
mdl1.generate_topology(ali[-1])
mdl1.transfer_xyz(ali)
mdl1.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
mdl2 = model(env, file="3O26.pdb")
mdl1.res_num_from(mdl2, ali)
mdl1.write(file="3O26" + "ALA" + "275" + "A" + '.tmp')
mdl1.read(file="3O26" + "ALA" + "275" + "A" + '.tmp')
make_restraints(mdl1, ali)
mdl1.env.edat.nonbonded_sel_atoms = 1
sched = autosched.loop.make_for_model(mdl1)
s = selection(mdl1.atoms['CA:' + "275" + ':' + "A"].select_sphere(15)).by_residue()
mdl1.restraints.unpick_all()
mdl1.restraints.pick(s)
s.energy()
s.randomize_xyz(deviation=4.0)
mdl1.env.edat.nonbonded_sel_atoms = 2
main(s, sched)
mdl1.env.edat.nonbonded_sel_atoms = 1
main(s, sched)
s.energy()
atmsel = selection(mdl1.chains["A"])
score = atmsel.assess_dope()
mdl1.write(file="current.pdb")
os.remove("3O26" + "ALA" + "275" + "A" + '.tmp')


ERROR:

randomi_498_> Atoms,selected atoms,random_seed,amplitude: 2302 558 1 4.0000
randomi_496_> Amplitude is > 0; randomization is done.
<Selection of 558 atoms> *************************************************************************
Starting pp with 4 workers
Traceback (most recent call last):
File "mutate_model.py", line 88, in <module>
main(s, sched)
File "m_m.py", line 52, in main
job_server.submit(optimize,(s,sched,),(refine,),("from modeller.optimizers import molecular_dynamics, conjugate_gradients",))()
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 460, in submit
sfunc = self.__dumpsfunc((func, ) + depfuncs, modules)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 638, in __dumpsfunc
sources = [self.__get_source(func) for func in funcs]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pp.py", line 705, in __get_source
sourcelines = inspect.getsourcelines(func)[0]
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 690, in getsourcelines
lines, lnum = findsource(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 526, in findsource
file = getfile(object)
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/inspect.py", line 408, in getfile
raise TypeError('{!r} is a built-in class'.format(object))
TypeError: <module '__builtin__' (built-in)> is a built-in class

Answer

Finally I have done myself by another method called multiprocessing.pool based on the blog http://chriskiehl.com/article/parallelism-in-one-line/ and https://pymotw.com/2/multiprocessing/basics.html

Here is my Pseudo CODE:

from multiprocessing import Pool 

def get_mm_script(scripts):
    #I just created all my mm.py scripts as string template
    return scripts

def run(filename):
    #here i use system command to run my all scripts.
    return

if __name__ == '__main__':
  scripts=get_mm_script(f)
  pool = Pool(4)
  pool.map(run, scripts)
  pool.close()
  pool.join()
Comments