Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Run the wordfreq script on the engines.
ipc.run('wordfreq.py')
# Run the serial version
print "Serial word frequency count:"
text = open('davinci.txt').read()
freqs = wordfreq(text)
print_wordfreq(freqs, 10)
# The parallel version
print "\nParallel word frequency count:"
files = ['davinci%i.txt' % i for i in range(4)]
ipc.scatter('textfile', files)
ipc.execute('text = open(textfile[0]).read()')
pfreqs = pwordfreq(ipc,'text')
print_wordfreq(freqs)
view.apply_sync(os.chdir, os.getcwd())
if not os.path.exists('davinci.txt'):
# download from project gutenberg
print("Downloading Da Vinci's notebooks from Project Gutenberg")
r = requests.get(davinci_url)
with io.open('davinci.txt', 'w', encoding='utf8') as f:
f.write(r.text)
# Run the serial version
print("Serial word frequency count:")
text = io.open('davinci.txt', encoding='latin1').read()
tic = time.time()
freqs = wordfreq(text)
toc = time.time()
print_wordfreq(freqs, 10)
print("Took %.3f s to calculate"%(toc-tic))
# The parallel version
print("\nParallel word frequency count:")
# split the davinci.txt into one file per engine:
lines = text.splitlines()
nlines = len(lines)
n = len(rc)
block = nlines//n
for i in range(n):
chunk = lines[i*block:i*(block+1)]
with io.open('davinci%i.txt'%i, 'w', encoding='utf8') as f:
f.write('\n'.join(chunk))
try: #python2
n = len(rc)
block = nlines//n
for i in range(n):
chunk = lines[i*block:i*(block+1)]
with io.open('davinci%i.txt'%i, 'w', encoding='utf8') as f:
f.write('\n'.join(chunk))
try: #python2
cwd = os.path.abspath(os.getcwdu())
except AttributeError: #python3
cwd = os.path.abspath(os.getcwd())
fnames = [ os.path.join(cwd, 'davinci%i.txt'%i) for i in range(n)]
tic = time.time()
pfreqs = pwordfreq(view,fnames)
toc = time.time()
print_wordfreq(freqs)
print("Took %.3f s to calculate on %i engines"%(toc-tic, len(view.targets)))
# cleanup split files
map(os.remove, fnames)
freqs[word] += count
return freqs
if __name__ == '__main__':
# Create a MultiEngineClient
from IPython.kernel import client
ipc = client.MultiEngineClient()
# Run the wordfreq script on the engines.
ipc.run('wordfreq.py')
# Run the serial version
print "Serial word frequency count:"
text = open('davinci.txt').read()
freqs = wordfreq(text)
print_wordfreq(freqs, 10)
# The parallel version
print "\nParallel word frequency count:"
files = ['davinci%i.txt' % i for i in range(4)]
ipc.scatter('textfile', files)
ipc.execute('text = open(textfile[0]).read()')
pfreqs = pwordfreq(ipc,'text')
print_wordfreq(freqs)
text - The name of a string on the engines to do the freq count on.
"""
if __name__ == '__main__':
# Create a MultiEngineClient
from IPython.kernel import client
ipc = client.MultiEngineClient()
# Run the wordfreq script on the engines.
ipc.run('wordfreq.py')
# Run the serial version
print "Serial word frequency count:"
text = open('davinci.txt').read()
freqs = wordfreq(text)
print_wordfreq(freqs, 10)
# The parallel version
print "\nParallel word frequency count:"
files = ['davinci%i.txt' % i for i in range(4)]
ipc.scatter('textfile', files)
ipc.execute('text = open(textfile[0]).read()')
pfreqs = pwordfreq(ipc,'text')
print_wordfreq(freqs)
# Run the wordfreq script on the engines.
ipc.run('wordfreq.py')
# Run the serial version
print "Serial word frequency count:"
text = open('davinci.txt').read()
freqs = wordfreq(text)
print_wordfreq(freqs, 10)
# The parallel version
print "\nParallel word frequency count:"
files = ['davinci%i.txt' % i for i in range(4)]
ipc.scatter('textfile', files)
ipc.execute('text = open(textfile[0]).read()')
pfreqs = pwordfreq(ipc,'text')
print_wordfreq(freqs)