Thursday, September 20, 2012

Fork Exec Wait with Python

The following program demonstrates the use of os.fork() and os.wait() in Python. The program forks several processes, each of which counts to a number. The parent process waits for each child process to end, and prints that process's return value.

#!/usr/bin/env python

from optparse import OptionParser
import os
import sys 
import time

def worker(count):
    for i in range(count):
        time.sleep(1)
        print '[%s] => %s' % (os.getpid(), i)

def boss(num_workers, count):
    child_pids = []
    for i in range(num_workers):
        pid = os.fork()
        if pid == 0:
            worker(count)
            os._exit(i)
        else:
            child_pids.append(pid)
    for pid in child_pids:
        pid, status = os.waitpid(pid, 0)
        if os.WIFEXITED(status):
            print 'parent: child with pid %d exited with value %d' % \ 
                (pid, os.WEXITSTATUS(status))

if __name__ == '__main__':
    usage = '''usage: %prog [options]
   
Demonstrate fork and wait system calls.'''
    version = '%prog 1.0'
    parser = OptionParser(usage=usage, version=version)
    parser.add_option('-n', '--num-workers', dest='num_workers', type='int',
            default=5, 
            help='number of child worker processes to fork (default=5)')
    parser.add_option('-c', '--toil-count', dest='toil_count', type='int', 
            default=10,
            help='the number each worker process must count to (default=10)')
    (options, args) = parser.parse_args()
    boss(options.num_workers, options.toil_count)

A sample output of running the program with seven worker processes, each of which counts to three, is shown below.

$ ./forkwait.py -n 7 -c 3
[692] => 0
[695] => 0
[694] => 0
[693] => 0
[697] => 0
[696] => 0
[698] => 0
[695] => 1
[692] => 1
[693] => 1
[697] => 1
[698] => 1
[694] => 1
[696] => 1
[695] => 2
[692] => 2
[697] => 2
[693] => 2
[698] => 2
[694] => 2
[696] => 2
parent: child with pid 692 exited with value 0
parent: child with pid 693 exited with value 1
parent: child with pid 694 exited with value 2
parent: child with pid 695 exited with value 3
parent: child with pid 696 exited with value 4
parent: child with pid 697 exited with value 5
parent: child with pid 698 exited with value 6

Our next program is a bit more interesting. In this program, the user species a list of webpages to download. For each webpage, the program forks a new process and uses os.execlp() to execute wget.

#!/usr/bin/env python

from optparse import OptionParser
import os
import sys 
import time
import urlparse

def wget(webpages):
    child_pids = []
    for webpage in webpages:
        pid = os.fork()
        if pid == 0:
            # generate output name to prevent many duplicate index.html's
            o = urlparse.urlparse(webpage)
            output = o.netloc + o.path
            output = output.replace(os.path.sep, '__')
            print '[*] saving %s to %s' % (webpage, output)
            os.execlp('wget', 'wget', '--quiet', '--output-document', output,
                    webpage)
            assert False, 'error starting wget'
        else:
            child_pids.append(pid)
    for pid in child_pids:
        pid, status = os.waitpid(pid, 0)
        if os.WIFEXITED(status):
            print 'parent: child with pid %d exited with value %d' % \ 
                (pid, os.WEXITSTATUS(status))

if __name__ == '__main__':
    usage = '''usage: %prog [url ...]
  
Retrieve one or more webpages.
'''
    version = '%prog 1.0'
    parser = OptionParser(usage=usage, version=version)
    (options, args) = parser.parse_args()
    wget(args)

In the sample run below, we attempt to download the homepage's of smherwig.org, lua.org, and the nonexistent, invalid URL, foo.

$ ./forkwget.py http://smherwig.org/index.html http://lua.org foo
[*] saving http://smherwig.org/index.html to smherwig.org__index.html
[*] saving http://lua.org to lua.org
[*] saving foo to foo
parent: child with pid 1153 exited with value 0
parent: child with pid 1154 exited with value 0
parent: child with pid 1155 exited with value 4

No comments:

Post a Comment