Spellchecking in python

FWIW, here’s the script I threw together to extract the wordlist I mentioned in the previous post:

#! /usr/bin/env python2.5

from __future__ import with_statement

import os
import re
import sys

from optparse import OptionParser

def worditer(wordsin, dict = None):
    r = re.compile(r'[#&]')
    if dict:
        cmd = 'aspell -a --lang=%s' % dict
        cmd = 'aspell -a'
    i, o = os.popen2(cmd)
    # skip first line
    for w in wordsin:
        if w:
            i.write(w + '\n')
            result = o.readline()
            if result and result != '\n':
                if r.match(result):
                    # add the word for this session
                    i.write('@%s\n' % w)
                    yield w

def dowords(wordsin, outstr, dict):
    for w in worditer(wordsin, dict):
        outstr.write(w + '\n')

def filewordsiter(filenames):
    regex = re.compile(r'\W*')
    for fname in filenames:
        with open(fname) as f:
            for line in f:
                for w in regex.split(line):
                    yield w

def dofiles(filenames, outstream, dict):
    dowords(filewordsiter(filenames), outstream, dict)

def main():
    parser = OptionParser()
    parser.add_option('-d', '--dict', dest = 'dict',
                        help = 'Dictionary to use')
    parser.add_option('-o', '--out', dest = 'outfile',
                        help = 'Output file, stdout if none')
    options, filenames = parser.parse_args()
    if options.outfile:
        outstr = open(options.outfile, "w")
        outstr = sys.stdout
    dofiles(filenames, outstr, options.dict)

if __name__ == '__main__':

Tags: ,

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: