FWIW, here’s the script I threw together to extract the wordlist I mentioned in the previous post:
#! /usr/bin/env python2.5 from __future__ import with_statement import os import re import sys from optparse import OptionParser def worditer(wordsin, dict = None): r = re.compile(r'[#&]') if dict: cmd = 'aspell -a --lang=%s' % dict else: cmd = 'aspell -a' i, o = os.popen2(cmd) # skip first line o.readline() for w in wordsin: if w: i.write(w + '\n') i.flush() result = o.readline() if result and result != '\n': o.readline() if r.match(result): # add the word for this session i.write('@%s\n' % w) yield w def dowords(wordsin, outstr, dict): for w in worditer(wordsin, dict): outstr.write(w + '\n') def filewordsiter(filenames): regex = re.compile(r'\W*') for fname in filenames: with open(fname) as f: for line in f: for w in regex.split(line): yield w def dofiles(filenames, outstream, dict): dowords(filewordsiter(filenames), outstream, dict) def main(): parser = OptionParser() parser.add_option('-d', '--dict', dest = 'dict', help = 'Dictionary to use') parser.add_option('-o', '--out', dest = 'outfile', help = 'Output file, stdout if none') options, filenames = parser.parse_args() if options.outfile: outstr = open(options.outfile, "w") else: outstr = sys.stdout dofiles(filenames, outstr, options.dict) if __name__ == '__main__': main()