do_uniq.py revision 141477
1141477Sru#!/usr/local/bin/python
2141477Sru#
3141477Sru# $FreeBSD: head/games/fortune/tools/do_uniq.py 141477 2005-02-07 21:15:16Z ru $
4141477Sru#
5141477Sru# an aggressive little script for trimming duplicate cookies
6141477Sru
7141477Sruimport re, sys
8141477Sru
9141477Sruwordlist = [
10141477Sru    'hadnot',
11141477Sru    'donot', 'hadnt',
12141477Sru    'dont', 'have', 'more', 'will', 'your',
13141477Sru    'and', 'are', 'had', 'the', 'you',
14141477Sru    'am', 'an', 'is', 'll', 've', 'we',
15141477Sru    'a', 'd', 'i', 'm', 's',
16141477Sru]
17141477Sru
18141477Srudef hash(fortune):
19141477Sru    f = fortune
20141477Sru    f = f.lower()
21141477Sru    f = re.sub('[\W_]', '', f)
22141477Sru    for word in wordlist:
23141477Sru        f = re.sub(word, '', f)
24141477Sru#    f = re.sub('[aeiouy]', '', f)
25141477Sru#    f = re.sub('[^aeiouy]', '', f)
26141477Sru    f = f[:30]
27141477Sru#    f = f[-30:]
28141477Sru    return f
29141477Sru
30141477Srudef edit(datfile):
31141477Sru    dups = {}
32141477Sru    fortunes = []
33141477Sru    fortune = ""
34141477Sru    for line in file(datfile):
35141477Sru        if line == "%\n":
36141477Sru            key = hash(fortune)
37141477Sru            if not dups.has_key(key):
38141477Sru                dups[key] = []
39141477Sru            dups[key].append(fortune)
40141477Sru            fortunes.append(fortune)
41141477Sru            fortune = ""
42141477Sru        else:
43141477Sru            fortune += line
44141477Sru    for key in dups.keys():
45141477Sru        if len(dups[key]) == 1:
46141477Sru            del dups[key]
47141477Sru    o = file(datfile + '~', "w")
48141477Sru    for fortune in fortunes:
49141477Sru        key = hash(fortune)
50141477Sru        if key in dups:
51141477Sru            print '\n' * 50
52141477Sru            for f in dups[key]:
53141477Sru                if f != fortune:
54141477Sru                    print f, '%'
55141477Sru            print fortune, '%'
56141477Sru            if raw_input("Remove last fortune? ") == 'y':
57141477Sru                del dups[key]
58141477Sru                continue
59141477Sru        o.write(fortune + "%\n")
60141477Sru    o.close()
61141477Sru
62141477Sruassert len(sys.argv) == 2
63141477Sruedit(sys.argv[1])
64