1141477Sru#!/usr/local/bin/python
2141477Sru#
3141477Sru# $FreeBSD$
4141477Sru#
5141477Sru# an aggressive little script for trimming duplicate cookies
6291041Srodrigcfrom __future__ import print_function
7281007Seadlerimport argparse
8281007Seadlerimport re
9141477Sru
10141477Sruwordlist = [
11141477Sru    'hadnot',
12141477Sru    'donot', 'hadnt',
13141477Sru    'dont', 'have', 'more', 'will', 'your',
14141477Sru    'and', 'are', 'had', 'the', 'you',
15141477Sru    'am', 'an', 'is', 'll', 've', 'we',
16141477Sru    'a', 'd', 'i', 'm', 's',
17141477Sru]
18141477Sru
19281007Seadler
20141477Srudef hash(fortune):
21141477Sru    f = fortune
22141477Sru    f = f.lower()
23141477Sru    f = re.sub('[\W_]', '', f)
24141477Sru    for word in wordlist:
25141477Sru        f = re.sub(word, '', f)
26141477Sru#    f = re.sub('[aeiouy]', '', f)
27141477Sru#    f = re.sub('[^aeiouy]', '', f)
28141477Sru    f = f[:30]
29141477Sru#    f = f[-30:]
30141477Sru    return f
31141477Sru
32281007Seadler
33141477Srudef edit(datfile):
34141477Sru    dups = {}
35141477Sru    fortunes = []
36141477Sru    fortune = ""
37281007Seadler    with open(datfile, "r") as datfiledf:
38281007Seadler        for line in datfiledf:
39281007Seadler            if line == "%\n":
40281007Seadler                key = hash(fortune)
41281007Seadler                if key not in dups:
42281007Seadler                    dups[key] = []
43281007Seadler                dups[key].append(fortune)
44281007Seadler                fortunes.append(fortune)
45281007Seadler                fortune = ""
46281007Seadler            else:
47281007Seadler                fortune += line
48241834Seadler    for key in list(dups.keys()):
49141477Sru        if len(dups[key]) == 1:
50141477Sru            del dups[key]
51281007Seadler    with open(datfile + "~", "w") as o:
52281007Seadler        for fortune in fortunes:
53281007Seadler            key = hash(fortune)
54281007Seadler            if key in dups:
55281007Seadler                print('\n' * 50)
56281007Seadler                for f in dups[key]:
57281007Seadler                    if f != fortune:
58281007Seadler                        print(f, '%')
59281007Seadler                print(fortune, '%')
60281007Seadler                if input("Remove last fortune? ") == 'y':
61281007Seadler                    del dups[key]
62281007Seadler                    continue
63281007Seadler            o.write(fortune + "%\n")
64141477Sru
65281007Seadlerparser = argparse.ArgumentParser(description="trimming duplicate cookies")
66281007Seadlerparser.add_argument("filename", type=str, nargs=1)
67281007Seadlerargs = parser.parse_args()
68281007Seadleredit(args.filename[0])
69