1141477Sru#!/usr/local/bin/python 2141477Sru# 3141477Sru# $FreeBSD$ 4141477Sru# 5141477Sru# an aggressive little script for trimming duplicate cookies 6291041Srodrigcfrom __future__ import print_function 7281007Seadlerimport argparse 8281007Seadlerimport re 9141477Sru 10141477Sruwordlist = [ 11141477Sru 'hadnot', 12141477Sru 'donot', 'hadnt', 13141477Sru 'dont', 'have', 'more', 'will', 'your', 14141477Sru 'and', 'are', 'had', 'the', 'you', 15141477Sru 'am', 'an', 'is', 'll', 've', 'we', 16141477Sru 'a', 'd', 'i', 'm', 's', 17141477Sru] 18141477Sru 19281007Seadler 20141477Srudef hash(fortune): 21141477Sru f = fortune 22141477Sru f = f.lower() 23141477Sru f = re.sub('[\W_]', '', f) 24141477Sru for word in wordlist: 25141477Sru f = re.sub(word, '', f) 26141477Sru# f = re.sub('[aeiouy]', '', f) 27141477Sru# f = re.sub('[^aeiouy]', '', f) 28141477Sru f = f[:30] 29141477Sru# f = f[-30:] 30141477Sru return f 31141477Sru 32281007Seadler 33141477Srudef edit(datfile): 34141477Sru dups = {} 35141477Sru fortunes = [] 36141477Sru fortune = "" 37281007Seadler with open(datfile, "r") as datfiledf: 38281007Seadler for line in datfiledf: 39281007Seadler if line == "%\n": 40281007Seadler key = hash(fortune) 41281007Seadler if key not in dups: 42281007Seadler dups[key] = [] 43281007Seadler dups[key].append(fortune) 44281007Seadler fortunes.append(fortune) 45281007Seadler fortune = "" 46281007Seadler else: 47281007Seadler fortune += line 48241834Seadler for key in list(dups.keys()): 49141477Sru if len(dups[key]) == 1: 50141477Sru del dups[key] 51281007Seadler with open(datfile + "~", "w") as o: 52281007Seadler for fortune in fortunes: 53281007Seadler key = hash(fortune) 54281007Seadler if key in dups: 55281007Seadler print('\n' * 50) 56281007Seadler for f in dups[key]: 57281007Seadler if f != fortune: 58281007Seadler print(f, '%') 59281007Seadler print(fortune, '%') 60281007Seadler if input("Remove last fortune? ") == 'y': 61281007Seadler del dups[key] 62281007Seadler continue 63281007Seadler o.write(fortune + "%\n") 64141477Sru 65281007Seadlerparser = argparse.ArgumentParser(description="trimming duplicate cookies") 66281007Seadlerparser.add_argument("filename", type=str, nargs=1) 67281007Seadlerargs = parser.parse_args() 68281007Seadleredit(args.filename[0]) 69