do_uniq.py revision 141477
1#!/usr/local/bin/python 2# 3# $FreeBSD: head/games/fortune/tools/do_uniq.py 141477 2005-02-07 21:15:16Z ru $ 4# 5# an aggressive little script for trimming duplicate cookies 6 7import re, sys 8 9wordlist = [ 10 'hadnot', 11 'donot', 'hadnt', 12 'dont', 'have', 'more', 'will', 'your', 13 'and', 'are', 'had', 'the', 'you', 14 'am', 'an', 'is', 'll', 've', 'we', 15 'a', 'd', 'i', 'm', 's', 16] 17 18def hash(fortune): 19 f = fortune 20 f = f.lower() 21 f = re.sub('[\W_]', '', f) 22 for word in wordlist: 23 f = re.sub(word, '', f) 24# f = re.sub('[aeiouy]', '', f) 25# f = re.sub('[^aeiouy]', '', f) 26 f = f[:30] 27# f = f[-30:] 28 return f 29 30def edit(datfile): 31 dups = {} 32 fortunes = [] 33 fortune = "" 34 for line in file(datfile): 35 if line == "%\n": 36 key = hash(fortune) 37 if not dups.has_key(key): 38 dups[key] = [] 39 dups[key].append(fortune) 40 fortunes.append(fortune) 41 fortune = "" 42 else: 43 fortune += line 44 for key in dups.keys(): 45 if len(dups[key]) == 1: 46 del dups[key] 47 o = file(datfile + '~', "w") 48 for fortune in fortunes: 49 key = hash(fortune) 50 if key in dups: 51 print '\n' * 50 52 for f in dups[key]: 53 if f != fortune: 54 print f, '%' 55 print fortune, '%' 56 if raw_input("Remove last fortune? ") == 'y': 57 del dups[key] 58 continue 59 o.write(fortune + "%\n") 60 o.close() 61 62assert len(sys.argv) == 2 63edit(sys.argv[1]) 64