do_uniq.py revision 141477
1141477Sru#!/usr/local/bin/python 2141477Sru# 3141477Sru# $FreeBSD: head/games/fortune/tools/do_uniq.py 141477 2005-02-07 21:15:16Z ru $ 4141477Sru# 5141477Sru# an aggressive little script for trimming duplicate cookies 6141477Sru 7141477Sruimport re, sys 8141477Sru 9141477Sruwordlist = [ 10141477Sru 'hadnot', 11141477Sru 'donot', 'hadnt', 12141477Sru 'dont', 'have', 'more', 'will', 'your', 13141477Sru 'and', 'are', 'had', 'the', 'you', 14141477Sru 'am', 'an', 'is', 'll', 've', 'we', 15141477Sru 'a', 'd', 'i', 'm', 's', 16141477Sru] 17141477Sru 18141477Srudef hash(fortune): 19141477Sru f = fortune 20141477Sru f = f.lower() 21141477Sru f = re.sub('[\W_]', '', f) 22141477Sru for word in wordlist: 23141477Sru f = re.sub(word, '', f) 24141477Sru# f = re.sub('[aeiouy]', '', f) 25141477Sru# f = re.sub('[^aeiouy]', '', f) 26141477Sru f = f[:30] 27141477Sru# f = f[-30:] 28141477Sru return f 29141477Sru 30141477Srudef edit(datfile): 31141477Sru dups = {} 32141477Sru fortunes = [] 33141477Sru fortune = "" 34141477Sru for line in file(datfile): 35141477Sru if line == "%\n": 36141477Sru key = hash(fortune) 37141477Sru if not dups.has_key(key): 38141477Sru dups[key] = [] 39141477Sru dups[key].append(fortune) 40141477Sru fortunes.append(fortune) 41141477Sru fortune = "" 42141477Sru else: 43141477Sru fortune += line 44141477Sru for key in dups.keys(): 45141477Sru if len(dups[key]) == 1: 46141477Sru del dups[key] 47141477Sru o = file(datfile + '~', "w") 48141477Sru for fortune in fortunes: 49141477Sru key = hash(fortune) 50141477Sru if key in dups: 51141477Sru print '\n' * 50 52141477Sru for f in dups[key]: 53141477Sru if f != fortune: 54141477Sru print f, '%' 55141477Sru print fortune, '%' 56141477Sru if raw_input("Remove last fortune? ") == 'y': 57141477Sru del dups[key] 58141477Sru continue 59141477Sru o.write(fortune + "%\n") 60141477Sru o.close() 61141477Sru 62141477Sruassert len(sys.argv) == 2 63141477Sruedit(sys.argv[1]) 64