1"""
2Some demo code to show find performance and some of the new indexing
3features in Metakit 2.3.  Sample output (SuSE Linux 6.4, PIII/650):
4
5  find.py - Mk4py 2.3.2 - linux2
6  filldb 100 rows: 0.015092 sec
7   find_raw 10000 times, key 50 -> 50: 1.20376 sec
8   find_raw 10000 times, key 101 -> -1: 1.82736 sec
9   find_ord 10000 times, key 50 -> 50: 0.722506 sec
10   find_ord 10000 times, key 101 -> -1: 0.670945 sec
11   hash_ini 1000 times, size 129: 0.487246 sec
12   hash_key 10000 times, key 50 -> 50: 0.658689 sec
13   hash_key 10000 times, key 101 -> -1: 0.592888 sec
14  filldb 1000 rows: 0.059973 sec
15   find_raw 1000 times, key 500 -> 500: 0.715116 sec
16   find_raw 1000 times, key 1001 -> -1: 1.35979 sec
17   find_ord 1000 times, key 500 -> 500: 0.0832601 sec
18   find_ord 1000 times, key 1001 -> -1: 0.0784379 sec
19   hash_ini 100 times, size 1025: 0.565817 sec
20   hash_key 10000 times, key 500 -> 500: 0.666233 sec
21   hash_key 10000 times, key 1001 -> -1: 0.714768 sec
22  filldb 10000 rows: 0.508873 sec
23   find_raw 100 times, key 5000 -> 5000: 0.669386 sec
24   find_raw 100 times, key 10001 -> -1: 1.30918 sec
25   find_ord 100 times, key 5000 -> 5000: 0.00834703 sec
26   find_ord 100 times, key 10001 -> -1: 0.00776601 sec
27   hash_ini 10 times, size 16385: 0.424207 sec
28   hash_key 10000 times, key 5000 -> 5000: 0.665882 sec
29   hash_key 10000 times, key 10001 -> -1: 0.640642 sec
30  filldb 100000 rows: 4.96599 sec
31   find_raw 10 times, key 50000 -> 50000: 0.625583 sec
32   find_raw 10 times, key 100001 -> -1: 1.23957 sec
33   find_ord 10 times, key 50000 -> 50000: 0.000901937 sec
34   find_ord 10 times, key 100001 -> -1: 0.00124598 sec
35   hash_ini 1 times, size 131073: 0.517645 sec
36   hash_key 10000 times, key 50000 -> 50000: 0.657581 sec
37   hash_key 10000 times, key 100001 -> -1: 0.628431 sec
38
39In a nutshell: unordered views use linear scan, which is extremely
40efficient but O(N), ordered views will switch to binary search, and
41if a hash mapping is used then you get the usual O(1) of hashing.
42
43The statement "vwo = vw.ordered(1)" sets up a view layer around vw,
44which allows Metakit to take advantage of sort order.  The numeric
45argument specifies how many of the first property define the key.
46The vwo view is modifiable, it will maintains order on insertions.
47
48The statement "vwh = vw.hash(map,1)" sets up a view layer around vw,
49using a secondary 'map' view to manage hashing.  As with ordered,
50the last argument specifies the number or properties to use as key.
51When vwh is modified, both vw and map will be adjusted.  The hash
52view can be set up after vw has been filled, this is probably a bit
53faster than setting up vwh first, and inserting into new data in vwh.
54The underlying vw and map views may *not* be altered directly once
55a hash is in use, unless you clear map and redefine the hash layer.
56
57Metakit 2.01 only supports find_raw (linear scanning).
58"""
59
60import sys; sys.path.append('../builds'); import Mk4py; mk = Mk4py
61print sys.argv[0], '-', 'Mk4py', mk.version, '-', sys.platform
62
63from time import time
64
65db = mk.storage()
66vw = db.getas('vw[p1:I,p2:I]')
67map = db.getas('map[_H:I,_R:I]')
68
69def filldb(n=1000):
70  del vw[:]
71  t0 = time();
72  for i in xrange(n):
73    vw.append(p1=i, p2=i+i)
74  print 'filldb %d rows: %g sec' % (n, time() - t0)
75
76# this find will do a linear scan
77def find_raw(k,n=1000):
78  t0 = time();
79  for i in xrange(n):
80    r = vw.find(p1=k)
81  print ' find_raw %d times, key %d -> %d: %g sec' % (n, k, r, time() - t0)
82
83# this find switches to binary search, because it knows the view is ordered
84def find_ord(k,n=1000):
85  t0 = time();
86  vwo = vw.ordered()
87  for i in xrange(n):
88    r = vwo.find(p1=k)
89  print ' find_ord %d times, key %d -> %d: %g sec' % (n, k, r, time() - t0)
90
91# setting up a hash view initializes it, since the map size is still zero
92def hash_ini(n=1000):
93  t0 = time();
94  for i in xrange(n):
95    del map[:]
96    vwh = vw.hash(map)
97  print ' hash_ini %d times, size %d: %g sec' % (n, len(map), time() - t0)
98
99# this lookup no longer initializes, find will now do a hash lookup
100def hash_key(k,n=1000):
101  t0 = time();
102  vwh = vw.hash(map)
103  for i in xrange(n):
104    r = vwh.find(p1=k)
105  print ' hash_key %d times, key %d -> %d: %g sec' % (n, k, r, time() - t0)
106
107for n in (100, 1000, 10000, 100000):
108  filldb(n)
109  find_raw(n/2, 1000000/n)
110  find_raw(n+1, 1000000/n)
111  find_ord(n/2, 1000000/n)
112  find_ord(n+1, 1000000/n)
113  hash_ini(100000/n)
114  hash_key(n/2, 10000)
115  hash_key(n+1, 10000)
116