1// $Id: WKdmDecompress.intel.s,v 1.1 2010/01/30 00:39:21 cclee Exp cclee $
2
3// This file contains i386 and x86_64 (no SSE) optimized implementation of WKdm Decompressor.
4// The implementation is derived by compiling (gcc -O3) the original C code (WKdmDecompress.c)
5// followed by hand tweaking of the compiled assembly code.
6// cclee, 1/29/10
7
8#if defined __i386__
9	.text
10	.align 4,0x90
11
12	.globl _WKdm_decompress
13_WKdm_decompress:
14
15	// save registers, set up base pointer %ebp, and allocate stack memory for local veriables
16
17	pushl	%ebp
18	movl	%esp, %ebp
19	pushl	%edi
20	pushl	%esi
21	pushl	%ebx
22	subl	$7324, %esp
23
24	// PRELOAD_DICTIONARY; dictionary starting address : -88(%ebp)
25	movl	$1, -88(%ebp)
26	movl	$1, -84(%ebp)
27	movl	$1, -80(%ebp)
28	movl	$1, -76(%ebp)
29	movl	$1, -72(%ebp)
30	movl	$1, -68(%ebp)
31	movl	$1, -64(%ebp)
32	movl	$1, -60(%ebp)
33	movl	$1, -56(%ebp)
34	movl	$1, -52(%ebp)
35	movl	$1, -48(%ebp)
36	movl	$1, -44(%ebp)
37	movl	$1, -40(%ebp)
38	movl	$1, -36(%ebp)
39	movl	$1, -32(%ebp)
40	movl	$1, -28(%ebp)
41
42	#define	dictionary_addr			-88(%ebp)
43	#define	TAGS_AREA_END -7292(%ebp)
44	#define	tempTagsArray	-7300(%ebp)
45	#define	tempQPosArray	-2488(%ebp)
46	#define	tempLowBitsArray	-7288(%ebp)
47	#define	next_low_bits		-7296(%ebp)
48	#define	dictionary		-7308(%ebp)
49	#define	tag_area_end	-7304(%ebp)
50
51	// WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray);
52
53	movl	8(%ebp), %eax						// src_buf
54	addl	$272, %eax							// src_buf + 16 (WKdm Header) + 256 (Tags)
55	movl	%eax, TAGS_AREA_END					// TAGS_AREA_END(src_buf)
56	movl	8(%ebp), %eax						// src_buf
57	movl	%eax, %edi							// src_buf
58	addl	$16, %eax							// TAGS_AREA_START(src_buf) = src_buf + 16 (WKdm Header)
59	leal	-1288(%ebp), %edx					// tempTagsArray
60	movl	%edx, tempTagsArray					// save a copy of tempTagsArray[] at the said location
61	cmpl	%eax, TAGS_AREA_END					// TAGS_AREA_END vs TAGS_AREA_START
62	jbe		1f									// if TAGS_AREA_END<=TAGS_AREA_START, no need for WK_unpack_2bits
63	movl	%edx, %ecx							// %ecx -> tempTagsArray[0]
64	xorl	%esi, %esi							// i=0
65	movl	$50529027, %ebx						// 0x03030303, mask to extract 4 2-bit tags
66	.align 4,0x90
67L_WK_unpack_2bits:
68	movl	16(%edi,%esi,4), %edx				// src_buf[i] for 16 tags, 16 (WKdm header)
69	movl	%edx, %eax							// w = src_buf[i]
70	andl	%ebx, %eax							// 1st 4 tags, each in bytes
71	movl	%eax, (%ecx)						// save 1st 4 tags
72	movl	%edx, %eax							// w = src_buf[i]
73	shrl	$2, %eax							// shift down 2 bits
74	andl	%ebx, %eax							// 2nd 4 tags, each in bytes
75	movl	%eax, 4(%ecx)						// save 2nd 4 tags
76	shrl	$4, %edx							// shift down w by 4 bits
77	movl	%edx, %eax							// w>>4
78	andl	%ebx, %eax							// 3rd 4 tags
79	movl	%eax, 8(%ecx)						// save 3rd 4 tags
80	shrl	$2, %edx							// w>>6
81	andl	%ebx, %edx							// 4th 4 tags
82	movl	%edx, 12(%ecx)						// save 4th 4 tags
83	addl	$16, %ecx							// point to next tempTagsArray[i*16]
84	incl	%esi								// i++
85	cmpl	$64, %esi							// i vs 64
86	jne		L_WK_unpack_2bits					// repeat the loop until i==64
871:
88
89	// WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray);
90
91	movl	8(%edi), %eax						// WKdm header qpos end
92	leal	(%edi,%eax,4), %esi					// QPOS_AREA_END
93	movl	4(%edi), %eax						// WKdm header qpos start
94	leal	(%edi,%eax,4), %ecx					// QPOS_AREA_START
95	cmpl	%ecx, %esi							// QPOS_AREA_END vs QPOS_AREA_START
96	jbe		1f									// if QPOS_AREA_END <= QPOS_AREA_START, skip WK_unpack_4bits
97	leal	tempQPosArray, %edi					// tempQPosArray
98	movl	$252645135, %ebx					// 0x0f0f0f0f : mask to extract 4 4-bit qpos
99L_WK_unpack_4bits:
100	movl	(%ecx), %eax						// w
101	movl	%eax, %edx							// w
102	andl	%ebx, %edx							// 1st 4 qpos
103	movl	%edx, (%edi)						// save 1st 4 qpos
104	shrl	$4, %eax							// w>>4
105	andl	%ebx, %eax							// 2nd 4 qpos
106	movl	%eax, 4(%edi)						// save 2nd 4 qpos
107	addl	$4, %ecx							// point to next word w
108	addl	$8, %edi							// qpos += 8
109	cmpl	%ecx, %esi							// QPOS_AREA_END vs qpos_pointer
110	ja		L_WK_unpack_4bits					// repeat until qpos_pointer >= QPOS_AREA_END
111
112	// WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray);
113
1141:
115	movl	8(%ebp), %edx						// src_buf
116	movl	12(%edx), %eax 						// LOW_BITS_AREA_END offset
117	leal	(%edx,%eax,4), %edi					// LOW_BITS_AREA_END
118	cmpl	%edi, %esi							// LOW_BITS_AREA_START(=QPOS_AREA_END) vs LOW_BITS_AREA_END
119	jae		1f									// if (LOW_BITS_AREA_START>=LOW_BITS_AREA_END) skip unpack_3_tenbits
120	leal	tempLowBitsArray, %ecx				// tempLowBitsArray
121	movl	$1023, %ebx							// 0x03ff to extact lower 10-bits
122
123	.align 4,0x90
124L_WK_unpack_3_tenbits:
125	movl	(%esi), %eax						// w = *next_low_bits
126	movl	%eax, %edx							// w
127	andl	%ebx, %edx							// 1st 10-bit
128	movl	%edx, (%ecx)						// save 1st 10-bit
129	shrl	$10, %eax							// (w>>10)
130	movl	%eax, %edx							// (w>>10)
131	andl	%ebx, %edx							// 2nd 10-bit
132	movl	%edx, 4(%ecx)						// save 2nd 10-bit
133	shrl	$10, %eax							// (w>>20), no need to and with mask, the top 2 bits should be zero
134	movl	%eax, 8(%ecx)						// save 3rd 10-bits
135	addl	$4, %esi							// point to next w
136	addl	$12, %ecx							// tempLowBitsArray += 3;
137	cmpl	%esi, %edi							// LOW_BITS_AREA_END vs next_low_bits
138	ja		L_WK_unpack_3_tenbits				// repeat until next_low_bits>=LOW_BITS_AREA_END
1391:
140	call	Lhash
141Lhash:
142	popl	%ebx								// set up %ebx for use in Hash Table loopup[
143
144	#define	next_tag	%esi
145	#define	next_qpos	%edi
146
147	movl	tempTagsArray, next_tag				// next_tag = tempTagsArray
148	leal	tempQPosArray, next_qpos			// next_qpos = tempQPosArray
149	movl	12(%ebp), %ecx						// dest_buf
150	addl	$4, %ecx							// for some reason, performance is better if we points to the next one
151	leal	tempLowBitsArray, %eax				// tempLowBitsArray
152	movl	%eax, next_low_bits					// next_low_bits = next_low_bits;
153	leal	-264(%ebp), %edx
154	movl	%edx, tag_area_end					// tag_area_end
155	leal	dictionary_addr, %eax				// dictionary starting address
156	movl	%eax, dictionary					// dictionary
157	jmp		L11
158	.align 4,0x90
159L29:
160	jle		L_ZERO_TAG
161	cmpb	$2, %al								// MISS_TAG
162	je		L_MISS_TAG
163L_EXACT_TAG:
164	movsbl	(next_qpos),%eax					// qpos = *next_qpos
165	incl	next_qpos							// next_qpos++
166	movl	dictionary, %edx					// dictionary
167	movl	(%edx,%eax,4), %eax					// w = dictionary[qpos]
168	movl	%eax, -4(%ecx)						// *dest_buf = w
169	.align 4,0x90
170L_next:
171	incl	next_tag							// next_tag++
172	addl	$4, %ecx							// dest_buf++
173	cmpl	tag_area_end, next_tag				// next_tag vs tag_area_end
174	jae		L_done								// if (next_tag>=tag_area_end)
175L11:
176	movzbl	(next_tag), %eax					// tag = *next_tag
177	cmpb	$1, %al								// Partial match?
178	jne		L29
179L_PARTIAL_TAG:
180	movsbl	(next_qpos),%edx					// qpos = *next_qpos
181	movl	dictionary, %eax					// dictionary
182	leal	(%eax,%edx,4), %edx					// dict_location = &dictionary[qpos]
183	movl	%edx, -7324(%ebp)					// save dict_location to release %edx
184	incl	next_qpos							// next_qpos++
185	movl	(%edx), %eax						// read dictionary word
186	andl	$-1024, %eax						// keep only higher 22-bits
187	movl	next_low_bits, %edx					// low_bits = *next_low_bits
188	orl		(%edx), %eax						// construct the new partially matched word
189	addl	$4, %edx							//
190	movl	%edx, next_low_bits					// next_low_bits++
191	movl	-7324(%ebp), %edx					// dict_location
192	movl	%eax, (%edx)						// update *dict_location with the newly constructed word
193	movl	%eax, -4(%ecx)						// *dest_buf = the newly constructed word
194	incl	next_tag							// next_tag++
195	addl	$4, %ecx							// dest_buf++
196	cmpl	tag_area_end, next_tag				// next_tag vs tag_area_end
197	jb		L11									// if next_tag < tag_area_end, repeat the loop
198L_done:
199
200	// release stack memory, restore registers, and return
201	addl	$7324, %esp
202	popl	%ebx
203	popl	%esi
204	popl	%edi
205	leave
206	ret
207
208	#define	next_full_patt	-7292(%ebp) /* next_full_patt starts with initial value of TAGS_AREA_END */
209
210	.align 4,0x90
211L_MISS_TAG:
212	movl	next_full_patt, %edx					// next_full_patt
213	movl	(%edx), %eax							// word = *next_full_patt
214	addl	$4, %edx								// next_full_patt++
215	movl	%edx, next_full_patt					// save next_full_patt
216	movl	%eax, %edx								// word
217	shrl	$10, %edx								// word>>10
218	andl	$255, %edx								// 8-bit hash table index
219	movsbl	_hashLookupTable-Lhash(%ebx,%edx),%edx	// qpos
220	movl	%eax, -88(%ebp,%edx)					// dictionary[qpos] = word
221	movl	%eax, -4(%ecx)							// *dest_buf = word
222	jmp		L_next									// repeat the loop
223
224	.align 4,0x90
225L_ZERO_TAG:
226	movl	$0, -4(%ecx)							// *dest_buf = 0
227	jmp		L_next									// repeat the loop
228
229#endif	// __i386__
230
231#if defined __x86_64__
232
233
234	.text
235	.align 4,0x90
236
237	.globl _WKdm_decompress
238_WKdm_decompress:
239
240	// save registers, and allocate stack memory for local variables
241
242	pushq	%rbp
243	movq	%rsp, %rbp
244	pushq	%r12
245	pushq	%rbx
246	subq	$7144, %rsp
247
248	movq	%rsi, %r12					// dest_buf
249
250	// PRELOAD_DICTIONARY; dictionary starting address : starting address -80(%rpb)
251	movl	$1, -80(%rbp)
252	movl	$1, -76(%rbp)
253	movl	$1, -72(%rbp)
254	movl	$1, -68(%rbp)
255	movl	$1, -64(%rbp)
256	movl	$1, -60(%rbp)
257	movl	$1, -56(%rbp)
258	movl	$1, -52(%rbp)
259	movl	$1, -48(%rbp)
260	movl	$1, -44(%rbp)
261	movl	$1, -40(%rbp)
262	movl	$1, -36(%rbp)
263	movl	$1, -32(%rbp)
264	movl	$1, -28(%rbp)
265	movl	$1, -24(%rbp)
266	movl	$1, -20(%rbp)
267
268	// WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray);
269	leaq	272(%rdi), %r10				// TAGS_AREA_END
270	leaq	16(%rdi), %rax				// TAGS_AREA_START
271	leaq	-1280(%rbp), %rsi			// tempTagsArray
272	cmpq	%rax, %r10					// TAGS_AREA_END vs TAGS_AREA_START
273	jbe		1f							// if TAGS_AREA_END <= TAGS_AREA_START, skip L_WK_unpack_2bits
274	movq	%rsi, %rcx					// next_word
275	xorl	%r8d, %r8d					// i = 0
276	.align 4,0x90
277L_WK_unpack_2bits:
278	movl	16(%rdi,%r8,4), %edx		// w = *next_word
279	movl	%edx, %eax					// w
280	andl	$50529027, %eax				// 1st 4 tags
281	movl	%eax, (%rcx)				// write 1st 4 tags
282	movl	%edx, %eax					// w
283	shrl	$2, %eax					// w>>2
284	andl	$50529027, %eax				// 2nd 4 tags
285	movl	%eax, 4(%rcx)				// write 2nd 4 tags
286	shrl	$4, %edx					// w>>4
287	movl	%edx, %eax					// w>>4
288	andl	$50529027, %eax				// 3rd 4 tags
289	movl	%eax, 8(%rcx)				// write 3rd 4 tags
290	shrl	$2, %edx					// w>>6
291	andl	$50529027, %edx				// 4th 4 tags
292	movl	%edx, 12(%rcx)				// write 4th 4 tags
293	addq	$16, %rcx					// next_tags += 16
294	incq	%r8							// i++
295	cmpq	$64, %r8					// i vs 64
296	jne		L_WK_unpack_2bits			// repeat loop until i==64
2971:
298
299	// WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray);
300
301	mov		8(%rdi), %eax				// WKdm header qpos end
302	leaq	(%rdi,%rax,4), %r9			// QPOS_AREA_END
303	mov		4(%rdi), %eax				// WKdm header qpos start
304	leaq	(%rdi,%rax,4), %r8			// QPOS_AREA_START
305	leaq	-2480(%rbp), %rbx			// tempQPosArray
306	cmpq	%r8, %r9					// QPOS_AREA_END vs QPOS_AREA_START
307	jbe		1f							// if QPOS_AREA_END <= QPOS_AREA_START, skip L_WK_unpack_4bits
308	leaq	8(%rbx), %rcx				// next_qpos
309L_WK_unpack_4bits:
310	movl	(%r8), %eax					// w = *next_word
311	movl	%eax, %edx					// w
312	andl	$252645135, %edx			// 1st 4 qpos
313	movl	%edx, -8(%rcx)				// write 1st 4 qpos
314	shrl	$4, %eax					// w>>4
315	andl	$252645135, %eax			// 2nd 4 qpos
316	movl	%eax, -4(%rcx)				// write 2nd 4 qpos
317	addq	$4, %r8						// next_word++
318	addq	$8, %rcx					// next_qpos+=8
319	cmpq	%r8, %r9					// QPOS_AREA_END vs QPOS_AREA_START
320	ja		L_WK_unpack_4bits			// repeat loop until QPOS_AREA_END <= QPOS_AREA_START
3211:
322
323	// WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray);
324
325	mov		12(%rdi), %eax				// LOW_BITS_AREA_END offset
326	leaq	(%rdi,%rax,4), %rdi			// LOW_BITS_AREA_END
327	leaq	-7280(%rbp), %r11			// tempLowBitsArray
328	cmpq	%rdi, %r9					// LOW_BITS_AREA_START vs LOW_BITS_AREA_END
329	jae		1f							// if START>=END, skip L_WK_unpack_3_tenbits
330	leaq	12(%r11), %rcx				// next_low_bits
331L_WK_unpack_3_tenbits:
332	movl	(%r9), %eax					// w = *next_word
333	movl	%eax, %edx					// w
334	andl	$1023, %edx					// 1st tenbits
335	movl	%edx, -12(%rcx)				// write 1st tenbits
336	shrl	$10, %eax					// w >> 10
337	movl	%eax, %edx					// w >> 10
338	andl	$1023, %edx					// 2nd tenbits
339	movl	%edx, -8(%rcx)				// write 2nd tenbits
340	shrl	$10, %eax					// w >> 20, 3rd tenbits
341	movl	%eax, -4(%rcx)				// write 3rd tenbits
342	addq	$4, %r9						// next_word++
343	addq	$12, %rcx					// next_low_bits += 3
344	cmpq	%r9, %rdi					// LOW_BITS_AREA_END vs next_word
345	ja		L_WK_unpack_3_tenbits		// repeat loop if LOW_BITS_AREA_END > next_word
3461:
347	movq	%rsi, %rdi						// next_tag
348	movq	%rbx, %r8						// next_qpos
349	leaq	4(%r12), %rcx					// dest_buf
350	movq	%r11, %r9						// next_low_bits
351	leaq	-80(%rbp), %r11					// dictionary
352	leaq	_hashLookupTable(%rip), %rbx	// hash look up table
353	leaq	1024(%rsi), %rsi				// tag_area_end
354
355	jmp	L11
356	.align 4,0x90
357L31:
358	jle		L_ZERO_TAG
359	cmpb	$2, %al							// MISS_TAG
360	je		L_MISS_TAG
361L_EXACT_TAG:
362	movsbq	(%r8),%rax						// qpos = *next_qpos
363	incq	%r8								// next_qpos++
364	movl	(%r11,%rax,4), %eax				// w = dictionary[qpos]
365	movl	%eax, -4(%rcx)					// *dest_buf = w
366	.align 4,0x90
367L_next:
368	incq	%rdi							// next_tag++
369	addq	$4, %rcx						// dest_buf++
370	cmpq	%rsi, %rdi						// next_tag vs tag_area_end
371	jae		L_done							// if next_tag >= tag_area_end, we're done
372L11:
373	movzbl	(%rdi), %eax					// tag = *next_tag
374	cmpb	$1, %al							// partial match tag ?
375	jne		L31
376L_PARTIAL_TAG:
377	movsbq	(%r8),%rdx						// qpos = *next_qpos
378	leaq	(%r11,%rdx,4), %rdx				// dict_location = &dictionary[qpos]
379	incq	%r8								// next_qpos++
380	movl	(%rdx), %eax					// read dictionary word
381	andl	$-1024, %eax					// clear lower 10 bits
382	orl		(%r9), %eax						// pad the lower 10-bits from *next_low_bits
383	addq	$4, %r9							// next_low_bits++
384	movl	%eax, (%rdx)					// *dict_location = newly formed word
385	movl	%eax, -4(%rcx)					// *dest_buf = newly formed word
386	cmpq	%rsi, %rdi						// compare next_tag vs tag_area_end
387	jne		L_next							// repeat loop until next_tag==tag_area_end
388L_done:
389
390	// release stack memory, restore registers, and return
391	addq	$7144, %rsp
392	popq	%rbx
393	popq	%r12
394	leave
395	ret
396
397	.align 4,0x90
398L_MISS_TAG:
399	movl	(%r10), %eax					// w = *next_full_patt
400	addq	$4, %r10						// next_full_patt++
401	movl	%eax, %edx						// w
402	shrl	$10, %edx						// w>>10
403	movzbl	%dl, %edx						// 8-bit hash table index
404	movsbq	(%rbx,%rdx),%rdx				// qpos
405	movl	%eax, -80(%rbp,%rdx)			// dictionary[qpos] = word
406	movl	%eax, -4(%rcx)					// *dest_buf = word
407	jmp		L_next							// repeat the loop
408
409	.align 4,0x90
410L_ZERO_TAG:
411	movl	$0, -4(%rcx)					// *dest_buf = 0
412	jmp		L_next							// repeat the loop
413
414#endif	// --X86_64__
415
416.globl _hashLookupTable
417	.const
418	.align 5
419_hashLookupTable:
420	.byte	0
421	.byte	52
422	.byte	8
423	.byte	56
424	.byte	16
425	.byte	12
426	.byte	28
427	.byte	20
428	.byte	4
429	.byte	36
430	.byte	48
431	.byte	24
432	.byte	44
433	.byte	40
434	.byte	32
435	.byte	60
436	.byte	8
437	.byte	12
438	.byte	28
439	.byte	20
440	.byte	4
441	.byte	60
442	.byte	16
443	.byte	36
444	.byte	24
445	.byte	48
446	.byte	44
447	.byte	32
448	.byte	52
449	.byte	56
450	.byte	40
451	.byte	12
452	.byte	8
453	.byte	48
454	.byte	16
455	.byte	52
456	.byte	60
457	.byte	28
458	.byte	56
459	.byte	32
460	.byte	20
461	.byte	24
462	.byte	36
463	.byte	40
464	.byte	44
465	.byte	4
466	.byte	8
467	.byte	40
468	.byte	60
469	.byte	32
470	.byte	20
471	.byte	44
472	.byte	4
473	.byte	36
474	.byte	52
475	.byte	24
476	.byte	16
477	.byte	56
478	.byte	48
479	.byte	12
480	.byte	28
481	.byte	16
482	.byte	8
483	.byte	40
484	.byte	36
485	.byte	28
486	.byte	32
487	.byte	12
488	.byte	4
489	.byte	44
490	.byte	52
491	.byte	20
492	.byte	24
493	.byte	48
494	.byte	60
495	.byte	56
496	.byte	40
497	.byte	48
498	.byte	8
499	.byte	32
500	.byte	28
501	.byte	36
502	.byte	4
503	.byte	44
504	.byte	20
505	.byte	56
506	.byte	60
507	.byte	24
508	.byte	52
509	.byte	16
510	.byte	12
511	.byte	12
512	.byte	4
513	.byte	48
514	.byte	20
515	.byte	8
516	.byte	52
517	.byte	16
518	.byte	60
519	.byte	24
520	.byte	36
521	.byte	44
522	.byte	28
523	.byte	56
524	.byte	40
525	.byte	32
526	.byte	36
527	.byte	20
528	.byte	24
529	.byte	60
530	.byte	40
531	.byte	44
532	.byte	52
533	.byte	16
534	.byte	32
535	.byte	4
536	.byte	48
537	.byte	8
538	.byte	28
539	.byte	56
540	.byte	12
541	.byte	28
542	.byte	32
543	.byte	40
544	.byte	52
545	.byte	36
546	.byte	16
547	.byte	20
548	.byte	48
549	.byte	8
550	.byte	4
551	.byte	60
552	.byte	24
553	.byte	56
554	.byte	44
555	.byte	12
556	.byte	8
557	.byte	36
558	.byte	24
559	.byte	28
560	.byte	16
561	.byte	60
562	.byte	20
563	.byte	56
564	.byte	32
565	.byte	40
566	.byte	48
567	.byte	12
568	.byte	4
569	.byte	44
570	.byte	52
571	.byte	44
572	.byte	40
573	.byte	12
574	.byte	56
575	.byte	8
576	.byte	36
577	.byte	24
578	.byte	60
579	.byte	28
580	.byte	48
581	.byte	4
582	.byte	32
583	.byte	20
584	.byte	16
585	.byte	52
586	.byte	60
587	.byte	12
588	.byte	24
589	.byte	36
590	.byte	8
591	.byte	4
592	.byte	16
593	.byte	56
594	.byte	48
595	.byte	44
596	.byte	40
597	.byte	52
598	.byte	32
599	.byte	20
600	.byte	28
601	.byte	32
602	.byte	12
603	.byte	36
604	.byte	28
605	.byte	24
606	.byte	56
607	.byte	40
608	.byte	16
609	.byte	52
610	.byte	44
611	.byte	4
612	.byte	20
613	.byte	60
614	.byte	8
615	.byte	48
616	.byte	48
617	.byte	52
618	.byte	12
619	.byte	20
620	.byte	32
621	.byte	44
622	.byte	36
623	.byte	28
624	.byte	4
625	.byte	40
626	.byte	24
627	.byte	8
628	.byte	56
629	.byte	60
630	.byte	16
631	.byte	36
632	.byte	32
633	.byte	8
634	.byte	40
635	.byte	4
636	.byte	52
637	.byte	24
638	.byte	44
639	.byte	20
640	.byte	12
641	.byte	28
642	.byte	48
643	.byte	56
644	.byte	16
645	.byte	60
646	.byte	4
647	.byte	52
648	.byte	60
649	.byte	48
650	.byte	20
651	.byte	16
652	.byte	56
653	.byte	44
654	.byte	24
655	.byte	8
656	.byte	40
657	.byte	12
658	.byte	32
659	.byte	28
660	.byte	36
661	.byte	24
662	.byte	32
663	.byte	12
664	.byte	4
665	.byte	20
666	.byte	16
667	.byte	60
668	.byte	36
669	.byte	28
670	.byte	8
671	.byte	52
672	.byte	40
673	.byte	48
674	.byte	44
675	.byte	56
676