1# mach: bfin
2
3// GENERIC CONVOLUTIONAL ENCODER
4// This a generic rate 1/n convolutional encoder. It computes n output
5// bits for each input bit, based on n generic polynomials.
6// It uses the set of BXOR_CC instructions to compute bit XOR
7// reduction from a state masked by a polynomial.  For an alternate
8// solution based on assembling several partial words, as in
9// the BDT benchmark, see file conv_enc.c. The solution presented
10// here is slower than conv_enc.c, but more generic.
11//
12// Forward Shift Register
13// -----------------------
14// This solution implements the XOR function by shifting the state
15// left by one, applying a mask to the state, and reducing
16// the result with a bit XOR reduction function.
17//    	             ----- XOR------------> G0
18// 	             |     |     |  |
19//        +------------------------------+
20//        | b0 b1 b2 b3          b14 b15 | <- in
21//        +------------------------------+
22//                   | 	|  |  |	    |
23//    	             ----- XOR------------> G1
24// Instruction BXOR computes the bit G0 or G1 and stores it into CC
25// and also into a destination reg half. Here, we take CC and rotate it
26// into an output register.
27// However, one can also store the output bit directly by storing
28// the register half where this bit is placed. This would result
29// in an output structure similar to the one in the original function
30// Convolutional_Encode(), where an entire half word holds a bit.
31// The resulting execution speed would be roughly twice as fast,
32// since there is no need to rotate output bit via CC.
33
34.include "testutils.inc"
35	start
36
37	loadsym P0, input;
38	loadsym P1, output;
39
40	R1 = 0;	R2 = 0;R3 = 0;
41
42	R2.L = 0;
43	R2.H = 0xa01d;	// polynom 0
44	R3.L = 0;
45	R3.H = 0x12f4;	// polynom 1
46
47	// load and  CurrentState to upper half of A0
48	A1 = A0 = 0;
49	R0 = 0x0000;
50	A0.w = R0;
51	A0 = A0 << 16;
52
53	// l-loop counter is in P4
54	P4 = 2(Z);
55	// **** START l-LOOP *****
56l$0:
57
58	// insert 16 bits of input into lower half of A0
59	// and advance input pointer
60	R0 = W [ P0 ++ ] (Z);
61	A0.L = R0.L;
62
63	P5 = 2 (Z);
64	LSETUP ( m$0 , m$0end ) LC0 = P5;	// **** BEGIN m-LOOP *****
65m$0:
66
67	P5 = 8 (Z);
68	LSETUP ( i$1 , i$1end ) LC1 = P5;	// **** BEGIN i-LOOP *****
69i$1:
70	R4.L = CC = BXORSHIFT( A0 , R2 );	// polynom0 -> CC
71	R1 = ROT R1 BY 1;			// CC -> R1
72	R4.L = CC = BXOR( A0 , R3 );		// polynom1 -> CC
73i$1end:
74	R1 = ROT R1 BY 1;			// CC -> R1
75
76	// store 16 bits of outdata RL1
77m$0end:
78	W [ P1 ++ ] = R1;
79
80	P4 += -1;
81	CC = P4 == 0;
82	IF !CC JUMP l$0;	// **** END l-LOOP *****
83
84				// Check results
85	loadsym I2, output;
86	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x8c62 );
87	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x262e );
88	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x5b4d );
89	R0.L = W [ I2 ++ ];	DBGA ( R0.L , 0x834f );
90	pass
91
92	.data
93input:
94	.dw 0x999f
95	.dw 0x1999
96
97output:
98	.dw 0x0000
99	.dw 0x0000
100	.dw 0x0000
101	.dw 0x0000
102