xlr_csum_nocopy.S revision 201917
1#include <machine/asm.h> 2 3 4/* 5 * a0: source address 6 * a1: length of the area to checksum 7 * a2: partial checksum 8 * a3: dst 9 */ 10 11#define src a0 12#define dst a3 13#define sum v0 14 15 .text 16 .set noreorder 17 18 .macro CSUM_BIGCHUNK_AND_COPY offset 19 pref 0, (\offset+0x0)(a0) 20 ld t0, (\offset+0x00)(a0) 21 ld t1, (\offset+0x08)(a0) 22 .word 0x70481038 /*daddwc v0, v0, t0 */ 23 .word 0x70491038 /*daddwc v0, v0, t1 */ 24 ld t0, (\offset + 0x10)(a0) 25 ld t1, (\offset + 0x18)(a0) 26 .word 0x70481038 /* daddwc v0, v0, t0 */ 27 .word 0x70491038 /*daddwc v0, v0, t1 */ 28 .endm 29 30small_csumcpy: /* unknown src alignment and < 8 bytes to go */ 31 move a1, t2 32 33 andi t0, a1, 4 34 beqz t0, 1f 35 andi t0, a1, 2 36 37 ulw t1, (src) /* Still a full word to go */ 38 daddiu src, 4 39 .word 0x70491038 /*daddwc v0, v0, t1 */ 40 411: move t1, zero 42 beqz t0, 1f 43 andi t0, a1, 1 44 45 ulhu t1, (src) /* Still a halfword to go */ 46 daddiu src, 2 47 481: beqz t0, 1f 49 sll t1, t1, 16 50 51 lbu t2, (src) 52 nop 53 54#ifdef __MIPSEB__ 55 sll t2, t2, 8 56#endif 57 or t1, t2 58 591: .word 0x70491038 /*daddwc v0, v0, t1 */ 60 61 .word 0x70461038 /*daddwc v0, v0, a2 */ 62 .word 0x70401038 /*daddwc v0, v0, $0 */ 63 64 /* Ideally at this point of time the status flag must be cleared */ 65 66 dsll32 v1, sum, 0 67 .word 0x70431038 /*daddwc v0, v0, v1 */ 68 dsrl32 sum, sum, 0 69 .word 0x70401038 /*daddwc v0, v0, zero */ 70 71 /* fold the checksum */ 72 sll v1, sum, 16 73 addu sum, v1 74 sltu v1, sum, v1 75 srl sum, sum, 16 76 addu sum, v1 771: 78 .set reorder 79 jr ra 80 .set noreorder 81 82/* ------------------------------------------------------------------ */ 83 84 .align 5 85LEAF(xlr_csum_partial_nocopy) 86 move sum, zero 87 move t7, zero 88 89 sltiu t8, a1, 0x8 90 bnez t8, small_csumcpy /* < 8 bytes to copy */ 91 move t2, a1 92 93 beqz a1, out 94 andi t7, src, 0x1 /* odd buffer? */ 95 96hword_align: 97 beqz t7, word_align 98 andi t8, src, 0x2 99 100 lbu t0, (src) 101 dsubu a1, a1, 0x1 102 .word 0x70481038 /*daddwc v0, v0, t0 */ 103 daddu src, src, 0x1 104 andi t8, src, 0x2 105 106word_align: 107 beqz t8, dword_align 108 sltiu t8, a1, 56 109 110 lhu t0, (src) 111 dsubu a1, a1, 0x2 112 .word 0x70481038 /*daddwc v0, v0, t0 */ 113 sltiu t8, a1, 56 114 daddu src, src, 0x2 115 116dword_align: 117 bnez t8, do_end_words 118 move t8, a1 119 120 andi t8, src, 0x4 121 beqz t8, qword_align 122 andi t8, src, 0x8 123 124 lw t0, 0x00(src) 125 dsubu a1, a1, 0x4 126 .word 0x70481038 /*daddwc v0, v0, t0 */ 127 daddu src, src, 0x4 128 andi t8, src, 0x8 129 130qword_align: 131 beqz t8, oword_align 132 andi t8, src, 0x10 133 134 ld t0, 0x00(src) 135 dsubu a1, a1, 0x8 136 .word 0x70481038 /*daddwc v0, v0, t0 */ 137 daddu src, src, 0x8 138 andi t8, src, 0x10 139 140oword_align: 141 beqz t8, begin_movement 142 dsrl t8, a1, 0x7 143 144 ld t3, 0x08(src) 145 ld t0, 0x00(src) 146 .word 0x704b1038 /*daddwc v0, v0, t3 */ 147 .word 0x70481038 /*daddwc v0, v0, t0 */ 148 dsubu a1, a1, 0x10 149 daddu src, src, 0x10 150 dsrl t8, a1, 0x7 151 152begin_movement: 153 beqz t8, 1f 154 andi t2, a1, 0x40 155 156move_128bytes: 157 pref 0, 0x20(a0) 158 pref 0, 0x40(a0) 159 pref 0, 0x60(a0) 160 CSUM_BIGCHUNK_AND_COPY(0x00) 161 CSUM_BIGCHUNK_AND_COPY(0x20) 162 CSUM_BIGCHUNK_AND_COPY(0x40) 163 CSUM_BIGCHUNK_AND_COPY(0x60) 164 dsubu t8, t8, 0x01 165 bnez t8, move_128bytes /* flag */ 166 daddu src, src, 0x80 167 1681: 169 beqz t2, 1f 170 andi t2, a1, 0x20 171 172move_64bytes: 173 pref 0, 0x20(a0) 174 pref 0, 0x40(a0) 175 CSUM_BIGCHUNK_AND_COPY(0x00) 176 CSUM_BIGCHUNK_AND_COPY(0x20) 177 daddu src, src, 0x40 178 1791: 180 beqz t2, do_end_words 181 andi t8, a1, 0x1c 182 183move_32bytes: 184 pref 0, 0x20(a0) 185 CSUM_BIGCHUNK_AND_COPY(0x00) 186 andi t8, a1, 0x1c 187 daddu src, src, 0x20 188 189do_end_words: 190 beqz t8, maybe_end_cruft 191 dsrl t8, t8, 0x2 192 193end_words: 194 lw t0, (src) 195 dsubu t8, t8, 0x1 196 .word 0x70481038 /*daddwc v0, v0, t0 */ 197 bnez t8, end_words 198 daddu src, src, 0x4 199 200maybe_end_cruft: 201 andi t2, a1, 0x3 202 203small_memcpy: 204 j small_csumcpy; move a1, t2 205 beqz t2, out 206 move a1, t2 207 208end_bytes: 209 lb t0, (src) 210 dsubu a1, a1, 0x1 211 bnez a2, end_bytes 212 daddu src, src, 0x1 213 214out: 215 jr ra 216 move v0, sum 217 END(xlr_csum_partial_nocopy) 218