1/*- 2 * Copyright (C) 2016 Cavium Inc. 3 * All rights reserved. 4 * 5 * Developed by Semihalf. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29#include <machine/asm.h> 30__FBSDID("$FreeBSD$"); 31 32 33#include "assym.s" 34 35 /* 36 * void bzero(void *p, size_t size) 37 * 38 * x0 - p 39 * x1 - size 40 */ 41ENTRY(bzero) 42 cbz x1, ending 43 44 /* 45 * x5 is number of cache lines to zero - calculated later and 46 * will become non-zero if buffer is long enough to zero by 47 * cache lines (and if it is allowed.) 48 * We need to zero it before proceeding with buffers of size 49 * smaller than 16 bytes - otherwise the x5 will not be 50 * calculated and will retain random value. 51 * "normal" is used for buffers <= 16 bytes and to align buffer 52 * to cache line for buffers bigger than cache line; non-0 x5 53 * after "normal" has completed indicates that it has been used 54 * to align buffer to cache line and now zero by cache lines will 55 * be performed, and x5 is amount of cache lines to loop through. 56 */ 57 mov x5, xzr 58 59 /* No use of cache assisted zero for buffers with size <= 16 */ 60 cmp x1, #0x10 61 b.le normal 62 63 /* 64 * Load size of line that will be cleaned by dc zva call. 65 * 0 means that the instruction is not allowed 66 */ 67 ldr x7, =dczva_line_size 68 ldr x7, [x7] 69 cbz x7, normal 70 71 /* 72 * Buffer must be larger than cache line for using cache zeroing 73 * (and cache line aligned but this is checked after jump) 74 */ 75 cmp x1, x7 76 b.lt normal 77 78 /* 79 * Calculate number of bytes to cache aligned address (x4) nad 80 * number of full cache lines (x5). x6 is final address to zero. 81 */ 82 sub x2, x7, #0x01 83 mov x3, -1 84 eor x3, x3, x2 85 add x4, x0, x2 86 and x4, x4, x3 87 subs x4, x4, x0 88 b.eq normal 89 90 /* Calculate number of "lines" in buffer */ 91 sub x5, x1, x4 92 rbit x2, x7 93 clz x2, x2 94 lsr x5, x5, x2 95 96 /* 97 * If number of cache lines is 0, we will not be able to zero 98 * by cache lines, so go normal way. 99 */ 100 cbz x5, normal 101 /* x6 is final address to zero */ 102 add x6, x0, x1 103 104 /* 105 * We are here because x5 is non-0 so normal will be used to 106 * align buffer before cache zeroing. x4 holds number of bytes 107 * needed for alignment. 108 */ 109 mov x1, x4 110 111 /* When jumping here: x0 holds pointer, x1 holds size */ 112normal: 113 /* 114 * Get buffer offset into 16 byte aligned address; 0 means pointer 115 * is aligned. 116 */ 117 ands x2, x0, #0x0f 118 b.eq aligned_to_16 119 /* Calculate one-byte loop runs to 8 byte aligned address. */ 120 ands x2, x2, #0x07 121 mov x3, #0x08 122 sub x2, x3, x2 123 /* x2 is number of bytes missing for alignment, x1 is buffer size */ 124 cmp x1, x2 125 csel x2, x1, x2, le 126 sub x1, x1, x2 127 128 /* 129 * Byte by byte copy will copy at least enough bytes to align 130 * pointer and at most "size". 131 */ 132align: 133 strb wzr, [x0], #0x01 134 subs x2, x2, #0x01 135 b.ne align 136 137 /* Now pointer is aligned to 8 bytes */ 138 cmp x1, #0x10 139 b.lt lead_out 140 /* 141 * Check if copy of another 8 bytes is needed to align to 16 byte 142 * address and do it 143 */ 144 tbz x0, #0x03, aligned_to_16 145 str xzr, [x0], #0x08 146 sub x1, x1, #0x08 147 148 /* While jumping here: x0 is 16 byte alligned address, x1 is size */ 149aligned_to_16: 150 /* If size is less than 16 bytes, use lead_out to copy what remains */ 151 cmp x1, #0x10 152 b.lt lead_out 153 154 lsr x2, x1, #0x04 155zero_by_16: 156 stp xzr, xzr, [x0], #0x10 157 subs x2, x2, #0x01 158 b.ne zero_by_16 159 160 /* 161 * Lead out requires addresses to be aligned to 8 bytes. It is used to 162 * zero buffers with sizes < 16 and what can not be zeroed by 163 * zero_by_16 loop. 164 */ 165 ands x1, x1, #0x0f 166 b.eq lead_out_end 167lead_out: 168 tbz x1, #0x03, lead_out_dword 169 str xzr, [x0], #0x08 170lead_out_dword: 171 tbz x1, #0x02, lead_out_word 172 str wzr, [x0], #0x04 173lead_out_word: 174 tbz x1, #0x01, lead_out_byte 175 strh wzr, [x0], #0x02 176lead_out_byte: 177 tbz x1, #0x00, lead_out_end 178 strb wzr, [x0], #0x01 179 180lead_out_end: 181 /* 182 * If x5 is non-zero, this means that normal has been used as 183 * a lead in to align buffer address to cache size 184 */ 185 cbz x5, ending 186 187 /* 188 * Here x5 holds number of lines to zero; x6 is final address of 189 * buffer. x0 is cache line aligned pointer. x7 is cache line size 190 * in bytes 191 */ 192cache_line_zero: 193 dc zva, x0 194 add x0, x0, x7 195 subs x5, x5, #0x01 196 b.ne cache_line_zero 197 198 /* Need to zero remaining bytes? */ 199 subs x1, x6, x0 200 b.ne normal 201 202ending: 203 ret 204 205END(bzero) 206 207