1/* $NetBSD: i915_syncmap.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $ */ 2 3/* 4 * Copyright �� 2017 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 * 25 */ 26 27#include <sys/cdefs.h> 28__KERNEL_RCSID(0, "$NetBSD: i915_syncmap.c,v 1.2 2021/12/18 23:45:28 riastradh Exp $"); 29 30#include <linux/slab.h> 31 32#include "i915_syncmap.h" 33 34#include "i915_gem.h" /* GEM_BUG_ON() */ 35#include "i915_selftest.h" 36 37#define SHIFT ilog2(KSYNCMAP) 38#define MASK (KSYNCMAP - 1) 39 40/* 41 * struct i915_syncmap is a layer of a radixtree that maps a u64 fence 42 * context id to the last u32 fence seqno waited upon from that context. 43 * Unlike lib/radixtree it uses a parent pointer that allows traversal back to 44 * the root. This allows us to access the whole tree via a single pointer 45 * to the most recently used layer. We expect fence contexts to be dense 46 * and most reuse to be on the same i915_gem_context but on neighbouring 47 * engines (i.e. on adjacent contexts) and reuse the same leaf, a very 48 * effective lookup cache. If the new lookup is not on the same leaf, we 49 * expect it to be on the neighbouring branch. 50 * 51 * A leaf holds an array of u32 seqno, and has height 0. The bitmap field 52 * allows us to store whether a particular seqno is valid (i.e. allows us 53 * to distinguish unset from 0). 54 * 55 * A branch holds an array of layer pointers, and has height > 0, and always 56 * has at least 2 layers (either branches or leaves) below it. 57 * 58 * For example, 59 * for x in 60 * 0 1 2 0x10 0x11 0x200 0x201 61 * 0x500000 0x500001 0x503000 0x503001 62 * 0xE<<60: 63 * i915_syncmap_set(&sync, x, lower_32_bits(x)); 64 * will build a tree like: 65 * 0xXXXXXXXXXXXXXXXX 66 * 0-> 0x0000000000XXXXXX 67 * | 0-> 0x0000000000000XXX 68 * | | 0-> 0x00000000000000XX 69 * | | | 0-> 0x000000000000000X 0:0, 1:1, 2:2 70 * | | | 1-> 0x000000000000001X 0:10, 1:11 71 * | | 2-> 0x000000000000020X 0:200, 1:201 72 * | 5-> 0x000000000050XXXX 73 * | 0-> 0x000000000050000X 0:500000, 1:500001 74 * | 3-> 0x000000000050300X 0:503000, 1:503001 75 * e-> 0xe00000000000000X e:e 76 */ 77 78struct i915_syncmap { 79 u64 prefix; 80 unsigned int height; 81 unsigned int bitmap; 82 struct i915_syncmap *parent; 83 /* 84 * Following this header is an array of either seqno or child pointers: 85 * union { 86 * u32 seqno[KSYNCMAP]; 87 * struct i915_syncmap *child[KSYNCMAP]; 88 * }; 89 */ 90}; 91 92/** 93 * i915_syncmap_init -- initialise the #i915_syncmap 94 * @root: pointer to the #i915_syncmap 95 */ 96void i915_syncmap_init(struct i915_syncmap **root) 97{ 98 BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP); 99 BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT); 100 BUILD_BUG_ON(KSYNCMAP > BITS_PER_TYPE((*root)->bitmap)); 101 *root = NULL; 102} 103 104static inline u32 *__sync_seqno(struct i915_syncmap *p) 105{ 106 GEM_BUG_ON(p->height); 107 return (u32 *)(p + 1); 108} 109 110static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p) 111{ 112 GEM_BUG_ON(!p->height); 113 return (struct i915_syncmap **)(p + 1); 114} 115 116static inline unsigned int 117__sync_branch_idx(const struct i915_syncmap *p, u64 id) 118{ 119 return (id >> p->height) & MASK; 120} 121 122static inline unsigned int 123__sync_leaf_idx(const struct i915_syncmap *p, u64 id) 124{ 125 GEM_BUG_ON(p->height); 126 return id & MASK; 127} 128 129static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id) 130{ 131 return id >> p->height >> SHIFT; 132} 133 134static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id) 135{ 136 GEM_BUG_ON(p->height); 137 return id >> SHIFT; 138} 139 140static inline bool seqno_later(u32 a, u32 b) 141{ 142 return (s32)(a - b) >= 0; 143} 144 145/** 146 * i915_syncmap_is_later -- compare against the last know sync point 147 * @root: pointer to the #i915_syncmap 148 * @id: the context id (other timeline) we are synchronising to 149 * @seqno: the sequence number along the other timeline 150 * 151 * If we have already synchronised this @root timeline with another (@id) then 152 * we can omit any repeated or earlier synchronisation requests. If the two 153 * timelines are already coupled, we can also omit the dependency between the 154 * two as that is already known via the timeline. 155 * 156 * Returns true if the two timelines are already synchronised wrt to @seqno, 157 * false if not and the synchronisation must be emitted. 158 */ 159bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno) 160{ 161 struct i915_syncmap *p; 162 unsigned int idx; 163 164 p = *root; 165 if (!p) 166 return false; 167 168 if (likely(__sync_leaf_prefix(p, id) == p->prefix)) 169 goto found; 170 171 /* First climb the tree back to a parent branch */ 172 do { 173 p = p->parent; 174 if (!p) 175 return false; 176 177 if (__sync_branch_prefix(p, id) == p->prefix) 178 break; 179 } while (1); 180 181 /* And then descend again until we find our leaf */ 182 do { 183 if (!p->height) 184 break; 185 186 p = __sync_child(p)[__sync_branch_idx(p, id)]; 187 if (!p) 188 return false; 189 190 if (__sync_branch_prefix(p, id) != p->prefix) 191 return false; 192 } while (1); 193 194 *root = p; 195found: 196 idx = __sync_leaf_idx(p, id); 197 if (!(p->bitmap & BIT(idx))) 198 return false; 199 200 return seqno_later(__sync_seqno(p)[idx], seqno); 201} 202 203static struct i915_syncmap * 204__sync_alloc_leaf(struct i915_syncmap *parent, u64 id) 205{ 206 struct i915_syncmap *p; 207 208 p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL); 209 if (unlikely(!p)) 210 return NULL; 211 212 p->parent = parent; 213 p->height = 0; 214 p->bitmap = 0; 215 p->prefix = __sync_leaf_prefix(p, id); 216 return p; 217} 218 219static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno) 220{ 221 unsigned int idx = __sync_leaf_idx(p, id); 222 223 p->bitmap |= BIT(idx); 224 __sync_seqno(p)[idx] = seqno; 225} 226 227static inline void __sync_set_child(struct i915_syncmap *p, 228 unsigned int idx, 229 struct i915_syncmap *child) 230{ 231 p->bitmap |= BIT(idx); 232 __sync_child(p)[idx] = child; 233} 234 235static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno) 236{ 237 struct i915_syncmap *p = *root; 238 unsigned int idx; 239 240 if (!p) { 241 p = __sync_alloc_leaf(NULL, id); 242 if (unlikely(!p)) 243 return -ENOMEM; 244 245 goto found; 246 } 247 248 /* Caller handled the likely cached case */ 249 GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix); 250 251 /* Climb back up the tree until we find a common prefix */ 252 do { 253 if (!p->parent) 254 break; 255 256 p = p->parent; 257 258 if (__sync_branch_prefix(p, id) == p->prefix) 259 break; 260 } while (1); 261 262 /* 263 * No shortcut, we have to descend the tree to find the right layer 264 * containing this fence. 265 * 266 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences 267 * or lower layers. Leaf nodes (height = 0) contain the fences, all 268 * other nodes (height > 0) are internal layers that point to a lower 269 * node. Each internal layer has at least 2 descendents. 270 * 271 * Starting at the top, we check whether the current prefix matches. If 272 * it doesn't, we have gone past our target and need to insert a join 273 * into the tree, and a new leaf node for the target as a descendent 274 * of the join, as well as the original layer. 275 * 276 * The matching prefix means we are still following the right branch 277 * of the tree. If it has height 0, we have found our leaf and just 278 * need to replace the fence slot with ourselves. If the height is 279 * not zero, our slot contains the next layer in the tree (unless 280 * it is empty, in which case we can add ourselves as a new leaf). 281 * As descend the tree the prefix grows (and height decreases). 282 */ 283 do { 284 struct i915_syncmap *next; 285 286 if (__sync_branch_prefix(p, id) != p->prefix) { 287 unsigned int above; 288 289 /* Insert a join above the current layer */ 290 next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next), 291 GFP_KERNEL); 292 if (unlikely(!next)) 293 return -ENOMEM; 294 295 /* Compute the height at which these two diverge */ 296 above = fls64(__sync_branch_prefix(p, id) ^ p->prefix); 297 above = round_up(above, SHIFT); 298 next->height = above + p->height; 299 next->prefix = __sync_branch_prefix(next, id); 300 301 /* Insert the join into the parent */ 302 if (p->parent) { 303 idx = __sync_branch_idx(p->parent, id); 304 __sync_child(p->parent)[idx] = next; 305 GEM_BUG_ON(!(p->parent->bitmap & BIT(idx))); 306 } 307 next->parent = p->parent; 308 309 /* Compute the idx of the other branch, not our id! */ 310 idx = p->prefix >> (above - SHIFT) & MASK; 311 __sync_set_child(next, idx, p); 312 p->parent = next; 313 314 /* Ascend to the join */ 315 p = next; 316 } else { 317 if (!p->height) 318 break; 319 } 320 321 /* Descend into the next layer */ 322 GEM_BUG_ON(!p->height); 323 idx = __sync_branch_idx(p, id); 324 next = __sync_child(p)[idx]; 325 if (!next) { 326 next = __sync_alloc_leaf(p, id); 327 if (unlikely(!next)) 328 return -ENOMEM; 329 330 __sync_set_child(p, idx, next); 331 p = next; 332 break; 333 } 334 335 p = next; 336 } while (1); 337 338found: 339 GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id)); 340 __sync_set_seqno(p, id, seqno); 341 *root = p; 342 return 0; 343} 344 345/** 346 * i915_syncmap_set -- mark the most recent syncpoint between contexts 347 * @root: pointer to the #i915_syncmap 348 * @id: the context id (other timeline) we have synchronised to 349 * @seqno: the sequence number along the other timeline 350 * 351 * When we synchronise this @root timeline with another (@id), we also know 352 * that we have synchronized with all previous seqno along that timeline. If 353 * we then have a request to synchronise with the same seqno or older, we can 354 * omit it, see i915_syncmap_is_later() 355 * 356 * Returns 0 on success, or a negative error code. 357 */ 358int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno) 359{ 360 struct i915_syncmap *p = *root; 361 362 /* 363 * We expect to be called in sequence following is_later(id), which 364 * should have preloaded the root for us. 365 */ 366 if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) { 367 __sync_set_seqno(p, id, seqno); 368 return 0; 369 } 370 371 return __sync_set(root, id, seqno); 372} 373 374static void __sync_free(struct i915_syncmap *p) 375{ 376 if (p->height) { 377 unsigned int i; 378 379 while ((i = ffs(p->bitmap))) { 380 p->bitmap &= ~0u << i; 381 __sync_free(__sync_child(p)[i - 1]); 382 } 383 } 384 385 kfree(p); 386} 387 388/** 389 * i915_syncmap_free -- free all memory associated with the syncmap 390 * @root: pointer to the #i915_syncmap 391 * 392 * Either when the timeline is to be freed and we no longer need the sync 393 * point tracking, or when the fences are all known to be signaled and the 394 * sync point tracking is redundant, we can free the #i915_syncmap to recover 395 * its allocations. 396 * 397 * Will reinitialise the @root pointer so that the #i915_syncmap is ready for 398 * reuse. 399 */ 400void i915_syncmap_free(struct i915_syncmap **root) 401{ 402 struct i915_syncmap *p; 403 404 p = *root; 405 if (!p) 406 return; 407 408 while (p->parent) 409 p = p->parent; 410 411 __sync_free(p); 412 *root = NULL; 413} 414 415#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 416#include "selftests/i915_syncmap.c" 417#endif 418