198944Sobrien/* SPDX-License-Identifier: GPL-2.0-only */ 246283Sdfr/* 398944Sobrien-*- linux-c -*- 446283Sdfr drbd_receiver.c 5130803Smarcel This file is part of DRBD by Philipp Reisner and Lars Ellenberg. 6130803Smarcel 7130803Smarcel Copyright (C) 2001-2008, LINBIT Information Technologies GmbH. 898944Sobrien Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>. 946283Sdfr Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. 1098944Sobrien 1198944Sobrien */ 1298944Sobrien 1398944Sobrien#ifndef _DRBD_VLI_H 1446283Sdfr#define _DRBD_VLI_H 1598944Sobrien 1698944Sobrien/* 1798944Sobrien * At a granularity of 4KiB storage represented per bit, 1898944Sobrien * and stroage sizes of several TiB, 1946283Sdfr * and possibly small-bandwidth replication, 2098944Sobrien * the bitmap transfer time can take much too long, 2198944Sobrien * if transmitted in plain text. 2298944Sobrien * 2398944Sobrien * We try to reduce the transferred bitmap information 2498944Sobrien * by encoding runlengths of bit polarity. 2598944Sobrien * 2698944Sobrien * We never actually need to encode a "zero" (runlengths are positive). 2798944Sobrien * But then we have to store the value of the first bit. 2898944Sobrien * The first bit of information thus shall encode if the first runlength 2998944Sobrien * gives the number of set or unset bits. 3098944Sobrien * 3198944Sobrien * We assume that large areas are either completely set or unset, 3298944Sobrien * which gives good compression with any runlength method, 3398944Sobrien * even when encoding the runlength as fixed size 32bit/64bit integers. 3498944Sobrien * 3598944Sobrien * Still, there may be areas where the polarity flips every few bits, 3698944Sobrien * and encoding the runlength sequence of those areas with fix size 3746283Sdfr * integers would be much worse than plaintext. 3846283Sdfr * 3946283Sdfr * We want to encode small runlength values with minimum code length, 40130803Smarcel * while still being able to encode a Huge run of all zeros. 41130803Smarcel * 4298944Sobrien * Thus we need a Variable Length Integer encoding, VLI. 4398944Sobrien * 4498944Sobrien * For some cases, we produce more code bits than plaintext input. 4598944Sobrien * We need to send incompressible chunks as plaintext, skip over them 46130803Smarcel * and then see if the next chunk compresses better. 47130803Smarcel * 48130803Smarcel * We don't care too much about "excellent" compression ratio for large 49130803Smarcel * runlengths (all set/all clear): whether we achieve a factor of 100 50130803Smarcel * or 1000 is not that much of an issue. 5146283Sdfr * We do not want to waste too much on short runlengths in the "noisy" 5298944Sobrien * parts of the bitmap, though. 5346283Sdfr * 5498944Sobrien * There are endless variants of VLI, we experimented with: 5598944Sobrien * * simple byte-based 5698944Sobrien * * various bit based with different code word length. 5798944Sobrien * 5898944Sobrien * To avoid yet an other configuration parameter (choice of bitmap compression 5998944Sobrien * algorithm) which was difficult to explain and tune, we just chose the one 6098944Sobrien * variant that turned out best in all test cases. 6198944Sobrien * Based on real world usage patterns, with device sizes ranging from a few GiB 6298944Sobrien * to several TiB, file server/mailserver/webserver/mysql/postgress, 6398944Sobrien * mostly idle to really busy, the all time winner (though sometimes only 6498944Sobrien * marginally better) is: 6598944Sobrien */ 6698944Sobrien 6798944Sobrien/* 6898944Sobrien * encoding is "visualised" as 6998944Sobrien * __little endian__ bitstream, least significant bit first (left most) 70130803Smarcel * 7198944Sobrien * this particular encoding is chosen so that the prefix code 7298944Sobrien * starts as unary encoding the level, then modified so that 7398944Sobrien * 10 levels can be described in 8bit, with minimal overhead 7498944Sobrien * for the smaller levels. 7598944Sobrien * 7698944Sobrien * Number of data bits follow fibonacci sequence, with the exception of the 7798944Sobrien * last level (+1 data bit, so it makes 64bit total). The only worse code when 7898944Sobrien * encoding bit polarity runlength is 1 plain bits => 2 code bits. 79130803Smarcelprefix data bits max val N�� data bits 8098944Sobrien0 x 0x2 1 8198944Sobrien10 x 0x4 1 82130803Smarcel110 xx 0x8 2 83130803Smarcel1110 xxx 0x10 3 84130803Smarcel11110 xxx xx 0x30 5 85130803Smarcel111110 xx xxxxxx 0x130 8 86130803Smarcel11111100 xxxxxxxx xxxxx 0x2130 13 8798944Sobrien11111110 xxxxxxxx xxxxxxxx xxxxx 0x202130 21 88130803Smarcel11111101 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx 0x400202130 34 89130803Smarcel11111111 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 56 90130803Smarcel * maximum encodable value: 0x100000400202130 == 2**56 + some */ 9198944Sobrien 9298944Sobrien/* compression "table": 9398944Sobrien transmitted x 0.29 9498944Sobrien as plaintext x ........................ 9598944Sobrien x ........................ 9698944Sobrien x ........................ 97130803Smarcel x 0.59 0.21........................ 9898944Sobrien x ........................................................ 9998944Sobrien x .. c ................................................... 10098944Sobrien x 0.44.. o ................................................... 10198944Sobrien x .......... d ................................................... 10298944Sobrien x .......... e ................................................... 10398944Sobrien X............. ................................................... 10498944Sobrien x.............. b ................................................... 10598944Sobrien2.0x............... i ................................................... 106130803Smarcel #X................ t ................................................... 10798944Sobrien #................. s ........................... plain bits .......... 10898944Sobrien-+----------------------------------------------------------------------- 10998944Sobrien 1 16 32 64 11098944Sobrien*/ 11198944Sobrien 11298944Sobrien/* LEVEL: (total bits, prefix bits, prefix value), 11398944Sobrien * sorted ascending by number of total bits. 11498944Sobrien * The rest of the code table is calculated at compiletime from this. */ 11598944Sobrien 11698944Sobrien/* fibonacci data 1, 1, ... */ 117130803Smarcel#define VLI_L_1_1() do { \ 11898944Sobrien LEVEL( 2, 1, 0x00); \ 11998944Sobrien LEVEL( 3, 2, 0x01); \ 12098944Sobrien LEVEL( 5, 3, 0x03); \ 12198944Sobrien LEVEL( 7, 4, 0x07); \ 12298944Sobrien LEVEL(10, 5, 0x0f); \ 12398944Sobrien LEVEL(14, 6, 0x1f); \ 12498944Sobrien LEVEL(21, 8, 0x3f); \ 12598944Sobrien LEVEL(29, 8, 0x7f); \ 12698944Sobrien LEVEL(42, 8, 0xbf); \ 12798944Sobrien LEVEL(64, 8, 0xff); \ 128130803Smarcel } while (0) 12998944Sobrien 13098944Sobrien/* finds a suitable level to decode the least significant part of in. 13198944Sobrien * returns number of bits consumed. 13298944Sobrien * 13398944Sobrien * BUG() for bad input, as that would mean a buggy code table. */ 13498944Sobrienstatic inline int vli_decode_bits(u64 *out, const u64 in) 13598944Sobrien{ 13698944Sobrien u64 adj = 1; 13798944Sobrien 13898944Sobrien#define LEVEL(t,b,v) \ 13998944Sobrien do { \ 140130803Smarcel if ((in & ((1 << b) -1)) == v) { \ 14198944Sobrien *out = ((in & ((~0ULL) >> (64-t))) >> b) + adj; \ 14298944Sobrien return t; \ 14398944Sobrien } \ 14498944Sobrien adj += 1ULL << (t - b); \ 14598944Sobrien } while (0) 14698944Sobrien 14798944Sobrien VLI_L_1_1(); 14898944Sobrien 14998944Sobrien /* NOT REACHED, if VLI_LEVELS code table is defined properly */ 15098944Sobrien BUG(); 151130803Smarcel#undef LEVEL 15298944Sobrien} 15398944Sobrien 15498944Sobrien/* return number of code bits needed, 15598944Sobrien * or negative error number */ 15698944Sobrienstatic inline int __vli_encode_bits(u64 *out, const u64 in) 15798944Sobrien{ 15898944Sobrien u64 max = 0; 15998944Sobrien u64 adj = 1; 16098944Sobrien 16198944Sobrien if (in == 0) 162130803Smarcel return -EINVAL; 16398944Sobrien 16498944Sobrien#define LEVEL(t,b,v) do { \ 16598944Sobrien max += 1ULL << (t - b); \ 16698944Sobrien if (in <= max) { \ 16798944Sobrien if (out) \ 16898944Sobrien *out = ((in - adj) << b) | v; \ 16998944Sobrien return t; \ 17098944Sobrien } \ 17198944Sobrien adj = max + 1; \ 17298944Sobrien } while (0) 173130803Smarcel 17498944Sobrien VLI_L_1_1(); 17598944Sobrien 17698944Sobrien return -EOVERFLOW; 17798944Sobrien#undef LEVEL 17898944Sobrien} 17998944Sobrien 18098944Sobrien#undef VLI_L_1_1 18198944Sobrien 18298944Sobrien/* code from here down is independend of actually used bit code */ 18398944Sobrien 18498944Sobrien/* 18598944Sobrien * Code length is determined by some unique (e.g. unary) prefix. 18698944Sobrien * This encodes arbitrary bit length, not whole bytes: we have a bit-stream, 18798944Sobrien * not a byte stream. 18898944Sobrien */ 18998944Sobrien 19098944Sobrien/* for the bitstream, we need a cursor */ 19198944Sobrienstruct bitstream_cursor { 192130803Smarcel /* the current byte */ 19398944Sobrien u8 *b; 19498944Sobrien /* the current bit within *b, nomalized: 0..7 */ 19598944Sobrien unsigned int bit; 19698944Sobrien}; 19798944Sobrien 19898944Sobrien/* initialize cursor to point to first bit of stream */ 19998944Sobrienstatic inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s) 20098944Sobrien{ 20198944Sobrien cur->b = s; 20298944Sobrien cur->bit = 0; 203130803Smarcel} 20498944Sobrien 20598944Sobrien/* advance cursor by that many bits; maximum expected input value: 64, 20698944Sobrien * but depending on VLI implementation, it may be more. */ 20798944Sobrienstatic inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits) 20898944Sobrien{ 20998944Sobrien bits += cur->bit; 21098944Sobrien cur->b = cur->b + (bits >> 3); 21198944Sobrien cur->bit = bits & 7; 21298944Sobrien} 21398944Sobrien 214130803Smarcel/* the bitstream itself knows its length */ 21598944Sobrienstruct bitstream { 21698944Sobrien struct bitstream_cursor cur; 21798944Sobrien unsigned char *buf; 21898944Sobrien size_t buf_len; /* in bytes */ 21998944Sobrien 22098944Sobrien /* for input stream: 22198944Sobrien * number of trailing 0 bits for padding 22298944Sobrien * total number of valid bits in stream: buf_len * 8 - pad_bits */ 22398944Sobrien unsigned int pad_bits; 22498944Sobrien}; 225130803Smarcel 22698944Sobrienstatic inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits) 22798944Sobrien{ 228130803Smarcel bs->buf = s; 229130803Smarcel bs->buf_len = len; 230130803Smarcel bs->pad_bits = pad_bits; 231130803Smarcel bitstream_cursor_reset(&bs->cur, bs->buf); 232130803Smarcel} 23398944Sobrien 234130803Smarcelstatic inline void bitstream_rewind(struct bitstream *bs) 23598944Sobrien{ 236130803Smarcel bitstream_cursor_reset(&bs->cur, bs->buf); 237130803Smarcel memset(bs->buf, 0, bs->buf_len); 238130803Smarcel} 23998944Sobrien 240130803Smarcel/* Put (at most 64) least significant bits of val into bitstream, and advance cursor. 241130803Smarcel * Ignores "pad_bits". 242130803Smarcel * Returns zero if bits == 0 (nothing to do). 24398944Sobrien * Returns number of bits used if successful. 24498944Sobrien * 24598944Sobrien * If there is not enough room left in bitstream, 24698944Sobrien * leaves bitstream unchanged and returns -ENOBUFS. 24798944Sobrien */ 24898944Sobrienstatic inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits) 24998944Sobrien{ 250130803Smarcel unsigned char *b = bs->cur.b; 25198944Sobrien unsigned int tmp; 25298944Sobrien 25398944Sobrien if (bits == 0) 25498944Sobrien return 0; 25598944Sobrien 25698944Sobrien if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len) 25798944Sobrien return -ENOBUFS; 25898944Sobrien 25998944Sobrien /* paranoia: strip off hi bits; they should not be set anyways. */ 260130803Smarcel if (bits < 64) 26198944Sobrien val &= ~0ULL >> (64 - bits); 26298944Sobrien 26398944Sobrien *b++ |= (val & 0xff) << bs->cur.bit; 264130803Smarcel 26598944Sobrien for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8) 266130803Smarcel *b++ |= (val >> tmp) & 0xff; 267130803Smarcel 268130803Smarcel bitstream_cursor_advance(&bs->cur, bits); 269130803Smarcel return bits; 27098944Sobrien} 27198944Sobrien 27298944Sobrien/* Fetch (at most 64) bits from bitstream into *out, and advance cursor. 273130803Smarcel * 274130803Smarcel * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged. 275130803Smarcel * 27698944Sobrien * If there are less than the requested number of valid bits left in the 277130803Smarcel * bitstream, still fetches all available bits. 278130803Smarcel * 27998944Sobrien * Returns number of actually fetched bits. 28098944Sobrien */ 28198944Sobrienstatic inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits) 28298944Sobrien{ 28398944Sobrien u64 val; 28498944Sobrien unsigned int n; 28598944Sobrien 28698944Sobrien if (bits > 64) 287130803Smarcel return -EINVAL; 28898944Sobrien 28998944Sobrien if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len) 29098944Sobrien bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3) 29198944Sobrien - bs->cur.bit - bs->pad_bits; 29298944Sobrien 29398944Sobrien if (bits == 0) { 29498944Sobrien *out = 0; 29598944Sobrien return 0; 29698944Sobrien } 29798944Sobrien 29898944Sobrien /* get the high bits */ 29998944Sobrien val = 0; 30098944Sobrien n = (bs->cur.bit + bits + 7) >> 3; 301130803Smarcel /* n may be at most 9, if cur.bit + bits > 64 */ 30298944Sobrien /* which means this copies at most 8 byte */ 30398944Sobrien if (n) { 30498944Sobrien memcpy(&val, bs->cur.b+1, n - 1); 305130803Smarcel val = le64_to_cpu(val) << (8 - bs->cur.bit); 30698944Sobrien } 307130803Smarcel 308130803Smarcel /* we still need the low bits */ 309130803Smarcel val |= bs->cur.b[0] >> bs->cur.bit; 31098944Sobrien 311130803Smarcel /* and mask out bits we don't want */ 31298944Sobrien val &= ~0ULL >> (64 - bits); 313130803Smarcel 314130803Smarcel bitstream_cursor_advance(&bs->cur, bits); 315130803Smarcel *out = val; 31698944Sobrien 31798944Sobrien return bits; 31898944Sobrien} 31998944Sobrien 32098944Sobrien/* encodes @in as vli into @bs; 32198944Sobrien 322130803Smarcel * return values 32398944Sobrien * > 0: number of bits successfully stored in bitstream 32498944Sobrien * -ENOBUFS @bs is full 32598944Sobrien * -EINVAL input zero (invalid) 32698944Sobrien * -EOVERFLOW input too large for this vli code (invalid) 32798944Sobrien */ 32898944Sobrienstatic inline int vli_encode_bits(struct bitstream *bs, u64 in) 32998944Sobrien{ 33098944Sobrien u64 code; 33198944Sobrien int bits = __vli_encode_bits(&code, in); 33298944Sobrien 33398944Sobrien if (bits <= 0) 33498944Sobrien return bits; 33598944Sobrien 336130803Smarcel return bitstream_put_bits(bs, code, bits); 33798944Sobrien} 33898944Sobrien 33998944Sobrien#endif 340130803Smarcel