• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/drivers/block/drbd/
1/*
2-*- linux-c -*-
3   drbd_receiver.c
4   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10   drbd is free software; you can redistribute it and/or modify
11   it under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 2, or (at your option)
13   any later version.
14
15   drbd is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18   GNU General Public License for more details.
19
20   You should have received a copy of the GNU General Public License
21   along with drbd; see the file COPYING.  If not, write to
22   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#ifndef _DRBD_VLI_H
26#define _DRBD_VLI_H
27
28/*
29 * At a granularity of 4KiB storage represented per bit,
30 * and stroage sizes of several TiB,
31 * and possibly small-bandwidth replication,
32 * the bitmap transfer time can take much too long,
33 * if transmitted in plain text.
34 *
35 * We try to reduce the transfered bitmap information
36 * by encoding runlengths of bit polarity.
37 *
38 * We never actually need to encode a "zero" (runlengths are positive).
39 * But then we have to store the value of the first bit.
40 * The first bit of information thus shall encode if the first runlength
41 * gives the number of set or unset bits.
42 *
43 * We assume that large areas are either completely set or unset,
44 * which gives good compression with any runlength method,
45 * even when encoding the runlength as fixed size 32bit/64bit integers.
46 *
47 * Still, there may be areas where the polarity flips every few bits,
48 * and encoding the runlength sequence of those areas with fix size
49 * integers would be much worse than plaintext.
50 *
51 * We want to encode small runlength values with minimum code length,
52 * while still being able to encode a Huge run of all zeros.
53 *
54 * Thus we need a Variable Length Integer encoding, VLI.
55 *
56 * For some cases, we produce more code bits than plaintext input.
57 * We need to send incompressible chunks as plaintext, skip over them
58 * and then see if the next chunk compresses better.
59 *
60 * We don't care too much about "excellent" compression ratio for large
61 * runlengths (all set/all clear): whether we achieve a factor of 100
62 * or 1000 is not that much of an issue.
63 * We do not want to waste too much on short runlengths in the "noisy"
64 * parts of the bitmap, though.
65 *
66 * There are endless variants of VLI, we experimented with:
67 *  * simple byte-based
68 *  * various bit based with different code word length.
69 *
70 * To avoid yet an other configuration parameter (choice of bitmap compression
71 * algorithm) which was difficult to explain and tune, we just chose the one
72 * variant that turned out best in all test cases.
73 * Based on real world usage patterns, with device sizes ranging from a few GiB
74 * to several TiB, file server/mailserver/webserver/mysql/postgress,
75 * mostly idle to really busy, the all time winner (though sometimes only
76 * marginally better) is:
77 */
78
79
80/* compression "table":
81 transmitted   x                                0.29
82 as plaintext x                                  ........................
83             x                                   ........................
84            x                                    ........................
85           x    0.59                         0.21........................
86          x      ........................................................
87         x       .. c ...................................................
88        x    0.44.. o ...................................................
89       x .......... d ...................................................
90      x  .......... e ...................................................
91     X.............   ...................................................
92    x.............. b ...................................................
932.0x............... i ...................................................
94 #X................ t ...................................................
95 #................. s ...........................  plain bits  ..........
96-+-----------------------------------------------------------------------
97 1             16              32                              64
98*/
99
100/* LEVEL: (total bits, prefix bits, prefix value),
101 * sorted ascending by number of total bits.
102 * The rest of the code table is calculated at compiletime from this. */
103
104/* fibonacci data 1, 1, ... */
105#define VLI_L_1_1() do { \
106	LEVEL( 2, 1, 0x00); \
107	LEVEL( 3, 2, 0x01); \
108	LEVEL( 5, 3, 0x03); \
109	LEVEL( 7, 4, 0x07); \
110	LEVEL(10, 5, 0x0f); \
111	LEVEL(14, 6, 0x1f); \
112	LEVEL(21, 8, 0x3f); \
113	LEVEL(29, 8, 0x7f); \
114	LEVEL(42, 8, 0xbf); \
115	LEVEL(64, 8, 0xff); \
116	} while (0)
117
118/* finds a suitable level to decode the least significant part of in.
119 * returns number of bits consumed.
120 *
121 * BUG() for bad input, as that would mean a buggy code table. */
122static inline int vli_decode_bits(u64 *out, const u64 in)
123{
124	u64 adj = 1;
125
126#define LEVEL(t,b,v)					\
127	do {						\
128		if ((in & ((1 << b) -1)) == v) {	\
129			*out = ((in & ((~0ULL) >> (64-t))) >> b) + adj;	\
130			return t;			\
131		}					\
132		adj += 1ULL << (t - b);			\
133	} while (0)
134
135	VLI_L_1_1();
136
137	/* NOT REACHED, if VLI_LEVELS code table is defined properly */
138	BUG();
139#undef LEVEL
140}
141
142/* return number of code bits needed,
143 * or negative error number */
144static inline int __vli_encode_bits(u64 *out, const u64 in)
145{
146	u64 max = 0;
147	u64 adj = 1;
148
149	if (in == 0)
150		return -EINVAL;
151
152#define LEVEL(t,b,v) do {		\
153		max += 1ULL << (t - b);	\
154		if (in <= max) {	\
155			if (out)	\
156				*out = ((in - adj) << b) | v;	\
157			return t;	\
158		}			\
159		adj = max + 1;		\
160	} while (0)
161
162	VLI_L_1_1();
163
164	return -EOVERFLOW;
165#undef LEVEL
166}
167
168#undef VLI_L_1_1
169
170/* code from here down is independend of actually used bit code */
171
172/*
173 * Code length is determined by some unique (e.g. unary) prefix.
174 * This encodes arbitrary bit length, not whole bytes: we have a bit-stream,
175 * not a byte stream.
176 */
177
178/* for the bitstream, we need a cursor */
179struct bitstream_cursor {
180	/* the current byte */
181	u8 *b;
182	/* the current bit within *b, nomalized: 0..7 */
183	unsigned int bit;
184};
185
186/* initialize cursor to point to first bit of stream */
187static inline void bitstream_cursor_reset(struct bitstream_cursor *cur, void *s)
188{
189	cur->b = s;
190	cur->bit = 0;
191}
192
193/* advance cursor by that many bits; maximum expected input value: 64,
194 * but depending on VLI implementation, it may be more. */
195static inline void bitstream_cursor_advance(struct bitstream_cursor *cur, unsigned int bits)
196{
197	bits += cur->bit;
198	cur->b = cur->b + (bits >> 3);
199	cur->bit = bits & 7;
200}
201
202/* the bitstream itself knows its length */
203struct bitstream {
204	struct bitstream_cursor cur;
205	unsigned char *buf;
206	size_t buf_len;		/* in bytes */
207
208	/* for input stream:
209	 * number of trailing 0 bits for padding
210	 * total number of valid bits in stream: buf_len * 8 - pad_bits */
211	unsigned int pad_bits;
212};
213
214static inline void bitstream_init(struct bitstream *bs, void *s, size_t len, unsigned int pad_bits)
215{
216	bs->buf = s;
217	bs->buf_len = len;
218	bs->pad_bits = pad_bits;
219	bitstream_cursor_reset(&bs->cur, bs->buf);
220}
221
222static inline void bitstream_rewind(struct bitstream *bs)
223{
224	bitstream_cursor_reset(&bs->cur, bs->buf);
225	memset(bs->buf, 0, bs->buf_len);
226}
227
228/* Put (at most 64) least significant bits of val into bitstream, and advance cursor.
229 * Ignores "pad_bits".
230 * Returns zero if bits == 0 (nothing to do).
231 * Returns number of bits used if successful.
232 *
233 * If there is not enough room left in bitstream,
234 * leaves bitstream unchanged and returns -ENOBUFS.
235 */
236static inline int bitstream_put_bits(struct bitstream *bs, u64 val, const unsigned int bits)
237{
238	unsigned char *b = bs->cur.b;
239	unsigned int tmp;
240
241	if (bits == 0)
242		return 0;
243
244	if ((bs->cur.b + ((bs->cur.bit + bits -1) >> 3)) - bs->buf >= bs->buf_len)
245		return -ENOBUFS;
246
247	/* paranoia: strip off hi bits; they should not be set anyways. */
248	if (bits < 64)
249		val &= ~0ULL >> (64 - bits);
250
251	*b++ |= (val & 0xff) << bs->cur.bit;
252
253	for (tmp = 8 - bs->cur.bit; tmp < bits; tmp += 8)
254		*b++ |= (val >> tmp) & 0xff;
255
256	bitstream_cursor_advance(&bs->cur, bits);
257	return bits;
258}
259
260/* Fetch (at most 64) bits from bitstream into *out, and advance cursor.
261 *
262 * If more than 64 bits are requested, returns -EINVAL and leave *out unchanged.
263 *
264 * If there are less than the requested number of valid bits left in the
265 * bitstream, still fetches all available bits.
266 *
267 * Returns number of actually fetched bits.
268 */
269static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
270{
271	u64 val;
272	unsigned int n;
273
274	if (bits > 64)
275		return -EINVAL;
276
277	if (bs->cur.b + ((bs->cur.bit + bs->pad_bits + bits -1) >> 3) - bs->buf >= bs->buf_len)
278		bits = ((bs->buf_len - (bs->cur.b - bs->buf)) << 3)
279			- bs->cur.bit - bs->pad_bits;
280
281	if (bits == 0) {
282		*out = 0;
283		return 0;
284	}
285
286	/* get the high bits */
287	val = 0;
288	n = (bs->cur.bit + bits + 7) >> 3;
289	/* n may be at most 9, if cur.bit + bits > 64 */
290	/* which means this copies at most 8 byte */
291	if (n) {
292		memcpy(&val, bs->cur.b+1, n - 1);
293		val = le64_to_cpu(val) << (8 - bs->cur.bit);
294	}
295
296	/* we still need the low bits */
297	val |= bs->cur.b[0] >> bs->cur.bit;
298
299	/* and mask out bits we don't want */
300	val &= ~0ULL >> (64 - bits);
301
302	bitstream_cursor_advance(&bs->cur, bits);
303	*out = val;
304
305	return bits;
306}
307
308/* encodes @in as vli into @bs;
309
310 * return values
311 *  > 0: number of bits successfully stored in bitstream
312 * -ENOBUFS @bs is full
313 * -EINVAL input zero (invalid)
314 * -EOVERFLOW input too large for this vli code (invalid)
315 */
316static inline int vli_encode_bits(struct bitstream *bs, u64 in)
317{
318	u64 code = code;
319	int bits = __vli_encode_bits(&code, in);
320
321	if (bits <= 0)
322		return bits;
323
324	return bitstream_put_bits(bs, code, bits);
325}
326
327#endif
328