• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/fs/reiserfs/
1/*
2 *  Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/*
6 *  Written by Anatoly P. Pinchuk pap@namesys.botik.ru
7 *  Programm System Institute
8 *  Pereslavl-Zalessky Russia
9 */
10
11/*
12 *  This file contains functions dealing with S+tree
13 *
14 * B_IS_IN_TREE
15 * copy_item_head
16 * comp_short_keys
17 * comp_keys
18 * comp_short_le_keys
19 * le_key2cpu_key
20 * comp_le_keys
21 * bin_search
22 * get_lkey
23 * get_rkey
24 * key_in_buffer
25 * decrement_bcount
26 * reiserfs_check_path
27 * pathrelse_and_restore
28 * pathrelse
29 * search_by_key_reada
30 * search_by_key
31 * search_for_position_by_key
32 * comp_items
33 * prepare_for_direct_item
34 * prepare_for_direntry_item
35 * prepare_for_delete_or_cut
36 * calc_deleted_bytes_number
37 * init_tb_struct
38 * padd_item
39 * reiserfs_delete_item
40 * reiserfs_delete_solid_item
41 * reiserfs_delete_object
42 * maybe_indirect_to_direct
43 * indirect_to_direct_roll_back
44 * reiserfs_cut_from_item
45 * truncate_directory
46 * reiserfs_do_truncate
47 * reiserfs_paste_into_item
48 * reiserfs_insert_item
49 */
50
51#include <linux/time.h>
52#include <linux/string.h>
53#include <linux/pagemap.h>
54#include <linux/reiserfs_fs.h>
55#include <linux/buffer_head.h>
56#include <linux/quotaops.h>
57
58/* Does the buffer contain a disk block which is in the tree. */
59inline int B_IS_IN_TREE(const struct buffer_head *bh)
60{
61
62	RFALSE(B_LEVEL(bh) > MAX_HEIGHT,
63	       "PAP-1010: block (%b) has too big level (%z)", bh, bh);
64
65	return (B_LEVEL(bh) != FREE_LEVEL);
66}
67
68//
69// to gets item head in le form
70//
71inline void copy_item_head(struct item_head *to,
72			   const struct item_head *from)
73{
74	memcpy(to, from, IH_SIZE);
75}
76
77/* k1 is pointer to on-disk structure which is stored in little-endian
78   form. k2 is pointer to cpu variable. For key of items of the same
79   object this returns 0.
80   Returns: -1 if key1 < key2
81   0 if key1 == key2
82   1 if key1 > key2 */
83inline int comp_short_keys(const struct reiserfs_key *le_key,
84			   const struct cpu_key *cpu_key)
85{
86	__u32 n;
87	n = le32_to_cpu(le_key->k_dir_id);
88	if (n < cpu_key->on_disk_key.k_dir_id)
89		return -1;
90	if (n > cpu_key->on_disk_key.k_dir_id)
91		return 1;
92	n = le32_to_cpu(le_key->k_objectid);
93	if (n < cpu_key->on_disk_key.k_objectid)
94		return -1;
95	if (n > cpu_key->on_disk_key.k_objectid)
96		return 1;
97	return 0;
98}
99
100/* k1 is pointer to on-disk structure which is stored in little-endian
101   form. k2 is pointer to cpu variable.
102   Compare keys using all 4 key fields.
103   Returns: -1 if key1 < key2 0
104   if key1 = key2 1 if key1 > key2 */
105static inline int comp_keys(const struct reiserfs_key *le_key,
106			    const struct cpu_key *cpu_key)
107{
108	int retval;
109
110	retval = comp_short_keys(le_key, cpu_key);
111	if (retval)
112		return retval;
113	if (le_key_k_offset(le_key_version(le_key), le_key) <
114	    cpu_key_k_offset(cpu_key))
115		return -1;
116	if (le_key_k_offset(le_key_version(le_key), le_key) >
117	    cpu_key_k_offset(cpu_key))
118		return 1;
119
120	if (cpu_key->key_length == 3)
121		return 0;
122
123	/* this part is needed only when tail conversion is in progress */
124	if (le_key_k_type(le_key_version(le_key), le_key) <
125	    cpu_key_k_type(cpu_key))
126		return -1;
127
128	if (le_key_k_type(le_key_version(le_key), le_key) >
129	    cpu_key_k_type(cpu_key))
130		return 1;
131
132	return 0;
133}
134
135inline int comp_short_le_keys(const struct reiserfs_key *key1,
136			      const struct reiserfs_key *key2)
137{
138	__u32 *k1_u32, *k2_u32;
139	int key_length = REISERFS_SHORT_KEY_LEN;
140
141	k1_u32 = (__u32 *) key1;
142	k2_u32 = (__u32 *) key2;
143	for (; key_length--; ++k1_u32, ++k2_u32) {
144		if (le32_to_cpu(*k1_u32) < le32_to_cpu(*k2_u32))
145			return -1;
146		if (le32_to_cpu(*k1_u32) > le32_to_cpu(*k2_u32))
147			return 1;
148	}
149	return 0;
150}
151
152inline void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from)
153{
154	int version;
155	to->on_disk_key.k_dir_id = le32_to_cpu(from->k_dir_id);
156	to->on_disk_key.k_objectid = le32_to_cpu(from->k_objectid);
157
158	// find out version of the key
159	version = le_key_version(from);
160	to->version = version;
161	to->on_disk_key.k_offset = le_key_k_offset(version, from);
162	to->on_disk_key.k_type = le_key_k_type(version, from);
163}
164
165// this does not say which one is bigger, it only returns 1 if keys
166// are not equal, 0 otherwise
167inline int comp_le_keys(const struct reiserfs_key *k1,
168			const struct reiserfs_key *k2)
169{
170	return memcmp(k1, k2, sizeof(struct reiserfs_key));
171}
172
173/**************************************************************************
174 *  Binary search toolkit function                                        *
175 *  Search for an item in the array by the item key                       *
176 *  Returns:    1 if found,  0 if not found;                              *
177 *        *pos = number of the searched element if found, else the        *
178 *        number of the first element that is larger than key.            *
179 **************************************************************************/
180/* For those not familiar with binary search: lbound is the leftmost item that it
181 could be, rbound the rightmost item that it could be.  We examine the item
182 halfway between lbound and rbound, and that tells us either that we can increase
183 lbound, or decrease rbound, or that we have found it, or if lbound <= rbound that
184 there are no possible items, and we have not found it. With each examination we
185 cut the number of possible items it could be by one more than half rounded down,
186 or we find it. */
187static inline int bin_search(const void *key,	/* Key to search for. */
188			     const void *base,	/* First item in the array. */
189			     int num,	/* Number of items in the array. */
190			     int width,	/* Item size in the array.
191					   searched. Lest the reader be
192					   confused, note that this is crafted
193					   as a general function, and when it
194					   is applied specifically to the array
195					   of item headers in a node, width
196					   is actually the item header size not
197					   the item size. */
198			     int *pos /* Number of the searched for element. */
199    )
200{
201	int rbound, lbound, j;
202
203	for (j = ((rbound = num - 1) + (lbound = 0)) / 2;
204	     lbound <= rbound; j = (rbound + lbound) / 2)
205		switch (comp_keys
206			((struct reiserfs_key *)((char *)base + j * width),
207			 (struct cpu_key *)key)) {
208		case -1:
209			lbound = j + 1;
210			continue;
211		case 1:
212			rbound = j - 1;
213			continue;
214		case 0:
215			*pos = j;
216			return ITEM_FOUND;	/* Key found in the array.  */
217		}
218
219	/* bin_search did not find given key, it returns position of key,
220	   that is minimal and greater than the given one. */
221	*pos = lbound;
222	return ITEM_NOT_FOUND;
223}
224
225
226/* Minimal possible key. It is never in the tree. */
227const struct reiserfs_key MIN_KEY = { 0, 0, {{0, 0},} };
228
229/* Maximal possible key. It is never in the tree. */
230static const struct reiserfs_key MAX_KEY = {
231	__constant_cpu_to_le32(0xffffffff),
232	__constant_cpu_to_le32(0xffffffff),
233	{{__constant_cpu_to_le32(0xffffffff),
234	  __constant_cpu_to_le32(0xffffffff)},}
235};
236
237/* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
238   of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
239   the path, there is no delimiting key in the tree (buffer is first or last buffer in tree), and in this
240   case we return a special key, either MIN_KEY or MAX_KEY. */
241static inline const struct reiserfs_key *get_lkey(const struct treepath *chk_path,
242						  const struct super_block *sb)
243{
244	int position, path_offset = chk_path->path_length;
245	struct buffer_head *parent;
246
247	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
248	       "PAP-5010: invalid offset in the path");
249
250	/* While not higher in path than first element. */
251	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
252
253		RFALSE(!buffer_uptodate
254		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
255		       "PAP-5020: parent is not uptodate");
256
257		/* Parent at the path is not in the tree now. */
258		if (!B_IS_IN_TREE
259		    (parent =
260		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
261			return &MAX_KEY;
262		/* Check whether position in the parent is correct. */
263		if ((position =
264		     PATH_OFFSET_POSITION(chk_path,
265					  path_offset)) >
266		    B_NR_ITEMS(parent))
267			return &MAX_KEY;
268		/* Check whether parent at the path really points to the child. */
269		if (B_N_CHILD_NUM(parent, position) !=
270		    PATH_OFFSET_PBUFFER(chk_path,
271					path_offset + 1)->b_blocknr)
272			return &MAX_KEY;
273		/* Return delimiting key if position in the parent is not equal to zero. */
274		if (position)
275			return B_N_PDELIM_KEY(parent, position - 1);
276	}
277	/* Return MIN_KEY if we are in the root of the buffer tree. */
278	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
279	    b_blocknr == SB_ROOT_BLOCK(sb))
280		return &MIN_KEY;
281	return &MAX_KEY;
282}
283
284/* Get delimiting key of the buffer at the path and its right neighbor. */
285inline const struct reiserfs_key *get_rkey(const struct treepath *chk_path,
286					   const struct super_block *sb)
287{
288	int position, path_offset = chk_path->path_length;
289	struct buffer_head *parent;
290
291	RFALSE(path_offset < FIRST_PATH_ELEMENT_OFFSET,
292	       "PAP-5030: invalid offset in the path");
293
294	while (path_offset-- > FIRST_PATH_ELEMENT_OFFSET) {
295
296		RFALSE(!buffer_uptodate
297		       (PATH_OFFSET_PBUFFER(chk_path, path_offset)),
298		       "PAP-5040: parent is not uptodate");
299
300		/* Parent at the path is not in the tree now. */
301		if (!B_IS_IN_TREE
302		    (parent =
303		     PATH_OFFSET_PBUFFER(chk_path, path_offset)))
304			return &MIN_KEY;
305		/* Check whether position in the parent is correct. */
306		if ((position =
307		     PATH_OFFSET_POSITION(chk_path,
308					  path_offset)) >
309		    B_NR_ITEMS(parent))
310			return &MIN_KEY;
311		/* Check whether parent at the path really points to the child. */
312		if (B_N_CHILD_NUM(parent, position) !=
313		    PATH_OFFSET_PBUFFER(chk_path,
314					path_offset + 1)->b_blocknr)
315			return &MIN_KEY;
316		/* Return delimiting key if position in the parent is not the last one. */
317		if (position != B_NR_ITEMS(parent))
318			return B_N_PDELIM_KEY(parent, position);
319	}
320	/* Return MAX_KEY if we are in the root of the buffer tree. */
321	if (PATH_OFFSET_PBUFFER(chk_path, FIRST_PATH_ELEMENT_OFFSET)->
322	    b_blocknr == SB_ROOT_BLOCK(sb))
323		return &MAX_KEY;
324	return &MIN_KEY;
325}
326
327/* Check whether a key is contained in the tree rooted from a buffer at a path. */
328/* This works by looking at the left and right delimiting keys for the buffer in the last path_element in
329   the path.  These delimiting keys are stored at least one level above that buffer in the tree. If the
330   buffer is the first or last node in the tree order then one of the delimiting keys may be absent, and in
331   this case get_lkey and get_rkey return a special key which is MIN_KEY or MAX_KEY. */
332static inline int key_in_buffer(struct treepath *chk_path,	/* Path which should be checked.  */
333				const struct cpu_key *key,	/* Key which should be checked.   */
334				struct super_block *sb
335    )
336{
337
338	RFALSE(!key || chk_path->path_length < FIRST_PATH_ELEMENT_OFFSET
339	       || chk_path->path_length > MAX_HEIGHT,
340	       "PAP-5050: pointer to the key(%p) is NULL or invalid path length(%d)",
341	       key, chk_path->path_length);
342	RFALSE(!PATH_PLAST_BUFFER(chk_path)->b_bdev,
343	       "PAP-5060: device must not be NODEV");
344
345	if (comp_keys(get_lkey(chk_path, sb), key) == 1)
346		/* left delimiting key is bigger, that the key we look for */
347		return 0;
348	/*  if ( comp_keys(key, get_rkey(chk_path, sb)) != -1 ) */
349	if (comp_keys(get_rkey(chk_path, sb), key) != 1)
350		/* key must be less than right delimitiing key */
351		return 0;
352	return 1;
353}
354
355int reiserfs_check_path(struct treepath *p)
356{
357	RFALSE(p->path_length != ILLEGAL_PATH_ELEMENT_OFFSET,
358	       "path not properly relsed");
359	return 0;
360}
361
362/* Drop the reference to each buffer in a path and restore
363 * dirty bits clean when preparing the buffer for the log.
364 * This version should only be called from fix_nodes() */
365void pathrelse_and_restore(struct super_block *sb,
366			   struct treepath *search_path)
367{
368	int path_offset = search_path->path_length;
369
370	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
371	       "clm-4000: invalid path offset");
372
373	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET) {
374		struct buffer_head *bh;
375		bh = PATH_OFFSET_PBUFFER(search_path, path_offset--);
376		reiserfs_restore_prepared_buffer(sb, bh);
377		brelse(bh);
378	}
379	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
380}
381
382/* Drop the reference to each buffer in a path */
383void pathrelse(struct treepath *search_path)
384{
385	int path_offset = search_path->path_length;
386
387	RFALSE(path_offset < ILLEGAL_PATH_ELEMENT_OFFSET,
388	       "PAP-5090: invalid path offset");
389
390	while (path_offset > ILLEGAL_PATH_ELEMENT_OFFSET)
391		brelse(PATH_OFFSET_PBUFFER(search_path, path_offset--));
392
393	search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET;
394}
395
396static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
397{
398	struct block_head *blkh;
399	struct item_head *ih;
400	int used_space;
401	int prev_location;
402	int i;
403	int nr;
404
405	blkh = (struct block_head *)buf;
406	if (blkh_level(blkh) != DISK_LEAF_NODE_LEVEL) {
407		reiserfs_warning(NULL, "reiserfs-5080",
408				 "this should be caught earlier");
409		return 0;
410	}
411
412	nr = blkh_nr_item(blkh);
413	if (nr < 1 || nr > ((blocksize - BLKH_SIZE) / (IH_SIZE + MIN_ITEM_LEN))) {
414		/* item number is too big or too small */
415		reiserfs_warning(NULL, "reiserfs-5081",
416				 "nr_item seems wrong: %z", bh);
417		return 0;
418	}
419	ih = (struct item_head *)(buf + BLKH_SIZE) + nr - 1;
420	used_space = BLKH_SIZE + IH_SIZE * nr + (blocksize - ih_location(ih));
421	if (used_space != blocksize - blkh_free_space(blkh)) {
422		/* free space does not match to calculated amount of use space */
423		reiserfs_warning(NULL, "reiserfs-5082",
424				 "free space seems wrong: %z", bh);
425		return 0;
426	}
427	// return 1 here
428
429	/* check tables of item heads */
430	ih = (struct item_head *)(buf + BLKH_SIZE);
431	prev_location = blocksize;
432	for (i = 0; i < nr; i++, ih++) {
433		if (le_ih_k_type(ih) == TYPE_ANY) {
434			reiserfs_warning(NULL, "reiserfs-5083",
435					 "wrong item type for item %h",
436					 ih);
437			return 0;
438		}
439		if (ih_location(ih) >= blocksize
440		    || ih_location(ih) < IH_SIZE * nr) {
441			reiserfs_warning(NULL, "reiserfs-5084",
442					 "item location seems wrong: %h",
443					 ih);
444			return 0;
445		}
446		if (ih_item_len(ih) < 1
447		    || ih_item_len(ih) > MAX_ITEM_LEN(blocksize)) {
448			reiserfs_warning(NULL, "reiserfs-5085",
449					 "item length seems wrong: %h",
450					 ih);
451			return 0;
452		}
453		if (prev_location - ih_location(ih) != ih_item_len(ih)) {
454			reiserfs_warning(NULL, "reiserfs-5086",
455					 "item location seems wrong "
456					 "(second one): %h", ih);
457			return 0;
458		}
459		prev_location = ih_location(ih);
460	}
461
462	// one may imagine much more checks
463	return 1;
464}
465
466/* returns 1 if buf looks like an internal node, 0 otherwise */
467static int is_internal(char *buf, int blocksize, struct buffer_head *bh)
468{
469	struct block_head *blkh;
470	int nr;
471	int used_space;
472
473	blkh = (struct block_head *)buf;
474	nr = blkh_level(blkh);
475	if (nr <= DISK_LEAF_NODE_LEVEL || nr > MAX_HEIGHT) {
476		/* this level is not possible for internal nodes */
477		reiserfs_warning(NULL, "reiserfs-5087",
478				 "this should be caught earlier");
479		return 0;
480	}
481
482	nr = blkh_nr_item(blkh);
483	if (nr > (blocksize - BLKH_SIZE - DC_SIZE) / (KEY_SIZE + DC_SIZE)) {
484		/* for internal which is not root we might check min number of keys */
485		reiserfs_warning(NULL, "reiserfs-5088",
486				 "number of key seems wrong: %z", bh);
487		return 0;
488	}
489
490	used_space = BLKH_SIZE + KEY_SIZE * nr + DC_SIZE * (nr + 1);
491	if (used_space != blocksize - blkh_free_space(blkh)) {
492		reiserfs_warning(NULL, "reiserfs-5089",
493				 "free space seems wrong: %z", bh);
494		return 0;
495	}
496	// one may imagine much more checks
497	return 1;
498}
499
500// make sure that bh contains formatted node of reiserfs tree of
501// 'level'-th level
502static int is_tree_node(struct buffer_head *bh, int level)
503{
504	if (B_LEVEL(bh) != level) {
505		reiserfs_warning(NULL, "reiserfs-5090", "node level %d does "
506				 "not match to the expected one %d",
507				 B_LEVEL(bh), level);
508		return 0;
509	}
510	if (level == DISK_LEAF_NODE_LEVEL)
511		return is_leaf(bh->b_data, bh->b_size, bh);
512
513	return is_internal(bh->b_data, bh->b_size, bh);
514}
515
516#define SEARCH_BY_KEY_READA 16
517
518/*
519 * The function is NOT SCHEDULE-SAFE!
520 * It might unlock the write lock if we needed to wait for a block
521 * to be read. Note that in this case it won't recover the lock to avoid
522 * high contention resulting from too much lock requests, especially
523 * the caller (search_by_key) will perform other schedule-unsafe
524 * operations just after calling this function.
525 *
526 * @return true if we have unlocked
527 */
528static bool search_by_key_reada(struct super_block *s,
529				struct buffer_head **bh,
530				b_blocknr_t *b, int num)
531{
532	int i, j;
533	bool unlocked = false;
534
535	for (i = 0; i < num; i++) {
536		bh[i] = sb_getblk(s, b[i]);
537	}
538	/*
539	 * We are going to read some blocks on which we
540	 * have a reference. It's safe, though we might be
541	 * reading blocks concurrently changed if we release
542	 * the lock. But it's still fine because we check later
543	 * if the tree changed
544	 */
545	for (j = 0; j < i; j++) {
546		/*
547		 * note, this needs attention if we are getting rid of the BKL
548		 * you have to make sure the prepared bit isn't set on this buffer
549		 */
550		if (!buffer_uptodate(bh[j])) {
551			if (!unlocked) {
552				reiserfs_write_unlock(s);
553				unlocked = true;
554			}
555			ll_rw_block(READA, 1, bh + j);
556		}
557		brelse(bh[j]);
558	}
559	return unlocked;
560}
561
562/**************************************************************************
563 * Algorithm   SearchByKey                                                *
564 *             look for item in the Disk S+Tree by its key                *
565 * Input:  sb   -  super block                                            *
566 *         key  - pointer to the key to search                            *
567 * Output: ITEM_FOUND, ITEM_NOT_FOUND or IO_ERROR                         *
568 *         search_path - path from the root to the needed leaf            *
569 **************************************************************************/
570
571/* This function fills up the path from the root to the leaf as it
572   descends the tree looking for the key.  It uses reiserfs_bread to
573   try to find buffers in the cache given their block number.  If it
574   does not find them in the cache it reads them from disk.  For each
575   node search_by_key finds using reiserfs_bread it then uses
576   bin_search to look through that node.  bin_search will find the
577   position of the block_number of the next node if it is looking
578   through an internal node.  If it is looking through a leaf node
579   bin_search will find the position of the item which has key either
580   equal to given key, or which is the maximal key less than the given
581   key.  search_by_key returns a path that must be checked for the
582   correctness of the top of the path but need not be checked for the
583   correctness of the bottom of the path */
584/* The function is NOT SCHEDULE-SAFE! */
585int search_by_key(struct super_block *sb, const struct cpu_key *key,	/* Key to search. */
586		  struct treepath *search_path,/* This structure was
587						   allocated and initialized
588						   by the calling
589						   function. It is filled up
590						   by this function.  */
591		  int stop_level	/* How far down the tree to search. To
592					   stop at leaf level - set to
593					   DISK_LEAF_NODE_LEVEL */
594    )
595{
596	b_blocknr_t block_number;
597	int expected_level;
598	struct buffer_head *bh;
599	struct path_element *last_element;
600	int node_level, retval;
601	int right_neighbor_of_leaf_node;
602	int fs_gen;
603	struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
604	b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA];
605	int reada_count = 0;
606
607#ifdef CONFIG_REISERFS_CHECK
608	int repeat_counter = 0;
609#endif
610
611	PROC_INFO_INC(sb, search_by_key);
612
613	/* As we add each node to a path we increase its count.  This means that
614	   we must be careful to release all nodes in a path before we either
615	   discard the path struct or re-use the path struct, as we do here. */
616
617	pathrelse(search_path);
618
619	right_neighbor_of_leaf_node = 0;
620
621	/* With each iteration of this loop we search through the items in the
622	   current node, and calculate the next current node(next path element)
623	   for the next iteration of this loop.. */
624	block_number = SB_ROOT_BLOCK(sb);
625	expected_level = -1;
626	while (1) {
627
628#ifdef CONFIG_REISERFS_CHECK
629		if (!(++repeat_counter % 50000))
630			reiserfs_warning(sb, "PAP-5100",
631					 "%s: there were %d iterations of "
632					 "while loop looking for key %K",
633					 current->comm, repeat_counter,
634					 key);
635#endif
636
637		/* prep path to have another element added to it. */
638		last_element =
639		    PATH_OFFSET_PELEMENT(search_path,
640					 ++search_path->path_length);
641		fs_gen = get_generation(sb);
642
643		/* Read the next tree node, and set the last element in the path to
644		   have a pointer to it. */
645		if ((bh = last_element->pe_buffer =
646		     sb_getblk(sb, block_number))) {
647			bool unlocked = false;
648
649			if (!buffer_uptodate(bh) && reada_count > 1)
650				/* may unlock the write lock */
651				unlocked = search_by_key_reada(sb, reada_bh,
652						    reada_blocks, reada_count);
653			/*
654			 * If we haven't already unlocked the write lock,
655			 * then we need to do that here before reading
656			 * the current block
657			 */
658			if (!buffer_uptodate(bh) && !unlocked) {
659				reiserfs_write_unlock(sb);
660				unlocked = true;
661			}
662			ll_rw_block(READ, 1, &bh);
663			wait_on_buffer(bh);
664
665			if (unlocked)
666				reiserfs_write_lock(sb);
667			if (!buffer_uptodate(bh))
668				goto io_error;
669		} else {
670		      io_error:
671			search_path->path_length--;
672			pathrelse(search_path);
673			return IO_ERROR;
674		}
675		reada_count = 0;
676		if (expected_level == -1)
677			expected_level = SB_TREE_HEIGHT(sb);
678		expected_level--;
679
680		/* It is possible that schedule occurred. We must check whether the key
681		   to search is still in the tree rooted from the current buffer. If
682		   not then repeat search from the root. */
683		if (fs_changed(fs_gen, sb) &&
684		    (!B_IS_IN_TREE(bh) ||
685		     B_LEVEL(bh) != expected_level ||
686		     !key_in_buffer(search_path, key, sb))) {
687			PROC_INFO_INC(sb, search_by_key_fs_changed);
688			PROC_INFO_INC(sb, search_by_key_restarted);
689			PROC_INFO_INC(sb,
690				      sbk_restarted[expected_level - 1]);
691			pathrelse(search_path);
692
693			/* Get the root block number so that we can repeat the search
694			   starting from the root. */
695			block_number = SB_ROOT_BLOCK(sb);
696			expected_level = -1;
697			right_neighbor_of_leaf_node = 0;
698
699			/* repeat search from the root */
700			continue;
701		}
702
703		/* only check that the key is in the buffer if key is not
704		   equal to the MAX_KEY. Latter case is only possible in
705		   "finish_unfinished()" processing during mount. */
706		RFALSE(comp_keys(&MAX_KEY, key) &&
707		       !key_in_buffer(search_path, key, sb),
708		       "PAP-5130: key is not in the buffer");
709#ifdef CONFIG_REISERFS_CHECK
710		if (REISERFS_SB(sb)->cur_tb) {
711			print_cur_tb("5140");
712			reiserfs_panic(sb, "PAP-5140",
713				       "schedule occurred in do_balance!");
714		}
715#endif
716
717		// make sure, that the node contents look like a node of
718		// certain level
719		if (!is_tree_node(bh, expected_level)) {
720			reiserfs_error(sb, "vs-5150",
721				       "invalid format found in block %ld. "
722				       "Fsck?", bh->b_blocknr);
723			pathrelse(search_path);
724			return IO_ERROR;
725		}
726
727		/* ok, we have acquired next formatted node in the tree */
728		node_level = B_LEVEL(bh);
729
730		PROC_INFO_BH_STAT(sb, bh, node_level - 1);
731
732		RFALSE(node_level < stop_level,
733		       "vs-5152: tree level (%d) is less than stop level (%d)",
734		       node_level, stop_level);
735
736		retval = bin_search(key, B_N_PITEM_HEAD(bh, 0),
737				      B_NR_ITEMS(bh),
738				      (node_level ==
739				       DISK_LEAF_NODE_LEVEL) ? IH_SIZE :
740				      KEY_SIZE,
741				      &(last_element->pe_position));
742		if (node_level == stop_level) {
743			return retval;
744		}
745
746		/* we are not in the stop level */
747		if (retval == ITEM_FOUND)
748			/* item has been found, so we choose the pointer which is to the right of the found one */
749			last_element->pe_position++;
750
751		/* if item was not found we choose the position which is to
752		   the left of the found item. This requires no code,
753		   bin_search did it already. */
754
755		/* So we have chosen a position in the current node which is
756		   an internal node.  Now we calculate child block number by
757		   position in the node. */
758		block_number =
759		    B_N_CHILD_NUM(bh, last_element->pe_position);
760
761		/* if we are going to read leaf nodes, try for read ahead as well */
762		if ((search_path->reada & PATH_READA) &&
763		    node_level == DISK_LEAF_NODE_LEVEL + 1) {
764			int pos = last_element->pe_position;
765			int limit = B_NR_ITEMS(bh);
766			struct reiserfs_key *le_key;
767
768			if (search_path->reada & PATH_READA_BACK)
769				limit = 0;
770			while (reada_count < SEARCH_BY_KEY_READA) {
771				if (pos == limit)
772					break;
773				reada_blocks[reada_count++] =
774				    B_N_CHILD_NUM(bh, pos);
775				if (search_path->reada & PATH_READA_BACK)
776					pos--;
777				else
778					pos++;
779
780				/*
781				 * check to make sure we're in the same object
782				 */
783				le_key = B_N_PDELIM_KEY(bh, pos);
784				if (le32_to_cpu(le_key->k_objectid) !=
785				    key->on_disk_key.k_objectid) {
786					break;
787				}
788			}
789		}
790	}
791}
792
793/* Form the path to an item and position in this item which contains
794   file byte defined by key. If there is no such item
795   corresponding to the key, we point the path to the item with
796   maximal key less than key, and *pos_in_item is set to one
797   past the last entry/byte in the item.  If searching for entry in a
798   directory item, and it is not found, *pos_in_item is set to one
799   entry more than the entry with maximal key which is less than the
800   sought key.
801
802   Note that if there is no entry in this same node which is one more,
803   then we point to an imaginary entry.  for direct items, the
804   position is in units of bytes, for indirect items the position is
805   in units of blocknr entries, for directory items the position is in
806   units of directory entries.  */
807
808/* The function is NOT SCHEDULE-SAFE! */
809int search_for_position_by_key(struct super_block *sb,	/* Pointer to the super block.          */
810			       const struct cpu_key *p_cpu_key,	/* Key to search (cpu variable)         */
811			       struct treepath *search_path	/* Filled up by this function.          */
812    )
813{
814	struct item_head *p_le_ih;	/* pointer to on-disk structure */
815	int blk_size;
816	loff_t item_offset, offset;
817	struct reiserfs_dir_entry de;
818	int retval;
819
820	/* If searching for directory entry. */
821	if (is_direntry_cpu_key(p_cpu_key))
822		return search_by_entry_key(sb, p_cpu_key, search_path,
823					   &de);
824
825	/* If not searching for directory entry. */
826
827	/* If item is found. */
828	retval = search_item(sb, p_cpu_key, search_path);
829	if (retval == IO_ERROR)
830		return retval;
831	if (retval == ITEM_FOUND) {
832
833		RFALSE(!ih_item_len
834		       (B_N_PITEM_HEAD
835			(PATH_PLAST_BUFFER(search_path),
836			 PATH_LAST_POSITION(search_path))),
837		       "PAP-5165: item length equals zero");
838
839		pos_in_item(search_path) = 0;
840		return POSITION_FOUND;
841	}
842
843	RFALSE(!PATH_LAST_POSITION(search_path),
844	       "PAP-5170: position equals zero");
845
846	/* Item is not found. Set path to the previous item. */
847	p_le_ih =
848	    B_N_PITEM_HEAD(PATH_PLAST_BUFFER(search_path),
849			   --PATH_LAST_POSITION(search_path));
850	blk_size = sb->s_blocksize;
851
852	if (comp_short_keys(&(p_le_ih->ih_key), p_cpu_key)) {
853		return FILE_NOT_FOUND;
854	}
855
856	item_offset = le_ih_k_offset(p_le_ih);
857	offset = cpu_key_k_offset(p_cpu_key);
858
859	/* Needed byte is contained in the item pointed to by the path. */
860	if (item_offset <= offset &&
861	    item_offset + op_bytes_number(p_le_ih, blk_size) > offset) {
862		pos_in_item(search_path) = offset - item_offset;
863		if (is_indirect_le_ih(p_le_ih)) {
864			pos_in_item(search_path) /= blk_size;
865		}
866		return POSITION_FOUND;
867	}
868
869	/* Needed byte is not contained in the item pointed to by the
870	   path. Set pos_in_item out of the item. */
871	if (is_indirect_le_ih(p_le_ih))
872		pos_in_item(search_path) =
873		    ih_item_len(p_le_ih) / UNFM_P_SIZE;
874	else
875		pos_in_item(search_path) = ih_item_len(p_le_ih);
876
877	return POSITION_NOT_FOUND;
878}
879
880/* Compare given item and item pointed to by the path. */
881int comp_items(const struct item_head *stored_ih, const struct treepath *path)
882{
883	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
884	struct item_head *ih;
885
886	/* Last buffer at the path is not in the tree. */
887	if (!B_IS_IN_TREE(bh))
888		return 1;
889
890	/* Last path position is invalid. */
891	if (PATH_LAST_POSITION(path) >= B_NR_ITEMS(bh))
892		return 1;
893
894	/* we need only to know, whether it is the same item */
895	ih = get_ih(path);
896	return memcmp(stored_ih, ih, IH_SIZE);
897}
898
899/* unformatted nodes are not logged anymore, ever.  This is safe
900** now
901*/
902#define held_by_others(bh) (atomic_read(&(bh)->b_count) > 1)
903
904// block can not be forgotten as it is in I/O or held by someone
905#define block_in_use(bh) (buffer_locked(bh) || (held_by_others(bh)))
906
907// prepare for delete or cut of direct item
908static inline int prepare_for_direct_item(struct treepath *path,
909					  struct item_head *le_ih,
910					  struct inode *inode,
911					  loff_t new_file_length, int *cut_size)
912{
913	loff_t round_len;
914
915	if (new_file_length == max_reiserfs_offset(inode)) {
916		/* item has to be deleted */
917		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
918		return M_DELETE;
919	}
920	// new file gets truncated
921	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_6) {
922		//
923		round_len = ROUND_UP(new_file_length);
924		/* this was new_file_length < le_ih ... */
925		if (round_len < le_ih_k_offset(le_ih)) {
926			*cut_size = -(IH_SIZE + ih_item_len(le_ih));
927			return M_DELETE;	/* Delete this item. */
928		}
929		/* Calculate first position and size for cutting from item. */
930		pos_in_item(path) = round_len - (le_ih_k_offset(le_ih) - 1);
931		*cut_size = -(ih_item_len(le_ih) - pos_in_item(path));
932
933		return M_CUT;	/* Cut from this item. */
934	}
935
936	// old file: items may have any length
937
938	if (new_file_length < le_ih_k_offset(le_ih)) {
939		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
940		return M_DELETE;	/* Delete this item. */
941	}
942	/* Calculate first position and size for cutting from item. */
943	*cut_size = -(ih_item_len(le_ih) -
944		      (pos_in_item(path) =
945		       new_file_length + 1 - le_ih_k_offset(le_ih)));
946	return M_CUT;		/* Cut from this item. */
947}
948
949static inline int prepare_for_direntry_item(struct treepath *path,
950					    struct item_head *le_ih,
951					    struct inode *inode,
952					    loff_t new_file_length,
953					    int *cut_size)
954{
955	if (le_ih_k_offset(le_ih) == DOT_OFFSET &&
956	    new_file_length == max_reiserfs_offset(inode)) {
957		RFALSE(ih_entry_count(le_ih) != 2,
958		       "PAP-5220: incorrect empty directory item (%h)", le_ih);
959		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
960		return M_DELETE;	/* Delete the directory item containing "." and ".." entry. */
961	}
962
963	if (ih_entry_count(le_ih) == 1) {
964		/* Delete the directory item such as there is one record only
965		   in this item */
966		*cut_size = -(IH_SIZE + ih_item_len(le_ih));
967		return M_DELETE;
968	}
969
970	/* Cut one record from the directory item. */
971	*cut_size =
972	    -(DEH_SIZE +
973	      entry_length(get_last_bh(path), le_ih, pos_in_item(path)));
974	return M_CUT;
975}
976
977#define JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD (2 * JOURNAL_PER_BALANCE_CNT + 1)
978
979/*  If the path points to a directory or direct item, calculate mode and the size cut, for balance.
980    If the path points to an indirect item, remove some number of its unformatted nodes.
981    In case of file truncate calculate whether this item must be deleted/truncated or last
982    unformatted node of this item will be converted to a direct item.
983    This function returns a determination of what balance mode the calling function should employ. */
984static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, struct inode *inode, struct treepath *path, const struct cpu_key *item_key, int *removed,	/* Number of unformatted nodes which were removed
985																						   from end of the file. */
986				      int *cut_size, unsigned long long new_file_length	/* MAX_KEY_OFFSET in case of delete. */
987    )
988{
989	struct super_block *sb = inode->i_sb;
990	struct item_head *p_le_ih = PATH_PITEM_HEAD(path);
991	struct buffer_head *bh = PATH_PLAST_BUFFER(path);
992
993	BUG_ON(!th->t_trans_id);
994
995	/* Stat_data item. */
996	if (is_statdata_le_ih(p_le_ih)) {
997
998		RFALSE(new_file_length != max_reiserfs_offset(inode),
999		       "PAP-5210: mode must be M_DELETE");
1000
1001		*cut_size = -(IH_SIZE + ih_item_len(p_le_ih));
1002		return M_DELETE;
1003	}
1004
1005	/* Directory item. */
1006	if (is_direntry_le_ih(p_le_ih))
1007		return prepare_for_direntry_item(path, p_le_ih, inode,
1008						 new_file_length,
1009						 cut_size);
1010
1011	/* Direct item. */
1012	if (is_direct_le_ih(p_le_ih))
1013		return prepare_for_direct_item(path, p_le_ih, inode,
1014					       new_file_length, cut_size);
1015
1016	/* Case of an indirect item. */
1017	{
1018	    int blk_size = sb->s_blocksize;
1019	    struct item_head s_ih;
1020	    int need_re_search;
1021	    int delete = 0;
1022	    int result = M_CUT;
1023	    int pos = 0;
1024
1025	    if ( new_file_length == max_reiserfs_offset (inode) ) {
1026		/* prepare_for_delete_or_cut() is called by
1027		 * reiserfs_delete_item() */
1028		new_file_length = 0;
1029		delete = 1;
1030	    }
1031
1032	    do {
1033		need_re_search = 0;
1034		*cut_size = 0;
1035		bh = PATH_PLAST_BUFFER(path);
1036		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1037		pos = I_UNFM_NUM(&s_ih);
1038
1039		while (le_ih_k_offset (&s_ih) + (pos - 1) * blk_size > new_file_length) {
1040		    __le32 *unfm;
1041		    __u32 block;
1042
1043		    /* Each unformatted block deletion may involve one additional
1044		     * bitmap block into the transaction, thereby the initial
1045		     * journal space reservation might not be enough. */
1046		    if (!delete && (*cut_size) != 0 &&
1047			reiserfs_transaction_free_space(th) < JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD)
1048			break;
1049
1050		    unfm = (__le32 *)B_I_PITEM(bh, &s_ih) + pos - 1;
1051		    block = get_block_num(unfm, 0);
1052
1053		    if (block != 0) {
1054			reiserfs_prepare_for_journal(sb, bh, 1);
1055			put_block_num(unfm, 0, 0);
1056			journal_mark_dirty(th, sb, bh);
1057			reiserfs_free_block(th, inode, block, 1);
1058		    }
1059
1060		    reiserfs_write_unlock(sb);
1061		    cond_resched();
1062		    reiserfs_write_lock(sb);
1063
1064		    if (item_moved (&s_ih, path))  {
1065			need_re_search = 1;
1066			break;
1067		    }
1068
1069		    pos --;
1070		    (*removed)++;
1071		    (*cut_size) -= UNFM_P_SIZE;
1072
1073		    if (pos == 0) {
1074			(*cut_size) -= IH_SIZE;
1075			result = M_DELETE;
1076			break;
1077		    }
1078		}
1079		/* a trick.  If the buffer has been logged, this will do nothing.  If
1080		** we've broken the loop without logging it, it will restore the
1081		** buffer */
1082		reiserfs_restore_prepared_buffer(sb, bh);
1083	    } while (need_re_search &&
1084		     search_for_position_by_key(sb, item_key, path) == POSITION_FOUND);
1085	    pos_in_item(path) = pos * UNFM_P_SIZE;
1086
1087	    if (*cut_size == 0) {
1088		/* Nothing were cut. maybe convert last unformatted node to the
1089		 * direct item? */
1090		result = M_CONVERT;
1091	    }
1092	    return result;
1093	}
1094}
1095
1096/* Calculate number of bytes which will be deleted or cut during balance */
1097static int calc_deleted_bytes_number(struct tree_balance *tb, char mode)
1098{
1099	int del_size;
1100	struct item_head *p_le_ih = PATH_PITEM_HEAD(tb->tb_path);
1101
1102	if (is_statdata_le_ih(p_le_ih))
1103		return 0;
1104
1105	del_size =
1106	    (mode ==
1107	     M_DELETE) ? ih_item_len(p_le_ih) : -tb->insert_size[0];
1108	if (is_direntry_le_ih(p_le_ih)) {
1109		return del_size;
1110	}
1111
1112	if (is_indirect_le_ih(p_le_ih))
1113		del_size = (del_size / UNFM_P_SIZE) *
1114				(PATH_PLAST_BUFFER(tb->tb_path)->b_size);
1115	return del_size;
1116}
1117
1118static void init_tb_struct(struct reiserfs_transaction_handle *th,
1119			   struct tree_balance *tb,
1120			   struct super_block *sb,
1121			   struct treepath *path, int size)
1122{
1123
1124	BUG_ON(!th->t_trans_id);
1125
1126	memset(tb, '\0', sizeof(struct tree_balance));
1127	tb->transaction_handle = th;
1128	tb->tb_sb = sb;
1129	tb->tb_path = path;
1130	PATH_OFFSET_PBUFFER(path, ILLEGAL_PATH_ELEMENT_OFFSET) = NULL;
1131	PATH_OFFSET_POSITION(path, ILLEGAL_PATH_ELEMENT_OFFSET) = 0;
1132	tb->insert_size[0] = size;
1133}
1134
1135void padd_item(char *item, int total_length, int length)
1136{
1137	int i;
1138
1139	for (i = total_length; i > length;)
1140		item[--i] = 0;
1141}
1142
1143#ifdef REISERQUOTA_DEBUG
1144char key2type(struct reiserfs_key *ih)
1145{
1146	if (is_direntry_le_key(2, ih))
1147		return 'd';
1148	if (is_direct_le_key(2, ih))
1149		return 'D';
1150	if (is_indirect_le_key(2, ih))
1151		return 'i';
1152	if (is_statdata_le_key(2, ih))
1153		return 's';
1154	return 'u';
1155}
1156
1157char head2type(struct item_head *ih)
1158{
1159	if (is_direntry_le_ih(ih))
1160		return 'd';
1161	if (is_direct_le_ih(ih))
1162		return 'D';
1163	if (is_indirect_le_ih(ih))
1164		return 'i';
1165	if (is_statdata_le_ih(ih))
1166		return 's';
1167	return 'u';
1168}
1169#endif
1170
1171/* Delete object item.
1172 * th       - active transaction handle
1173 * path     - path to the deleted item
1174 * item_key - key to search for the deleted item
1175 * indode   - used for updating i_blocks and quotas
1176 * un_bh    - NULL or unformatted node pointer
1177 */
1178int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
1179			 struct treepath *path, const struct cpu_key *item_key,
1180			 struct inode *inode, struct buffer_head *un_bh)
1181{
1182	struct super_block *sb = inode->i_sb;
1183	struct tree_balance s_del_balance;
1184	struct item_head s_ih;
1185	struct item_head *q_ih;
1186	int quota_cut_bytes;
1187	int ret_value, del_size, removed;
1188
1189#ifdef CONFIG_REISERFS_CHECK
1190	char mode;
1191	int iter = 0;
1192#endif
1193
1194	BUG_ON(!th->t_trans_id);
1195
1196	init_tb_struct(th, &s_del_balance, sb, path,
1197		       0 /*size is unknown */ );
1198
1199	while (1) {
1200		removed = 0;
1201
1202#ifdef CONFIG_REISERFS_CHECK
1203		iter++;
1204		mode =
1205#endif
1206		    prepare_for_delete_or_cut(th, inode, path,
1207					      item_key, &removed,
1208					      &del_size,
1209					      max_reiserfs_offset(inode));
1210
1211		RFALSE(mode != M_DELETE, "PAP-5320: mode must be M_DELETE");
1212
1213		copy_item_head(&s_ih, PATH_PITEM_HEAD(path));
1214		s_del_balance.insert_size[0] = del_size;
1215
1216		ret_value = fix_nodes(M_DELETE, &s_del_balance, NULL, NULL);
1217		if (ret_value != REPEAT_SEARCH)
1218			break;
1219
1220		PROC_INFO_INC(sb, delete_item_restarted);
1221
1222		// file system changed, repeat search
1223		ret_value =
1224		    search_for_position_by_key(sb, item_key, path);
1225		if (ret_value == IO_ERROR)
1226			break;
1227		if (ret_value == FILE_NOT_FOUND) {
1228			reiserfs_warning(sb, "vs-5340",
1229					 "no items of the file %K found",
1230					 item_key);
1231			break;
1232		}
1233	}			/* while (1) */
1234
1235	if (ret_value != CARRY_ON) {
1236		unfix_nodes(&s_del_balance);
1237		return 0;
1238	}
1239	// reiserfs_delete_item returns item length when success
1240	ret_value = calc_deleted_bytes_number(&s_del_balance, M_DELETE);
1241	q_ih = get_ih(path);
1242	quota_cut_bytes = ih_item_len(q_ih);
1243
1244	/* hack so the quota code doesn't have to guess if the file
1245	 ** has a tail.  On tail insert, we allocate quota for 1 unformatted node.
1246	 ** We test the offset because the tail might have been
1247	 ** split into multiple items, and we only want to decrement for
1248	 ** the unfm node once
1249	 */
1250	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(q_ih)) {
1251		if ((le_ih_k_offset(q_ih) & (sb->s_blocksize - 1)) == 1) {
1252			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1253		} else {
1254			quota_cut_bytes = 0;
1255		}
1256	}
1257
1258	if (un_bh) {
1259		int off;
1260		char *data;
1261
1262		/* We are in direct2indirect conversion, so move tail contents
1263		   to the unformatted node */
1264		/* note, we do the copy before preparing the buffer because we
1265		 ** don't care about the contents of the unformatted node yet.
1266		 ** the only thing we really care about is the direct item's data
1267		 ** is in the unformatted node.
1268		 **
1269		 ** Otherwise, we would have to call reiserfs_prepare_for_journal on
1270		 ** the unformatted node, which might schedule, meaning we'd have to
1271		 ** loop all the way back up to the start of the while loop.
1272		 **
1273		 ** The unformatted node must be dirtied later on.  We can't be
1274		 ** sure here if the entire tail has been deleted yet.
1275		 **
1276		 ** un_bh is from the page cache (all unformatted nodes are
1277		 ** from the page cache) and might be a highmem page.  So, we
1278		 ** can't use un_bh->b_data.
1279		 ** -clm
1280		 */
1281
1282		data = kmap_atomic(un_bh->b_page, KM_USER0);
1283		off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
1284		memcpy(data + off,
1285		       B_I_PITEM(PATH_PLAST_BUFFER(path), &s_ih),
1286		       ret_value);
1287		kunmap_atomic(data, KM_USER0);
1288	}
1289	/* Perform balancing after all resources have been collected at once. */
1290	do_balance(&s_del_balance, NULL, NULL, M_DELETE);
1291
1292#ifdef REISERQUOTA_DEBUG
1293	reiserfs_debug(sb, REISERFS_DEBUG_CODE,
1294		       "reiserquota delete_item(): freeing %u, id=%u type=%c",
1295		       quota_cut_bytes, inode->i_uid, head2type(&s_ih));
1296#endif
1297	dquot_free_space_nodirty(inode, quota_cut_bytes);
1298
1299	/* Return deleted body length */
1300	return ret_value;
1301}
1302
1303/* Summary Of Mechanisms For Handling Collisions Between Processes:
1304
1305 deletion of the body of the object is performed by iput(), with the
1306 result that if multiple processes are operating on a file, the
1307 deletion of the body of the file is deferred until the last process
1308 that has an open inode performs its iput().
1309
1310 writes and truncates are protected from collisions by use of
1311 semaphores.
1312
1313 creates, linking, and mknod are protected from collisions with other
1314 processes by making the reiserfs_add_entry() the last step in the
1315 creation, and then rolling back all changes if there was a collision.
1316 - Hans
1317*/
1318
1319/* this deletes item which never gets split */
1320void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th,
1321				struct inode *inode, struct reiserfs_key *key)
1322{
1323	struct tree_balance tb;
1324	INITIALIZE_PATH(path);
1325	int item_len = 0;
1326	int tb_init = 0;
1327	struct cpu_key cpu_key;
1328	int retval;
1329	int quota_cut_bytes = 0;
1330
1331	BUG_ON(!th->t_trans_id);
1332
1333	le_key2cpu_key(&cpu_key, key);
1334
1335	while (1) {
1336		retval = search_item(th->t_super, &cpu_key, &path);
1337		if (retval == IO_ERROR) {
1338			reiserfs_error(th->t_super, "vs-5350",
1339				       "i/o failure occurred trying "
1340				       "to delete %K", &cpu_key);
1341			break;
1342		}
1343		if (retval != ITEM_FOUND) {
1344			pathrelse(&path);
1345			// No need for a warning, if there is just no free space to insert '..' item into the newly-created subdir
1346			if (!
1347			    ((unsigned long long)
1348			     GET_HASH_VALUE(le_key_k_offset
1349					    (le_key_version(key), key)) == 0
1350			     && (unsigned long long)
1351			     GET_GENERATION_NUMBER(le_key_k_offset
1352						   (le_key_version(key),
1353						    key)) == 1))
1354				reiserfs_warning(th->t_super, "vs-5355",
1355						 "%k not found", key);
1356			break;
1357		}
1358		if (!tb_init) {
1359			tb_init = 1;
1360			item_len = ih_item_len(PATH_PITEM_HEAD(&path));
1361			init_tb_struct(th, &tb, th->t_super, &path,
1362				       -(IH_SIZE + item_len));
1363		}
1364		quota_cut_bytes = ih_item_len(PATH_PITEM_HEAD(&path));
1365
1366		retval = fix_nodes(M_DELETE, &tb, NULL, NULL);
1367		if (retval == REPEAT_SEARCH) {
1368			PROC_INFO_INC(th->t_super, delete_solid_item_restarted);
1369			continue;
1370		}
1371
1372		if (retval == CARRY_ON) {
1373			do_balance(&tb, NULL, NULL, M_DELETE);
1374			if (inode) {	/* Should we count quota for item? (we don't count quotas for save-links) */
1375#ifdef REISERQUOTA_DEBUG
1376				reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
1377					       "reiserquota delete_solid_item(): freeing %u id=%u type=%c",
1378					       quota_cut_bytes, inode->i_uid,
1379					       key2type(key));
1380#endif
1381				dquot_free_space_nodirty(inode,
1382							 quota_cut_bytes);
1383			}
1384			break;
1385		}
1386		// IO_ERROR, NO_DISK_SPACE, etc
1387		reiserfs_warning(th->t_super, "vs-5360",
1388				 "could not delete %K due to fix_nodes failure",
1389				 &cpu_key);
1390		unfix_nodes(&tb);
1391		break;
1392	}
1393
1394	reiserfs_check_path(&path);
1395}
1396
1397int reiserfs_delete_object(struct reiserfs_transaction_handle *th,
1398			   struct inode *inode)
1399{
1400	int err;
1401	inode->i_size = 0;
1402	BUG_ON(!th->t_trans_id);
1403
1404	/* for directory this deletes item containing "." and ".." */
1405	err =
1406	    reiserfs_do_truncate(th, inode, NULL, 0 /*no timestamp updates */ );
1407	if (err)
1408		return err;
1409
1410#if defined(USE_INODE_GENERATION_COUNTER)
1411	if (!old_format_only(th->t_super)) {
1412		__le32 *inode_generation;
1413
1414		inode_generation =
1415		    &REISERFS_SB(th->t_super)->s_rs->s_inode_generation;
1416		le32_add_cpu(inode_generation, 1);
1417	}
1418/* USE_INODE_GENERATION_COUNTER */
1419#endif
1420	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1421
1422	return err;
1423}
1424
1425static void unmap_buffers(struct page *page, loff_t pos)
1426{
1427	struct buffer_head *bh;
1428	struct buffer_head *head;
1429	struct buffer_head *next;
1430	unsigned long tail_index;
1431	unsigned long cur_index;
1432
1433	if (page) {
1434		if (page_has_buffers(page)) {
1435			tail_index = pos & (PAGE_CACHE_SIZE - 1);
1436			cur_index = 0;
1437			head = page_buffers(page);
1438			bh = head;
1439			do {
1440				next = bh->b_this_page;
1441
1442				/* we want to unmap the buffers that contain the tail, and
1443				 ** all the buffers after it (since the tail must be at the
1444				 ** end of the file).  We don't want to unmap file data
1445				 ** before the tail, since it might be dirty and waiting to
1446				 ** reach disk
1447				 */
1448				cur_index += bh->b_size;
1449				if (cur_index > tail_index) {
1450					reiserfs_unmap_buffer(bh);
1451				}
1452				bh = next;
1453			} while (bh != head);
1454		}
1455	}
1456}
1457
1458static int maybe_indirect_to_direct(struct reiserfs_transaction_handle *th,
1459				    struct inode *inode,
1460				    struct page *page,
1461				    struct treepath *path,
1462				    const struct cpu_key *item_key,
1463				    loff_t new_file_size, char *mode)
1464{
1465	struct super_block *sb = inode->i_sb;
1466	int block_size = sb->s_blocksize;
1467	int cut_bytes;
1468	BUG_ON(!th->t_trans_id);
1469	BUG_ON(new_file_size != inode->i_size);
1470
1471	/* the page being sent in could be NULL if there was an i/o error
1472	 ** reading in the last block.  The user will hit problems trying to
1473	 ** read the file, but for now we just skip the indirect2direct
1474	 */
1475	if (atomic_read(&inode->i_count) > 1 ||
1476	    !tail_has_to_be_packed(inode) ||
1477	    !page || (REISERFS_I(inode)->i_flags & i_nopack_mask)) {
1478		/* leave tail in an unformatted node */
1479		*mode = M_SKIP_BALANCING;
1480		cut_bytes =
1481		    block_size - (new_file_size & (block_size - 1));
1482		pathrelse(path);
1483		return cut_bytes;
1484	}
1485	/* Perform the conversion to a direct_item. */
1486	/* return indirect_to_direct(inode, path, item_key,
1487				  new_file_size, mode); */
1488	return indirect2direct(th, inode, page, path, item_key,
1489			       new_file_size, mode);
1490}
1491
1492/* we did indirect_to_direct conversion. And we have inserted direct
1493   item successesfully, but there were no disk space to cut unfm
1494   pointer being converted. Therefore we have to delete inserted
1495   direct item(s) */
1496static void indirect_to_direct_roll_back(struct reiserfs_transaction_handle *th,
1497					 struct inode *inode, struct treepath *path)
1498{
1499	struct cpu_key tail_key;
1500	int tail_len;
1501	int removed;
1502	BUG_ON(!th->t_trans_id);
1503
1504	make_cpu_key(&tail_key, inode, inode->i_size + 1, TYPE_DIRECT, 4);	// !!!!
1505	tail_key.key_length = 4;
1506
1507	tail_len =
1508	    (cpu_key_k_offset(&tail_key) & (inode->i_sb->s_blocksize - 1)) - 1;
1509	while (tail_len) {
1510		/* look for the last byte of the tail */
1511		if (search_for_position_by_key(inode->i_sb, &tail_key, path) ==
1512		    POSITION_NOT_FOUND)
1513			reiserfs_panic(inode->i_sb, "vs-5615",
1514				       "found invalid item");
1515		RFALSE(path->pos_in_item !=
1516		       ih_item_len(PATH_PITEM_HEAD(path)) - 1,
1517		       "vs-5616: appended bytes found");
1518		PATH_LAST_POSITION(path)--;
1519
1520		removed =
1521		    reiserfs_delete_item(th, path, &tail_key, inode,
1522					 NULL /*unbh not needed */ );
1523		RFALSE(removed <= 0
1524		       || removed > tail_len,
1525		       "vs-5617: there was tail %d bytes, removed item length %d bytes",
1526		       tail_len, removed);
1527		tail_len -= removed;
1528		set_cpu_key_k_offset(&tail_key,
1529				     cpu_key_k_offset(&tail_key) - removed);
1530	}
1531	reiserfs_warning(inode->i_sb, "reiserfs-5091", "indirect_to_direct "
1532			 "conversion has been rolled back due to "
1533			 "lack of disk space");
1534	//mark_file_without_tail (inode);
1535	mark_inode_dirty(inode);
1536}
1537
1538/* (Truncate or cut entry) or delete object item. Returns < 0 on failure */
1539int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th,
1540			   struct treepath *path,
1541			   struct cpu_key *item_key,
1542			   struct inode *inode,
1543			   struct page *page, loff_t new_file_size)
1544{
1545	struct super_block *sb = inode->i_sb;
1546	/* Every function which is going to call do_balance must first
1547	   create a tree_balance structure.  Then it must fill up this
1548	   structure by using the init_tb_struct and fix_nodes functions.
1549	   After that we can make tree balancing. */
1550	struct tree_balance s_cut_balance;
1551	struct item_head *p_le_ih;
1552	int cut_size = 0,	/* Amount to be cut. */
1553	    ret_value = CARRY_ON, removed = 0,	/* Number of the removed unformatted nodes. */
1554	    is_inode_locked = 0;
1555	char mode;		/* Mode of the balance. */
1556	int retval2 = -1;
1557	int quota_cut_bytes;
1558	loff_t tail_pos = 0;
1559
1560	BUG_ON(!th->t_trans_id);
1561
1562	init_tb_struct(th, &s_cut_balance, inode->i_sb, path,
1563		       cut_size);
1564
1565	/* Repeat this loop until we either cut the item without needing
1566	   to balance, or we fix_nodes without schedule occurring */
1567	while (1) {
1568		/* Determine the balance mode, position of the first byte to
1569		   be cut, and size to be cut.  In case of the indirect item
1570		   free unformatted nodes which are pointed to by the cut
1571		   pointers. */
1572
1573		mode =
1574		    prepare_for_delete_or_cut(th, inode, path,
1575					      item_key, &removed,
1576					      &cut_size, new_file_size);
1577		if (mode == M_CONVERT) {
1578			/* convert last unformatted node to direct item or leave
1579			   tail in the unformatted node */
1580			RFALSE(ret_value != CARRY_ON,
1581			       "PAP-5570: can not convert twice");
1582
1583			ret_value =
1584			    maybe_indirect_to_direct(th, inode, page,
1585						     path, item_key,
1586						     new_file_size, &mode);
1587			if (mode == M_SKIP_BALANCING)
1588				/* tail has been left in the unformatted node */
1589				return ret_value;
1590
1591			is_inode_locked = 1;
1592
1593			/* removing of last unformatted node will change value we
1594			   have to return to truncate. Save it */
1595			retval2 = ret_value;
1596			/*retval2 = sb->s_blocksize - (new_file_size & (sb->s_blocksize - 1)); */
1597
1598			/* So, we have performed the first part of the conversion:
1599			   inserting the new direct item.  Now we are removing the
1600			   last unformatted node pointer. Set key to search for
1601			   it. */
1602			set_cpu_key_k_type(item_key, TYPE_INDIRECT);
1603			item_key->key_length = 4;
1604			new_file_size -=
1605			    (new_file_size & (sb->s_blocksize - 1));
1606			tail_pos = new_file_size;
1607			set_cpu_key_k_offset(item_key, new_file_size + 1);
1608			if (search_for_position_by_key
1609			    (sb, item_key,
1610			     path) == POSITION_NOT_FOUND) {
1611				print_block(PATH_PLAST_BUFFER(path), 3,
1612					    PATH_LAST_POSITION(path) - 1,
1613					    PATH_LAST_POSITION(path) + 1);
1614				reiserfs_panic(sb, "PAP-5580", "item to "
1615					       "convert does not exist (%K)",
1616					       item_key);
1617			}
1618			continue;
1619		}
1620		if (cut_size == 0) {
1621			pathrelse(path);
1622			return 0;
1623		}
1624
1625		s_cut_balance.insert_size[0] = cut_size;
1626
1627		ret_value = fix_nodes(mode, &s_cut_balance, NULL, NULL);
1628		if (ret_value != REPEAT_SEARCH)
1629			break;
1630
1631		PROC_INFO_INC(sb, cut_from_item_restarted);
1632
1633		ret_value =
1634		    search_for_position_by_key(sb, item_key, path);
1635		if (ret_value == POSITION_FOUND)
1636			continue;
1637
1638		reiserfs_warning(sb, "PAP-5610", "item %K not found",
1639				 item_key);
1640		unfix_nodes(&s_cut_balance);
1641		return (ret_value == IO_ERROR) ? -EIO : -ENOENT;
1642	}			/* while */
1643
1644	// check fix_nodes results (IO_ERROR or NO_DISK_SPACE)
1645	if (ret_value != CARRY_ON) {
1646		if (is_inode_locked) {
1647			// to cut item
1648			indirect_to_direct_roll_back(th, inode, path);
1649		}
1650		if (ret_value == NO_DISK_SPACE)
1651			reiserfs_warning(sb, "reiserfs-5092",
1652					 "NO_DISK_SPACE");
1653		unfix_nodes(&s_cut_balance);
1654		return -EIO;
1655	}
1656
1657	/* go ahead and perform balancing */
1658
1659	RFALSE(mode == M_PASTE || mode == M_INSERT, "invalid mode");
1660
1661	/* Calculate number of bytes that need to be cut from the item. */
1662	quota_cut_bytes =
1663	    (mode ==
1664	     M_DELETE) ? ih_item_len(get_ih(path)) : -s_cut_balance.
1665	    insert_size[0];
1666	if (retval2 == -1)
1667		ret_value = calc_deleted_bytes_number(&s_cut_balance, mode);
1668	else
1669		ret_value = retval2;
1670
1671	/* For direct items, we only change the quota when deleting the last
1672	 ** item.
1673	 */
1674	p_le_ih = PATH_PITEM_HEAD(s_cut_balance.tb_path);
1675	if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(p_le_ih)) {
1676		if (mode == M_DELETE &&
1677		    (le_ih_k_offset(p_le_ih) & (sb->s_blocksize - 1)) ==
1678		    1) {
1679			REISERFS_I(inode)->i_first_direct_byte = U32_MAX;
1680			quota_cut_bytes = sb->s_blocksize + UNFM_P_SIZE;
1681		} else {
1682			quota_cut_bytes = 0;
1683		}
1684	}
1685#ifdef CONFIG_REISERFS_CHECK
1686	if (is_inode_locked) {
1687		struct item_head *le_ih =
1688		    PATH_PITEM_HEAD(s_cut_balance.tb_path);
1689		/* we are going to complete indirect2direct conversion. Make
1690		   sure, that we exactly remove last unformatted node pointer
1691		   of the item */
1692		if (!is_indirect_le_ih(le_ih))
1693			reiserfs_panic(sb, "vs-5652",
1694				       "item must be indirect %h", le_ih);
1695
1696		if (mode == M_DELETE && ih_item_len(le_ih) != UNFM_P_SIZE)
1697			reiserfs_panic(sb, "vs-5653", "completing "
1698				       "indirect2direct conversion indirect "
1699				       "item %h being deleted must be of "
1700				       "4 byte long", le_ih);
1701
1702		if (mode == M_CUT
1703		    && s_cut_balance.insert_size[0] != -UNFM_P_SIZE) {
1704			reiserfs_panic(sb, "vs-5654", "can not complete "
1705				       "indirect2direct conversion of %h "
1706				       "(CUT, insert_size==%d)",
1707				       le_ih, s_cut_balance.insert_size[0]);
1708		}
1709		/* it would be useful to make sure, that right neighboring
1710		   item is direct item of this file */
1711	}
1712#endif
1713
1714	do_balance(&s_cut_balance, NULL, NULL, mode);
1715	if (is_inode_locked) {
1716		/* we've done an indirect->direct conversion.  when the data block
1717		 ** was freed, it was removed from the list of blocks that must
1718		 ** be flushed before the transaction commits, make sure to
1719		 ** unmap and invalidate it
1720		 */
1721		unmap_buffers(page, tail_pos);
1722		REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
1723	}
1724#ifdef REISERQUOTA_DEBUG
1725	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1726		       "reiserquota cut_from_item(): freeing %u id=%u type=%c",
1727		       quota_cut_bytes, inode->i_uid, '?');
1728#endif
1729	dquot_free_space_nodirty(inode, quota_cut_bytes);
1730	return ret_value;
1731}
1732
1733static void truncate_directory(struct reiserfs_transaction_handle *th,
1734			       struct inode *inode)
1735{
1736	BUG_ON(!th->t_trans_id);
1737	if (inode->i_nlink)
1738		reiserfs_error(inode->i_sb, "vs-5655", "link count != 0");
1739
1740	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), DOT_OFFSET);
1741	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_DIRENTRY);
1742	reiserfs_delete_solid_item(th, inode, INODE_PKEY(inode));
1743	reiserfs_update_sd(th, inode);
1744	set_le_key_k_offset(KEY_FORMAT_3_5, INODE_PKEY(inode), SD_OFFSET);
1745	set_le_key_k_type(KEY_FORMAT_3_5, INODE_PKEY(inode), TYPE_STAT_DATA);
1746}
1747
1748/* Truncate file to the new size. Note, this must be called with a transaction
1749   already started */
1750int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
1751			  struct inode *inode,	/* ->i_size contains new size */
1752			 struct page *page,	/* up to date for last block */
1753			 int update_timestamps	/* when it is called by
1754						   file_release to convert
1755						   the tail - no timestamps
1756						   should be updated */
1757    )
1758{
1759	INITIALIZE_PATH(s_search_path);	/* Path to the current object item. */
1760	struct item_head *p_le_ih;	/* Pointer to an item header. */
1761	struct cpu_key s_item_key;	/* Key to search for a previous file item. */
1762	loff_t file_size,	/* Old file size. */
1763	 new_file_size;	/* New file size. */
1764	int deleted;		/* Number of deleted or truncated bytes. */
1765	int retval;
1766	int err = 0;
1767
1768	BUG_ON(!th->t_trans_id);
1769	if (!
1770	    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
1771	     || S_ISLNK(inode->i_mode)))
1772		return 0;
1773
1774	if (S_ISDIR(inode->i_mode)) {
1775		// deletion of directory - no need to update timestamps
1776		truncate_directory(th, inode);
1777		return 0;
1778	}
1779
1780	/* Get new file size. */
1781	new_file_size = inode->i_size;
1782
1783	make_cpu_key(&s_item_key, inode, max_reiserfs_offset(inode),
1784		     TYPE_DIRECT, 3);
1785
1786	retval =
1787	    search_for_position_by_key(inode->i_sb, &s_item_key,
1788				       &s_search_path);
1789	if (retval == IO_ERROR) {
1790		reiserfs_error(inode->i_sb, "vs-5657",
1791			       "i/o failure occurred trying to truncate %K",
1792			       &s_item_key);
1793		err = -EIO;
1794		goto out;
1795	}
1796	if (retval == POSITION_FOUND || retval == FILE_NOT_FOUND) {
1797		reiserfs_error(inode->i_sb, "PAP-5660",
1798			       "wrong result %d of search for %K", retval,
1799			       &s_item_key);
1800
1801		err = -EIO;
1802		goto out;
1803	}
1804
1805	s_search_path.pos_in_item--;
1806
1807	/* Get real file size (total length of all file items) */
1808	p_le_ih = PATH_PITEM_HEAD(&s_search_path);
1809	if (is_statdata_le_ih(p_le_ih))
1810		file_size = 0;
1811	else {
1812		loff_t offset = le_ih_k_offset(p_le_ih);
1813		int bytes =
1814		    op_bytes_number(p_le_ih, inode->i_sb->s_blocksize);
1815
1816		/* this may mismatch with real file size: if last direct item
1817		   had no padding zeros and last unformatted node had no free
1818		   space, this file would have this file size */
1819		file_size = offset + bytes - 1;
1820	}
1821	/*
1822	 * are we doing a full truncate or delete, if so
1823	 * kick in the reada code
1824	 */
1825	if (new_file_size == 0)
1826		s_search_path.reada = PATH_READA | PATH_READA_BACK;
1827
1828	if (file_size == 0 || file_size < new_file_size) {
1829		goto update_and_out;
1830	}
1831
1832	/* Update key to search for the last file item. */
1833	set_cpu_key_k_offset(&s_item_key, file_size);
1834
1835	do {
1836		/* Cut or delete file item. */
1837		deleted =
1838		    reiserfs_cut_from_item(th, &s_search_path, &s_item_key,
1839					   inode, page, new_file_size);
1840		if (deleted < 0) {
1841			reiserfs_warning(inode->i_sb, "vs-5665",
1842					 "reiserfs_cut_from_item failed");
1843			reiserfs_check_path(&s_search_path);
1844			return 0;
1845		}
1846
1847		RFALSE(deleted > file_size,
1848		       "PAP-5670: reiserfs_cut_from_item: too many bytes deleted: deleted %d, file_size %lu, item_key %K",
1849		       deleted, file_size, &s_item_key);
1850
1851		/* Change key to search the last file item. */
1852		file_size -= deleted;
1853
1854		set_cpu_key_k_offset(&s_item_key, file_size);
1855
1856		/* While there are bytes to truncate and previous file item is presented in the tree. */
1857
1858		/*
1859		 ** This loop could take a really long time, and could log
1860		 ** many more blocks than a transaction can hold.  So, we do a polite
1861		 ** journal end here, and if the transaction needs ending, we make
1862		 ** sure the file is consistent before ending the current trans
1863		 ** and starting a new one
1864		 */
1865		if (journal_transaction_should_end(th, 0) ||
1866		    reiserfs_transaction_free_space(th) <= JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD) {
1867			int orig_len_alloc = th->t_blocks_allocated;
1868			pathrelse(&s_search_path);
1869
1870			if (update_timestamps) {
1871				inode->i_mtime = CURRENT_TIME_SEC;
1872				inode->i_ctime = CURRENT_TIME_SEC;
1873			}
1874			reiserfs_update_sd(th, inode);
1875
1876			err = journal_end(th, inode->i_sb, orig_len_alloc);
1877			if (err)
1878				goto out;
1879			err = journal_begin(th, inode->i_sb,
1880					    JOURNAL_FOR_FREE_BLOCK_AND_UPDATE_SD + JOURNAL_PER_BALANCE_CNT * 4) ;
1881			if (err)
1882				goto out;
1883			reiserfs_update_inode_transaction(inode);
1884		}
1885	} while (file_size > ROUND_UP(new_file_size) &&
1886		 search_for_position_by_key(inode->i_sb, &s_item_key,
1887					    &s_search_path) == POSITION_FOUND);
1888
1889	RFALSE(file_size > ROUND_UP(new_file_size),
1890	       "PAP-5680: truncate did not finish: new_file_size %Ld, current %Ld, oid %d",
1891	       new_file_size, file_size, s_item_key.on_disk_key.k_objectid);
1892
1893      update_and_out:
1894	if (update_timestamps) {
1895		// this is truncate, not file closing
1896		inode->i_mtime = CURRENT_TIME_SEC;
1897		inode->i_ctime = CURRENT_TIME_SEC;
1898	}
1899	reiserfs_update_sd(th, inode);
1900
1901      out:
1902	pathrelse(&s_search_path);
1903	return err;
1904}
1905
1906#ifdef CONFIG_REISERFS_CHECK
1907// this makes sure, that we __append__, not overwrite or add holes
1908static void check_research_for_paste(struct treepath *path,
1909				     const struct cpu_key *key)
1910{
1911	struct item_head *found_ih = get_ih(path);
1912
1913	if (is_direct_le_ih(found_ih)) {
1914		if (le_ih_k_offset(found_ih) +
1915		    op_bytes_number(found_ih,
1916				    get_last_bh(path)->b_size) !=
1917		    cpu_key_k_offset(key)
1918		    || op_bytes_number(found_ih,
1919				       get_last_bh(path)->b_size) !=
1920		    pos_in_item(path))
1921			reiserfs_panic(NULL, "PAP-5720", "found direct item "
1922				       "%h or position (%d) does not match "
1923				       "to key %K", found_ih,
1924				       pos_in_item(path), key);
1925	}
1926	if (is_indirect_le_ih(found_ih)) {
1927		if (le_ih_k_offset(found_ih) +
1928		    op_bytes_number(found_ih,
1929				    get_last_bh(path)->b_size) !=
1930		    cpu_key_k_offset(key)
1931		    || I_UNFM_NUM(found_ih) != pos_in_item(path)
1932		    || get_ih_free_space(found_ih) != 0)
1933			reiserfs_panic(NULL, "PAP-5730", "found indirect "
1934				       "item (%h) or position (%d) does not "
1935				       "match to key (%K)",
1936				       found_ih, pos_in_item(path), key);
1937	}
1938}
1939#endif				/* config reiserfs check */
1940
1941/* Paste bytes to the existing item. Returns bytes number pasted into the item. */
1942int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *search_path,	/* Path to the pasted item.	  */
1943			     const struct cpu_key *key,	/* Key to search for the needed item. */
1944			     struct inode *inode,	/* Inode item belongs to */
1945			     const char *body,	/* Pointer to the bytes to paste.    */
1946			     int pasted_size)
1947{				/* Size of pasted bytes.             */
1948	struct tree_balance s_paste_balance;
1949	int retval;
1950	int fs_gen;
1951
1952	BUG_ON(!th->t_trans_id);
1953
1954	fs_gen = get_generation(inode->i_sb);
1955
1956#ifdef REISERQUOTA_DEBUG
1957	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
1958		       "reiserquota paste_into_item(): allocating %u id=%u type=%c",
1959		       pasted_size, inode->i_uid,
1960		       key2type(&(key->on_disk_key)));
1961#endif
1962
1963	retval = dquot_alloc_space_nodirty(inode, pasted_size);
1964	if (retval) {
1965		pathrelse(search_path);
1966		return retval;
1967	}
1968	init_tb_struct(th, &s_paste_balance, th->t_super, search_path,
1969		       pasted_size);
1970#ifdef DISPLACE_NEW_PACKING_LOCALITIES
1971	s_paste_balance.key = key->on_disk_key;
1972#endif
1973
1974	/* DQUOT_* can schedule, must check before the fix_nodes */
1975	if (fs_changed(fs_gen, inode->i_sb)) {
1976		goto search_again;
1977	}
1978
1979	while ((retval =
1980		fix_nodes(M_PASTE, &s_paste_balance, NULL,
1981			  body)) == REPEAT_SEARCH) {
1982	      search_again:
1983		/* file system changed while we were in the fix_nodes */
1984		PROC_INFO_INC(th->t_super, paste_into_item_restarted);
1985		retval =
1986		    search_for_position_by_key(th->t_super, key,
1987					       search_path);
1988		if (retval == IO_ERROR) {
1989			retval = -EIO;
1990			goto error_out;
1991		}
1992		if (retval == POSITION_FOUND) {
1993			reiserfs_warning(inode->i_sb, "PAP-5710",
1994					 "entry or pasted byte (%K) exists",
1995					 key);
1996			retval = -EEXIST;
1997			goto error_out;
1998		}
1999#ifdef CONFIG_REISERFS_CHECK
2000		check_research_for_paste(search_path, key);
2001#endif
2002	}
2003
2004	/* Perform balancing after all resources are collected by fix_nodes, and
2005	   accessing them will not risk triggering schedule. */
2006	if (retval == CARRY_ON) {
2007		do_balance(&s_paste_balance, NULL /*ih */ , body, M_PASTE);
2008		return 0;
2009	}
2010	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2011      error_out:
2012	/* this also releases the path */
2013	unfix_nodes(&s_paste_balance);
2014#ifdef REISERQUOTA_DEBUG
2015	reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2016		       "reiserquota paste_into_item(): freeing %u id=%u type=%c",
2017		       pasted_size, inode->i_uid,
2018		       key2type(&(key->on_disk_key)));
2019#endif
2020	dquot_free_space_nodirty(inode, pasted_size);
2021	return retval;
2022}
2023
2024/* Insert new item into the buffer at the path.
2025 * th   - active transaction handle
2026 * path - path to the inserted item
2027 * ih   - pointer to the item header to insert
2028 * body - pointer to the bytes to insert
2029 */
2030int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
2031			 struct treepath *path, const struct cpu_key *key,
2032			 struct item_head *ih, struct inode *inode,
2033			 const char *body)
2034{
2035	struct tree_balance s_ins_balance;
2036	int retval;
2037	int fs_gen = 0;
2038	int quota_bytes = 0;
2039
2040	BUG_ON(!th->t_trans_id);
2041
2042	if (inode) {		/* Do we count quotas for item? */
2043		fs_gen = get_generation(inode->i_sb);
2044		quota_bytes = ih_item_len(ih);
2045
2046		/* hack so the quota code doesn't have to guess if the file has
2047		 ** a tail, links are always tails, so there's no guessing needed
2048		 */
2049		if (!S_ISLNK(inode->i_mode) && is_direct_le_ih(ih))
2050			quota_bytes = inode->i_sb->s_blocksize + UNFM_P_SIZE;
2051#ifdef REISERQUOTA_DEBUG
2052		reiserfs_debug(inode->i_sb, REISERFS_DEBUG_CODE,
2053			       "reiserquota insert_item(): allocating %u id=%u type=%c",
2054			       quota_bytes, inode->i_uid, head2type(ih));
2055#endif
2056		/* We can't dirty inode here. It would be immediately written but
2057		 * appropriate stat item isn't inserted yet... */
2058		retval = dquot_alloc_space_nodirty(inode, quota_bytes);
2059		if (retval) {
2060			pathrelse(path);
2061			return retval;
2062		}
2063	}
2064	init_tb_struct(th, &s_ins_balance, th->t_super, path,
2065		       IH_SIZE + ih_item_len(ih));
2066#ifdef DISPLACE_NEW_PACKING_LOCALITIES
2067	s_ins_balance.key = key->on_disk_key;
2068#endif
2069	/* DQUOT_* can schedule, must check to be sure calling fix_nodes is safe */
2070	if (inode && fs_changed(fs_gen, inode->i_sb)) {
2071		goto search_again;
2072	}
2073
2074	while ((retval =
2075		fix_nodes(M_INSERT, &s_ins_balance, ih,
2076			  body)) == REPEAT_SEARCH) {
2077	      search_again:
2078		/* file system changed while we were in the fix_nodes */
2079		PROC_INFO_INC(th->t_super, insert_item_restarted);
2080		retval = search_item(th->t_super, key, path);
2081		if (retval == IO_ERROR) {
2082			retval = -EIO;
2083			goto error_out;
2084		}
2085		if (retval == ITEM_FOUND) {
2086			reiserfs_warning(th->t_super, "PAP-5760",
2087					 "key %K already exists in the tree",
2088					 key);
2089			retval = -EEXIST;
2090			goto error_out;
2091		}
2092	}
2093
2094	/* make balancing after all resources will be collected at a time */
2095	if (retval == CARRY_ON) {
2096		do_balance(&s_ins_balance, ih, body, M_INSERT);
2097		return 0;
2098	}
2099
2100	retval = (retval == NO_DISK_SPACE) ? -ENOSPC : -EIO;
2101      error_out:
2102	/* also releases the path */
2103	unfix_nodes(&s_ins_balance);
2104#ifdef REISERQUOTA_DEBUG
2105	reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE,
2106		       "reiserquota insert_item(): freeing %u id=%u type=%c",
2107		       quota_bytes, inode->i_uid, head2type(ih));
2108#endif
2109	if (inode)
2110		dquot_free_space_nodirty(inode, quota_bytes);
2111	return retval;
2112}
2113