1/*
2   Unix SMB/CIFS implementation.
3   Locking functions
4   Copyright (C) Jeremy Allison 1992-2000
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20   Revision History:
21
22   POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23*/
24
25#include "includes.h"
26
27/*
28 * The POSIX locking database handle.
29 */
30
31static TDB_CONTEXT *posix_lock_tdb;
32
33/*
34 * The pending close database handle.
35 */
36
37static TDB_CONTEXT *posix_pending_close_tdb;
38
39/*
40 * The data in POSIX lock records is an unsorted linear array of these
41 * records.  It is unnecessary to store the count as tdb provides the
42 * size of the record.
43 */
44
45struct posix_lock {
46	int fd;
47	SMB_OFF_T start;
48	SMB_OFF_T size;
49	int lock_type;
50};
51
52/*
53 * The data in POSIX pending close records is an unsorted linear array of int
54 * records.  It is unnecessary to store the count as tdb provides the
55 * size of the record.
56 */
57
58/* The key used in both the POSIX databases. */
59
60struct posix_lock_key {
61	SMB_DEV_T device;
62	SMB_INO_T inode;
63};
64
65/*******************************************************************
66 Form a static locking key for a dev/inode pair.
67******************************************************************/
68
69static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
70{
71	static struct posix_lock_key key;
72	TDB_DATA kbuf;
73
74	memset(&key, '\0', sizeof(key));
75	key.device = dev;
76	key.inode = inode;
77	kbuf.dptr = (char *)&key;
78	kbuf.dsize = sizeof(key);
79	return kbuf;
80}
81
82/*******************************************************************
83 Convenience function to get a key from an fsp.
84******************************************************************/
85
86static TDB_DATA locking_key_fsp(files_struct *fsp)
87{
88	return locking_key(fsp->dev, fsp->inode);
89}
90
91/****************************************************************************
92 Add an fd to the pending close tdb.
93****************************************************************************/
94
95static BOOL add_fd_to_close_entry(files_struct *fsp)
96{
97	TDB_DATA kbuf = locking_key_fsp(fsp);
98	TDB_DATA dbuf;
99	char *tp;
100
101	dbuf.dptr = NULL;
102
103	dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
104
105	tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
106	if (!tp) {
107		DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
108		SAFE_FREE(dbuf.dptr);
109		return False;
110	} else
111		dbuf.dptr = tp;
112
113	memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
114	dbuf.dsize += sizeof(int);
115
116	if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
117		DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
118	}
119
120	SAFE_FREE(dbuf.dptr);
121	return True;
122}
123
124/****************************************************************************
125 Remove all fd entries for a specific dev/inode pair from the tdb.
126****************************************************************************/
127
128static void delete_close_entries(files_struct *fsp)
129{
130	TDB_DATA kbuf = locking_key_fsp(fsp);
131
132	if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
133		DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
134}
135
136/****************************************************************************
137 Get the array of POSIX pending close records for an open fsp. Caller must
138 free. Returns number of entries.
139****************************************************************************/
140
141static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
142{
143	TDB_DATA kbuf = locking_key_fsp(fsp);
144	TDB_DATA dbuf;
145	size_t count = 0;
146
147	*entries = NULL;
148	dbuf.dptr = NULL;
149
150	dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
151
152	if (!dbuf.dptr) {
153		return 0;
154	}
155
156	*entries = (int *)dbuf.dptr;
157	count = (size_t)(dbuf.dsize / sizeof(int));
158
159	return count;
160}
161
162/****************************************************************************
163 Get the array of POSIX locks for an fsp. Caller must free. Returns
164 number of entries.
165****************************************************************************/
166
167static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
168{
169	TDB_DATA kbuf = locking_key_fsp(fsp);
170	TDB_DATA dbuf;
171	size_t count = 0;
172
173	*entries = NULL;
174
175	dbuf.dptr = NULL;
176
177	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
178
179	if (!dbuf.dptr) {
180		return 0;
181	}
182
183	*entries = (struct posix_lock *)dbuf.dptr;
184	count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
185
186	return count;
187}
188
189/****************************************************************************
190 Deal with pending closes needed by POSIX locking support.
191 Note that posix_locking_close_file() is expected to have been called
192 to delete all locks on this fsp before this function is called.
193****************************************************************************/
194
195int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
196{
197	int saved_errno = 0;
198	int ret;
199	size_t count, i;
200	struct posix_lock *entries = NULL;
201	int *fd_array = NULL;
202	BOOL locks_on_other_fds = False;
203
204	if (!lp_posix_locking(SNUM(conn))) {
205		/*
206		 * No POSIX to worry about, just close.
207		 */
208		ret = SMB_VFS_CLOSE(fsp,fsp->fd);
209		fsp->fd = -1;
210		return ret;
211	}
212
213	/*
214	 * Get the number of outstanding POSIX locks on this dev/inode pair.
215	 */
216
217	count = get_posix_lock_entries(fsp, &entries);
218
219	/*
220	 * Check if there are any outstanding locks belonging to
221	 * other fd's. This should never be the case if posix_locking_close_file()
222	 * has been called first, but it never hurts to be *sure*.
223	 */
224
225	for (i = 0; i < count; i++) {
226		if (entries[i].fd != fsp->fd) {
227			locks_on_other_fds = True;
228			break;
229		}
230	}
231
232	if (locks_on_other_fds) {
233
234		/*
235		 * There are outstanding locks on this dev/inode pair on other fds.
236		 * Add our fd to the pending close tdb and set fsp->fd to -1.
237		 */
238
239		if (!add_fd_to_close_entry(fsp)) {
240			SAFE_FREE(entries);
241			return -1;
242		}
243
244		SAFE_FREE(entries);
245		fsp->fd = -1;
246		return 0;
247	}
248
249	SAFE_FREE(entries);
250
251	/*
252	 * No outstanding POSIX locks. Get the pending close fd's
253	 * from the tdb and close them all.
254	 */
255
256	count = get_posix_pending_close_entries(fsp, &fd_array);
257
258	if (count) {
259		DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
260
261		for(i = 0; i < count; i++) {
262			if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
263				saved_errno = errno;
264			}
265		}
266
267		/*
268		 * Delete all fd's stored in the tdb
269		 * for this dev/inode pair.
270		 */
271
272		delete_close_entries(fsp);
273	}
274
275	SAFE_FREE(fd_array);
276
277	/*
278	 * Finally close the fd associated with this fsp.
279	 */
280
281	ret = SMB_VFS_CLOSE(fsp,fsp->fd);
282
283	if (saved_errno != 0) {
284		errno = saved_errno;
285		ret = -1;
286	}
287
288	fsp->fd = -1;
289
290	return ret;
291}
292
293/****************************************************************************
294 Debugging aid :-).
295****************************************************************************/
296
297static const char *posix_lock_type_name(int lock_type)
298{
299	return (lock_type == F_RDLCK) ? "READ" : "WRITE";
300}
301
302/****************************************************************************
303 Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
304 then the POSIX fcntl lock fails.
305****************************************************************************/
306
307static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
308{
309	TDB_DATA kbuf = locking_key_fsp(fsp);
310	TDB_DATA dbuf;
311	struct posix_lock *locks;
312	size_t count;
313
314	dbuf.dptr = NULL;
315
316	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
317
318	if (!dbuf.dptr) {
319		DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
320		goto fail;
321	}
322
323	count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
324	locks = (struct posix_lock *)dbuf.dptr;
325
326	if (count == 1) {
327		tdb_delete(posix_lock_tdb, kbuf);
328	} else {
329		if (entry < count-1) {
330			memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
331		}
332		dbuf.dsize -= sizeof(*locks);
333		tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
334	}
335
336	SAFE_FREE(dbuf.dptr);
337
338	return True;
339
340 fail:
341
342	SAFE_FREE(dbuf.dptr);
343	return False;
344}
345
346/****************************************************************************
347 Add an entry into the POSIX locking tdb. We return the index number of the
348 added lock (used in case we need to delete *exactly* this entry). Returns
349 False on fail, True on success.
350****************************************************************************/
351
352static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
353{
354	TDB_DATA kbuf = locking_key_fsp(fsp);
355	TDB_DATA dbuf;
356	struct posix_lock pl;
357	char *tp;
358
359	dbuf.dptr = NULL;
360
361	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
362
363	*pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
364
365	/*
366	 * Add new record.
367	 */
368
369	pl.fd = fsp->fd;
370	pl.start = start;
371	pl.size = size;
372	pl.lock_type = lock_type;
373
374	tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(pl));
375	if (!tp) {
376		DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
377		goto fail;
378	} else
379		dbuf.dptr = tp;
380
381	memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
382	dbuf.dsize += sizeof(pl);
383
384	if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
385		DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
386		goto fail;
387	}
388
389	SAFE_FREE(dbuf.dptr);
390
391	DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
392			fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
393			(double)fsp->dev, (double)fsp->inode ));
394
395	return True;
396
397 fail:
398
399	SAFE_FREE(dbuf.dptr);
400	return False;
401}
402
403/****************************************************************************
404 Calculate if locks have any overlap at all.
405****************************************************************************/
406
407static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
408{
409	if (start1 >= start2 && start1 <= start2 + size2)
410		return True;
411
412	if (start1 < start2 && start1 + size1 > start2)
413		return True;
414
415	return False;
416}
417
418/****************************************************************************
419 Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
420 deleted and the number of records that are overlapped by this one, or -1 on error.
421****************************************************************************/
422
423static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
424{
425	TDB_DATA kbuf = locking_key_fsp(fsp);
426	TDB_DATA dbuf;
427	struct posix_lock *locks;
428	size_t i, count;
429	BOOL found = False;
430	int num_overlapping_records = 0;
431
432	dbuf.dptr = NULL;
433
434	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
435
436	if (!dbuf.dptr) {
437		DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
438		goto fail;
439	}
440
441	/* There are existing locks - find a match. */
442	locks = (struct posix_lock *)dbuf.dptr;
443	count = (size_t)(dbuf.dsize / sizeof(*locks));
444
445	/*
446	 * Search for and delete the first record that matches the
447	 * unlock criteria.
448	 */
449
450	for (i=0; i<count; i++) {
451		struct posix_lock *entry = &locks[i];
452
453		if (entry->fd == fsp->fd &&
454			entry->start == start &&
455			entry->size == size) {
456
457			/* Make a copy if requested. */
458			if (pl)
459				*pl = *entry;
460
461			/* Found it - delete it. */
462			if (count == 1) {
463				tdb_delete(posix_lock_tdb, kbuf);
464			} else {
465				if (i < count-1) {
466					memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
467				}
468				dbuf.dsize -= sizeof(*locks);
469				tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
470			}
471			count--;
472			found = True;
473			break;
474		}
475	}
476
477	if (!found)
478		goto fail;
479
480	/*
481	 * Count the number of entries that are
482	 * overlapped by this unlock request.
483	 */
484
485	for (i = 0; i < count; i++) {
486		struct posix_lock *entry = &locks[i];
487
488		if (fsp->fd == entry->fd &&
489			does_lock_overlap( start, size, entry->start, entry->size))
490				num_overlapping_records++;
491	}
492
493	DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
494			posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
495				(unsigned int)num_overlapping_records ));
496
497	SAFE_FREE(dbuf.dptr);
498
499	return num_overlapping_records;
500
501 fail:
502
503	SAFE_FREE(dbuf.dptr);
504	return -1;
505}
506
507/****************************************************************************
508 Utility function to map a lock type correctly depending on the open
509 mode of a file.
510****************************************************************************/
511
512static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
513{
514	if((lock_type == WRITE_LOCK) && !fsp->can_write) {
515		/*
516		 * Many UNIX's cannot get a write lock on a file opened read-only.
517		 * Win32 locking semantics allow this.
518		 * Do the best we can and attempt a read-only lock.
519		 */
520		DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
521		return F_RDLCK;
522	} else if((lock_type == READ_LOCK) && !fsp->can_read) {
523		/*
524		 * Ditto for read locks on write only files.
525		 */
526		DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
527		return F_WRLCK;
528	}
529
530  /*
531   * This return should be the most normal, as we attempt
532   * to always open files read/write.
533   */
534
535  return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
536}
537
538/****************************************************************************
539 Check to see if the given unsigned lock range is within the possible POSIX
540 range. Modifies the given args to be in range if possible, just returns
541 False if not.
542****************************************************************************/
543
544static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
545								SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
546{
547	SMB_OFF_T offset = (SMB_OFF_T)u_offset;
548	SMB_OFF_T count = (SMB_OFF_T)u_count;
549
550	/*
551	 * For the type of system we are, attempt to
552	 * find the maximum positive lock offset as an SMB_OFF_T.
553	 */
554
555#if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
556
557	SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
558
559#elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
560
561	/*
562	 * In this case SMB_OFF_T is 64 bits,
563	 * and the underlying system can handle 64 bit signed locks.
564	 */
565
566	SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
567	SMB_OFF_T mask = (mask2<<1);
568	SMB_OFF_T max_positive_lock_offset = ~mask;
569
570#else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
571
572	/*
573	 * In this case either SMB_OFF_T is 32 bits,
574	 * or the underlying system cannot handle 64 bit signed locks.
575	 * All offsets & counts must be 2^31 or less.
576	 */
577
578	SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
579
580#endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
581
582	/*
583	 * POSIX locks of length zero mean lock to end-of-file.
584	 * Win32 locks of length zero are point probes. Ignore
585	 * any Win32 locks of length zero. JRA.
586	 */
587
588	if (count == (SMB_OFF_T)0) {
589		DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
590		return False;
591	}
592
593	/*
594	 * If the given offset was > max_positive_lock_offset then we cannot map this at all
595	 * ignore this lock.
596	 */
597
598	if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
599		DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
600				(double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
601		return False;
602	}
603
604	/*
605	 * We must truncate the count to less than max_positive_lock_offset.
606	 */
607
608	if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
609		count = max_positive_lock_offset;
610
611	/*
612	 * Truncate count to end at max lock offset.
613	 */
614
615	if (offset + count < 0 || offset + count > max_positive_lock_offset)
616		count = max_positive_lock_offset - offset;
617
618	/*
619	 * If we ate all the count, ignore this lock.
620	 */
621
622	if (count == 0) {
623		DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
624				(double)u_offset, (double)u_count ));
625		return False;
626	}
627
628	/*
629	 * The mapping was successful.
630	 */
631
632	DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
633			(double)offset, (double)count ));
634
635	*offset_out = offset;
636	*count_out = count;
637
638	return True;
639}
640
641/****************************************************************************
642 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
643 broken NFS implementations.
644****************************************************************************/
645
646static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
647{
648	int ret;
649
650	DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
651
652	ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
653
654	if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
655
656		DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
657					(double)offset,(double)count));
658		DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
659		DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
660
661		/*
662		 * If the offset is > 0x7FFFFFFF then this will cause problems on
663		 * 32 bit NFS mounted filesystems. Just ignore it.
664		 */
665
666		if (offset & ~((SMB_OFF_T)0x7fffffff)) {
667			DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
668			return True;
669		}
670
671		if (count & ~((SMB_OFF_T)0x7fffffff)) {
672			/* 32 bit NFS file system, retry with smaller offset */
673			DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
674			errno = 0;
675			count &= 0x7fffffff;
676			ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
677		}
678	}
679
680	DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
681
682	return ret;
683}
684
685/****************************************************************************
686 POSIX function to see if a file region is locked. Returns True if the
687 region is locked, False otherwise.
688****************************************************************************/
689
690BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
691{
692	SMB_OFF_T offset;
693	SMB_OFF_T count;
694	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
695
696	DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
697			fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
698
699	/*
700	 * If the requested lock won't fit in the POSIX range, we will
701	 * never set it, so presume it is not locked.
702	 */
703
704	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
705		return False;
706
707	/*
708	 * Note that most UNIX's can *test* for a write lock on
709	 * a read-only fd, just not *set* a write lock on a read-only
710	 * fd. So we don't need to use map_lock_type here.
711	 */
712
713	return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
714}
715
716/*
717 * Structure used when splitting a lock range
718 * into a POSIX lock range. Doubly linked list.
719 */
720
721struct lock_list {
722    struct lock_list *next;
723    struct lock_list *prev;
724    SMB_OFF_T start;
725    SMB_OFF_T size;
726};
727
728/****************************************************************************
729 Create a list of lock ranges that don't overlap a given range. Used in calculating
730 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
731 understand it :-).
732****************************************************************************/
733
734static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
735{
736	TDB_DATA kbuf = locking_key_fsp(fsp);
737	TDB_DATA dbuf;
738	struct posix_lock *locks;
739	size_t num_locks, i;
740
741	dbuf.dptr = NULL;
742
743	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
744
745	if (!dbuf.dptr)
746		return lhead;
747
748	locks = (struct posix_lock *)dbuf.dptr;
749	num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
750
751	/*
752	 * Check the current lock list on this dev/inode pair.
753	 * Quit if the list is deleted.
754	 */
755
756	DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
757		(double)lhead->start, (double)lhead->size ));
758
759	for (i=0; i<num_locks && lhead; i++) {
760
761		struct posix_lock *lock = &locks[i];
762		struct lock_list *l_curr;
763
764		/*
765		 * Walk the lock list, checking for overlaps. Note that
766		 * the lock list can expand within this loop if the current
767		 * range being examined needs to be split.
768		 */
769
770		for (l_curr = lhead; l_curr;) {
771
772			DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
773				(double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
774
775			if ( (l_curr->start >= (lock->start + lock->size)) ||
776				 (lock->start >= (l_curr->start + l_curr->size))) {
777
778				/* No overlap with this lock - leave this range alone. */
779/*********************************************
780                                             +---------+
781                                             | l_curr  |
782                                             +---------+
783                                +-------+
784                                | lock  |
785                                +-------+
786OR....
787             +---------+
788             |  l_curr |
789             +---------+
790**********************************************/
791
792				DEBUG(10,("no overlap case.\n" ));
793
794				l_curr = l_curr->next;
795
796			} else if ( (l_curr->start >= lock->start) &&
797						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
798
799				/*
800				 * This unlock is completely overlapped by this existing lock range
801				 * and thus should have no effect (not be unlocked). Delete it from the list.
802				 */
803/*********************************************
804                +---------+
805                |  l_curr |
806                +---------+
807        +---------------------------+
808        |       lock                |
809        +---------------------------+
810**********************************************/
811				/* Save the next pointer */
812				struct lock_list *ul_next = l_curr->next;
813
814				DEBUG(10,("delete case.\n" ));
815
816				DLIST_REMOVE(lhead, l_curr);
817				if(lhead == NULL)
818					break; /* No more list... */
819
820				l_curr = ul_next;
821
822			} else if ( (l_curr->start >= lock->start) &&
823						(l_curr->start < lock->start + lock->size) &&
824						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
825
826				/*
827				 * This unlock overlaps the existing lock range at the high end.
828				 * Truncate by moving start to existing range end and reducing size.
829				 */
830/*********************************************
831                +---------------+
832                |  l_curr       |
833                +---------------+
834        +---------------+
835        |    lock       |
836        +---------------+
837BECOMES....
838                        +-------+
839                        | l_curr|
840                        +-------+
841**********************************************/
842
843				l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
844				l_curr->start = lock->start + lock->size;
845
846				DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
847								(double)l_curr->start, (double)l_curr->size ));
848
849				l_curr = l_curr->next;
850
851			} else if ( (l_curr->start < lock->start) &&
852						(l_curr->start + l_curr->size > lock->start) &&
853						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
854
855				/*
856				 * This unlock overlaps the existing lock range at the low end.
857				 * Truncate by reducing size.
858				 */
859/*********************************************
860   +---------------+
861   |  l_curr       |
862   +---------------+
863           +---------------+
864           |    lock       |
865           +---------------+
866BECOMES....
867   +-------+
868   | l_curr|
869   +-------+
870**********************************************/
871
872				l_curr->size = lock->start - l_curr->start;
873
874				DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
875								(double)l_curr->start, (double)l_curr->size ));
876
877				l_curr = l_curr->next;
878
879			} else if ( (l_curr->start < lock->start) &&
880						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
881				/*
882				 * Worst case scenario. Unlock request completely overlaps an existing
883				 * lock range. Split the request into two, push the new (upper) request
884				 * into the dlink list, and continue with the entry after ul_new (as we
885				 * know that ul_new will not overlap with this lock).
886				 */
887/*********************************************
888        +---------------------------+
889        |        l_curr             |
890        +---------------------------+
891                +---------+
892                | lock    |
893                +---------+
894BECOMES.....
895        +-------+         +---------+
896        | l_curr|         | l_new   |
897        +-------+         +---------+
898**********************************************/
899				struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
900
901				if(l_new == NULL) {
902					DEBUG(0,("posix_lock_list: talloc fail.\n"));
903					return NULL; /* The talloc_destroy takes care of cleanup. */
904				}
905
906				ZERO_STRUCTP(l_new);
907				l_new->start = lock->start + lock->size;
908				l_new->size = l_curr->start + l_curr->size - l_new->start;
909
910				/* Truncate the l_curr. */
911				l_curr->size = lock->start - l_curr->start;
912
913				DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
914new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
915								(double)l_new->start, (double)l_new->size ));
916
917				/*
918				 * Add into the dlink list after the l_curr point - NOT at lhead.
919				 * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
920				 */
921
922				l_new->prev = l_curr;
923				l_new->next = l_curr->next;
924				l_curr->next = l_new;
925
926				/* And move after the link we added. */
927				l_curr = l_new->next;
928
929			} else {
930
931				/*
932				 * This logic case should never happen. Ensure this is the
933				 * case by forcing an abort.... Remove in production.
934				 */
935				pstring msg;
936
937				slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
938lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
939
940				smb_panic(msg);
941			}
942		} /* end for ( l_curr = lhead; l_curr;) */
943	} /* end for (i=0; i<num_locks && ul_head; i++) */
944
945	SAFE_FREE(dbuf.dptr);
946
947	return lhead;
948}
949
950/****************************************************************************
951 POSIX function to acquire a lock. Returns True if the
952 lock could be granted, False if not.
953****************************************************************************/
954
955BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
956{
957	SMB_OFF_T offset;
958	SMB_OFF_T count;
959	BOOL ret = True;
960	size_t entry_num = 0;
961	size_t lock_count;
962	TALLOC_CTX *l_ctx = NULL;
963	struct lock_list *llist = NULL;
964	struct lock_list *ll = NULL;
965	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
966
967	DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
968			fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
969
970	/*
971	 * If the requested lock won't fit in the POSIX range, we will
972	 * pretend it was successful.
973	 */
974
975	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
976		return True;
977
978	/*
979	 * Windows is very strange. It allows read locks to be overlayed
980	 * (even over a write lock), but leaves the write lock in force until the first
981	 * unlock. It also reference counts the locks. This means the following sequence :
982	 *
983	 * process1                                      process2
984	 * ------------------------------------------------------------------------
985	 * WRITE LOCK : start = 2, len = 10
986	 *                                            READ LOCK: start =0, len = 10 - FAIL
987	 * READ LOCK : start = 0, len = 14
988	 *                                            READ LOCK: start =0, len = 10 - FAIL
989	 * UNLOCK : start = 2, len = 10
990	 *                                            READ LOCK: start =0, len = 10 - OK
991	 *
992	 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
993	 * would leave a single read lock over the 0-14 region. In order to
994	 * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
995	 * entries, one for each overlayed lock request. We are guarenteed by the brlock
996	 * semantics that if a write lock is added, then it will be first in the array.
997	 */
998
999	if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1000		DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1001		return True; /* Not a fatal error. */
1002	}
1003
1004	if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1005		DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1006		talloc_destroy(l_ctx);
1007		return True; /* Not a fatal error. */
1008	}
1009
1010	/*
1011	 * Create the initial list entry containing the
1012	 * lock we want to add.
1013	 */
1014
1015	ZERO_STRUCTP(ll);
1016	ll->start = offset;
1017	ll->size = count;
1018
1019	DLIST_ADD(llist, ll);
1020
1021	/*
1022	 * The following call calculates if there are any
1023	 * overlapping locks held by this process on
1024	 * fd's open on the same file and splits this list
1025	 * into a list of lock ranges that do not overlap with existing
1026	 * POSIX locks.
1027	 */
1028
1029	llist = posix_lock_list(l_ctx, llist, fsp);
1030
1031	/*
1032	 * Now we have the list of ranges to lock it is safe to add the
1033	 * entry into the POSIX lock tdb. We take note of the entry we
1034	 * added here in case we have to remove it on POSIX lock fail.
1035	 */
1036
1037	if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1038		DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1039		talloc_destroy(l_ctx);
1040		return False;
1041	}
1042
1043	/*
1044	 * Add the POSIX locks on the list of ranges returned.
1045	 * As the lock is supposed to be added atomically, we need to
1046	 * back out all the locks if any one of these calls fail.
1047	 */
1048
1049	for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1050		offset = ll->start;
1051		count = ll->size;
1052
1053		DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1054			posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1055
1056		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1057			DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1058				posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1059			ret = False;
1060			break;
1061		}
1062	}
1063
1064	if (!ret) {
1065
1066		/*
1067		 * Back out all the POSIX locks we have on fail.
1068		 */
1069
1070		for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1071			offset = ll->start;
1072			count = ll->size;
1073
1074			DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1075				posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1076
1077			posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1078		}
1079
1080		/*
1081		 * Remove the tdb entry for this lock.
1082		 */
1083
1084		delete_posix_lock_entry_by_index(fsp,entry_num);
1085	}
1086
1087	talloc_destroy(l_ctx);
1088	return ret;
1089}
1090
1091/****************************************************************************
1092 POSIX function to release a lock. Returns True if the
1093 lock could be released, False if not.
1094****************************************************************************/
1095
1096BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1097{
1098	SMB_OFF_T offset;
1099	SMB_OFF_T count;
1100	BOOL ret = True;
1101	TALLOC_CTX *ul_ctx = NULL;
1102	struct lock_list *ulist = NULL;
1103	struct lock_list *ul = NULL;
1104	struct posix_lock deleted_lock;
1105	int num_overlapped_entries;
1106
1107	DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1108		fsp->fsp_name, (double)u_offset, (double)u_count ));
1109
1110	/*
1111	 * If the requested lock won't fit in the POSIX range, we will
1112	 * pretend it was successful.
1113	 */
1114
1115	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1116		return True;
1117
1118	/*
1119	 * We treat this as one unlock request for POSIX accounting purposes even
1120	 * if it may later be split into multiple smaller POSIX unlock ranges.
1121	 * num_overlapped_entries is the number of existing locks that have any
1122	 * overlap with this unlock request.
1123	 */
1124
1125	num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1126
1127	if (num_overlapped_entries == -1) {
1128        smb_panic("release_posix_lock: unable find entry to delete !\n");
1129	}
1130
1131	/*
1132	 * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1133	 * a POSIX write lock, then before doing the unlock we need to downgrade
1134	 * the POSIX lock to a read lock. This allows any overlapping read locks
1135	 * to be atomically maintained.
1136	 */
1137
1138	if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1139		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1140			DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1141			return False;
1142		}
1143	}
1144
1145	if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1146		DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1147		return True; /* Not a fatal error. */
1148	}
1149
1150	if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1151		DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1152		talloc_destroy(ul_ctx);
1153		return True; /* Not a fatal error. */
1154	}
1155
1156	/*
1157	 * Create the initial list entry containing the
1158	 * lock we want to remove.
1159	 */
1160
1161	ZERO_STRUCTP(ul);
1162	ul->start = offset;
1163	ul->size = count;
1164
1165	DLIST_ADD(ulist, ul);
1166
1167	/*
1168	 * The following call calculates if there are any
1169	 * overlapping locks held by this process on
1170	 * fd's open on the same file and creates a
1171	 * list of unlock ranges that will allow
1172	 * POSIX lock ranges to remain on the file whilst the
1173	 * unlocks are performed.
1174	 */
1175
1176	ulist = posix_lock_list(ul_ctx, ulist, fsp);
1177
1178	/*
1179	 * Release the POSIX locks on the list of ranges returned.
1180	 */
1181
1182	for(; ulist; ulist = ulist->next) {
1183		offset = ulist->start;
1184		count = ulist->size;
1185
1186		DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1187			(double)offset, (double)count ));
1188
1189		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1190			ret = False;
1191	}
1192
1193	talloc_destroy(ul_ctx);
1194
1195	return ret;
1196}
1197
1198/****************************************************************************
1199 Remove all lock entries for a specific dev/inode pair from the tdb.
1200****************************************************************************/
1201
1202static void delete_posix_lock_entries(files_struct *fsp)
1203{
1204	TDB_DATA kbuf = locking_key_fsp(fsp);
1205
1206	if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1207		DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1208}
1209
1210/****************************************************************************
1211 Debug function.
1212****************************************************************************/
1213
1214static void dump_entry(struct posix_lock *pl)
1215{
1216	DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1217		(double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1218}
1219
1220/****************************************************************************
1221 Remove any locks on this fd. Called from file_close().
1222****************************************************************************/
1223
1224void posix_locking_close_file(files_struct *fsp)
1225{
1226	struct posix_lock *entries = NULL;
1227	size_t count, i;
1228
1229	/*
1230	 * Optimization for the common case where we are the only
1231	 * opener of a file. If all fd entries are our own, we don't
1232	 * need to explicitly release all the locks via the POSIX functions,
1233	 * we can just remove all the entries in the tdb and allow the
1234	 * close to remove the real locks.
1235	 */
1236
1237	count = get_posix_lock_entries(fsp, &entries);
1238
1239	if (count == 0) {
1240		DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1241		return;
1242	}
1243
1244	for (i = 0; i < count; i++) {
1245		if (entries[i].fd != fsp->fd )
1246			break;
1247
1248		dump_entry(&entries[i]);
1249	}
1250
1251	if (i == count) {
1252		/* All locks are ours. */
1253		DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1254			fsp->fsp_name, (unsigned int)count ));
1255		SAFE_FREE(entries);
1256		delete_posix_lock_entries(fsp);
1257		return;
1258	}
1259
1260	/*
1261	 * Difficult case. We need to delete all our locks, whilst leaving
1262	 * all other POSIX locks in place.
1263	 */
1264
1265	for (i = 0; i < count; i++) {
1266		struct posix_lock *pl = &entries[i];
1267		if (pl->fd == fsp->fd)
1268			release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1269	}
1270	SAFE_FREE(entries);
1271}
1272
1273/*******************************************************************
1274 Create the in-memory POSIX lock databases.
1275********************************************************************/
1276
1277BOOL posix_locking_init(int read_only)
1278{
1279	if (posix_lock_tdb && posix_pending_close_tdb)
1280		return True;
1281
1282	if (!posix_lock_tdb)
1283		posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1284					  read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1285	if (!posix_lock_tdb) {
1286		DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1287		return False;
1288	}
1289	if (!posix_pending_close_tdb)
1290		posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1291						   read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1292	if (!posix_pending_close_tdb) {
1293		DEBUG(0,("Failed to open POSIX pending close database.\n"));
1294		return False;
1295	}
1296
1297	return True;
1298}
1299
1300/*******************************************************************
1301 Delete the in-memory POSIX lock databases.
1302********************************************************************/
1303
1304BOOL posix_locking_end(void)
1305{
1306    if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1307		return False;
1308    if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1309		return False;
1310	return True;
1311}
1312