1/*
2   Unix SMB/CIFS implementation.
3   Locking functions
4   Copyright (C) Jeremy Allison 1992-2000
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20   Revision History:
21
22   POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23*/
24
25#include "includes.h"
26
27/*
28 * The POSIX locking database handle.
29 */
30
31static TDB_CONTEXT *posix_lock_tdb;
32
33/*
34 * The pending close database handle.
35 */
36
37static TDB_CONTEXT *posix_pending_close_tdb;
38
39/*
40 * The data in POSIX lock records is an unsorted linear array of these
41 * records.  It is unnecessary to store the count as tdb provides the
42 * size of the record.
43 */
44
45struct posix_lock {
46	int fd;
47	SMB_OFF_T start;
48	SMB_OFF_T size;
49	int lock_type;
50};
51
52/*
53 * The data in POSIX pending close records is an unsorted linear array of int
54 * records.  It is unnecessary to store the count as tdb provides the
55 * size of the record.
56 */
57
58/* The key used in both the POSIX databases. */
59
60struct posix_lock_key {
61	SMB_DEV_T device;
62	SMB_INO_T inode;
63};
64
65/*******************************************************************
66 Form a static locking key for a dev/inode pair.
67******************************************************************/
68
69static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
70{
71	static struct posix_lock_key key;
72	TDB_DATA kbuf;
73
74	memset(&key, '\0', sizeof(key));
75	key.device = dev;
76	key.inode = inode;
77	kbuf.dptr = (char *)&key;
78	kbuf.dsize = sizeof(key);
79	return kbuf;
80}
81
82/*******************************************************************
83 Convenience function to get a key from an fsp.
84******************************************************************/
85
86static TDB_DATA locking_key_fsp(files_struct *fsp)
87{
88	return locking_key(fsp->dev, fsp->inode);
89}
90
91/****************************************************************************
92 Add an fd to the pending close tdb.
93****************************************************************************/
94
95static BOOL add_fd_to_close_entry(files_struct *fsp)
96{
97	TDB_DATA kbuf = locking_key_fsp(fsp);
98	TDB_DATA dbuf;
99	char *tp;
100
101	dbuf.dptr = NULL;
102
103	dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
104
105	tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
106	if (!tp) {
107		DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
108		SAFE_FREE(dbuf.dptr);
109		return False;
110	} else
111		dbuf.dptr = tp;
112
113	memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
114	dbuf.dsize += sizeof(int);
115
116	if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
117		DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
118	}
119
120	SAFE_FREE(dbuf.dptr);
121	return True;
122}
123
124/****************************************************************************
125 Remove all fd entries for a specific dev/inode pair from the tdb.
126****************************************************************************/
127
128static void delete_close_entries(files_struct *fsp)
129{
130	TDB_DATA kbuf = locking_key_fsp(fsp);
131
132	if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
133		DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
134}
135
136/****************************************************************************
137 Get the array of POSIX pending close records for an open fsp. Caller must
138 free. Returns number of entries.
139****************************************************************************/
140
141static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
142{
143	TDB_DATA kbuf = locking_key_fsp(fsp);
144	TDB_DATA dbuf;
145	size_t count = 0;
146
147	*entries = NULL;
148	dbuf.dptr = NULL;
149
150	dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
151
152	if (!dbuf.dptr) {
153		return 0;
154	}
155
156	*entries = (int *)dbuf.dptr;
157	count = (size_t)(dbuf.dsize / sizeof(int));
158
159	return count;
160}
161
162/****************************************************************************
163 Get the array of POSIX locks for an fsp. Caller must free. Returns
164 number of entries.
165****************************************************************************/
166
167static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
168{
169	TDB_DATA kbuf = locking_key_fsp(fsp);
170	TDB_DATA dbuf;
171	size_t count = 0;
172
173	*entries = NULL;
174
175	dbuf.dptr = NULL;
176
177	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
178
179	if (!dbuf.dptr) {
180		return 0;
181	}
182
183	*entries = (struct posix_lock *)dbuf.dptr;
184	count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
185
186	return count;
187}
188
189/****************************************************************************
190 Deal with pending closes needed by POSIX locking support.
191 Note that posix_locking_close_file() is expected to have been called
192 to delete all locks on this fsp before this function is called.
193****************************************************************************/
194
195int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
196{
197	int saved_errno = 0;
198	int ret;
199	size_t count, i;
200	struct posix_lock *entries = NULL;
201	int *fd_array = NULL;
202	BOOL locks_on_other_fds = False;
203
204	if (!lp_posix_locking(SNUM(conn))) {
205		/*
206		 * No POSIX to worry about, just close.
207		 */
208		ret = SMB_VFS_CLOSE(fsp,fsp->fd);
209		fsp->fd = -1;
210		return ret;
211	}
212
213	/*
214	 * Get the number of outstanding POSIX locks on this dev/inode pair.
215	 */
216
217	count = get_posix_lock_entries(fsp, &entries);
218
219	/*
220	 * Check if there are any outstanding locks belonging to
221	 * other fd's. This should never be the case if posix_locking_close_file()
222	 * has been called first, but it never hurts to be *sure*.
223	 */
224
225	for (i = 0; i < count; i++) {
226		if (entries[i].fd != fsp->fd) {
227			locks_on_other_fds = True;
228			break;
229		}
230	}
231
232	if (locks_on_other_fds) {
233
234		/*
235		 * There are outstanding locks on this dev/inode pair on other fds.
236		 * Add our fd to the pending close tdb and set fsp->fd to -1.
237		 */
238
239		if (!add_fd_to_close_entry(fsp)) {
240			SAFE_FREE(entries);
241			return False;
242		}
243
244		SAFE_FREE(entries);
245		fsp->fd = -1;
246		return 0;
247	}
248
249	SAFE_FREE(entries);
250
251	/*
252	 * No outstanding POSIX locks. Get the pending close fd's
253	 * from the tdb and close them all.
254	 */
255
256	count = get_posix_pending_close_entries(fsp, &fd_array);
257
258	if (count) {
259		DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
260
261		for(i = 0; i < count; i++) {
262			if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
263				saved_errno = errno;
264			}
265		}
266
267		/*
268		 * Delete all fd's stored in the tdb
269		 * for this dev/inode pair.
270		 */
271
272		delete_close_entries(fsp);
273	}
274
275	SAFE_FREE(fd_array);
276
277	/*
278	 * Finally close the fd associated with this fsp.
279	 */
280
281	ret = SMB_VFS_CLOSE(fsp,fsp->fd);
282
283	if (saved_errno != 0) {
284        errno = saved_errno;
285		ret = -1;
286    }
287
288	fsp->fd = -1;
289
290	return ret;
291}
292
293/****************************************************************************
294 Debugging aid :-).
295****************************************************************************/
296
297static const char *posix_lock_type_name(int lock_type)
298{
299	return (lock_type == F_RDLCK) ? "READ" : "WRITE";
300}
301
302/****************************************************************************
303 Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
304 then the POSIX fcntl lock fails.
305****************************************************************************/
306
307static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
308{
309	TDB_DATA kbuf = locking_key_fsp(fsp);
310	TDB_DATA dbuf;
311	struct posix_lock *locks;
312	size_t count;
313
314	dbuf.dptr = NULL;
315
316	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
317
318	if (!dbuf.dptr) {
319		DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
320		goto fail;
321	}
322
323	count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
324	locks = (struct posix_lock *)dbuf.dptr;
325
326	if (count == 1) {
327		tdb_delete(posix_lock_tdb, kbuf);
328	} else {
329		if (entry < count-1) {
330			memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
331		}
332		dbuf.dsize -= sizeof(*locks);
333		tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
334	}
335
336	SAFE_FREE(dbuf.dptr);
337
338	return True;
339
340 fail:
341
342	SAFE_FREE(dbuf.dptr);
343	return False;
344}
345
346/****************************************************************************
347 Add an entry into the POSIX locking tdb. We return the index number of the
348 added lock (used in case we need to delete *exactly* this entry). Returns
349 False on fail, True on success.
350****************************************************************************/
351
352static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
353{
354	TDB_DATA kbuf = locking_key_fsp(fsp);
355	TDB_DATA dbuf;
356	struct posix_lock pl;
357	char *tp;
358
359	dbuf.dptr = NULL;
360
361	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
362
363	*pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
364
365	/*
366	 * Add new record.
367	 */
368
369	pl.fd = fsp->fd;
370	pl.start = start;
371	pl.size = size;
372	pl.lock_type = lock_type;
373
374	tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
375	if (!tp) {
376		DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
377		goto fail;
378	} else
379		dbuf.dptr = tp;
380
381	memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
382	dbuf.dsize += sizeof(pl);
383
384	if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
385		DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
386		goto fail;
387	}
388
389	SAFE_FREE(dbuf.dptr);
390
391	DEBUG(10,("add_posix_lock: File %s: type = %s: start=%lld size=%lld: dev=%llu inode=%llu\n",
392			fsp->fsp_name, posix_lock_type_name(lock_type), start, size,
393			fsp->dev, fsp->inode ));
394
395	return True;
396
397 fail:
398
399	SAFE_FREE(dbuf.dptr);
400	return False;
401}
402
403/****************************************************************************
404 Calculate if locks have any overlap at all.
405****************************************************************************/
406
407static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
408{
409	if (start1 >= start2 && start1 <= start2 + size2)
410		return True;
411
412	if (start1 < start2 && start1 + size1 > start2)
413		return True;
414
415	return False;
416}
417
418/****************************************************************************
419 Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
420 deleted and the number of records that are overlapped by this one, or -1 on error.
421****************************************************************************/
422
423static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
424{
425	TDB_DATA kbuf = locking_key_fsp(fsp);
426	TDB_DATA dbuf;
427	struct posix_lock *locks;
428	size_t i, count;
429	BOOL found = False;
430	int num_overlapping_records = 0;
431
432	dbuf.dptr = NULL;
433
434	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
435
436	if (!dbuf.dptr) {
437		DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
438		goto fail;
439	}
440
441	/* There are existing locks - find a match. */
442	locks = (struct posix_lock *)dbuf.dptr;
443	count = (size_t)(dbuf.dsize / sizeof(*locks));
444
445	/*
446	 * Search for and delete the first record that matches the
447	 * unlock criteria.
448	 */
449
450	for (i=0; i<count; i++) {
451		struct posix_lock *entry = &locks[i];
452
453		if (entry->fd == fsp->fd &&
454			entry->start == start &&
455			entry->size == size) {
456
457			/* Make a copy if requested. */
458			if (pl)
459				*pl = *entry;
460
461			/* Found it - delete it. */
462			if (count == 1) {
463				tdb_delete(posix_lock_tdb, kbuf);
464			} else {
465				if (i < count-1) {
466					memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
467				}
468				dbuf.dsize -= sizeof(*locks);
469				tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
470			}
471			count--;
472			found = True;
473			break;
474		}
475	}
476
477	if (!found)
478		goto fail;
479
480	/*
481	 * Count the number of entries that are
482	 * overlapped by this unlock request.
483	 */
484
485	for (i = 0; i < count; i++) {
486		struct posix_lock *entry = &locks[i];
487
488		if (fsp->fd == entry->fd &&
489			does_lock_overlap( start, size, entry->start, entry->size))
490				num_overlapping_records++;
491	}
492
493	DEBUG(10,("delete_posix_lock_entry: type = %s: start=%lld size=%lld, num_records = %d\n",
494			posix_lock_type_name(pl->lock_type), pl->start, pl->size,
495				(unsigned int)num_overlapping_records ));
496
497	SAFE_FREE(dbuf.dptr);
498
499	return num_overlapping_records;
500
501 fail:
502
503	SAFE_FREE(dbuf.dptr);
504	return -1;
505}
506
507/****************************************************************************
508 Utility function to map a lock type correctly depending on the open
509 mode of a file.
510****************************************************************************/
511
512static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
513{
514	if((lock_type == WRITE_LOCK) && !fsp->can_write) {
515		/*
516		 * Many UNIX's cannot get a write lock on a file opened read-only.
517		 * Win32 locking semantics allow this.
518		 * Do the best we can and attempt a read-only lock.
519		 */
520		DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
521		return F_RDLCK;
522	} else if((lock_type == READ_LOCK) && !fsp->can_read) {
523		/*
524		 * Ditto for read locks on write only files.
525		 */
526		DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
527		return F_WRLCK;
528	}
529
530  /*
531   * This return should be the most normal, as we attempt
532   * to always open files read/write.
533   */
534
535  return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
536}
537
538/****************************************************************************
539 Check to see if the given unsigned lock range is within the possible POSIX
540 range. Modifies the given args to be in range if possible, just returns
541 False if not.
542****************************************************************************/
543
544static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
545								SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
546{
547	SMB_OFF_T offset = (SMB_OFF_T)u_offset;
548	SMB_OFF_T count = (SMB_OFF_T)u_count;
549
550	/*
551	 * For the type of system we are, attempt to
552	 * find the maximum positive lock offset as an SMB_OFF_T.
553	 */
554
555#if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
556
557	SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
558
559#elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
560
561	/*
562	 * In this case SMB_OFF_T is 64 bits,
563	 * and the underlying system can handle 64 bit signed locks.
564	 */
565
566    SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
567    SMB_OFF_T mask = (mask2<<1);
568    SMB_OFF_T max_positive_lock_offset = ~mask;
569
570#else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
571
572	/*
573	 * In this case either SMB_OFF_T is 32 bits,
574	 * or the underlying system cannot handle 64 bit signed locks.
575	 * All offsets & counts must be 2^31 or less.
576	 */
577
578    SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
579
580#endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
581
582	/*
583	 * POSIX locks of length zero mean lock to end-of-file.
584	 * Win32 locks of length zero are point probes. Ignore
585	 * any Win32 locks of length zero. JRA.
586	 */
587
588	if (count == (SMB_OFF_T)0) {
589		DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
590		return False;
591	}
592
593	/*
594	 * If the given offset was > max_positive_lock_offset then we cannot map this at all
595	 * ignore this lock.
596	 */
597
598	if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
599		DEBUG(10,("posix_lock_in_range: (offset = %llu) offset > %llu and we cannot handle this. Ignoring lock.\n",
600				u_offset, ((SMB_BIG_UINT)max_positive_lock_offset) ));
601		return False;
602	}
603
604	/*
605	 * We must truncate the count to less than max_positive_lock_offset.
606	 */
607
608	if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
609		count = max_positive_lock_offset;
610
611	/*
612	 * Truncate count to end at max lock offset.
613	 */
614
615	if (offset + count < 0 || offset + count > max_positive_lock_offset)
616		count = max_positive_lock_offset - offset;
617
618	/*
619	 * If we ate all the count, ignore this lock.
620	 */
621
622	if (count == 0) {
623		DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %llu, u_count = %llu\n",
624				u_offset, u_count ));
625		return False;
626	}
627
628	/*
629	 * The mapping was successful.
630	 */
631
632	DEBUG(10,("posix_lock_in_range: offset_out = %lld, count_out = %lld\n",
633			offset, count ));
634
635	*offset_out = offset;
636	*count_out = count;
637
638	return True;
639}
640
641/****************************************************************************
642 Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
643 broken NFS implementations.
644****************************************************************************/
645
646static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
647{
648	int ret;
649
650	DEBUG(8,("posix_fcntl_lock %d %d %lld %lld %d\n",fsp->fd,op,offset,count,type));
651
652	ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
653
654	if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
655
656		DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %lld, length %lld returned\n",
657					offset,count));
658		DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
659		DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
660
661		/*
662		 * If the offset is > 0x7FFFFFFF then this will cause problems on
663		 * 32 bit NFS mounted filesystems. Just ignore it.
664		 */
665
666		if (offset & ~((SMB_OFF_T)0x7fffffff)) {
667			DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
668			return True;
669		}
670
671		if (count & ~((SMB_OFF_T)0x7fffffff)) {
672			/* 32 bit NFS file system, retry with smaller offset */
673			DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
674			errno = 0;
675			count &= 0x7fffffff;
676			ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
677		}
678	}
679
680	DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
681
682	return ret;
683}
684
685/****************************************************************************
686 POSIX function to see if a file region is locked. Returns True if the
687 region is locked, False otherwise.
688****************************************************************************/
689
690BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
691{
692	SMB_OFF_T offset;
693	SMB_OFF_T count;
694	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
695
696	DEBUG(10,("is_posix_locked: File %s, offset = %llu, count = %llu, type = %s\n",
697			fsp->fsp_name, u_offset, u_count, posix_lock_type_name(lock_type) ));
698
699	/*
700	 * If the requested lock won't fit in the POSIX range, we will
701	 * never set it, so presume it is not locked.
702	 */
703
704	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
705		return False;
706
707	/*
708	 * Note that most UNIX's can *test* for a write lock on
709	 * a read-only fd, just not *set* a write lock on a read-only
710	 * fd. So we don't need to use map_lock_type here.
711	 */
712
713	return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
714}
715
716/*
717 * Structure used when splitting a lock range
718 * into a POSIX lock range. Doubly linked list.
719 */
720
721struct lock_list {
722    struct lock_list *next;
723    struct lock_list *prev;
724    SMB_OFF_T start;
725    SMB_OFF_T size;
726};
727
728/****************************************************************************
729 Create a list of lock ranges that don't overlap a given range. Used in calculating
730 POSIX locks and unlocks. This is a difficult function that requires ASCII art to
731 understand it :-).
732****************************************************************************/
733
734static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
735{
736	TDB_DATA kbuf = locking_key_fsp(fsp);
737	TDB_DATA dbuf;
738	struct posix_lock *locks;
739	size_t num_locks, i;
740
741	dbuf.dptr = NULL;
742
743	dbuf = tdb_fetch(posix_lock_tdb, kbuf);
744
745	if (!dbuf.dptr)
746		return lhead;
747
748	locks = (struct posix_lock *)dbuf.dptr;
749	num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
750
751	/*
752	 * Check the current lock list on this dev/inode pair.
753	 * Quit if the list is deleted.
754	 */
755
756	DEBUG(10,("posix_lock_list: curr: start=%lld,size=%lld\n",
757		lhead->start, lhead->size ));
758
759	for (i=0; i<num_locks && lhead; i++) {
760
761		struct posix_lock *lock = &locks[i];
762		struct lock_list *l_curr;
763
764		/*
765		 * Walk the lock list, checking for overlaps. Note that
766		 * the lock list can expand within this loop if the current
767		 * range being examined needs to be split.
768		 */
769
770		for (l_curr = lhead; l_curr;) {
771
772			DEBUG(10,("posix_lock_list: lock: fd=%d: start=%lld,size=%lld:type=%s", lock->fd,
773				lock->start, lock->size, posix_lock_type_name(lock->lock_type) ));
774
775			if ( (l_curr->start >= (lock->start + lock->size)) ||
776				 (lock->start >= (l_curr->start + l_curr->size))) {
777
778				/* No overlap with this lock - leave this range alone. */
779/*********************************************
780                                             +---------+
781                                             | l_curr  |
782                                             +---------+
783                                +-------+
784                                | lock  |
785                                +-------+
786OR....
787             +---------+
788             |  l_curr |
789             +---------+
790**********************************************/
791
792				DEBUG(10,("no overlap case.\n" ));
793
794				l_curr = l_curr->next;
795
796			} else if ( (l_curr->start >= lock->start) &&
797						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
798
799				/*
800				 * This unlock is completely overlapped by this existing lock range
801				 * and thus should have no effect (not be unlocked). Delete it from the list.
802				 */
803/*********************************************
804                +---------+
805                |  l_curr |
806                +---------+
807        +---------------------------+
808        |       lock                |
809        +---------------------------+
810**********************************************/
811				/* Save the next pointer */
812				struct lock_list *ul_next = l_curr->next;
813
814				DEBUG(10,("delete case.\n" ));
815
816				DLIST_REMOVE(lhead, l_curr);
817				if(lhead == NULL)
818					break; /* No more list... */
819
820				l_curr = ul_next;
821
822			} else if ( (l_curr->start >= lock->start) &&
823						(l_curr->start < lock->start + lock->size) &&
824						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
825
826				/*
827				 * This unlock overlaps the existing lock range at the high end.
828				 * Truncate by moving start to existing range end and reducing size.
829				 */
830/*********************************************
831                +---------------+
832                |  l_curr       |
833                +---------------+
834        +---------------+
835        |    lock       |
836        +---------------+
837BECOMES....
838                        +-------+
839                        | l_curr|
840                        +-------+
841**********************************************/
842
843				l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
844				l_curr->start = lock->start + lock->size;
845
846				DEBUG(10,("truncate high case: start=%lld,size=%lld\n",
847								l_curr->start, l_curr->size ));
848
849				l_curr = l_curr->next;
850
851			} else if ( (l_curr->start < lock->start) &&
852						(l_curr->start + l_curr->size > lock->start) &&
853						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
854
855				/*
856				 * This unlock overlaps the existing lock range at the low end.
857				 * Truncate by reducing size.
858				 */
859/*********************************************
860   +---------------+
861   |  l_curr       |
862   +---------------+
863           +---------------+
864           |    lock       |
865           +---------------+
866BECOMES....
867   +-------+
868   | l_curr|
869   +-------+
870**********************************************/
871
872				l_curr->size = lock->start - l_curr->start;
873
874				DEBUG(10,("truncate low case: start=%lld,size=%lld\n",
875								l_curr->start, l_curr->size ));
876
877				l_curr = l_curr->next;
878
879			} else if ( (l_curr->start < lock->start) &&
880						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
881				/*
882				 * Worst case scenario. Unlock request completely overlaps an existing
883				 * lock range. Split the request into two, push the new (upper) request
884				 * into the dlink list, and continue with the entry after ul_new (as we
885				 * know that ul_new will not overlap with this lock).
886				 */
887/*********************************************
888        +---------------------------+
889        |        l_curr             |
890        +---------------------------+
891                +---------+
892                | lock    |
893                +---------+
894BECOMES.....
895        +-------+         +---------+
896        | l_curr|         | l_new   |
897        +-------+         +---------+
898**********************************************/
899				struct lock_list *l_new = (struct lock_list *)talloc(ctx,
900													sizeof(struct lock_list));
901
902				if(l_new == NULL) {
903					DEBUG(0,("posix_lock_list: talloc fail.\n"));
904					return NULL; /* The talloc_destroy takes care of cleanup. */
905				}
906
907				ZERO_STRUCTP(l_new);
908				l_new->start = lock->start + lock->size;
909				l_new->size = l_curr->start + l_curr->size - l_new->start;
910
911				/* Truncate the l_curr. */
912				l_curr->size = lock->start - l_curr->start;
913
914				DEBUG(10,("split case: curr: start=%lld,size=%lld \
915new: start=%lld,size=%lld\n", l_curr->start, l_curr->size,
916								l_new->start, l_new->size ));
917
918				/*
919				 * Add into the dlink list after the l_curr point - NOT at lhead.
920				 * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
921				 */
922
923				l_new->prev = l_curr;
924				l_new->next = l_curr->next;
925				l_curr->next = l_new;
926
927				/* And move after the link we added. */
928				l_curr = l_new->next;
929
930			} else {
931
932				/*
933				 * This logic case should never happen. Ensure this is the
934				 * case by forcing an abort.... Remove in production.
935				 */
936				pstring msg;
937
938				slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %lld, size = %lld : \
939lock: start = %lld, size = %lld\n", l_curr->start, l_curr->size, lock->start, lock->size );
940
941				smb_panic(msg);
942			}
943		} /* end for ( l_curr = lhead; l_curr;) */
944	} /* end for (i=0; i<num_locks && ul_head; i++) */
945
946	SAFE_FREE(dbuf.dptr);
947
948	return lhead;
949}
950
951/****************************************************************************
952 POSIX function to acquire a lock. Returns True if the
953 lock could be granted, False if not.
954****************************************************************************/
955
956BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
957{
958	SMB_OFF_T offset;
959	SMB_OFF_T count;
960	BOOL ret = True;
961	size_t entry_num = 0;
962	size_t lock_count;
963	TALLOC_CTX *l_ctx = NULL;
964	struct lock_list *llist = NULL;
965	struct lock_list *ll = NULL;
966	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
967
968	DEBUG(5,("set_posix_lock: File %s, offset = %llu, count = %llu, type = %s\n",
969			fsp->fsp_name, u_offset, u_count, posix_lock_type_name(lock_type) ));
970
971	/*
972	 * If the requested lock won't fit in the POSIX range, we will
973	 * pretend it was successful.
974	 */
975
976	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
977		return True;
978
979	/*
980	 * Windows is very strange. It allows read locks to be overlayed
981	 * (even over a write lock), but leaves the write lock in force until the first
982	 * unlock. It also reference counts the locks. This means the following sequence :
983	 *
984	 * process1                                      process2
985	 * ------------------------------------------------------------------------
986	 * WRITE LOCK : start = 2, len = 10
987	 *                                            READ LOCK: start =0, len = 10 - FAIL
988	 * READ LOCK : start = 0, len = 14
989	 *                                            READ LOCK: start =0, len = 10 - FAIL
990	 * UNLOCK : start = 2, len = 10
991	 *                                            READ LOCK: start =0, len = 10 - OK
992	 *
993	 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
994	 * would leave a single read lock over the 0-14 region. In order to
995	 * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
996	 * entries, one for each overlayed lock request. We are guarenteed by the brlock
997	 * semantics that if a write lock is added, then it will be first in the array.
998	 */
999
1000	if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1001		DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1002		return True; /* Not a fatal error. */
1003	}
1004
1005	if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1006		DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1007		talloc_destroy(l_ctx);
1008		return True; /* Not a fatal error. */
1009	}
1010
1011	/*
1012	 * Create the initial list entry containing the
1013	 * lock we want to add.
1014	 */
1015
1016	ZERO_STRUCTP(ll);
1017	ll->start = offset;
1018	ll->size = count;
1019
1020	DLIST_ADD(llist, ll);
1021
1022	/*
1023	 * The following call calculates if there are any
1024	 * overlapping locks held by this process on
1025	 * fd's open on the same file and splits this list
1026	 * into a list of lock ranges that do not overlap with existing
1027	 * POSIX locks.
1028	 */
1029
1030	llist = posix_lock_list(l_ctx, llist, fsp);
1031
1032	/*
1033	 * Now we have the list of ranges to lock it is safe to add the
1034	 * entry into the POSIX lock tdb. We take note of the entry we
1035	 * added here in case we have to remove it on POSIX lock fail.
1036	 */
1037
1038	if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1039		DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1040		talloc_destroy(l_ctx);
1041		return False;
1042	}
1043
1044	/*
1045	 * Add the POSIX locks on the list of ranges returned.
1046	 * As the lock is supposed to be added atomically, we need to
1047	 * back out all the locks if any one of these calls fail.
1048	 */
1049
1050	for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1051		offset = ll->start;
1052		count = ll->size;
1053
1054		DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %lld, count = %lld\n",
1055			posix_lock_type_name(posix_lock_type), offset, count ));
1056
1057		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1058			DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %lld, count = %lld. Errno = %s\n",
1059				posix_lock_type_name(posix_lock_type), offset, count, strerror(errno) ));
1060			ret = False;
1061			break;
1062		}
1063	}
1064
1065	if (!ret) {
1066
1067		/*
1068		 * Back out all the POSIX locks we have on fail.
1069		 */
1070
1071		for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1072			offset = ll->start;
1073			count = ll->size;
1074
1075			DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %lld, count = %lld\n",
1076				posix_lock_type_name(posix_lock_type), offset, count ));
1077
1078			posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1079		}
1080
1081		/*
1082		 * Remove the tdb entry for this lock.
1083		 */
1084
1085		delete_posix_lock_entry_by_index(fsp,entry_num);
1086	}
1087
1088	talloc_destroy(l_ctx);
1089	return ret;
1090}
1091
1092/****************************************************************************
1093 POSIX function to release a lock. Returns True if the
1094 lock could be released, False if not.
1095****************************************************************************/
1096
1097BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1098{
1099	SMB_OFF_T offset;
1100	SMB_OFF_T count;
1101	BOOL ret = True;
1102	TALLOC_CTX *ul_ctx = NULL;
1103	struct lock_list *ulist = NULL;
1104	struct lock_list *ul = NULL;
1105	struct posix_lock deleted_lock;
1106	int num_overlapped_entries;
1107
1108	DEBUG(5,("release_posix_lock: File %s, offset = %llu, count = %llu\n",
1109		fsp->fsp_name, u_offset, u_count ));
1110
1111	/*
1112	 * If the requested lock won't fit in the POSIX range, we will
1113	 * pretend it was successful.
1114	 */
1115
1116	if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1117		return True;
1118
1119	/*
1120	 * We treat this as one unlock request for POSIX accounting purposes even
1121	 * if it may later be split into multiple smaller POSIX unlock ranges.
1122	 * num_overlapped_entries is the number of existing locks that have any
1123	 * overlap with this unlock request.
1124	 */
1125
1126	num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1127
1128	if (num_overlapped_entries == -1) {
1129        smb_panic("release_posix_lock: unable find entry to delete !\n");
1130	}
1131
1132	/*
1133	 * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1134	 * a POSIX write lock, then before doing the unlock we need to downgrade
1135	 * the POSIX lock to a read lock. This allows any overlapping read locks
1136	 * to be atomically maintained.
1137	 */
1138
1139	if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1140		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1141			DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1142			return False;
1143		}
1144	}
1145
1146	if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1147		DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1148		return True; /* Not a fatal error. */
1149	}
1150
1151	if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1152		DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1153		talloc_destroy(ul_ctx);
1154		return True; /* Not a fatal error. */
1155	}
1156
1157	/*
1158	 * Create the initial list entry containing the
1159	 * lock we want to remove.
1160	 */
1161
1162	ZERO_STRUCTP(ul);
1163	ul->start = offset;
1164	ul->size = count;
1165
1166	DLIST_ADD(ulist, ul);
1167
1168	/*
1169	 * The following call calculates if there are any
1170	 * overlapping locks held by this process on
1171	 * fd's open on the same file and creates a
1172	 * list of unlock ranges that will allow
1173	 * POSIX lock ranges to remain on the file whilst the
1174	 * unlocks are performed.
1175	 */
1176
1177	ulist = posix_lock_list(ul_ctx, ulist, fsp);
1178
1179	/*
1180	 * Release the POSIX locks on the list of ranges returned.
1181	 */
1182
1183	for(; ulist; ulist = ulist->next) {
1184		offset = ulist->start;
1185		count = ulist->size;
1186
1187		DEBUG(5,("release_posix_lock: Real unlock: offset = %lld, count = %lld\n",
1188			offset, count ));
1189
1190		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1191			ret = False;
1192	}
1193
1194	talloc_destroy(ul_ctx);
1195
1196	return ret;
1197}
1198
1199/****************************************************************************
1200 Remove all lock entries for a specific dev/inode pair from the tdb.
1201****************************************************************************/
1202
1203static void delete_posix_lock_entries(files_struct *fsp)
1204{
1205	TDB_DATA kbuf = locking_key_fsp(fsp);
1206
1207	if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1208		DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1209}
1210
1211/****************************************************************************
1212 Debug function.
1213****************************************************************************/
1214
1215static void dump_entry(struct posix_lock *pl)
1216{
1217	DEBUG(10,("entry: start=%lld, size=%lld, type=%d, fd=%i\n",
1218		pl->start, pl->size, (int)pl->lock_type, pl->fd ));
1219}
1220
1221/****************************************************************************
1222 Remove any locks on this fd. Called from file_close().
1223****************************************************************************/
1224
1225void posix_locking_close_file(files_struct *fsp)
1226{
1227	struct posix_lock *entries = NULL;
1228	size_t count, i;
1229
1230	/*
1231	 * Optimization for the common case where we are the only
1232	 * opener of a file. If all fd entries are our own, we don't
1233	 * need to explicitly release all the locks via the POSIX functions,
1234	 * we can just remove all the entries in the tdb and allow the
1235	 * close to remove the real locks.
1236	 */
1237
1238	count = get_posix_lock_entries(fsp, &entries);
1239
1240	if (count == 0) {
1241		DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1242		return;
1243	}
1244
1245	for (i = 0; i < count; i++) {
1246		if (entries[i].fd != fsp->fd )
1247			break;
1248
1249		dump_entry(&entries[i]);
1250	}
1251
1252	if (i == count) {
1253		/* All locks are ours. */
1254		DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n",
1255			fsp->fsp_name, (unsigned int)count ));
1256		SAFE_FREE(entries);
1257		delete_posix_lock_entries(fsp);
1258		return;
1259	}
1260
1261	/*
1262	 * Difficult case. We need to delete all our locks, whilst leaving
1263	 * all other POSIX locks in place.
1264	 */
1265
1266	for (i = 0; i < count; i++) {
1267		struct posix_lock *pl = &entries[i];
1268		if (pl->fd == fsp->fd)
1269			release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1270	}
1271	SAFE_FREE(entries);
1272}
1273
1274/*******************************************************************
1275 Create the in-memory POSIX lock databases.
1276********************************************************************/
1277
1278BOOL posix_locking_init(int read_only)
1279{
1280	if (posix_lock_tdb && posix_pending_close_tdb)
1281		return True;
1282
1283	if (!posix_lock_tdb)
1284		posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1285					  read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1286	if (!posix_lock_tdb) {
1287		DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1288		return False;
1289	}
1290	if (!posix_pending_close_tdb)
1291		posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1292						   read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1293	if (!posix_pending_close_tdb) {
1294		DEBUG(0,("Failed to open POSIX pending close database.\n"));
1295		return False;
1296	}
1297
1298	return True;
1299}
1300
1301/*******************************************************************
1302 Delete the in-memory POSIX lock databases.
1303********************************************************************/
1304
1305BOOL posix_locking_end(void)
1306{
1307    if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1308		return False;
1309    if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1310		return False;
1311	return True;
1312}
1313