1/*
2   Unix SMB/CIFS implementation.
3   kernel oplock processing for Linux
4   Copyright (C) Andrew Tridgell 2000
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19*/
20
21#include "includes.h"
22
23#if HAVE_KERNEL_OPLOCKS_LINUX
24
25static SIG_ATOMIC_T signals_received;
26#define FD_PENDING_SIZE 100
27static SIG_ATOMIC_T fd_pending_array[FD_PENDING_SIZE];
28
29#ifndef F_SETLEASE
30#define F_SETLEASE	1024
31#endif
32
33#ifndef F_GETLEASE
34#define F_GETLEASE	1025
35#endif
36
37#ifndef CAP_LEASE
38#define CAP_LEASE 28
39#endif
40
41#ifndef RT_SIGNAL_LEASE
42#define RT_SIGNAL_LEASE (SIGRTMIN+1)
43#endif
44
45#ifndef F_SETSIG
46#define F_SETSIG 10
47#endif
48
49/****************************************************************************
50 Handle a LEASE signal, incrementing the signals_received and blocking the signal.
51****************************************************************************/
52
53static void signal_handler(int sig, siginfo_t *info, void *unused)
54{
55	if (signals_received < FD_PENDING_SIZE - 1) {
56		fd_pending_array[signals_received] = (SIG_ATOMIC_T)info->si_fd;
57		signals_received++;
58	} /* Else signal is lost. */
59	sys_select_signal();
60}
61
62/****************************************************************************
63 Try to gain a linux capability.
64****************************************************************************/
65
66static void set_capability(unsigned capability)
67{
68#ifndef _LINUX_CAPABILITY_VERSION
69#define _LINUX_CAPABILITY_VERSION 0x19980330
70#endif
71	/* these can be removed when they are in glibc headers */
72	struct  {
73		uint32 version;
74		int pid;
75	} header;
76	struct {
77		uint32 effective;
78		uint32 permitted;
79		uint32 inheritable;
80	} data;
81
82	header.version = _LINUX_CAPABILITY_VERSION;
83	header.pid = 0;
84
85	if (capget(&header, &data) == -1) {
86		DEBUG(3,("Unable to get kernel capabilities (%s)\n", strerror(errno)));
87		return;
88	}
89
90	data.effective |= (1<<capability);
91
92	if (capset(&header, &data) == -1) {
93		DEBUG(3,("Unable to set %d capability (%s)\n",
94			 capability, strerror(errno)));
95	}
96}
97
98/****************************************************************************
99 Call SETLEASE. If we get EACCES then we try setting up the right capability and
100 try again
101****************************************************************************/
102
103static int linux_setlease(int fd, int leasetype)
104{
105	int ret;
106
107	if (fcntl(fd, F_SETSIG, RT_SIGNAL_LEASE) == -1) {
108		DEBUG(3,("Failed to set signal handler for kernel lease\n"));
109		return -1;
110	}
111
112	ret = fcntl(fd, F_SETLEASE, leasetype);
113	if (ret == -1 && errno == EACCES) {
114		set_capability(CAP_LEASE);
115		ret = fcntl(fd, F_SETLEASE, leasetype);
116	}
117
118	return ret;
119}
120
121/****************************************************************************
122 * Deal with the Linux kernel <--> smbd
123 * oplock break protocol.
124****************************************************************************/
125
126static BOOL linux_oplock_receive_message(fd_set *fds, char *buffer, int buffer_len)
127{
128	int fd;
129	struct files_struct *fsp;
130
131	BlockSignals(True, RT_SIGNAL_LEASE);
132	fd = fd_pending_array[0];
133	fsp = file_find_fd(fd);
134	fd_pending_array[0] = (SIG_ATOMIC_T)-1;
135	if (signals_received > 1)
136		memmove((void *)&fd_pending_array[0], (void *)&fd_pending_array[1],
137			sizeof(SIG_ATOMIC_T)*(signals_received-1));
138	signals_received--;
139	/* now we can receive more signals */
140	BlockSignals(False, RT_SIGNAL_LEASE);
141
142	if (fsp == NULL) {
143		DEBUG(0,("Invalid file descriptor %d in kernel oplock break!\n", (int)fd));
144		return False;
145	}
146
147	DEBUG(3,("linux_oplock_receive_message: kernel oplock break request received for \
148dev = %x, inode = %.0f fd = %d, fileid = %lu \n", (unsigned int)fsp->dev, (double)fsp->inode,
149			fd, fsp->file_id));
150
151	/*
152	 * Create a kernel oplock break message.
153	 */
154
155	/* Setup the message header */
156	SIVAL(buffer,OPBRK_CMD_LEN_OFFSET,KERNEL_OPLOCK_BREAK_MSG_LEN);
157	SSVAL(buffer,OPBRK_CMD_PORT_OFFSET,0);
158
159	buffer += OPBRK_CMD_HEADER_LEN;
160
161	SSVAL(buffer,OPBRK_MESSAGE_CMD_OFFSET,KERNEL_OPLOCK_BREAK_CMD);
162
163	memcpy(buffer + KERNEL_OPLOCK_BREAK_DEV_OFFSET, (char *)&fsp->dev, sizeof(fsp->dev));
164	memcpy(buffer + KERNEL_OPLOCK_BREAK_INODE_OFFSET, (char *)&fsp->inode, sizeof(fsp->inode));
165	memcpy(buffer + KERNEL_OPLOCK_BREAK_FILEID_OFFSET, (char *)&fsp->file_id, sizeof(fsp->file_id));
166
167	return True;
168}
169
170/****************************************************************************
171 Attempt to set an kernel oplock on a file.
172****************************************************************************/
173
174static BOOL linux_set_kernel_oplock(files_struct *fsp, int oplock_type)
175{
176	if (linux_setlease(fsp->fd, F_WRLCK) == -1) {
177		DEBUG(3,("linux_set_kernel_oplock: Refused oplock on file %s, fd = %d, dev = %x, \
178inode = %.0f. (%s)\n",
179			 fsp->fsp_name, fsp->fd,
180			 (unsigned int)fsp->dev, (double)fsp->inode, strerror(errno)));
181		return False;
182	}
183
184	DEBUG(3,("linux_set_kernel_oplock: got kernel oplock on file %s, dev = %x, inode = %.0f, file_id = %lu\n",
185		  fsp->fsp_name, (unsigned int)fsp->dev, (double)fsp->inode, fsp->file_id));
186
187	return True;
188}
189
190/****************************************************************************
191 Release a kernel oplock on a file.
192****************************************************************************/
193
194static void linux_release_kernel_oplock(files_struct *fsp)
195{
196	if (DEBUGLVL(10)) {
197		/*
198		 * Check and print out the current kernel
199		 * oplock state of this file.
200		 */
201		int state = fcntl(fsp->fd, F_GETLEASE, 0);
202		dbgtext("linux_release_kernel_oplock: file %s, dev = %x, inode = %.0f file_id = %lu has kernel \
203oplock state of %x.\n", fsp->fsp_name, (unsigned int)fsp->dev,
204                        (double)fsp->inode, fsp->file_id, state );
205	}
206
207	/*
208	 * Remove the kernel oplock on this file.
209	 */
210	if (linux_setlease(fsp->fd, F_UNLCK) == -1) {
211		if (DEBUGLVL(0)) {
212			dbgtext("linux_release_kernel_oplock: Error when removing kernel oplock on file " );
213			dbgtext("%s, dev = %x, inode = %.0f, file_id = %lu. Error was %s\n",
214				fsp->fsp_name, (unsigned int)fsp->dev,
215				(double)fsp->inode, fsp->file_id, strerror(errno) );
216		}
217	}
218}
219
220/****************************************************************************
221 Parse a kernel oplock message.
222****************************************************************************/
223
224static BOOL linux_kernel_oplock_parse(char *msg_start, int msg_len, SMB_INO_T *inode,
225		SMB_DEV_T *dev, unsigned long *file_id)
226{
227	/* Ensure that the msg length is correct. */
228	if (msg_len != KERNEL_OPLOCK_BREAK_MSG_LEN) {
229		DEBUG(0,("incorrect length for KERNEL_OPLOCK_BREAK_CMD (was %d, should be %lu).\n",
230			 msg_len, (unsigned long)KERNEL_OPLOCK_BREAK_MSG_LEN));
231		return False;
232	}
233
234	memcpy((char *)inode, msg_start+KERNEL_OPLOCK_BREAK_INODE_OFFSET, sizeof(*inode));
235	memcpy((char *)dev, msg_start+KERNEL_OPLOCK_BREAK_DEV_OFFSET, sizeof(*dev));
236	memcpy((char *)file_id, msg_start+KERNEL_OPLOCK_BREAK_FILEID_OFFSET, sizeof(*file_id));
237
238	DEBUG(3,("kernel oplock break request for file dev = %x, inode = %.0f, file_id = %lu\n",
239		(unsigned int)*dev, (double)*inode, *file_id));
240
241	return True;
242}
243
244/****************************************************************************
245 See if a oplock message is waiting.
246****************************************************************************/
247
248static BOOL linux_oplock_msg_waiting(fd_set *fds)
249{
250	return signals_received != 0;
251}
252
253/****************************************************************************
254 See if the kernel supports oplocks.
255****************************************************************************/
256
257static BOOL linux_oplocks_available(void)
258{
259	int fd, ret;
260	fd = open("/dev/null", O_RDONLY);
261	if (fd == -1)
262		return False; /* uggh! */
263	ret = fcntl(fd, F_GETLEASE, 0);
264	close(fd);
265	return ret == F_UNLCK;
266}
267
268/****************************************************************************
269 Setup kernel oplocks.
270****************************************************************************/
271
272struct kernel_oplocks *linux_init_kernel_oplocks(void)
273{
274	static struct kernel_oplocks koplocks;
275        struct sigaction act;
276
277	if (!linux_oplocks_available()) {
278		DEBUG(3,("Linux kernel oplocks not available\n"));
279		return NULL;
280	}
281
282	ZERO_STRUCT(act);
283
284	act.sa_handler = NULL;
285	act.sa_sigaction = signal_handler;
286	act.sa_flags = SA_SIGINFO;
287	sigemptyset( &act.sa_mask );
288	if (sigaction(RT_SIGNAL_LEASE, &act, NULL) != 0) {
289		DEBUG(0,("Failed to setup RT_SIGNAL_LEASE handler\n"));
290		return NULL;
291	}
292
293	koplocks.receive_message = linux_oplock_receive_message;
294	koplocks.set_oplock = linux_set_kernel_oplock;
295	koplocks.release_oplock = linux_release_kernel_oplock;
296	koplocks.parse_message = linux_kernel_oplock_parse;
297	koplocks.msg_waiting = linux_oplock_msg_waiting;
298	koplocks.notification_fd = -1;
299
300	/* the signal can start off blocked due to a bug in bash */
301	BlockSignals(False, RT_SIGNAL_LEASE);
302
303	DEBUG(3,("Linux kernel oplocks enabled\n"));
304
305	return &koplocks;
306}
307#else
308 void oplock_linux_dummy(void) {}
309#endif /* HAVE_KERNEL_OPLOCKS_LINUX */
310