1/*
2 * /proc/sys support
3 */
4
5#include <linux/sysctl.h>
6#include <linux/proc_fs.h>
7#include <linux/security.h>
8#include "internal.h"
9
10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations;
12static struct inode_operations proc_sys_inode_operations;
13
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
15{
16	/* Refresh the cached information bits in the inode */
17	if (table) {
18		inode->i_uid = 0;
19		inode->i_gid = 0;
20		inode->i_mode = table->mode;
21		if (table->proc_handler) {
22			inode->i_mode |= S_IFREG;
23			inode->i_nlink = 1;
24		} else {
25			inode->i_mode |= S_IFDIR;
26			inode->i_nlink = 0;	/* It is too hard to figure out */
27		}
28	}
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{
33	struct inode *inode;
34	struct proc_inode *dir_ei, *ei;
35	int depth;
36
37	inode = new_inode(dir->i_sb);
38	if (!inode)
39		goto out;
40
41	/* A directory is always one deeper than it's parent */
42	dir_ei = PROC_I(dir);
43	depth = dir_ei->fd + 1;
44
45	ei = PROC_I(inode);
46	ei->fd = depth;
47	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48	inode->i_op = &proc_sys_inode_operations;
49	inode->i_fop = &proc_sys_file_operations;
50	inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51	proc_sys_refresh_inode(inode, table);
52out:
53	return inode;
54}
55
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
57{
58	for (;;) {
59		struct proc_inode *ei;
60
61		ei = PROC_I(dentry->d_inode);
62		if (ei->fd == depth)
63			break; /* found */
64
65		dentry = dentry->d_parent;
66	}
67	return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71							struct qstr *name)
72{
73	int len;
74	for ( ; table->ctl_name || table->procname; table++) {
75
76		if (!table->procname)
77			continue;
78
79		len = strlen(table->procname);
80		if (len != name->len)
81			continue;
82
83		if (memcmp(table->procname, name->name, len) != 0)
84			continue;
85
86		/* I have a match */
87		return table;
88	}
89	return NULL;
90}
91
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
93						struct ctl_table *table)
94{
95	struct dentry *ancestor;
96	struct proc_inode *ei;
97	int depth, i;
98
99	ei = PROC_I(dentry->d_inode);
100	depth = ei->fd;
101
102	if (depth == 0)
103		return table;
104
105	for (i = 1; table && (i <= depth); i++) {
106		ancestor = proc_sys_ancestor(dentry, i);
107		table = proc_sys_lookup_table_one(table, &ancestor->d_name);
108		if (table)
109			table = table->child;
110	}
111	return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115						struct qstr *name,
116						struct ctl_table *table)
117{
118	table = proc_sys_lookup_table(dparent, table);
119	if (table)
120		table = proc_sys_lookup_table_one(table, name);
121	return table;
122}
123
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
125						struct qstr *name,
126						struct ctl_table_header **ptr)
127{
128	struct ctl_table_header *head;
129	struct ctl_table *table = NULL;
130
131	for (head = sysctl_head_next(NULL); head;
132			head = sysctl_head_next(head)) {
133		table = proc_sys_lookup_entry(parent, name, head->ctl_table);
134		if (table)
135			break;
136	}
137	*ptr = head;
138	return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142					struct nameidata *nd)
143{
144	struct ctl_table_header *head;
145	struct inode *inode;
146	struct dentry *err;
147	struct ctl_table *table;
148
149	err = ERR_PTR(-ENOENT);
150	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151	if (!table)
152		goto out;
153
154	err = ERR_PTR(-ENOMEM);
155	inode = proc_sys_make_inode(dir, table);
156	if (!inode)
157		goto out;
158
159	err = NULL;
160	dentry->d_op = &proc_sys_dentry_operations;
161	d_add(dentry, inode);
162
163out:
164	sysctl_head_finish(head);
165	return err;
166}
167
168static ssize_t proc_sys_read(struct file *filp, char __user *buf,
169				size_t count, loff_t *ppos)
170{
171	struct dentry *dentry = filp->f_dentry;
172	struct ctl_table_header *head;
173	struct ctl_table *table;
174	ssize_t error, res;
175
176	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
177	/* Has the sysctl entry disappeared on us? */
178	error = -ENOENT;
179	if (!table)
180		goto out;
181
182	/* Has the sysctl entry been replaced by a directory? */
183	error = -EISDIR;
184	if (!table->proc_handler)
185		goto out;
186
187	/*
188	 * At this point we know that the sysctl was not unregistered
189	 * and won't be until we finish.
190	 */
191	error = -EPERM;
192	if (sysctl_perm(table, MAY_READ))
193		goto out;
194
195	/* careful: calling conventions are nasty here */
196	res = count;
197	error = table->proc_handler(table, 0, filp, buf, &res, ppos);
198	if (!error)
199		error = res;
200out:
201	sysctl_head_finish(head);
202
203	return error;
204}
205
206static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
207				size_t count, loff_t *ppos)
208{
209	struct dentry *dentry = filp->f_dentry;
210	struct ctl_table_header *head;
211	struct ctl_table *table;
212	ssize_t error, res;
213
214	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
215	/* Has the sysctl entry disappeared on us? */
216	error = -ENOENT;
217	if (!table)
218		goto out;
219
220	/* Has the sysctl entry been replaced by a directory? */
221	error = -EISDIR;
222	if (!table->proc_handler)
223		goto out;
224
225	/*
226	 * At this point we know that the sysctl was not unregistered
227	 * and won't be until we finish.
228	 */
229	error = -EPERM;
230	if (sysctl_perm(table, MAY_WRITE))
231		goto out;
232
233	/* careful: calling conventions are nasty here */
234	res = count;
235	error = table->proc_handler(table, 1, filp, (char __user *)buf,
236				    &res, ppos);
237	if (!error)
238		error = res;
239out:
240	sysctl_head_finish(head);
241
242	return error;
243}
244
245
246static int proc_sys_fill_cache(struct file *filp, void *dirent,
247				filldir_t filldir, struct ctl_table *table)
248{
249	struct ctl_table_header *head;
250	struct ctl_table *child_table = NULL;
251	struct dentry *child, *dir = filp->f_path.dentry;
252	struct inode *inode;
253	struct qstr qname;
254	ino_t ino = 0;
255	unsigned type = DT_UNKNOWN;
256	int ret;
257
258	qname.name = table->procname;
259	qname.len  = strlen(table->procname);
260	qname.hash = full_name_hash(qname.name, qname.len);
261
262	/* Suppress duplicates.
263	 * Only fill a directory entry if it is the value that
264	 * an ordinary lookup of that name returns.  Hide all
265	 * others.
266	 *
267	 * If we ever cache this translation in the dcache
268	 * I should do a dcache lookup first.  But for now
269	 * it is just simpler not to.
270	 */
271	ret = 0;
272	child_table = do_proc_sys_lookup(dir, &qname, &head);
273	sysctl_head_finish(head);
274	if (child_table != table)
275		return 0;
276
277	child = d_lookup(dir, &qname);
278	if (!child) {
279		struct dentry *new;
280		new = d_alloc(dir, &qname);
281		if (new) {
282			inode = proc_sys_make_inode(dir->d_inode, table);
283			if (!inode)
284				child = ERR_PTR(-ENOMEM);
285			else {
286				new->d_op = &proc_sys_dentry_operations;
287				d_add(new, inode);
288			}
289			if (child)
290				dput(new);
291			else
292				child = new;
293		}
294	}
295	if (!child || IS_ERR(child) || !child->d_inode)
296		goto end_instantiate;
297	inode = child->d_inode;
298	if (inode) {
299		ino  = inode->i_ino;
300		type = inode->i_mode >> 12;
301	}
302	dput(child);
303end_instantiate:
304	if (!ino)
305		ino= find_inode_number(dir, &qname);
306	if (!ino)
307		ino = 1;
308	return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
309}
310
311static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
312{
313	struct dentry *dentry = filp->f_dentry;
314	struct inode *inode = dentry->d_inode;
315	struct ctl_table_header *head = NULL;
316	struct ctl_table *table;
317	unsigned long pos;
318	int ret;
319
320	ret = -ENOTDIR;
321	if (!S_ISDIR(inode->i_mode))
322		goto out;
323
324	ret = 0;
325	/* Avoid a switch here: arm builds fail with missing __cmpdi2 */
326	if (filp->f_pos == 0) {
327		if (filldir(dirent, ".", 1, filp->f_pos,
328				inode->i_ino, DT_DIR) < 0)
329			goto out;
330		filp->f_pos++;
331	}
332	if (filp->f_pos == 1) {
333		if (filldir(dirent, "..", 2, filp->f_pos,
334				parent_ino(dentry), DT_DIR) < 0)
335			goto out;
336		filp->f_pos++;
337	}
338	pos = 2;
339
340	/* - Find each instance of the directory
341	 * - Read all entries in each instance
342	 * - Before returning an entry to user space lookup the entry
343	 *   by name and if I find a different entry don't return
344	 *   this one because it means it is a buried dup.
345	 * For sysctl this should only happen for directory entries.
346	 */
347	for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
348		table = proc_sys_lookup_table(dentry, head->ctl_table);
349
350		if (!table)
351			continue;
352
353		for (; table->ctl_name || table->procname; table++, pos++) {
354			/* Can't do anything without a proc name */
355			if (!table->procname)
356				continue;
357
358			if (pos < filp->f_pos)
359				continue;
360
361			if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
362				goto out;
363			filp->f_pos = pos + 1;
364		}
365	}
366	ret = 1;
367out:
368	sysctl_head_finish(head);
369	return ret;
370}
371
372static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
373{
374	/*
375	 * sysctl entries that are not writeable,
376	 * are _NOT_ writeable, capabilities or not.
377	 */
378	struct ctl_table_header *head;
379	struct ctl_table *table;
380	struct dentry *dentry;
381	int mode;
382	int depth;
383	int error;
384
385	head = NULL;
386	depth = PROC_I(inode)->fd;
387
388	/* First check the cached permissions, in case we don't have
389	 * enough information to lookup the sysctl table entry.
390	 */
391	error = -EACCES;
392	mode = inode->i_mode;
393
394	if (current->euid == 0)
395		mode >>= 6;
396	else if (in_group_p(0))
397		mode >>= 3;
398
399	if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
400		error = 0;
401
402	/* If we can't get a sysctl table entry the permission
403	 * checks on the cached mode will have to be enough.
404	 */
405	if (!nd || !depth)
406		goto out;
407
408	dentry = nd->dentry;
409	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
410
411	/* If the entry does not exist deny permission */
412	error = -EACCES;
413	if (!table)
414		goto out;
415
416	/* Use the permissions on the sysctl table entry */
417	error = sysctl_perm(table, mask);
418out:
419	sysctl_head_finish(head);
420	return error;
421}
422
423static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
424{
425	struct inode *inode = dentry->d_inode;
426	int error;
427
428	if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
429		return -EPERM;
430
431	error = inode_change_ok(inode, attr);
432	if (!error)
433		error = inode_setattr(inode, attr);
434
435	return error;
436}
437
438/* I'm lazy and don't distinguish between files and directories,
439 * until access time.
440 */
441static const struct file_operations proc_sys_file_operations = {
442	.read		= proc_sys_read,
443	.write		= proc_sys_write,
444	.readdir	= proc_sys_readdir,
445};
446
447static struct inode_operations proc_sys_inode_operations = {
448	.lookup		= proc_sys_lookup,
449	.permission	= proc_sys_permission,
450	.setattr	= proc_sys_setattr,
451};
452
453static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
454{
455	struct ctl_table_header *head;
456	struct ctl_table *table;
457	table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
458	proc_sys_refresh_inode(dentry->d_inode, table);
459	sysctl_head_finish(head);
460	return !!table;
461}
462
463static struct dentry_operations proc_sys_dentry_operations = {
464	.d_revalidate	= proc_sys_revalidate,
465};
466
467static struct proc_dir_entry *proc_sys_root;
468
469int proc_sys_init(void)
470{
471	proc_sys_root = proc_mkdir("sys", NULL);
472	proc_sys_root->proc_iops = &proc_sys_inode_operations;
473	proc_sys_root->proc_fops = &proc_sys_file_operations;
474	proc_sys_root->nlink = 0;
475	return 0;
476}
477