1/*	$OpenBSD: rules.c,v 1.5 2022/12/26 19:16:02 jmc Exp $ */
2/*
3 * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17#include <err.h>
18#include <stdlib.h>
19#include <stdio.h>
20#include <string.h>
21
22#include "extern.h"
23
24struct rule {
25	char			*pattern;
26	enum rule_type		 type;
27#ifdef NOTYET
28	unsigned int		 modifiers;
29#endif
30	short			 numseg;
31	unsigned char		 anchored;
32	unsigned char		 fileonly;
33	unsigned char		 nowild;
34	unsigned char		 onlydir;
35	unsigned char		 leadingdir;
36};
37
38static struct rule	*rules;
39static size_t		 numrules;	/* number of rules */
40static size_t		 rulesz;	/* available size */
41
42/* up to protocol 29 filter rules only support - + ! and no modifiers */
43
44const struct command {
45	enum rule_type		type;
46	char			sopt;
47	const char		*lopt;
48} commands[] = {
49	{ RULE_EXCLUDE,		'-',	"exclude" },
50	{ RULE_INCLUDE,		'+',	"include" },
51	{ RULE_CLEAR,		'!',	"clear" },
52#ifdef NOTYET
53	{ RULE_MERGE,		'.',	"merge" },
54	{ RULE_DIR_MERGE,	':',	"dir-merge" },
55	{ RULE_SHOW,		'S',	"show" },
56	{ RULE_HIDE,		'H',	"hide" },
57	{ RULE_PROTECT,		'P',	"protect" },
58	{ RULE_RISK,		'R',	"risk" },
59#endif
60	{ 0 }
61};
62
63#ifdef NOTYET
64#define MOD_ABSOLUTE			0x0001
65#define MOD_NEGATE			0x0002
66#define MOD_CVSEXCLUDE			0x0004
67#define MOD_SENDING			0x0008
68#define MOD_RECEIVING			0x0010
69#define MOD_PERISHABLE			0x0020
70#define MOD_XATTR			0x0040
71#define MOD_MERGE_EXCLUDE		0x0080
72#define MOD_MERGE_INCLUDE		0x0100
73#define MOD_MERGE_CVSCOMPAT		0x0200
74#define MOD_MERGE_EXCLUDE_FILE		0x0400
75#define MOD_MERGE_NO_INHERIT		0x0800
76#define MOD_MERGE_WORDSPLIT		0x1000
77
78/* maybe support absolute and negate */
79const struct modifier {
80	unsigned int		modifier;
81	char			sopt;
82} modifiers[] = {
83	{ MOD_ABSOLUTE,			'/' },
84	{ MOD_NEGATE,			'!' },
85	{ MOD_CVSEXCLUDE,		'C' },
86	{ MOD_SENDING,			's' },
87	{ MOD_RECEIVING,		'r' },
88	{ MOD_PERISHABLE,		'p' },
89	{ MOD_XATTR,			'x' },
90	/* for '.' and ':' types */
91	{ MOD_MERGE_EXCLUDE,		'-' },
92	{ MOD_MERGE_INCLUDE,		'+' },
93	{ MOD_MERGE_CVSCOMPAT,		'C' },
94	{ MOD_MERGE_EXCLUDE_FILE,	'e' },
95	{ MOD_MERGE_NO_INHERIT,		'n' },
96	{ MOD_MERGE_WORDSPLIT,		'w' },
97	{ 0 }
98}
99#endif
100
101static struct rule *
102get_next_rule(void)
103{
104	struct rule *new;
105	size_t newsz;
106
107	if (++numrules > rulesz) {
108		if (rulesz == 0)
109			newsz = 16;
110		else
111			newsz = rulesz * 2;
112
113		new = recallocarray(rules, rulesz, newsz, sizeof(*rules));
114		if (new == NULL)
115			err(ERR_NOMEM, NULL);
116
117		rules = new;
118		rulesz = newsz;
119	}
120
121	return rules + numrules - 1;
122}
123
124static enum rule_type
125parse_command(const char *command, size_t len)
126{
127	const char *mod;
128	size_t	i;
129
130	mod = memchr(command, ',', len);
131	if (mod != NULL) {
132		/* XXX modifiers not yet implemented */
133		return RULE_NONE;
134	}
135
136	for (i = 0; commands[i].type != RULE_NONE; i++) {
137		if (strncmp(commands[i].lopt, command, len) == 0)
138			return commands[i].type;
139		if (len == 1 && commands[i].sopt == *command)
140			return commands[i].type;
141	}
142
143	return RULE_NONE;
144}
145
146static void
147parse_pattern(struct rule *r, char *pattern)
148{
149	size_t plen;
150	char *p;
151	short nseg = 1;
152
153	/*
154	 * check for / at start and end of pattern both are special and
155	 * can bypass full path matching.
156	 */
157	if (*pattern == '/') {
158		pattern++;
159		r->anchored = 1;
160	}
161	plen = strlen(pattern);
162	/*
163	 * check for patterns ending in '/' and '/'+'***' and handle them
164	 * specially. Because of this and the check above pattern will never
165	 * start or end with a '/'.
166	 */
167	if (plen > 1 && pattern[plen - 1] == '/') {
168		r->onlydir = 1;
169		pattern[plen - 1] = '\0';
170	}
171	if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {
172		r->leadingdir = 1;
173		pattern[plen - 4] = '\0';
174	}
175
176	/* count how many segments the pattern has. */
177	for (p = pattern; *p != '\0'; p++)
178		if (*p == '/')
179			nseg++;
180	r->numseg = nseg;
181
182	/* check if this pattern only matches against the basename */
183	if (nseg == 1 && !r->anchored)
184		r->fileonly = 1;
185
186	if (strpbrk(pattern, "*?[") == NULL) {
187		/* no wildchar matching */
188		r->nowild = 1;
189	} else {
190		/* requires wildchar matching */
191		if (strstr(pattern, "**") != NULL)
192			r->numseg = -1;
193	}
194
195	r->pattern = strdup(pattern);
196	if (r->pattern == NULL)
197		err(ERR_NOMEM, NULL);
198}
199
200int
201parse_rule(char *line, enum rule_type def)
202{
203	enum rule_type type;
204	struct rule *r;
205	char *pattern;
206	size_t len;
207
208	switch (*line) {
209	case '#':
210	case ';':
211		/* comment */
212		return 0;
213	case '\0':
214		/* ignore empty lines */
215		return 0;
216	default:
217		len = strcspn(line, " _");
218		type = parse_command(line, len);
219		if (type == RULE_NONE) {
220			if (def == RULE_NONE)
221				return -1;
222			type = def;
223			pattern = line;
224		} else
225			pattern = line + len + 1;
226
227		if (*pattern == '\0' && type != RULE_CLEAR)
228			return -1;
229		if (*pattern != '\0' && type == RULE_CLEAR)
230			return -1;
231		break;
232	}
233
234	r = get_next_rule();
235	r->type = type;
236	parse_pattern(r, pattern);
237
238	return 0;
239}
240
241void
242parse_file(const char *file, enum rule_type def)
243{
244	FILE *fp;
245	char *line = NULL;
246	size_t linesize = 0, linenum = 0;
247	ssize_t linelen;
248
249	if ((fp = fopen(file, "r")) == NULL)
250		err(ERR_SYNTAX, "open: %s", file);
251
252	while ((linelen = getline(&line, &linesize, fp)) != -1) {
253		linenum++;
254		line[linelen - 1] = '\0';
255		if (parse_rule(line, def) == -1)
256			errx(ERR_SYNTAX, "syntax error in %s at entry %zu",
257			    file, linenum);
258	}
259
260	free(line);
261	if (ferror(fp))
262		err(ERR_SYNTAX, "failed to parse file %s", file);
263	fclose(fp);
264}
265
266static const char *
267send_command(struct rule *r)
268{
269	static char buf[16];
270	char *b = buf;
271	char *ep = buf + sizeof(buf);
272
273	switch (r->type) {
274	case RULE_EXCLUDE:
275		*b++ = '-';
276		break;
277	case RULE_INCLUDE:
278		*b++ = '+';
279		break;
280	case RULE_CLEAR:
281		*b++ = '!';
282		break;
283#ifdef NOTYET
284	case RULE_MERGE:
285		*b++ = '.';
286		break;
287	case RULE_DIR_MERGE:
288		*b++ = ':';
289		break;
290	case RULE_SHOW:
291		*b++ = 'S';
292		break;
293	case RULE_HIDE:
294		*b++ = 'H';
295		break;
296	case RULE_PROTECT:
297		*b++ = 'P';
298		break;
299	case RULE_RISK:
300		*b++ = 'R';
301		break;
302#endif
303	default:
304		err(ERR_SYNTAX, "unknown rule type %d", r->type);
305	}
306
307#ifdef NOTYET
308	for (i = 0; modifiers[i].modifier != 0; i++) {
309		if (rule->modifiers & modifiers[i].modifier)
310			*b++ = modifiers[i].sopt;
311		if (b >= ep - 3)
312			err(ERR_SYNTAX, "rule modifiers overflow");
313	}
314#endif
315	if (b >= ep - 3)
316		err(ERR_SYNTAX, "rule prefix overflow");
317	*b++ = ' ';
318
319	/* include the stripped root '/' for anchored patterns */
320	if (r->anchored)
321		*b++ = '/';
322	*b++ = '\0';
323	return buf;
324}
325
326static const char *
327postfix_command(struct rule *r)
328{
329	static char buf[8];
330
331	buf[0] = '\0';
332	if (r->onlydir)
333		strlcpy(buf, "/", sizeof(buf));
334	if (r->leadingdir)
335		strlcpy(buf, "/***", sizeof(buf));
336
337	return buf;
338}
339
340void
341send_rules(struct sess *sess, int fd)
342{
343	const char *cmd;
344	const char *postfix;
345	struct rule *r;
346	size_t cmdlen, len, postlen, i;
347
348	for (i = 0; i < numrules; i++) {
349		r = &rules[i];
350		cmd = send_command(r);
351		if (cmd == NULL)
352			err(ERR_PROTOCOL,
353			    "rules are incompatible with remote rsync");
354		postfix = postfix_command(r);
355		cmdlen = strlen(cmd);
356		len = strlen(r->pattern);
357		postlen = strlen(postfix);
358
359		if (!io_write_int(sess, fd, cmdlen + len + postlen))
360			err(ERR_SOCK_IO, "send rules");
361		if (!io_write_buf(sess, fd, cmd, cmdlen))
362			err(ERR_SOCK_IO, "send rules");
363		if (!io_write_buf(sess, fd, r->pattern, len))
364			err(ERR_SOCK_IO, "send rules");
365		/* include the '/' stripped by onlydir */
366		if (postlen > 0)
367			if (!io_write_buf(sess, fd, postfix, postlen))
368				err(ERR_SOCK_IO, "send rules");
369	}
370
371	if (!io_write_int(sess, fd, 0))
372		err(ERR_SOCK_IO, "send rules");
373}
374
375void
376recv_rules(struct sess *sess, int fd)
377{
378	char line[8192];
379	size_t len;
380
381	do {
382		if (!io_read_size(sess, fd, &len))
383			err(ERR_SOCK_IO, "receive rules");
384
385		if (len == 0)
386			return;
387		if (len >= sizeof(line) - 1)
388			errx(ERR_SOCK_IO, "received rule too long");
389		if (!io_read_buf(sess, fd, line, len))
390			err(ERR_SOCK_IO, "receive rules");
391		line[len] = '\0';
392		if (parse_rule(line, RULE_NONE) == -1)
393			errx(ERR_PROTOCOL, "syntax error in received rules");
394	} while (1);
395}
396
397static inline int
398rule_matched(struct rule *r)
399{
400	/* TODO apply negation once modifiers are added */
401
402	if (r->type == RULE_EXCLUDE)
403		return -1;
404	else
405		return 1;
406}
407
408int
409rules_match(const char *path, int isdir)
410{
411	const char *basename, *p = NULL;
412	struct rule *r;
413	size_t i;
414
415	basename = strrchr(path, '/');
416	if (basename != NULL)
417		basename += 1;
418	else
419		basename = path;
420
421	for (i = 0; i < numrules; i++) {
422		r = &rules[i];
423
424		if (r->onlydir && !isdir)
425			continue;
426
427		if (r->nowild) {
428			/* fileonly and anchored are mutually exclusive */
429			if (r->fileonly) {
430				if (strcmp(basename, r->pattern) == 0)
431					return rule_matched(r);
432			} else if (r->anchored) {
433				/*
434				 * assumes that neither path nor pattern
435				 * start with a '/'.
436				 */
437				if (strcmp(path, r->pattern) == 0)
438					return rule_matched(r);
439			} else if (r->leadingdir) {
440				size_t plen = strlen(r->pattern);
441
442				p = strstr(path, r->pattern);
443				/*
444				 * match from start or dir boundary also
445				 * match to end or to dir boundary
446				 */
447				if (p != NULL && (p == path || p[-1] == '/') &&
448				    (p[plen] == '\0' || p[plen] == '/'))
449					return rule_matched(r);
450			} else {
451				size_t len = strlen(path);
452				size_t plen = strlen(r->pattern);
453
454				if (len >= plen && strcmp(path + len - plen,
455				    r->pattern) == 0) {
456					/* match all or start on dir boundary */
457					if (len == plen ||
458					    path[len - plen - 1] == '/')
459						return rule_matched(r);
460				}
461			}
462		} else {
463			if (r->fileonly) {
464				p = basename;
465			} else if (r->anchored || r->numseg == -1) {
466				/* full path matching */
467				p = path;
468			} else {
469				short nseg = 1;
470
471				/* match against the last numseg elements */
472				for (p = path; *p != '\0'; p++)
473					if (*p == '/')
474						nseg++;
475				if (nseg < r->numseg) {
476					p = NULL;
477				} else {
478					nseg -= r->numseg;
479					for (p = path; *p != '\0' && nseg > 0;
480					    p++) {
481						if (*p == '/')
482							nseg--;
483					}
484				}
485			}
486
487			if (p != NULL) {
488				if (rmatch(r->pattern, p, r->leadingdir) == 0)
489					return rule_matched(r);
490			}
491		}
492	}
493
494	return 0;
495}
496