1CAUTION:  This patch compiles, but is otherwise totally untested!
2
3This patch also implements --times-only.
4
5Implementation details for the --source-filter and -dest-filter options:
6
7 - These options open a *HUGE* security hole in daemon mode unless they
8   are refused in your rsyncd.conf!
9
10 - Filtering disables rsync alogrithm. (This should be fixed.)
11
12 - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14 - If source filter fails, data is send unfiltered. (Should be changed
15   to abort.)
16
17 - Failure of destination filter, causes data loss!!! (Should be changed
18   to abort.)
19
20 - If filter changes size of file, you should use --times-only option to
21   prevent repeated transfers of unchanged files.
22
23 - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24   to be fixed.)
25
26To use this patch, run these commands for a successful build:
27
28    patch -p1 <patches/source-filter_dest-filter.diff
29    ./prepare-source
30    ./configure                                (optional if already run)
31    make
32
33--- old/generator.c
34+++ new/generator.c
35@@ -60,6 +60,7 @@ extern int append_mode;
36 extern int make_backups;
37 extern int csum_length;
38 extern int ignore_times;
39+extern int times_only;
40 extern int size_only;
41 extern OFF_T max_size;
42 extern OFF_T min_size;
43@@ -378,7 +379,7 @@ void itemize(struct file_struct *file, i
44 /* Perform our quick-check heuristic for determining if a file is unchanged. */
45 int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
46 {
47-	if (st->st_size != file->length)
48+	if (!times_only && st->st_size != file->length)
49 		return 0;
50 
51 	/* if always checksum is set then we use the checksum instead
52--- old/main.c
53+++ new/main.c
54@@ -122,7 +122,7 @@ pid_t wait_process(pid_t pid, int *statu
55 }
56 
57 /* Wait for a process to exit, calling io_flush while waiting. */
58-static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
59+void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
60 {
61 	pid_t waited_pid;
62 	int status;
63--- old/options.c
64+++ new/options.c
65@@ -99,6 +99,7 @@ int keep_partial = 0;
66 int safe_symlinks = 0;
67 int copy_unsafe_links = 0;
68 int size_only = 0;
69+int times_only = 0;
70 int daemon_bwlimit = 0;
71 int bwlimit = 0;
72 int fuzzy_basis = 0;
73@@ -151,6 +152,8 @@ char *logfile_name = NULL;
74 char *logfile_format = NULL;
75 char *stdout_format = NULL;
76 char *password_file = NULL;
77+char *source_filter = NULL;
78+char *dest_filter = NULL;
79 char *rsync_path = RSYNC_PATH;
80 char *backup_dir = NULL;
81 char backup_dir_buf[MAXPATHLEN];
82@@ -343,6 +346,7 @@ void usage(enum logcode F)
83   rprintf(F,"     --timeout=TIME          set I/O timeout in seconds\n");
84   rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
85   rprintf(F,"     --size-only             skip files that match in size\n");
86+  rprintf(F,"     --times-only            skip files that match in mod-time\n");
87   rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
88   rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
89   rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
90@@ -380,6 +384,8 @@ void usage(enum logcode F)
91   rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
92   rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
93   rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
94+  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
95+  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
96   rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
97 #ifdef INET6
98   rprintf(F," -4, --ipv4                  prefer IPv4\n");
99@@ -463,6 +469,7 @@ static struct poptOption long_options[] 
100   {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
101   {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
102   {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
103+  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
104   {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
105   {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
106   {"existing",         0,  POPT_ARG_NONE,   &ignore_non_existing, 0, 0, 0 },
107@@ -541,6 +548,8 @@ static struct poptOption long_options[] 
108   {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
109   {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
110   {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
111+  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
112+  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
113   {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
114   {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
115   {"server",           0,  POPT_ARG_NONE,   0, OPT_SERVER, 0, 0 },
116@@ -1410,6 +1419,16 @@ int parse_arguments(int *argc, const cha
117 		}
118 	}
119 
120+	if (source_filter || dest_filter) {
121+		if (whole_file == 0) {
122+			snprintf(err_buf, sizeof err_buf,
123+				 "--no-whole-file cannot be used with --%s-filter\n",
124+				 source_filter ? "source" : "dest");
125+			return 0;
126+		}
127+		whole_file = 1;
128+	}
129+
130 	if (files_from) {
131 		char *h, *p;
132 		int q;
133@@ -1676,6 +1695,25 @@ void server_options(char **args,int *arg
134 			args[ac++] = "--size-only";
135 	}
136 
137+	if (times_only && am_sender)
138+		args[ac++] = "--times-only";
139+
140+	if (source_filter && !am_sender) {
141+		/* Need to single quote the arg to keep the remote shell
142+		 * from splitting it.  FIXME: breaks if command has single quotes. */
143+	        if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
144+			goto oom;
145+		args[ac++] = arg;
146+	}
147+
148+	if (dest_filter && am_sender) {
149+		/* Need to single quote the arg to keep the remote shell
150+		 * from splitting it.  FIXME: breaks if command has single quotes. */
151+	        if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
152+			goto oom;
153+		args[ac++] = arg;
154+	}
155+
156 	if (modify_window_set) {
157 		if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
158 			goto oom;
159--- old/pipe.c
160+++ new/pipe.c
161@@ -166,3 +166,77 @@ pid_t local_child(int argc, char **argv,
162 
163 	return pid;
164 }
165+
166+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
167+{
168+	pid_t pid;
169+	int pipefds[2];
170+	
171+	if (verbose >= 2)
172+		print_child_argv(command);
173+
174+	if (pipe(pipefds) < 0) {
175+		rsyserr(FERROR, errno, "pipe");
176+		exit_cleanup(RERR_IPC);
177+	}
178+
179+	pid = do_fork();
180+	if (pid == -1) {
181+		rsyserr(FERROR, errno, "fork");
182+		exit_cleanup(RERR_IPC);
183+	}
184+
185+	if (pid == 0) {
186+		if (dup2(pipefds[0], STDIN_FILENO) < 0
187+		 || close(pipefds[1]) < 0
188+		 || dup2(out, STDOUT_FILENO) < 0) {
189+			rsyserr(FERROR, errno, "Failed dup/close");
190+			exit_cleanup(RERR_IPC);
191+		}
192+		umask(orig_umask);
193+		set_blocking(STDIN_FILENO);
194+		if (blocking_io)
195+			set_blocking(STDOUT_FILENO);
196+		execvp(command[0], command);
197+		rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
198+		exit_cleanup(RERR_IPC);
199+	}
200+
201+	if (close(pipefds[0]) < 0) {
202+		rsyserr(FERROR, errno, "Failed to close");
203+		exit_cleanup(RERR_IPC);
204+	}
205+
206+	*pipe_to_filter = pipefds[1];
207+
208+	return pid;
209+}
210+
211+pid_t run_filter_on_file(char *command[], int out, int in)
212+{
213+	pid_t pid;
214+	
215+	if (verbose >= 2)
216+		print_child_argv(command);
217+
218+	pid = do_fork();
219+	if (pid == -1) {
220+		rsyserr(FERROR, errno, "fork");
221+		exit_cleanup(RERR_IPC);
222+	}
223+
224+	if (pid == 0) {
225+		if (dup2(in, STDIN_FILENO) < 0
226+		 || dup2(out, STDOUT_FILENO) < 0) {
227+			rsyserr(FERROR, errno, "Failed to dup2");
228+			exit_cleanup(RERR_IPC);
229+		}
230+		if (blocking_io)
231+			set_blocking(STDOUT_FILENO);
232+		execvp(command[0], command);
233+		rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
234+		exit_cleanup(RERR_IPC);
235+	}
236+
237+	return pid;
238+}
239--- old/receiver.c
240+++ new/receiver.c
241@@ -48,6 +48,7 @@ extern int checksum_seed;
242 extern int inplace;
243 extern int delay_updates;
244 extern struct stats stats;
245+extern char *dest_filter;
246 extern char *stdout_format;
247 extern char *tmpdir;
248 extern char *partial_dir;
249@@ -351,6 +352,8 @@ int recv_files(int f_in, struct file_lis
250 	enum logcode log_code = log_before_transfer ? FLOG : FINFO;
251 	int max_phase = protocol_version >= 29 ? 2 : 1;
252 	int i, recv_ok;
253+	pid_t pid = 0;
254+	char *filter_argv[MAX_FILTER_ARGS + 1];
255 
256 	if (verbose > 2)
257 		rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
258@@ -365,6 +368,23 @@ int recv_files(int f_in, struct file_lis
259 
260 	updating_basis = inplace;
261 
262+	if (dest_filter) {
263+		char *p;
264+		char *sep = " \t";
265+		int i;
266+		for (p = strtok(dest_filter, sep), i = 0;
267+		     p && i < MAX_FILTER_ARGS;
268+		     p = strtok(0, sep))
269+			filter_argv[i++] = p;
270+		filter_argv[i] = NULL;
271+		if (p) {
272+			rprintf(FERROR,
273+				"Too many arguments to dest-filter (> %d)\n",
274+				MAX_FILTER_ARGS);
275+			exit_cleanup(RERR_SYNTAX);
276+		}
277+	}
278+
279 	while (1) {
280 		cleanup_disable();
281 
282@@ -610,6 +630,9 @@ int recv_files(int f_in, struct file_lis
283 		else if (!am_server && verbose && do_progress)
284 			rprintf(FINFO, "%s\n", fname);
285 
286+		if (dest_filter)
287+			pid = run_filter(filter_argv, fd2, &fd2);
288+
289 		/* recv file data */
290 		recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
291 				       fname, fd2, file->length);
292@@ -624,6 +647,16 @@ int recv_files(int f_in, struct file_lis
293 			exit_cleanup(RERR_FILEIO);
294 		}
295 
296+		if (dest_filter) {
297+			int status;
298+			wait_process_with_flush(pid, &status);
299+			if (status != 0) {
300+				rprintf(FERROR, "filter %s exited code: %d\n",
301+					dest_filter, status);
302+				continue;
303+			}
304+		}
305+
306 		if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
307 			char *temp_copy_name;
308 			if (partialptr == fname)
309--- old/rsync.h
310+++ new/rsync.h
311@@ -104,6 +104,7 @@
312 #define IOERR_DEL_LIMIT (1<<2)
313 
314 #define MAX_ARGS 1000
315+#define MAX_FILTER_ARGS 100
316 #define MAX_BASIS_DIRS 20
317 #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
318 
319--- old/rsync.yo
320+++ new/rsync.yo
321@@ -360,6 +360,7 @@ to the detailed description below for a 
322      --timeout=TIME          set I/O timeout in seconds
323  -I, --ignore-times          don't skip files that match size and time
324      --size-only             skip files that match in size
325+     --times-only            skip files that match in mod-time
326      --modify-window=NUM     compare mod-times with reduced accuracy
327  -T, --temp-dir=DIR          create temporary files in directory DIR
328  -y, --fuzzy                 find similar file for basis if no dest file
329@@ -397,6 +398,8 @@ to the detailed description below for a 
330      --write-batch=FILE      write a batched update to FILE
331      --only-write-batch=FILE like --write-batch but w/o updating dest
332      --read-batch=FILE       read a batched update from FILE
333+     --source-filter=COMMAND filter file through COMMAND at source
334+     --dest-filter=COMMAND   filter file through COMMAND at destination
335      --protocol=NUM          force an older protocol version to be used
336      --checksum-seed=NUM     set block/file checksum seed (advanced)
337  -4, --ipv4                  prefer IPv4
338@@ -1759,6 +1762,33 @@ file previously generated by bf(--write-
339 If em(FILE) is bf(-), the batch data will be read from standard input.
340 See the "BATCH MODE" section for details.
341 
342+dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
343+filter program that will be applied to the contents of all transferred
344+regular files before the data is sent to destination.  COMMAND will receive
345+the data on its standard input and it should write the filtered data to
346+standard output.  COMMAND should exit non-zero if it cannot process the
347+data or if it encounters an error when writing the data to stdout.
348+
349+Example: --source-filter="gzip -9" will cause remote files to be
350+compressed.
351+Use of --source-filter automatically enables --whole-file.
352+If your filter does not output the same number of bytes that it received on
353+input, you should use --times-only to disable size and content checks on
354+subsequent rsync runs.
355+
356+dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
357+program that will be applied to the contents of all transferred regular
358+files before the data is written to disk.  COMMAND will receive the data on
359+its standard input and it should write the filtered data to standard
360+output.  COMMAND should exit non-zero if it cannot process the data or if
361+it encounters an error when writing the data to stdout.
362+
363+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
364+Use of --dest-filter automatically enables --whole-file.
365+If your filter does not output the same number of bytes that it
366+received on input, you should use --times-only to disable size and
367+content checks on subsequent rsync runs.
368+
369 dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
370 is useful for creating a batch file that is compatible with an older
371 version of rsync.  For instance, if rsync 2.6.4 is being used with the
372--- old/sender.c
373+++ new/sender.c
374@@ -43,6 +43,7 @@ extern int batch_fd;
375 extern int write_batch;
376 extern struct stats stats;
377 extern struct file_list *the_file_list;
378+extern char *source_filter;
379 extern char *stdout_format;
380 
381 
382@@ -224,6 +225,26 @@ void send_files(struct file_list *flist,
383 	enum logcode log_code = log_before_transfer ? FLOG : FINFO;
384 	int f_xfer = write_batch < 0 ? batch_fd : f_out;
385 	int i, j;
386+	char *filter_argv[MAX_FILTER_ARGS + 1];
387+	char *tmp = 0;
388+	int unlink_tmp = 0;
389+
390+	if (source_filter) {
391+		char *p;
392+		char *sep = " \t";
393+		int i;
394+		for (p = strtok(source_filter, sep), i = 0;
395+		     p && i < MAX_FILTER_ARGS;
396+		     p = strtok(0, sep))
397+			filter_argv[i++] = p;
398+		filter_argv[i] = NULL;
399+		if (p) {
400+			rprintf(FERROR,
401+				"Too many arguments to source-filter (> %d)\n",
402+				MAX_FILTER_ARGS);
403+			exit_cleanup(RERR_SYNTAX);
404+		}
405+	}
406 
407 	if (verbose > 2)
408 		rprintf(FINFO, "send_files starting\n");
409@@ -297,6 +318,7 @@ void send_files(struct file_list *flist,
410 			return;
411 		}
412 
413+		unlink_tmp = 0;
414 		fd = do_open(fname, O_RDONLY, 0);
415 		if (fd == -1) {
416 			if (errno == ENOENT) {
417@@ -325,6 +347,33 @@ void send_files(struct file_list *flist,
418 			return;
419 		}
420 
421+		if (source_filter) {
422+			int fd2;
423+			char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
424+
425+			tmp = strdup(tmpl);
426+			fd2 = mkstemp(tmp);
427+			if (fd2 == -1) {
428+				rprintf(FERROR, "mkstemp %s failed: %s\n",
429+					tmp, strerror(errno));
430+			} else {
431+				int status;
432+				pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
433+				close(fd);
434+				close(fd2);
435+				wait_process_with_flush(pid, &status);
436+				if (status != 0) {
437+					rprintf(FERROR,
438+					    "bypassing source filter %s; exited with code: %d\n",
439+					    source_filter, status);
440+					fd = do_open(fname, O_RDONLY, 0);
441+				} else {
442+					fd = do_open(tmp, O_RDONLY, 0);
443+					unlink_tmp = 1;
444+				}
445+			}
446+		}
447+
448 		if (st.st_size) {
449 			int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
450 			mbuf = map_file(fd, st.st_size, read_size, s->blength);
451@@ -366,6 +415,8 @@ void send_files(struct file_list *flist,
452 			}
453 		}
454 		close(fd);
455+		if (unlink_tmp)
456+			unlink(tmp);
457 
458 		free_sums(s);
459 
460--- old/proto.h
461+++ new/proto.h
462@@ -199,6 +199,7 @@ void maybe_log_item(struct file_struct *
463 void log_delete(char *fname, int mode);
464 void log_exit(int code, const char *file, int line);
465 pid_t wait_process(pid_t pid, int *status_ptr, int flags);
466+void wait_process_with_flush(pid_t pid, int *exit_code_ptr);
467 int child_main(int argc, char *argv[]);
468 void start_server(int f_in, int f_out, int argc, char *argv[]);
469 int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]);
470@@ -218,6 +219,8 @@ BOOL pm_process( char *FileName,
471 pid_t piped_child(char **command, int *f_in, int *f_out);
472 pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
473 		  int (*child_main)(int, char*[]));
474+pid_t run_filter(char *command[], int out, int *pipe_to_filter);
475+pid_t run_filter_on_file(char *command[], int out, int in);
476 void end_progress(OFF_T size);
477 void show_progress(OFF_T ofs, OFF_T size);
478 int recv_files(int f_in, struct file_list *flist, char *local_name);
479--- old/rsync.1
480+++ new/rsync.1
481@@ -426,6 +426,7 @@ to the detailed description below for a 
482      \-\-timeout=TIME          set I/O timeout in seconds
483  \-I, \-\-ignore\-times          don\&'t skip files that match size and time
484      \-\-size\-only             skip files that match in size
485+     \-\-times\-only            skip files that match in mod-time
486      \-\-modify\-window=NUM     compare mod-times with reduced accuracy
487  \-T, \-\-temp\-dir=DIR          create temporary files in directory DIR
488  \-y, \-\-fuzzy                 find similar file for basis if no dest file
489@@ -463,6 +464,8 @@ to the detailed description below for a 
490      \-\-write\-batch=FILE      write a batched update to FILE
491      \-\-only\-write\-batch=FILE like \-\-write\-batch but w/o updating dest
492      \-\-read\-batch=FILE       read a batched update from FILE
493+     \-\-source\-filter=COMMAND filter file through COMMAND at source
494+     \-\-dest\-filter=COMMAND   filter file through COMMAND at destination
495      \-\-protocol=NUM          force an older protocol version to be used
496      \-\-checksum\-seed=NUM     set block/file checksum seed (advanced)
497  \-4, \-\-ipv4                  prefer IPv4
498@@ -2038,6 +2041,35 @@ file previously generated by \fB\-\-writ
499 If \fIFILE\fP is \fB\-\fP, the batch data will be read from standard input\&.
500 See the "BATCH MODE" section for details\&.
501 .IP 
502+.IP "\fB\-\-source\-filter=COMMAND\fP"
503+This option allows the user to specify a
504+filter program that will be applied to the contents of all transferred
505+regular files before the data is sent to destination\&.  COMMAND will receive
506+the data on its standard input and it should write the filtered data to
507+standard output\&.  COMMAND should exit non-zero if it cannot process the
508+data or if it encounters an error when writing the data to stdout\&.
509+.IP 
510+Example: \-\-source\-filter="gzip \-9" will cause remote files to be
511+compressed\&.
512+Use of \-\-source\-filter automatically enables \-\-whole\-file\&.
513+If your filter does not output the same number of bytes that it received on
514+input, you should use \-\-times\-only to disable size and content checks on
515+subsequent rsync runs\&.
516+.IP 
517+.IP "\fB\-\-dest\-filter=COMMAND\fP"
518+This option allows you to specify a filter
519+program that will be applied to the contents of all transferred regular
520+files before the data is written to disk\&.  COMMAND will receive the data on
521+its standard input and it should write the filtered data to standard
522+output\&.  COMMAND should exit non-zero if it cannot process the data or if
523+it encounters an error when writing the data to stdout\&.
524+.IP 
525+Example: \-\-dest\-filter="gzip \-9" will cause remote files to be compressed\&.
526+Use of \-\-dest\-filter automatically enables \-\-whole\-file\&.
527+If your filter does not output the same number of bytes that it
528+received on input, you should use \-\-times\-only to disable size and
529+content checks on subsequent rsync runs\&.
530+.IP 
531 .IP "\fB\-\-protocol=NUM\fP"
532 Force an older protocol version to be used\&.  This
533 is useful for creating a batch file that is compatible with an older
534