1CAUTION: This patch compiles, but is otherwise totally untested! 2 3This patch also implements --times-only. 4 5Implementation details for the --source-filter and -dest-filter options: 6 7 - These options open a *HUGE* security hole in daemon mode unless they 8 are refused in your rsyncd.conf! 9 10 - Filtering disables rsync alogrithm. (This should be fixed.) 11 12 - Source filter makes temporary files in /tmp. (Should be overridable.) 13 14 - If source filter fails, data is send unfiltered. (Should be changed 15 to abort.) 16 17 - Failure of destination filter, causes data loss!!! (Should be changed 18 to abort.) 19 20 - If filter changes size of file, you should use --times-only option to 21 prevent repeated transfers of unchanged files. 22 23 - If the COMMAND contains single quotes, option-passing breaks. (Needs 24 to be fixed.) 25 26To use this patch, run these commands for a successful build: 27 28 patch -p1 <patches/source-filter_dest-filter.diff 29 ./prepare-source 30 ./configure (optional if already run) 31 make 32 33--- old/generator.c 34+++ new/generator.c 35@@ -60,6 +60,7 @@ extern int append_mode; 36 extern int make_backups; 37 extern int csum_length; 38 extern int ignore_times; 39+extern int times_only; 40 extern int size_only; 41 extern OFF_T max_size; 42 extern OFF_T min_size; 43@@ -378,7 +379,7 @@ void itemize(struct file_struct *file, i 44 /* Perform our quick-check heuristic for determining if a file is unchanged. */ 45 int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) 46 { 47- if (st->st_size != file->length) 48+ if (!times_only && st->st_size != file->length) 49 return 0; 50 51 /* if always checksum is set then we use the checksum instead 52--- old/main.c 53+++ new/main.c 54@@ -122,7 +122,7 @@ pid_t wait_process(pid_t pid, int *statu 55 } 56 57 /* Wait for a process to exit, calling io_flush while waiting. */ 58-static void wait_process_with_flush(pid_t pid, int *exit_code_ptr) 59+void wait_process_with_flush(pid_t pid, int *exit_code_ptr) 60 { 61 pid_t waited_pid; 62 int status; 63--- old/options.c 64+++ new/options.c 65@@ -99,6 +99,7 @@ int keep_partial = 0; 66 int safe_symlinks = 0; 67 int copy_unsafe_links = 0; 68 int size_only = 0; 69+int times_only = 0; 70 int daemon_bwlimit = 0; 71 int bwlimit = 0; 72 int fuzzy_basis = 0; 73@@ -151,6 +152,8 @@ char *logfile_name = NULL; 74 char *logfile_format = NULL; 75 char *stdout_format = NULL; 76 char *password_file = NULL; 77+char *source_filter = NULL; 78+char *dest_filter = NULL; 79 char *rsync_path = RSYNC_PATH; 80 char *backup_dir = NULL; 81 char backup_dir_buf[MAXPATHLEN]; 82@@ -343,6 +346,7 @@ void usage(enum logcode F) 83 rprintf(F," --timeout=TIME set I/O timeout in seconds\n"); 84 rprintf(F," -I, --ignore-times don't skip files that match in size and mod-time\n"); 85 rprintf(F," --size-only skip files that match in size\n"); 86+ rprintf(F," --times-only skip files that match in mod-time\n"); 87 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n"); 88 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); 89 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n"); 90@@ -380,6 +384,8 @@ void usage(enum logcode F) 91 rprintf(F," --write-batch=FILE write a batched update to FILE\n"); 92 rprintf(F," --only-write-batch=FILE like --write-batch but w/o updating destination\n"); 93 rprintf(F," --read-batch=FILE read a batched update from FILE\n"); 94+ rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n"); 95+ rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n"); 96 rprintf(F," --protocol=NUM force an older protocol version to be used\n"); 97 #ifdef INET6 98 rprintf(F," -4, --ipv4 prefer IPv4\n"); 99@@ -463,6 +469,7 @@ static struct poptOption long_options[] 100 {"chmod", 0, POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 }, 101 {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times, 0, 0, 0 }, 102 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 }, 103+ {"times-only", 0, POPT_ARG_NONE, ×_only , 0, 0, 0 }, 104 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 }, 105 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, 106 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, 107@@ -541,6 +548,8 @@ static struct poptOption long_options[] 108 {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 }, 109 {"blocking-io", 0, POPT_ARG_VAL, &blocking_io, 1, 0, 0 }, 110 {"no-blocking-io", 0, POPT_ARG_VAL, &blocking_io, 0, 0, 0 }, 111+ {"source-filter", 0, POPT_ARG_STRING, &source_filter, 0, 0, 0 }, 112+ {"dest-filter", 0, POPT_ARG_STRING, &dest_filter, 0, 0, 0 }, 113 {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, 114 {"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 }, 115 {"server", 0, POPT_ARG_NONE, 0, OPT_SERVER, 0, 0 }, 116@@ -1410,6 +1419,16 @@ int parse_arguments(int *argc, const cha 117 } 118 } 119 120+ if (source_filter || dest_filter) { 121+ if (whole_file == 0) { 122+ snprintf(err_buf, sizeof err_buf, 123+ "--no-whole-file cannot be used with --%s-filter\n", 124+ source_filter ? "source" : "dest"); 125+ return 0; 126+ } 127+ whole_file = 1; 128+ } 129+ 130 if (files_from) { 131 char *h, *p; 132 int q; 133@@ -1676,6 +1695,25 @@ void server_options(char **args,int *arg 134 args[ac++] = "--size-only"; 135 } 136 137+ if (times_only && am_sender) 138+ args[ac++] = "--times-only"; 139+ 140+ if (source_filter && !am_sender) { 141+ /* Need to single quote the arg to keep the remote shell 142+ * from splitting it. FIXME: breaks if command has single quotes. */ 143+ if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0) 144+ goto oom; 145+ args[ac++] = arg; 146+ } 147+ 148+ if (dest_filter && am_sender) { 149+ /* Need to single quote the arg to keep the remote shell 150+ * from splitting it. FIXME: breaks if command has single quotes. */ 151+ if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0) 152+ goto oom; 153+ args[ac++] = arg; 154+ } 155+ 156 if (modify_window_set) { 157 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0) 158 goto oom; 159--- old/pipe.c 160+++ new/pipe.c 161@@ -166,3 +166,77 @@ pid_t local_child(int argc, char **argv, 162 163 return pid; 164 } 165+ 166+pid_t run_filter(char *command[], int out, int *pipe_to_filter) 167+{ 168+ pid_t pid; 169+ int pipefds[2]; 170+ 171+ if (verbose >= 2) 172+ print_child_argv(command); 173+ 174+ if (pipe(pipefds) < 0) { 175+ rsyserr(FERROR, errno, "pipe"); 176+ exit_cleanup(RERR_IPC); 177+ } 178+ 179+ pid = do_fork(); 180+ if (pid == -1) { 181+ rsyserr(FERROR, errno, "fork"); 182+ exit_cleanup(RERR_IPC); 183+ } 184+ 185+ if (pid == 0) { 186+ if (dup2(pipefds[0], STDIN_FILENO) < 0 187+ || close(pipefds[1]) < 0 188+ || dup2(out, STDOUT_FILENO) < 0) { 189+ rsyserr(FERROR, errno, "Failed dup/close"); 190+ exit_cleanup(RERR_IPC); 191+ } 192+ umask(orig_umask); 193+ set_blocking(STDIN_FILENO); 194+ if (blocking_io) 195+ set_blocking(STDOUT_FILENO); 196+ execvp(command[0], command); 197+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]); 198+ exit_cleanup(RERR_IPC); 199+ } 200+ 201+ if (close(pipefds[0]) < 0) { 202+ rsyserr(FERROR, errno, "Failed to close"); 203+ exit_cleanup(RERR_IPC); 204+ } 205+ 206+ *pipe_to_filter = pipefds[1]; 207+ 208+ return pid; 209+} 210+ 211+pid_t run_filter_on_file(char *command[], int out, int in) 212+{ 213+ pid_t pid; 214+ 215+ if (verbose >= 2) 216+ print_child_argv(command); 217+ 218+ pid = do_fork(); 219+ if (pid == -1) { 220+ rsyserr(FERROR, errno, "fork"); 221+ exit_cleanup(RERR_IPC); 222+ } 223+ 224+ if (pid == 0) { 225+ if (dup2(in, STDIN_FILENO) < 0 226+ || dup2(out, STDOUT_FILENO) < 0) { 227+ rsyserr(FERROR, errno, "Failed to dup2"); 228+ exit_cleanup(RERR_IPC); 229+ } 230+ if (blocking_io) 231+ set_blocking(STDOUT_FILENO); 232+ execvp(command[0], command); 233+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]); 234+ exit_cleanup(RERR_IPC); 235+ } 236+ 237+ return pid; 238+} 239--- old/receiver.c 240+++ new/receiver.c 241@@ -48,6 +48,7 @@ extern int checksum_seed; 242 extern int inplace; 243 extern int delay_updates; 244 extern struct stats stats; 245+extern char *dest_filter; 246 extern char *stdout_format; 247 extern char *tmpdir; 248 extern char *partial_dir; 249@@ -351,6 +352,8 @@ int recv_files(int f_in, struct file_lis 250 enum logcode log_code = log_before_transfer ? FLOG : FINFO; 251 int max_phase = protocol_version >= 29 ? 2 : 1; 252 int i, recv_ok; 253+ pid_t pid = 0; 254+ char *filter_argv[MAX_FILTER_ARGS + 1]; 255 256 if (verbose > 2) 257 rprintf(FINFO,"recv_files(%d) starting\n",flist->count); 258@@ -365,6 +368,23 @@ int recv_files(int f_in, struct file_lis 259 260 updating_basis = inplace; 261 262+ if (dest_filter) { 263+ char *p; 264+ char *sep = " \t"; 265+ int i; 266+ for (p = strtok(dest_filter, sep), i = 0; 267+ p && i < MAX_FILTER_ARGS; 268+ p = strtok(0, sep)) 269+ filter_argv[i++] = p; 270+ filter_argv[i] = NULL; 271+ if (p) { 272+ rprintf(FERROR, 273+ "Too many arguments to dest-filter (> %d)\n", 274+ MAX_FILTER_ARGS); 275+ exit_cleanup(RERR_SYNTAX); 276+ } 277+ } 278+ 279 while (1) { 280 cleanup_disable(); 281 282@@ -610,6 +630,9 @@ int recv_files(int f_in, struct file_lis 283 else if (!am_server && verbose && do_progress) 284 rprintf(FINFO, "%s\n", fname); 285 286+ if (dest_filter) 287+ pid = run_filter(filter_argv, fd2, &fd2); 288+ 289 /* recv file data */ 290 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size, 291 fname, fd2, file->length); 292@@ -624,6 +647,16 @@ int recv_files(int f_in, struct file_lis 293 exit_cleanup(RERR_FILEIO); 294 } 295 296+ if (dest_filter) { 297+ int status; 298+ wait_process_with_flush(pid, &status); 299+ if (status != 0) { 300+ rprintf(FERROR, "filter %s exited code: %d\n", 301+ dest_filter, status); 302+ continue; 303+ } 304+ } 305+ 306 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) { 307 char *temp_copy_name; 308 if (partialptr == fname) 309--- old/rsync.h 310+++ new/rsync.h 311@@ -104,6 +104,7 @@ 312 #define IOERR_DEL_LIMIT (1<<2) 313 314 #define MAX_ARGS 1000 315+#define MAX_FILTER_ARGS 100 316 #define MAX_BASIS_DIRS 20 317 #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100) 318 319--- old/rsync.yo 320+++ new/rsync.yo 321@@ -360,6 +360,7 @@ to the detailed description below for a 322 --timeout=TIME set I/O timeout in seconds 323 -I, --ignore-times don't skip files that match size and time 324 --size-only skip files that match in size 325+ --times-only skip files that match in mod-time 326 --modify-window=NUM compare mod-times with reduced accuracy 327 -T, --temp-dir=DIR create temporary files in directory DIR 328 -y, --fuzzy find similar file for basis if no dest file 329@@ -397,6 +398,8 @@ to the detailed description below for a 330 --write-batch=FILE write a batched update to FILE 331 --only-write-batch=FILE like --write-batch but w/o updating dest 332 --read-batch=FILE read a batched update from FILE 333+ --source-filter=COMMAND filter file through COMMAND at source 334+ --dest-filter=COMMAND filter file through COMMAND at destination 335 --protocol=NUM force an older protocol version to be used 336 --checksum-seed=NUM set block/file checksum seed (advanced) 337 -4, --ipv4 prefer IPv4 338@@ -1759,6 +1762,33 @@ file previously generated by bf(--write- 339 If em(FILE) is bf(-), the batch data will be read from standard input. 340 See the "BATCH MODE" section for details. 341 342+dit(bf(--source-filter=COMMAND)) This option allows the user to specify a 343+filter program that will be applied to the contents of all transferred 344+regular files before the data is sent to destination. COMMAND will receive 345+the data on its standard input and it should write the filtered data to 346+standard output. COMMAND should exit non-zero if it cannot process the 347+data or if it encounters an error when writing the data to stdout. 348+ 349+Example: --source-filter="gzip -9" will cause remote files to be 350+compressed. 351+Use of --source-filter automatically enables --whole-file. 352+If your filter does not output the same number of bytes that it received on 353+input, you should use --times-only to disable size and content checks on 354+subsequent rsync runs. 355+ 356+dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter 357+program that will be applied to the contents of all transferred regular 358+files before the data is written to disk. COMMAND will receive the data on 359+its standard input and it should write the filtered data to standard 360+output. COMMAND should exit non-zero if it cannot process the data or if 361+it encounters an error when writing the data to stdout. 362+ 363+Example: --dest-filter="gzip -9" will cause remote files to be compressed. 364+Use of --dest-filter automatically enables --whole-file. 365+If your filter does not output the same number of bytes that it 366+received on input, you should use --times-only to disable size and 367+content checks on subsequent rsync runs. 368+ 369 dit(bf(--protocol=NUM)) Force an older protocol version to be used. This 370 is useful for creating a batch file that is compatible with an older 371 version of rsync. For instance, if rsync 2.6.4 is being used with the 372--- old/sender.c 373+++ new/sender.c 374@@ -43,6 +43,7 @@ extern int batch_fd; 375 extern int write_batch; 376 extern struct stats stats; 377 extern struct file_list *the_file_list; 378+extern char *source_filter; 379 extern char *stdout_format; 380 381 382@@ -224,6 +225,26 @@ void send_files(struct file_list *flist, 383 enum logcode log_code = log_before_transfer ? FLOG : FINFO; 384 int f_xfer = write_batch < 0 ? batch_fd : f_out; 385 int i, j; 386+ char *filter_argv[MAX_FILTER_ARGS + 1]; 387+ char *tmp = 0; 388+ int unlink_tmp = 0; 389+ 390+ if (source_filter) { 391+ char *p; 392+ char *sep = " \t"; 393+ int i; 394+ for (p = strtok(source_filter, sep), i = 0; 395+ p && i < MAX_FILTER_ARGS; 396+ p = strtok(0, sep)) 397+ filter_argv[i++] = p; 398+ filter_argv[i] = NULL; 399+ if (p) { 400+ rprintf(FERROR, 401+ "Too many arguments to source-filter (> %d)\n", 402+ MAX_FILTER_ARGS); 403+ exit_cleanup(RERR_SYNTAX); 404+ } 405+ } 406 407 if (verbose > 2) 408 rprintf(FINFO, "send_files starting\n"); 409@@ -297,6 +318,7 @@ void send_files(struct file_list *flist, 410 return; 411 } 412 413+ unlink_tmp = 0; 414 fd = do_open(fname, O_RDONLY, 0); 415 if (fd == -1) { 416 if (errno == ENOENT) { 417@@ -325,6 +347,33 @@ void send_files(struct file_list *flist, 418 return; 419 } 420 421+ if (source_filter) { 422+ int fd2; 423+ char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX"; 424+ 425+ tmp = strdup(tmpl); 426+ fd2 = mkstemp(tmp); 427+ if (fd2 == -1) { 428+ rprintf(FERROR, "mkstemp %s failed: %s\n", 429+ tmp, strerror(errno)); 430+ } else { 431+ int status; 432+ pid_t pid = run_filter_on_file(filter_argv, fd2, fd); 433+ close(fd); 434+ close(fd2); 435+ wait_process_with_flush(pid, &status); 436+ if (status != 0) { 437+ rprintf(FERROR, 438+ "bypassing source filter %s; exited with code: %d\n", 439+ source_filter, status); 440+ fd = do_open(fname, O_RDONLY, 0); 441+ } else { 442+ fd = do_open(tmp, O_RDONLY, 0); 443+ unlink_tmp = 1; 444+ } 445+ } 446+ } 447+ 448 if (st.st_size) { 449 int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE); 450 mbuf = map_file(fd, st.st_size, read_size, s->blength); 451@@ -366,6 +415,8 @@ void send_files(struct file_list *flist, 452 } 453 } 454 close(fd); 455+ if (unlink_tmp) 456+ unlink(tmp); 457 458 free_sums(s); 459 460--- old/proto.h 461+++ new/proto.h 462@@ -199,6 +199,7 @@ void maybe_log_item(struct file_struct * 463 void log_delete(char *fname, int mode); 464 void log_exit(int code, const char *file, int line); 465 pid_t wait_process(pid_t pid, int *status_ptr, int flags); 466+void wait_process_with_flush(pid_t pid, int *exit_code_ptr); 467 int child_main(int argc, char *argv[]); 468 void start_server(int f_in, int f_out, int argc, char *argv[]); 469 int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]); 470@@ -218,6 +219,8 @@ BOOL pm_process( char *FileName, 471 pid_t piped_child(char **command, int *f_in, int *f_out); 472 pid_t local_child(int argc, char **argv, int *f_in, int *f_out, 473 int (*child_main)(int, char*[])); 474+pid_t run_filter(char *command[], int out, int *pipe_to_filter); 475+pid_t run_filter_on_file(char *command[], int out, int in); 476 void end_progress(OFF_T size); 477 void show_progress(OFF_T ofs, OFF_T size); 478 int recv_files(int f_in, struct file_list *flist, char *local_name); 479--- old/rsync.1 480+++ new/rsync.1 481@@ -426,6 +426,7 @@ to the detailed description below for a 482 \-\-timeout=TIME set I/O timeout in seconds 483 \-I, \-\-ignore\-times don\&'t skip files that match size and time 484 \-\-size\-only skip files that match in size 485+ \-\-times\-only skip files that match in mod-time 486 \-\-modify\-window=NUM compare mod-times with reduced accuracy 487 \-T, \-\-temp\-dir=DIR create temporary files in directory DIR 488 \-y, \-\-fuzzy find similar file for basis if no dest file 489@@ -463,6 +464,8 @@ to the detailed description below for a 490 \-\-write\-batch=FILE write a batched update to FILE 491 \-\-only\-write\-batch=FILE like \-\-write\-batch but w/o updating dest 492 \-\-read\-batch=FILE read a batched update from FILE 493+ \-\-source\-filter=COMMAND filter file through COMMAND at source 494+ \-\-dest\-filter=COMMAND filter file through COMMAND at destination 495 \-\-protocol=NUM force an older protocol version to be used 496 \-\-checksum\-seed=NUM set block/file checksum seed (advanced) 497 \-4, \-\-ipv4 prefer IPv4 498@@ -2038,6 +2041,35 @@ file previously generated by \fB\-\-writ 499 If \fIFILE\fP is \fB\-\fP, the batch data will be read from standard input\&. 500 See the "BATCH MODE" section for details\&. 501 .IP 502+.IP "\fB\-\-source\-filter=COMMAND\fP" 503+This option allows the user to specify a 504+filter program that will be applied to the contents of all transferred 505+regular files before the data is sent to destination\&. COMMAND will receive 506+the data on its standard input and it should write the filtered data to 507+standard output\&. COMMAND should exit non-zero if it cannot process the 508+data or if it encounters an error when writing the data to stdout\&. 509+.IP 510+Example: \-\-source\-filter="gzip \-9" will cause remote files to be 511+compressed\&. 512+Use of \-\-source\-filter automatically enables \-\-whole\-file\&. 513+If your filter does not output the same number of bytes that it received on 514+input, you should use \-\-times\-only to disable size and content checks on 515+subsequent rsync runs\&. 516+.IP 517+.IP "\fB\-\-dest\-filter=COMMAND\fP" 518+This option allows you to specify a filter 519+program that will be applied to the contents of all transferred regular 520+files before the data is written to disk\&. COMMAND will receive the data on 521+its standard input and it should write the filtered data to standard 522+output\&. COMMAND should exit non-zero if it cannot process the data or if 523+it encounters an error when writing the data to stdout\&. 524+.IP 525+Example: \-\-dest\-filter="gzip \-9" will cause remote files to be compressed\&. 526+Use of \-\-dest\-filter automatically enables \-\-whole\-file\&. 527+If your filter does not output the same number of bytes that it 528+received on input, you should use \-\-times\-only to disable size and 529+content checks on subsequent rsync runs\&. 530+.IP 531 .IP "\fB\-\-protocol=NUM\fP" 532 Force an older protocol version to be used\&. This 533 is useful for creating a batch file that is compatible with an older 534