diff -ur rsync-2.6.9/checksum.c rsync-2.6.9fadvice/checksum.c --- rsync-2.6.9/checksum.c 2006-04-26 01:51:12.000000000 +0200 +++ rsync-2.6.9fadvice/checksum.c 2007-05-21 22:16:34.000000000 +0200 @@ -28,6 +28,9 @@ extern int checksum_seed; extern int protocol_version; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif /* a simple 32 bit checksum that can be upadted from either end diff -ur rsync-2.6.9/cleanup.c rsync-2.6.9fadvice/cleanup.c --- rsync-2.6.9/cleanup.c 2006-10-15 17:43:26.000000000 +0200 +++ rsync-2.6.9fadvice/cleanup.c 2007-05-21 22:16:34.000000000 +0200 @@ -47,7 +47,11 @@ int fd; int ret; STRUCT_STAT st; - +#endif +#ifdef WITH_DROP_CACHE + fadv_close_all(); +#endif +#ifdef SHUTDOWN_ALL_SOCKETS max_fd = sysconf(_SC_OPEN_MAX) - 1; for (fd = max_fd; fd >= 0; fd--) { if ((ret = do_fstat(fd, &st)) == 0) { diff -ur rsync-2.6.9/config.h.in rsync-2.6.9fadvice/config.h.in --- rsync-2.6.9/config.h.in 2006-11-07 05:39:47.000000000 +0100 +++ rsync-2.6.9fadvice/config.h.in 2007-05-20 14:44:44.000000000 +0200 @@ -205,6 +205,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `mincore' function. */ +#undef HAVE_MINCORE + /* Define to 1 if you have the `mkfifo' function. */ #undef HAVE_MKFIFO @@ -214,6 +217,9 @@ /* Define to 1 if you have the `mkstemp64' function. */ #undef HAVE_MKSTEMP64 +/* Define to 1 if you have the `mmap' function. */ +#undef HAVE_MMAP + /* Define to 1 if you have the `mtrace' function. */ #undef HAVE_MTRACE @@ -229,6 +235,9 @@ /* Define to 1 if you have the `open64' function. */ #undef HAVE_OPEN64 +/* Define to 1 if you have the `posix_fadvise64' function. */ +#undef HAVE_POSIX_FADVISE64 + /* Define to 1 if you have the `putenv' function. */ #undef HAVE_PUTENV diff -ur rsync-2.6.9/configure rsync-2.6.9fadvice/configure --- rsync-2.6.9/configure 2006-11-07 05:39:47.000000000 +0100 +++ rsync-2.6.9fadvice/configure 2007-05-21 22:11:21.000000000 +0200 @@ -13480,7 +13480,7 @@ strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \ setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ strerror putenv iconv_open locale_charset nl_langinfo \ - sigaction sigprocmask + sigaction sigprocmask posix_fadvise64 mincore mmap do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_func" >&5 diff -ur rsync-2.6.9/configure.in rsync-2.6.9fadvice/configure.in --- rsync-2.6.9/configure.in 2006-11-07 05:39:47.000000000 +0100 +++ rsync-2.6.9fadvice/configure.in 2007-05-20 14:40:05.000000000 +0200 @@ -528,7 +528,7 @@ strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \ setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ strerror putenv iconv_open locale_charset nl_langinfo \ - sigaction sigprocmask) + sigaction sigprocmask posix_fadvise64 mincore mmap) AC_CHECK_FUNCS(getpgrp tcgetpgrp) if test $ac_cv_func_getpgrp = yes; then diff -ur rsync-2.6.9/fileio.c rsync-2.6.9fadvice/fileio.c --- rsync-2.6.9/fileio.c 2006-04-26 01:51:13.000000000 +0200 +++ rsync-2.6.9fadvice/fileio.c 2007-05-21 22:16:34.000000000 +0200 @@ -27,15 +27,15 @@ #endif extern int sparse_files; - static char last_byte; static int last_sparse; + int sparse_end(int f) { if (last_sparse) { do_lseek(f,-1,SEEK_CUR); - return (write(f,&last_byte,1) == 1 ? 0 : -1); + return (fadv_write(f,&last_byte,1) == 1 ? 0 : -1); } last_sparse = 0; return 0; @@ -62,7 +62,7 @@ if (l1 == len) return len; - ret = write(f, buf + l1, len - (l1+l2)); + ret = fadv_write(f, buf + l1, len - (l1+l2)); if (ret == -1 || ret == 0) return ret; else if (ret != (int) (len - (l1+l2))) @@ -85,7 +85,7 @@ char *bp = wf_writeBuf; while (wf_writeBufCnt > 0) { - if ((ret = write(f, bp, wf_writeBufCnt)) < 0) { + if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) { if (errno == EINTR) continue; return ret; @@ -236,7 +236,7 @@ map->p_len = window_size; while (read_size > 0) { - nread = read(map->fd, map->p + read_offset, read_size); + nread = fadv_read(map->fd, map->p + read_offset, read_size); if (nread <= 0) { if (!map->status) map->status = nread ? errno : ENODATA; diff -ur rsync-2.6.9/generator.c rsync-2.6.9fadvice/generator.c --- rsync-2.6.9/generator.c 2006-10-23 00:36:36.000000000 +0200 +++ rsync-2.6.9fadvice/generator.c 2007-05-21 22:16:34.000000000 +0200 @@ -93,6 +93,10 @@ extern struct file_list *the_file_list; extern struct filter_list_struct server_filter_list; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + static int deletion_count = 0; /* used to implement --max-delete */ /* For calling delete_file() */ diff -ur rsync-2.6.9/options.c rsync-2.6.9fadvice/options.c --- rsync-2.6.9/options.c 2006-10-24 02:36:38.000000000 +0200 +++ rsync-2.6.9fadvice/options.c 2007-05-21 22:16:34.000000000 +0200 @@ -56,6 +56,9 @@ int preserve_times = 0; int omit_dir_times = 0; int update_only = 0; +#ifdef WITH_DROP_CACHE +int drop_cache = 0; +#endif int cvs_exclude = 0; int dry_run = 0; int do_xfers = 1; @@ -293,6 +296,9 @@ rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n"); rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); rprintf(F," -u, --update skip files that are newer on the receiver\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n"); rprintf(F," --append append data onto shorter files\n"); rprintf(F," -d, --dirs transfer directories without recursing\n"); @@ -465,6 +471,9 @@ {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 }, {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 }, {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, +#endif {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 }, @@ -567,6 +576,9 @@ rprintf(F," --log-file=FILE override the \"log file\" setting\n"); rprintf(F," --log-file-format=FMT override the \"log format\" setting\n"); rprintf(F," --sockopts=OPTIONS specify custom TCP options\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," -v, --verbose increase verbosity\n"); #ifdef INET6 rprintf(F," -4, --ipv4 prefer IPv4\n"); @@ -594,6 +606,9 @@ {"log-file-format", 0, POPT_ARG_STRING, &logfile_format, 0, 0, 0 }, {"no-detach", 0, POPT_ARG_VAL, &no_detach, 1, 0, 0 }, {"port", 0, POPT_ARG_INT, &rsync_port, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, +#endif {"sockopts", 0, POPT_ARG_STRING, &sockopts, 0, 0, 0 }, {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, {"server", 0, POPT_ARG_NONE, &am_server, 0, 0, 0 }, @@ -1492,6 +1507,10 @@ if (!am_sender) args[ac++] = "--sender"; +#ifdef WITH_DROP_CACHE + if (drop_cache) + args[ac++] = "--drop-cache"; +#endif x = 1; argstr[0] = '-'; for (i = 0; i < verbose; i++) diff -ur rsync-2.6.9/receiver.c rsync-2.6.9fadvice/receiver.c --- rsync-2.6.9/receiver.c 2006-10-13 09:18:29.000000000 +0200 +++ rsync-2.6.9fadvice/receiver.c 2007-05-21 22:16:34.000000000 +0200 @@ -22,6 +22,7 @@ #include "rsync.h" + extern int verbose; extern int do_xfers; extern int am_server; @@ -55,12 +56,15 @@ extern struct file_list *the_file_list; extern struct filter_list_struct server_filter_list; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + static struct bitbag *delayed_bits = NULL; static int phase = 0; /* We're either updating the basis file or an identical copy: */ static int updating_basis; - /* * get_tmpname() - create a tmp filename for a given filename * diff -ur rsync-2.6.9/rsync.1 rsync-2.6.9fadvice/rsync.1 --- rsync-2.6.9/rsync.1 2006-11-07 05:39:51.000000000 +0100 +++ rsync-2.6.9fadvice/rsync.1 2007-05-20 17:04:52.000000000 +0200 @@ -398,6 +398,7 @@ \-O, \-\-omit\-dir\-times omit directories when preserving times \-\-super receiver attempts super-user activities \-S, \-\-sparse handle sparse files efficiently + \-\-drop\-cache drop cache continuosly using fadvise \-n, \-\-dry\-run show what would have been transferred \-W, \-\-whole\-file copy files whole (without rsync algorithm) \-x, \-\-one\-file\-system don\&'t cross filesystem boundaries @@ -1028,6 +1029,13 @@ filesystem\&. It doesn\&'t seem to handle seeks over null regions correctly and ends up corrupting the files\&. .IP +.IP "\fB\-\-drop\-cache\fP" +Stop rsync from filling up the file system cache with the files it copies\&. Without this +option other processes, that had been crunching along happily on your system, will suddenly +become slow as they find their data being outsed from the cache. The \fB\-\-drop\-cache\fP function +uses posix_fadvise64 and mincore todo its work\&. It will only get compiled if configure can find posix_fadvise64 and mincore\&. +Rsync will tries only to drop data from cache that has not been cached before. +.IP .IP "\fB\-n, \-\-dry\-run\fP" This tells rsync to not do any file transfers, instead it will just report the actions it would have taken\&. diff -ur rsync-2.6.9/rsync.h rsync-2.6.9fadvice/rsync.h --- rsync-2.6.9/rsync.h 2006-10-24 05:31:30.000000000 +0200 +++ rsync-2.6.9fadvice/rsync.h 2007-05-21 22:18:02.000000000 +0200 @@ -888,3 +888,16 @@ #ifdef MAINTAINER_MODE const char *get_panic_action(void); #endif + +#if defined HAVE_POSIX_FADVISE64 && defined HAVE_MINCORE && defined HAVE_MMAP +#define WITH_DROP_CACHE 1 +#include +int fadv_close(int fd); +void fadv_close_all(void); +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count); +ssize_t fadv_read(int fd, void *buf, size_t count); + + + diff -ur rsync-2.6.9/rsync.yo rsync-2.6.9fadvice/rsync.yo --- rsync-2.6.9/rsync.yo 2006-11-07 05:39:47.000000000 +0100 +++ rsync-2.6.9fadvice/rsync.yo 2007-05-20 17:07:24.000000000 +0200 @@ -887,6 +887,17 @@ filesystem. It doesn't seem to handle seeks over null regions correctly and ends up corrupting the files. +dit(bf(--drop-cache)) Stop rsync from disturbing the file system cache with +the data from the files it copies. Without this option other processes, that +had been crunching along happily using cached data, will suddenly become +slow as they find their favorite data blocks data being evicted from the +cache by the files read and written by rsync. Since rsync has to wait until +the data is written to disk, before it can drop the cache, this option will +slow rsync down considerably, especially with small files and short copy +jobs. The bf(--drop-cache) function uses posix_fadvise64 and mincore todo +its work. It will only get compiled if configure can find posix_fadvise64 +and mincore. + dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers, instead it will just report the actions it would have taken. diff -ur rsync-2.6.9/sender.c rsync-2.6.9fadvice/sender.c --- rsync-2.6.9/sender.c 2006-09-20 03:53:32.000000000 +0200 +++ rsync-2.6.9fadvice/sender.c 2007-05-21 22:16:34.000000000 +0200 @@ -45,6 +45,9 @@ extern struct file_list *the_file_list; extern char *stdout_format; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif /** * @file diff -ur rsync-2.6.9/t_unsafe.c rsync-2.6.9fadvice/t_unsafe.c --- rsync-2.6.9/t_unsafe.c 2006-04-26 01:51:15.000000000 +0200 +++ rsync-2.6.9fadvice/t_unsafe.c 2007-05-21 22:16:34.000000000 +0200 @@ -24,7 +24,7 @@ #include "rsync.h" -int dry_run, read_only, list_only, verbose; +int dry_run, read_only, list_only, verbose, drop_cache; int preserve_perms = 0; int diff -ur rsync-2.6.9/util.c rsync-2.6.9fadvice/util.c --- rsync-2.6.9/util.c 2006-10-14 22:31:33.000000000 +0200 +++ rsync-2.6.9fadvice/util.c 2007-05-21 22:20:34.000000000 +0200 @@ -25,6 +25,9 @@ extern int verbose; extern int dry_run; +#ifdef WITH_DROP_CACHE +extern int drop_cache; +#endif extern int module_id; extern int modify_window; extern int relative_paths; @@ -40,6 +43,217 @@ unsigned int curr_dir_len; int curr_dir_depth; /* This is only set for a sanitizing daemon. */ +#ifdef WITH_DROP_CACHE +#define FADV_BUFFER_SIZE 1024*1024*16 + +static struct stat fadv_fd_stat[1024]; +static off_t fadv_fd_pos[1024]; +static unsigned char *fadv_core_ptr[1024]; +static int fadv_max_fd = 0; +static int fadv_close_ring_tail = 0; +static int fadv_close_ring_head = 0; +static int fadv_close_ring_size = 0; +static int fadv_close_ring[1024]; +static int fadv_close_buffer_size = 0; +static size_t fadv_pagesize; + +static void fadv_fd_init_func(void){ + static int fadv_fd_init = 0; + if (fadv_fd_init == 0){ + int i; + fadv_fd_init = 1; + fadv_pagesize = getpagesize(); + if (fadv_max_fd == 0){ + fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20; + if (fadv_max_fd < 0) + fadv_max_fd = 1; + if (fadv_max_fd > 1000) + fadv_max_fd = 1000; + } + for (i=0;i 99) { + rprintf(FINFO,"%d: ",fd); + for (pi = 0; pi <= stat.st_size/fadv_pagesize; pi++) { + if ((fadv_core_ptr[fd])[pi]&1) { + rprintf(FINFO,"%lu ", (unsigned long)pi); + } + } + rprintf(FINFO,"\n"); + } + munmap(pa, stat.st_size); + } + } +} + +static void fadv_drop(int fd, int sync){ + /* trail 1 MB behind in dropping. we do this to make + sure that the same block or stripe does not have + to be written twice */ + int pos = lseek(fd,0,SEEK_CUR) - 1024*1024; + if (fd > fadv_max_fd){ + return; + } + if ( fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE ) { + if (sync) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. If only there + was a way to mark cache so that it gets release once the data + is written to disk. */ + fdatasync(fd); + } + if (fadv_core_ptr[fd] != NULL) { + size_t pi; + if (pos > fadv_fd_stat[fd].st_size){ + for (pi = fadv_fd_pos[fd]/fadv_pagesize; pi <= pos/fadv_pagesize; pi++) { + if (! (fadv_core_ptr[fd][pi]&1)) { + posix_fadvise64(fd, pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + } else { + posix_fadvise64(fd, fadv_fd_stat[fd].st_size, pos-fadv_fd_stat[fd].st_size, POSIX_FADV_DONTNEED); + } + } + else { + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED); + } + fadv_fd_pos[fd] = pos; + } +} + +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count) +{ + int ret = write(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,1); + } +#endif + return ret; +} + + + +ssize_t fadv_read(int fd, void *buf, size_t count) +{ + int ret; +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_fd_init_func(); + fadv_get_core(fd); + } +#endif + ret = read(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,0); + } +#endif + return ret; +} + +#ifdef WITH_DROP_CACHE +void fadv_close_all(void){ + /* printf ("%i\n",fadv_close_ring_size); */ + while (fadv_close_ring_size > 0){ + fdatasync(fadv_close_ring[fadv_close_ring_tail]); + if (fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]){ + size_t pi; + for (pi = 0; pi <= fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size/fadv_pagesize; pi++) { + if (!(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]][pi]&1)) { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + /* if the file has grown, drop the rest */ + //posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size,0, POSIX_FADV_DONTNEED); + + free(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]); + fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]] = NULL; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_ino = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_dev = 0; + } + else { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED); + } + fadv_close_ring_size--; + close(fadv_close_ring[fadv_close_ring_tail]); + fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd; + fadv_close_buffer_size = 0; + } +} + +int fadv_close(int fd){ + if (drop_cache) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. So instead of doing + it right away, we save us a copy of the filehandle and do it + some time before we are out of filehandles. This speeds + up operation for small files massively. It is directly + related to the number of spare file handles you have. */ + int newfd = dup(fd); + int pos = lseek(fd,0,SEEK_CUR); + fadv_fd_init_func(); + fadv_core_ptr[newfd] = fadv_core_ptr[fd]; + fadv_fd_stat[newfd].st_size = fadv_fd_stat[fd].st_size ; + fadv_core_ptr[fd] = NULL; + fadv_close_buffer_size += pos - fadv_fd_pos[fd]; + fadv_close_ring[fadv_close_ring_head] = newfd; + fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd; + fadv_close_ring_size ++; + if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){ + /* it seems fastest to drop things 'in groups' */ + fadv_close_all(); + } + }; + return close(fd); +} + + +#define close(fd) fadv_close(fd) +#endif + /* Set a fd into nonblocking mode. */ void set_nonblocking(int fd) { @@ -220,7 +434,7 @@ total_written = 0; while (len > 0) { - int written = write(desc, ptr, len); + int written = fadv_write(desc, ptr, len); if (written < 0) { if (errno == EINTR) continue; @@ -252,7 +466,7 @@ return len; do { - n_chars = read(desc, ptr, len); + n_chars = fadv_read(desc, ptr, len); } while (n_chars < 0 && errno == EINTR); return n_chars;