diff -ur rsync-3.0.9.orig/checksum.c rsync-3.0.9/checksum.c --- rsync-3.0.9.orig/checksum.c 2010-06-30 18:17:26.000000000 +0200 +++ rsync-3.0.9/checksum.c 2011-12-01 16:41:38.000000000 +0100 @@ -23,6 +23,9 @@ extern int checksum_seed; extern int protocol_version; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif /* a simple 32 bit checksum that can be upadted from either end diff -ur rsync-3.0.9.orig/cleanup.c rsync-3.0.9/cleanup.c --- rsync-3.0.9.orig/cleanup.c 2011-02-21 19:20:58.000000000 +0100 +++ rsync-3.0.9/cleanup.c 2011-12-01 16:41:38.000000000 +0100 @@ -47,7 +47,11 @@ int fd; int ret; STRUCT_STAT st; - +#endif +#ifdef WITH_DROP_CACHE + fadv_close_all(); +#endif +#ifdef SHUTDOWN_ALL_SOCKETS max_fd = sysconf(_SC_OPEN_MAX) - 1; for (fd = max_fd; fd >= 0; fd--) { if ((ret = do_fstat(fd, &st)) == 0) { diff -ur rsync-3.0.9.orig/config.h.in rsync-3.0.9/config.h.in --- rsync-3.0.9.orig/config.h.in 2011-09-23 18:41:30.000000000 +0200 +++ rsync-3.0.9/config.h.in 2011-12-01 16:41:38.000000000 +0100 @@ -244,6 +244,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H +/* Define to 1 if you have the `mincore' function. */ +#undef HAVE_MINCORE + /* Define to 1 if you have the `mkfifo' function. */ #undef HAVE_MKFIFO @@ -256,6 +259,9 @@ /* Define to 1 if the system has the type `mode_t'. */ #undef HAVE_MODE_T +/* Define to 1 if you have the `mmap' function. */ +#undef HAVE_MMAP + /* Define to 1 if you have the `mtrace' function. */ #undef HAVE_MTRACE @@ -298,6 +304,9 @@ /* true if you have posix ACLs */ #undef HAVE_POSIX_ACLS +/* Define to 1 if you have the `posix_fadvise64' function. */ +#undef HAVE_POSIX_FADVISE64 + /* Define to 1 if you have the `putenv' function. */ #undef HAVE_PUTENV diff -ur rsync-3.0.9.orig/configure.ac rsync-3.0.9/configure.ac --- rsync-3.0.9.orig/configure.ac 2011-09-23 18:13:53.000000000 +0200 +++ rsync-3.0.9/configure.ac 2011-12-01 16:41:58.000000000 +0100 @@ -573,7 +573,7 @@ strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \ setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ - extattr_get_link sigaction sigprocmask setattrlist \ + extattr_get_link sigaction sigprocmask setattrlist mmap mincore posix_fadvise64 \ utimensat) dnl cygwin iconv.h defines iconv_open as libiconv_open Only in rsync-3.0.9: configure.ac~ diff -ur rsync-3.0.9.orig/configure.sh rsync-3.0.9/configure.sh --- rsync-3.0.9.orig/configure.sh 2011-09-23 18:41:30.000000000 +0200 +++ rsync-3.0.9/configure.sh 2011-12-01 16:42:09.000000000 +0100 @@ -7448,7 +7448,7 @@ strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \ setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ - extattr_get_link sigaction sigprocmask setattrlist \ + extattr_get_link sigaction sigprocmask setattrlist mmap mincore posix_fadvise64 \ utimensat do : as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` Only in rsync-3.0.9: configure.sh~ diff -ur rsync-3.0.9.orig/fileio.c rsync-3.0.9/fileio.c --- rsync-3.0.9.orig/fileio.c 2011-02-21 20:32:51.000000000 +0100 +++ rsync-3.0.9/fileio.c 2011-12-01 16:41:38.000000000 +0100 @@ -43,7 +43,7 @@ ret = -1; else { do { - ret = write(f, "", 1); + ret = fadv_write(f, "", 1); } while (ret < 0 && errno == EINTR); ret = ret <= 0 ? -1 : 0; @@ -73,7 +73,7 @@ do_lseek(f, sparse_seek, SEEK_CUR); sparse_seek = l2; - while ((ret = write(f, buf + l1, len - (l1+l2))) <= 0) { + while ((ret = fadv_write(f, buf + l1, len - (l1+l2))) <= 0) { if (ret < 0 && errno == EINTR) continue; return ret; @@ -96,7 +96,7 @@ char *bp = wf_writeBuf; while (wf_writeBufCnt > 0) { - if ((ret = write(f, bp, wf_writeBufCnt)) < 0) { + if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) { if (errno == EINTR) continue; return ret; @@ -246,7 +246,7 @@ map->p_len = window_size; while (read_size > 0) { - nread = read(map->fd, map->p + read_offset, read_size); + nread = fadv_read(map->fd, map->p + read_offset, read_size); if (nread <= 0) { if (!map->status) map->status = nread ? errno : ENODATA; diff -ur rsync-3.0.9.orig/generator.c rsync-3.0.9/generator.c --- rsync-3.0.9.orig/generator.c 2011-09-10 22:38:11.000000000 +0200 +++ rsync-3.0.9/generator.c 2011-12-01 16:41:38.000000000 +0100 @@ -113,6 +113,10 @@ static int need_retouch_dir_perms; static const char *solo_file = NULL; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + /* For calling delete_item() and delete_dir_contents(). */ #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */ #define DEL_RECURSE (1<<1) /* if dir, delete all contents */ diff -ur rsync-3.0.9.orig/options.c rsync-3.0.9/options.c --- rsync-3.0.9.orig/options.c 2011-09-14 00:41:26.000000000 +0200 +++ rsync-3.0.9/options.c 2011-12-01 16:41:38.000000000 +0100 @@ -60,6 +60,9 @@ int preserve_gid = 0; int preserve_times = 0; int update_only = 0; +#ifdef WITH_DROP_CACHE +int drop_cache = 0; +#endif int cvs_exclude = 0; int dry_run = 0; int do_xfers = 1; @@ -325,6 +328,9 @@ rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n"); rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); rprintf(F," -u, --update skip files that are newer on the receiver\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n"); rprintf(F," --append append data onto shorter files\n"); rprintf(F," --append-verify like --append, but with old data in file checksum\n"); @@ -533,6 +539,9 @@ {"no-one-file-system",0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, {"no-x", 0, POPT_ARG_VAL, &one_file_system, 0, 0, 0 }, {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, +#endif {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 }, @@ -670,6 +679,9 @@ rprintf(F," --log-file=FILE override the \"log file\" setting\n"); rprintf(F," --log-file-format=FMT override the \"log format\" setting\n"); rprintf(F," --sockopts=OPTIONS specify custom TCP options\n"); +#ifdef WITH_DROP_CACHE + rprintf(F," --drop-cache do not cache rsync files (POSIX_FADV_DONTNEED)\n"); +#endif rprintf(F," -v, --verbose increase verbosity\n"); rprintf(F," -4, --ipv4 prefer IPv4\n"); rprintf(F," -6, --ipv6 prefer IPv6\n"); @@ -693,6 +705,9 @@ {"log-file", 0, POPT_ARG_STRING, &logfile_name, 0, 0, 0 }, {"log-file-format", 0, POPT_ARG_STRING, &logfile_format, 0, 0, 0 }, {"port", 0, POPT_ARG_INT, &rsync_port, 0, 0, 0 }, +#ifdef WITH_DROP_CACHE + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, +#endif {"sockopts", 0, POPT_ARG_STRING, &sockopts, 0, 0, 0 }, {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, {"server", 0, POPT_ARG_NONE, &am_server, 0, 0, 0 }, @@ -1730,6 +1745,10 @@ if (!am_sender) args[ac++] = "--sender"; +#ifdef WITH_DROP_CACHE + if (drop_cache) + args[ac++] = "--drop-cache"; +#endif x = 1; argstr[0] = '-'; diff -ur rsync-3.0.9.orig/proto.h rsync-3.0.9/proto.h --- rsync-3.0.9.orig/proto.h 2011-09-23 18:44:36.000000000 +0200 +++ rsync-3.0.9/proto.h 2011-12-01 16:41:38.000000000 +0100 @@ -328,6 +328,10 @@ uid_t recv_user_name(int f, uid_t uid); gid_t recv_group_name(int f, gid_t gid, uint16 *flags_ptr); void recv_id_list(int f, struct file_list *flist); +ssize_t fadv_write(int fd, const void *buf, size_t count); +ssize_t fadv_read(int fd, void *buf, size_t count); +void fadv_close_all(void); +int fadv_close(int fd); void set_nonblocking(int fd); void set_blocking(int fd); int fd_pair(int fd[2]); diff -ur rsync-3.0.9.orig/receiver.c rsync-3.0.9/receiver.c --- rsync-3.0.9.orig/receiver.c 2011-03-26 18:01:37.000000000 +0100 +++ rsync-3.0.9/receiver.c 2011-12-01 16:41:38.000000000 +0100 @@ -21,6 +21,7 @@ #include "rsync.h" + extern int verbose; extern int dry_run; extern int do_xfers; @@ -58,6 +59,10 @@ extern struct file_list *cur_flist, *first_flist, *dir_flist; extern struct filter_list_struct daemon_filter_list; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif + static struct bitbag *delayed_bits = NULL; static int phase = 0, redoing = 0; static flist_ndx_list batch_redo_list; diff -ur rsync-3.0.9.orig/rsync.1 rsync-3.0.9/rsync.1 --- rsync-3.0.9.orig/rsync.1 2011-09-23 18:42:26.000000000 +0200 +++ rsync-3.0.9/rsync.1 2011-12-01 16:41:38.000000000 +0100 @@ -445,6 +445,7 @@ \-\-super receiver attempts super\-user activities \-\-fake\-super store/recover privileged attrs using xattrs \-S, \-\-sparse handle sparse files efficiently + \-\-drop\-cache drop cache continuosly using fadvise \-n, \-\-dry\-run perform a trial run with no changes made \-W, \-\-whole\-file copy files whole (w/o delta\-xfer algorithm) \-x, \-\-one\-file\-system don'\&t cross filesystem boundaries @@ -1283,6 +1284,13 @@ script (from the support directory) as a substitute for an actual remote shell (see \fB\-\-rsh\fP). .IP +.IP "\fB\-\-drop\-cache\fP" +Stop rsync from filling up the file system cache with the files it copies\&. Without this +option other processes, that had been crunching along happily on your system, will suddenly +become slow as they find their data being outsed from the cache. The \fB\-\-drop\-cache\fP function +uses posix_fadvise64 and mincore todo its work\&. It will only get compiled if configure can find posix_fadvise64 and mincore\&. +Rsync will tries only to drop data from cache that has not been cached before. +.IP This option is overridden by both \fB\-\-super\fP and \fB\-\-no\-super\fP. .IP See also the \(dq\&fake super\(dq\& setting in the daemon\(cq\&s rsyncd.conf file. diff -ur rsync-3.0.9.orig/rsync.h rsync-3.0.9/rsync.h --- rsync-3.0.9.orig/rsync.h 2011-02-21 20:32:51.000000000 +0100 +++ rsync-3.0.9/rsync.h 2011-12-01 16:41:38.000000000 +0100 @@ -1168,3 +1168,16 @@ #ifdef MAINTAINER_MODE const char *get_panic_action(void); #endif + +#if defined HAVE_POSIX_FADVISE64 && defined HAVE_MINCORE && defined HAVE_MMAP +#define WITH_DROP_CACHE 1 +#include +int fadv_close(int fd); +void fadv_close_all(void); +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count); +ssize_t fadv_read(int fd, void *buf, size_t count); + + + diff -ur rsync-3.0.9.orig/rsync.yo rsync-3.0.9/rsync.yo --- rsync-3.0.9.orig/rsync.yo 2011-09-23 18:13:53.000000000 +0200 +++ rsync-3.0.9/rsync.yo 2011-12-01 16:41:38.000000000 +0100 @@ -1122,6 +1122,17 @@ up less space on the destination. Conflicts with bf(--inplace) because it's not possible to overwrite data in a sparse fashion. +dit(bf(--drop-cache)) Stop rsync from disturbing the file system cache with +the data from the files it copies. Without this option other processes, that +had been crunching along happily using cached data, will suddenly become +slow as they find their favorite data blocks data being evicted from the +cache by the files read and written by rsync. Since rsync has to wait until +the data is written to disk, before it can drop the cache, this option will +slow rsync down considerably, especially with small files and short copy +jobs. The bf(--drop-cache) function uses posix_fadvise64 and mincore todo +its work. It will only get compiled if configure can find posix_fadvise64 +and mincore. + dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't make any changes (and produces mostly the same output as a real run). It is most commonly used in combination with the bf(-v, --verbose) and/or diff -ur rsync-3.0.9.orig/sender.c rsync-3.0.9/sender.c --- rsync-3.0.9.orig/sender.c 2009-12-13 02:23:03.000000000 +0100 +++ rsync-3.0.9/sender.c 2011-12-01 16:41:38.000000000 +0100 @@ -44,6 +44,9 @@ extern int write_batch; extern struct stats stats; extern struct file_list *cur_flist, *first_flist, *dir_flist; +#ifdef WITH_DROP_CACHE +#define close(fd) fadv_close(fd) +#endif /** * @file diff -ur rsync-3.0.9.orig/t_unsafe.c rsync-3.0.9/t_unsafe.c --- rsync-3.0.9.orig/t_unsafe.c 2009-01-17 22:41:35.000000000 +0100 +++ rsync-3.0.9/t_unsafe.c 2011-12-01 16:41:38.000000000 +0100 @@ -23,7 +23,7 @@ #include "rsync.h" -int dry_run = 0; +int dry_run = 0, drop_cache; int am_root = 0; int read_only = 0; int list_only = 0; diff -ur rsync-3.0.9.orig/util.c rsync-3.0.9/util.c --- rsync-3.0.9.orig/util.c 2011-04-23 00:51:55.000000000 +0200 +++ rsync-3.0.9/util.c 2011-12-01 16:41:38.000000000 +0100 @@ -35,6 +35,10 @@ extern mode_t orig_umask; extern char *partial_dir; extern struct filter_list_struct daemon_filter_list; +#ifdef WITH_DROP_CACHE +#include +extern int drop_cache; +#endif int sanitize_paths = 0; @@ -42,6 +46,222 @@ unsigned int curr_dir_len; int curr_dir_depth; /* This is only set for a sanitizing daemon. */ +#ifdef WITH_DROP_CACHE +#define FADV_BUFFER_SIZE 1024*1024*16 + +static struct stat fadv_fd_stat[1024]; +static off_t fadv_fd_pos[1024]; +static unsigned char *fadv_core_ptr[1024]; +static int fadv_max_fd = 0; +static int fadv_close_ring_tail = 0; +static int fadv_close_ring_head = 0; +static int fadv_close_ring_size = 0; +static int fadv_close_ring[1024]; +static int fadv_close_buffer_size = 0; +static size_t fadv_pagesize; + +static void fadv_fd_init_func(void) +{ + static int fadv_fd_init = 0; + if (fadv_fd_init == 0){ + int i; + fadv_fd_init = 1; + fadv_pagesize = getpagesize(); + if (fadv_max_fd == 0){ + fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20; + if (fadv_max_fd < 0) + fadv_max_fd = 1; + if (fadv_max_fd > 1000) + fadv_max_fd = 1000; + } + for (i=0;i 99) { + rprintf(FINFO,"%d: ",fd); + for (pi = 0; pi <= stat.st_size/fadv_pagesize; pi++) { + if ((fadv_core_ptr[fd])[pi]&1) { + rprintf(FINFO,"%lu ", (unsigned long)pi); + } + } + rprintf(FINFO,"\n"); + } + munmap(pa, stat.st_size); + } + } +} + +static void fadv_drop(int fd, int sync) +{ + /* trail 1 MB behind in dropping. we do this to make + sure that the same block or stripe does not have + to be written twice */ + int pos = lseek(fd,0,SEEK_CUR) - 1024*1024; + if (fd > fadv_max_fd){ + return; + } + if ( fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE ) { + if (sync) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. If only there + was a way to mark cache so that it gets release once the data + is written to disk. */ + fdatasync(fd); + } + if (fadv_core_ptr[fd] != NULL) { + size_t pi; + if (pos < fadv_fd_stat[fd].st_size){ + for (pi = fadv_fd_pos[fd]/fadv_pagesize; pi <= pos/fadv_pagesize; pi++) { + if (! (fadv_core_ptr[fd][pi]&1)) { + posix_fadvise64(fd, pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + } else { + posix_fadvise64(fd, fadv_fd_stat[fd].st_size, pos-fadv_fd_stat[fd].st_size, POSIX_FADV_DONTNEED); + } + } + else { + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED); + } + fadv_fd_pos[fd] = pos; + } +} + +#endif + +ssize_t fadv_write(int fd, const void *buf, size_t count) +{ + int ret = write(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,1); + } +#endif + return ret; +} + + + +ssize_t fadv_read(int fd, void *buf, size_t count) +{ + int ret; +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_fd_init_func(); + fadv_get_core(fd); + } +#endif + ret = read(fd, buf, count); +#ifdef WITH_DROP_CACHE + if (drop_cache) { + fadv_drop(fd,0); + } +#endif + return ret; +} + +#ifdef WITH_DROP_CACHE +void fadv_close_all(void) +{ + /* printf ("%i\n",fadv_close_ring_size); */ + while (fadv_close_ring_size > 0){ + fdatasync(fadv_close_ring[fadv_close_ring_tail]); + if (fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]){ + size_t pi; + for (pi = 0; pi <= fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size/fadv_pagesize; pi++) { + if (!(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]][pi]&1)) { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], pi*fadv_pagesize, fadv_pagesize, POSIX_FADV_DONTNEED); + } + } + /* if the file has grown, drop the rest */ + //posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size,0, POSIX_FADV_DONTNEED); + + free(fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]]); + fadv_core_ptr[fadv_close_ring[fadv_close_ring_tail]] = NULL; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_size = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_ino = 0; + fadv_fd_stat[fadv_close_ring[fadv_close_ring_tail]].st_dev = 0; + } + else { + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED); + } + fadv_close_ring_size--; + close(fadv_close_ring[fadv_close_ring_tail]); + fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd; + fadv_close_buffer_size = 0; + } +} + +int fadv_close(int fd) +{ + if (drop_cache) { + /* if the file is not flushed to disk before calling fadvise, + then the Cache will not be freed and the advise gets ignored + this does give a severe hit on performance. So instead of doing + it right away, we save us a copy of the filehandle and do it + some time before we are out of filehandles. This speeds + up operation for small files massively. It is directly + related to the number of spare file handles you have. */ + int newfd = dup(fd); + int pos = lseek(fd,0,SEEK_CUR); + fadv_fd_init_func(); + fadv_core_ptr[newfd] = fadv_core_ptr[fd]; + fadv_fd_stat[newfd].st_size = fadv_fd_stat[fd].st_size ; + fadv_core_ptr[fd] = NULL; + fadv_close_buffer_size += pos - fadv_fd_pos[fd]; + fadv_close_ring[fadv_close_ring_head] = newfd; + fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd; + fadv_close_ring_size ++; + if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){ + /* it seems fastest to drop things 'in groups' */ + fadv_close_all(); + } + }; + return close(fd); +} + + +#define close(fd) fadv_close(fd) +#endif + /* Set a fd into nonblocking mode. */ void set_nonblocking(int fd) { @@ -235,7 +455,7 @@ total_written = 0; while (len > 0) { - int written = write(desc, ptr, len); + int written = fadv_write(desc, ptr, len); if (written < 0) { if (errno == EINTR) continue; @@ -267,7 +487,7 @@ return len; do { - n_chars = read(desc, ptr, len); + n_chars = fadv_read(desc, ptr, len); } while (n_chars < 0 && errno == EINTR); return n_chars;