In the course of tracking down the system-call-overhead of the pgsql driver, I encountered the dual read-buffer strategy in file.c. Although it was/is functional as is, it could be polished a bit.
Any thoughts?
I tried to cater for both large reads from files as well as network sized reads from arriving packets.
commit e44d08a94ebc9e38cd107177abd1fdfdae8a8e73 Author: Stephen R. van den Berg srb@cuci.nl Date: Tue Aug 19 10:53:08 2008 +0200
file.c optimise buffer management, minimise read size/systemcalls
diff --git a/src/modules/files/file.c b/src/modules/files/file.c index e69043a..7d4ef16 100644 --- a/src/modules/files/file.c +++ b/src/modules/files/file.c @@ -121,8 +121,6 @@ #define FD (THIS->box.fd) #define ERRNO (THIS->my_errno)
-#define READ_BUFFER 8192 - /* Don't try to use socketpair() on AmigaOS, socketpair_ultra works better */ #ifdef __amigaos__ #undef HAVE_SOCKETPAIR @@ -130,6 +128,10 @@
/* #define SOCKETPAIR_DEBUG */
+#define READ_BUFFER 8192 +#define DIRECT_BUFSIZE (64*1024) +#define SMALL_NETBUF 2048 + struct program *file_program; struct program *file_ref_program;
@@ -472,13 +474,16 @@ static struct pike_string *do_read(int fd, { ONERROR ebuf; ptrdiff_t bytes_read, i; + INT32 try_read; bytes_read=0; *err=0;
- if(r <= 65536) + if(r <= DIRECT_BUFSIZE + || all && (r<<1) > r && (((r-1)|r)+1)!=(r<<1)) /* r<<1 != power of two */ { struct pike_string *str;
+ i=r;
str=begin_shared_string(r);
@@ -488,7 +493,9 @@ static struct pike_string *do_read(int fd, int fd=FD; int e; THREADS_ALLOW(); - i = fd_read(fd, str->str+bytes_read, r); + + try_read = i; + i = fd_read(fd, str->str+bytes_read, i); e=errno; THREADS_DISALLOW();
@@ -515,6 +522,24 @@ static struct pike_string *do_read(int fd, } break; } + + /* + * Large reads cause the kernel to validate more memory before + * starting the actual read, and hence cause slowdowns. + * Ideally you read as much as you're going to receive. + * This buffer calculation algorithm tries to optimise the + * read length depending on past read chunksizes. + * For network reads this allows it to follow the packetsizes. + * The initial read will attempt the full buffer. + */ + if( i < try_read ) + i += SMALL_NETBUF; + else + i += (r-i)>>1; + if (i < SMALL_NETBUF) + i = SMALL_NETBUF; + if(i > r-SMALL_NETBUF) + i = r; }while(r);
UNSET_ONERROR(ebuf); @@ -533,18 +558,22 @@ static struct pike_string *do_read(int fd, /* For some reason, 8k seems to work faster than 64k. * (4k seems to be about 2% faster than 8k when using linux though) * /Hubbe (Per pointed it out to me..) + * + * The slowdowns most likely are because of memory validation + * done by the kernel for buffer space which is later unused + * (short read) */ -#define CHUNK ( 1024 * 8 ) - INT32 try_read; dynamic_buffer b;
b.s.str=0; initialize_buf(&b); SET_ONERROR(ebuf, free_dynamic_buffer, &b); + i = all && (r<<1)>r ? DIRECT_BUFSIZE : READ_BUFFER; do{ int e; char *buf; - try_read=MINIMUM(CHUNK,r); + + try_read = i;
buf = low_make_buf_space(try_read, &b);
@@ -555,23 +584,13 @@ static struct pike_string *do_read(int fd,
check_threads_etc();
- if(i==try_read) - { - r-=i; - bytes_read+=i; - if(!all) break; - } - else if(i>0) + if(i>=0) { bytes_read+=i; r-=i; - low_make_buf_space(i - try_read, &b); - if(!all) break; - } - else if(i==0) - { - low_make_buf_space(-try_read, &b); - break; + if(try_read > i) + low_make_buf_space(i - try_read, &b); + if(!all || !i) break; } else { @@ -588,6 +607,16 @@ static struct pike_string *do_read(int fd, break; } } + + /* See explanation in previous loop above */ + if( i < try_read ) + i += SMALL_NETBUF; + else + i += (DIRECT_BUFSIZE-i)>>1; + if (i < SMALL_NETBUF) + i = SMALL_NETBUF; + if(i > r-SMALL_NETBUF) + i = r; }while(r);
UNSET_ONERROR(ebuf); @@ -596,7 +625,6 @@ static struct pike_string *do_read(int fd, ADD_FD_EVENTS (THIS, PIKE_BIT_FD_READ);
return low_free_buf(&b); -#undef CHUNK } }
@@ -2457,7 +2485,7 @@ static void file_listxattr(INT32 args) if( res<0 && errno==ERANGE ) { /* Too little space in buffer.*/ - int blen = 65536; + int blen = DIRECT_BUFSIZE; do_free = 1; ptr = xalloc( 1 ); do { @@ -2527,7 +2555,7 @@ static void file_getxattr(INT32 args) if( res<0 && errno==ERANGE ) { /* Too little space in buffer.*/ - int blen = 65536; + int blen = DIRECT_BUFSIZE; do_free = 1; ptr = xalloc( 1 ); do {
@@ -533,18 +558,22 @@ static struct pike_string *do_read(int fd, /* For some reason, 8k seems to work faster than 64k. * (4k seems to be about 2% faster than 8k when using linux though) * /Hubbe (Per pointed it out to me..)
*
* The slowdowns most likely are because of memory validation
* done by the kernel for buffer space which is later unused
* (short read) */
Feel free to follow style with a /srb or similar; it's actually both useful and makes the in-comment discussions nicer to read, even though "blame" can be used to track down the same data with moderate accuracy too when really needed. (Hardly a general rule, but felt appropriate.)
(Not meant as policy nit, as there is none; do as you well prefer. :-)
Is this comment the executive summary?
+ /* + * Large reads cause the kernel to validate more memory before + * starting the actual read, and hence cause slowdowns. + * Ideally you read as much as you're going to receive. + * This buffer calculation algorithm tries to optimise the + * read length depending on past read chunksizes. + * For network reads this allows it to follow the packetsizes. + * The initial read will attempt the full buffer. + */
Sounds ok, but only in 7.9, of course.
pike-devel@lists.lysator.liu.se