RFC optimisation of file.c read size - Pike-devel

20 Aug 2008

In the course of tracking down the system-call-overhead of the pgsql
driver, I encountered the dual read-buffer strategy in file.c.
Although it was/is functional as is, it could be polished a bit.
Any thoughts?
I tried to cater for both large reads from files as well as network
sized reads from arriving packets.
commit e44d08a94ebc9e38cd107177abd1fdfdae8a8e73
Author: Stephen R. van den Berg srb@cuci.nl
Date:   Tue Aug 19 10:53:08 2008 +0200
file.c optimise buffer management, minimise read size/systemcalls

diff --git a/src/modules/files/file.c b/src/modules/files/file.c
index e69043a..7d4ef16 100644
--- a/src/modules/files/file.c
+++ b/src/modules/files/file.c
@@ -121,8 +121,6 @@
 #define FD (THIS->box.fd)
 #define ERRNO (THIS->my_errno)
-#define READ_BUFFER 8192
-
 /* Don't try to use socketpair() on AmigaOS, socketpair_ultra works better */
 #ifdef __amigaos__
 #undef HAVE_SOCKETPAIR
@@ -130,6 +128,10 @@
/* #define SOCKETPAIR_DEBUG */
+#define READ_BUFFER	8192
+#define DIRECT_BUFSIZE	(64*1024)
+#define SMALL_NETBUF	2048
+
 struct program *file_program;
 struct program *file_ref_program;
@@ -472,13 +474,16 @@ static struct pike_string *do_read(int fd,
 {
   ONERROR ebuf;
   ptrdiff_t bytes_read, i;
+  INT32 try_read;
   bytes_read=0;
   *err=0;
-  if(r <= 65536)
+  if(r <= DIRECT_BUFSIZE
+   || all && (r<<1) > r && (((r-1)|r)+1)!=(r<<1))   /* r<<1 != power of two */
   {
     struct pike_string *str;
+    i=r;
str=begin_shared_string(r);
@@ -488,7 +493,9 @@ static struct pike_string *do_read(int fd,
       int fd=FD;
       int e;
       THREADS_ALLOW();
-      i = fd_read(fd, str->str+bytes_read, r);
+
+      try_read = i;
+      i = fd_read(fd, str->str+bytes_read, i);
       e=errno;
       THREADS_DISALLOW();
@@ -515,6 +522,24 @@ static struct pike_string *do_read(int fd,
    }
    break;
       }
+
+      /*
+       * Large reads cause the kernel to validate more memory before
+       * starting the actual read, and hence cause slowdowns.
+       * Ideally you read as much as you're going to receive.
+       * This buffer calculation algorithm tries to optimise the
+       * read length depending on past read chunksizes.
+       * For network reads this allows it to follow the packetsizes.
+       * The initial read will attempt the full buffer.
+       */
+      if( i < try_read )
+	i += SMALL_NETBUF;
+      else
+	i += (r-i)>>1;
+      if (i < SMALL_NETBUF)
+	i = SMALL_NETBUF;
+      if(i > r-SMALL_NETBUF)
+	i = r;
     }while(r);
UNSET_ONERROR(ebuf);
@@ -533,18 +558,22 @@ static struct pike_string *do_read(int fd,
     /* For some reason, 8k seems to work faster than 64k.
      * (4k seems to be about 2% faster than 8k when using linux though)
      * /Hubbe (Per pointed it out to me..)
+     *
+     * The slowdowns most likely are because of memory validation
+     * done by the kernel for buffer space which is later unused
+     * (short read)
      */
-#define CHUNK ( 1024 * 8 )
-    INT32 try_read;
     dynamic_buffer b;
b.s.str=0;
     initialize_buf(&b);
     SET_ONERROR(ebuf, free_dynamic_buffer, &b);
+    i = all && (r<<1)>r ? DIRECT_BUFSIZE : READ_BUFFER;
     do{
       int e;
       char *buf;
-      try_read=MINIMUM(CHUNK,r);
+
+      try_read = i;
buf = low_make_buf_space(try_read, &b);
@@ -555,23 +584,13 @@ static struct pike_string *do_read(int fd,
check_threads_etc();
-      if(i==try_read)
-      {
-	r-=i;
-	bytes_read+=i;
-	if(!all) break;
-      }
-      else if(i>0)
+      if(i>=0)
       {
    bytes_read+=i;
    r-=i;
-	low_make_buf_space(i - try_read, &b);
-	if(!all) break;
-      }
-      else if(i==0)
-      {
-	low_make_buf_space(-try_read, &b);
-	break;
+	if(try_read > i)
+	  low_make_buf_space(i - try_read, &b);
+	if(!all || !i) break;
       }
       else
       {
@@ -588,6 +607,16 @@ static struct pike_string *do_read(int fd,
      break;
    }
       }
+
+      /* See explanation in previous loop above */
+      if( i < try_read )
+	i += SMALL_NETBUF;
+      else
+	i += (DIRECT_BUFSIZE-i)>>1;
+      if (i < SMALL_NETBUF)
+	i = SMALL_NETBUF;
+      if(i > r-SMALL_NETBUF)
+	i = r;
     }while(r);
UNSET_ONERROR(ebuf);
@@ -596,7 +625,6 @@ static struct pike_string *do_read(int fd,
       ADD_FD_EVENTS (THIS, PIKE_BIT_FD_READ);
return low_free_buf(&b);
-#undef CHUNK
   }
 }
@@ -2457,7 +2485,7 @@ static void file_listxattr(INT32 args)
   if( res<0 && errno==ERANGE )
   {
     /* Too little space in buffer.*/
-    int blen = 65536;
+    int blen = DIRECT_BUFSIZE;
     do_free = 1;
     ptr = xalloc( 1 );
     do {
@@ -2527,7 +2555,7 @@ static void file_getxattr(INT32 args)
   if( res<0 && errno==ERANGE )
   {
     /* Too little space in buffer.*/
-    int blen = 65536;
+    int blen = DIRECT_BUFSIZE;
     do_free = 1;
     ptr = xalloc( 1 );
     do {
-- 
Sincerely,
           Stephen R. van den Berg.
E-mails should be like a lady's skirt:
Long enough to cover the subject, and short enough to be interesting.