You can try to write in C:
#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with
<yourFile ./wcl This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very commparablecomparable to the ~380ms I'm getting from wc -l.
Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):
#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } I created my test file with:
$ dd if=/dev/zero of=file bs=1M count=1042 and added some test newlines with:
$ echo >> 1GB and a hex editor.