Skip to main content
deleted 1 character in body
Source Link
Faheem Mitha
  • 36.1k
  • 33
  • 131
  • 192

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very commparablecomparable to the ~380ms I'm getting from wc -l.

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very commparable to the ~380ms I'm getting from wc -l.

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very comparable to the ~380ms I'm getting from wc -l.

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

added 48 characters in body
Source Link
muru
  • 78.4k
  • 16
  • 214
  • 321
#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 
#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 
#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 
#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 
#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 
#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 
#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 
#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 
added 84 characters in body
Source Link
Petr Skocik
  • 29.7k
  • 18
  • 90
  • 155

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very commparable to the ~380ms I'm getting from wc -l.

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms. (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal).

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

You can try to write in C:

#include <unistd.h> #include <stdio.h> #include <string.h> int main(){ char buf[BUFSIZ]; int nread; size_t nfound=0; while((nread=read(0, buf, BUFSIZ))>0){ char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} } if(nread<0) { perror("Error"); return 1; } printf("%lu\n", nfound); return 0; } 

Save in e.g., wcl.c, compile e.g., with gcc wcl.c -O2 -o wcl and run with

<yourFile ./wcl 

This finds newlines sprinkled in a 1GB file on my system in about 370ms (repeated runs). (Increasing buffer sizes slightly increases the time, which is to be expected -- BUFSIZ should be close to optimal). This is very commparable to the ~380ms I'm getting from wc -l.

Mmaping gives me a better time of about 280ms, but it of course has the limitation of being limited to real files (no FIFOS, no terminal input, etc.):

#include <stdio.h> #include <string.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> int main(){ struct stat sbuf; if(fstat(0, &sbuf)<0){ perror("Can't stat stdin"); return 1; } char* buf = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, 0/*stdin*/, 0/*offset*/); if(buf == MAP_FAILED){ perror("Mmap error"); return 1; } size_t nread = sbuf.st_size, nfound=0; char const* p; for(p=buf; p=memchr(p,'\n',nread-(p-buf)); nfound++,p++) {;} printf("%lu\n", nfound); return 0; } 

I created my test file with:

 $ dd if=/dev/zero of=file bs=1M count=1042 

and added some test newlines with:

 $ echo >> 1GB 

and a hex editor.

added 880 characters in body
Source Link
Petr Skocik
  • 29.7k
  • 18
  • 90
  • 155
Loading
added 141 characters in body
Source Link
Petr Skocik
  • 29.7k
  • 18
  • 90
  • 155
Loading
Source Link
Petr Skocik
  • 29.7k
  • 18
  • 90
  • 155
Loading