/******************************************************************************* * * text.c * * AUTHOR: * Dan Harkless * * COPYRIGHT: * This file is Copyright (C) 2003 by Dan Harkless, and is released under the * GNU General Public License . * * USAGE: * % text [...] * * DESCRIPTION: * It's really annoying when you're doing: * * grep PATTERN * * * and the current directory includes binary files and/or subdirectories, * resulting in the output of long lines filled with garbage characters, * ringing bells, and often the freeze-up of your terminal. * * Proper modern greps like GNU grep (which just outputs "Binary file FILENAME * matches") don't suffer from this problem, but in case you're using a vendor * version that does, you can use this program to "screen" files before they're * passed off to grep for processing. If you do: * * grep PATTERN `text *` * * grep will only look for your Pattern in "text" files in the local directory. * * I put "text" in quotes because this program's algorithm is fast but * imperfect -- it only checks whether the first character in each file is a * printable character. This is fine for general use, because the most common * binaries files most people will come across in a directory start with * non-printable "magic numbers", but proprietary data files could well start * with a printable character. In the future I may add an option to check the * first N bytes of each file for non-printable characters. * * To facilitate use of this command in scripts, 'text' prints "/dev/null" if * it finds no text files, so grep won't hang waiting for you to type a CTRL-D. * * Note that of course you can use 'text' with other commands besides * [ef]?grep. * * DATE MODIFICATION * ========== ================================================================== * 2003-10-09 Noted that GNU grep doesn't suffer the problem this script fixes. * 2003-06-25 Ignore setocale() failure if ENOENT; for use on minimal installs. * 2003-04-23 Need setlocale(LC_ALL, "") for isgraph() to work for non-"C" * locales. Doing this increases the chances for false positives, * however -- we might want to add an option to disable the call. * In the meantime, `env LANG=C text *` works for that. * 1999-04-09 Get our program name from argv[0] rather than hardcoding as text. * 1998-10-22 Directories can start with a printable character -- use stat() and * only consider regular files. * 1995-08-30 Original. * *******************************************************************************/ #include /* for isgraph() and isspace() */ #include /* for errno */ #include /* for setlocale() */ #include /* for fprintf(), etc. */ #include /* for malloc() */ #include /* for strrchr(), etc. */ #include /* for stat(), etc. */ #define FALSE 0 #define TRUE 1 typedef unsigned char boolean; int main(int argc, char** argv) { boolean found_at_least_one_text_file = FALSE; char* last_slash = strrchr(argv[0], '/'); char* our_program_name; int i; if (last_slash == NULL) our_program_name = argv[0]; else our_program_name = last_slash + 1; if (setlocale(LC_ALL, "") == NULL && errno != ENOENT) { fprintf(stderr, "%s: setlocale(): %s.\n", our_program_name, strerror(errno)); return EXIT_FAILURE; } if (argc < 2) { fprintf(stderr, "Usage: %s [...]\n", our_program_name); return EXIT_FAILURE; } for (i = 1; i < argc; i++) { char c; struct stat file_stat; if (stat(argv[i], &file_stat) < 0) fprintf(stderr, "%s: %s.\n", argv[i], strerror(errno)); else if (S_ISREG(file_stat.st_mode)) { FILE* file = fopen(argv[i], "r"); if (file == NULL) fprintf(stderr, "%s: %s.\n", argv[i], strerror(errno)); else { fread(&c, 1, 1, file); if (isgraph(c) || isspace(c)) { if (found_at_least_one_text_file) putchar(' '); fputs(argv[i], stdout); found_at_least_one_text_file = TRUE; } fclose(file); } } } if (!found_at_least_one_text_file) fputs("/dev/null", stdout); putchar('\n'); return EXIT_SUCCESS; }