#!/usr/bin/perl # # grep16 # # AUTHOR: # Dan Harkless http://harkless.org/dan/software/ # # COPYRIGHT: # This file is Copyright (C) 2016 by Dan Harkless, and is released under the # GNU General Public License . # # USAGE: # % grep16 [-a] PERLRE [FILE]... # # DESCRIPTION: # Greps for a Perl regular expression () # in each file specified on the commandline, or stdin if no files are # specified. The regular expression is searched for in each file both as-is # and converted to a UTF-16 string with NULs between each character. There is # no NUL put in before the first character or after the last, so files can be # big and/or little endian (though note that in rare cases this may cause # false matches on the first or last character). # # Like grep, grep16 returns 0 if the regexp was found in one of the files, 1 # if it was not, and 2 if there was an error. Also like grep, failure to open # one of the files is not considered a fatal error; a message will be output # to STDERR, and the program will eventually exit with status 2, but in the # meantime the other files specified on the commandline will be processed. # # The output is similar to grep, but there are some differences. Each # matching line will be output with "FILENAME: " (not just "FILENAME:" -- # there's a space) at the beginning (or "-: " for STDIN; "-" specified on the # commandline will also be treated as STDIN). This will be the case even if # there was only one file specified -- no need to specify a -H option. Also, # if a binary file matches, "FILENAME: Binary file matches." will be output # rather than "Binary file FILENAME matches". Files (other than STDIN, which # is always treated as text) are tested for binaryness using Perl's -B # operator (see ), and # the current locale is taken into account (but note that most systems, # including Cygwin, have no UTF-16 locale, so if you want UTF-16 strings to be # output rather than the "Binary" message, then as with grep, you'll need to # specify the -a option). # # Embedded NULs are stripped before outputting matching lines, but other # unprintable characters are output as is, so note that terminal-affecting # control codes may be output if Perl's -B option makes the wrong decision on # a file containing binary data somewhere (e.g. after the "first block or # so"), or you use -a to override. # # COMMANDLINE OPTIONS: # -a # Treat files specified after this option as text ("a" for "ASCII", like # grep, but one of the main uses for this option is to get UTF-16 strings to # be output) even if they would normally be considered binary. Files # specified before it on the commandline are treated normally. # # DATE MODIFICATION # ========== ================================================================== # 2016-09-26 Original. ## Modules / pragmas used ###################################################### use English qw(-no_match_vars); # allow use of names like @ARG rather than @_ use File::Basename; # for basename() use locale; # consider cur. locale for -B binary file test use warnings; # get warnings for this script but not modules ## Subroutines ################################################################# sub error { print STDERR "$progname: @ARG.\n"; $exit_status = 2; } ## Main ######################################################################## $exit_status = 1; $progname = basename($PROGRAM_NAME); $text = 0; if (scalar(@ARGV) >= 1 and $ARGV[0] eq "-a") { $text = 1; shift; } if (scalar(@ARGV) < 1) { print STDERR "Usage: $progname [-a] PERLRE [FILE]...\n"; exit 2; } $regexp = shift; $regexp16 = $regexp; $regexp16 =~ s/(.)/$1\\x00/g; chop $regexp16; # either endianness, but may cause false match on c. #1 / c. #N if (scalar(@ARGV) == 0) { unshift(@ARGV, "-"); } while ($param = shift) { if ($param eq "-a") { $text = 1; } else { if (not open(FILE, $param)) { error("$param: $OS_ERROR"); next; } while () { if (/$regexp/ or /$regexp16/) { $ARG =~ s/\x00//g; if ($text or not -B $param) { print "$param: $ARG"; if ($ARG !~ /\n$/) { print "\n"; } } else { print "$param: Binary file matches.\n"; } $exit_status = 0; } } close(FILE); } } exit $exit_status;