#!/usr/bin/perl -w # # monitor_file # # AUTHOR: # Dan Harkless # # COPYRIGHT: # This file is Copyright (C) 2008 by Dan Harkless, and is released under the # GNU General Public License . # # USAGE: # % monitor_file -m # [-d ] [-e ] [-p ] [-P] # [-o -r ] # # EXAMPLES: # % monitor_file -d "-u -T" -m myfile -P # % monitor_file -e me@domain.tld -m /proc/mdstat -p ~/logs -o '^.*?(recovery| # resync).*' -r '[...] $1 [...]' # # DESCRIPTION: # Monitors a file (or pseudo-file, like a /proc entry) to watch for changes. # If there are any, 'diff' output will be displayed, or sent to the email # address specified. # # monitor_file only checks the file once per invocation, then exits -- it's # best called from cron. The way it determines if the file has changed since # last time is by comparing it to a saved "previous state" file. If there's a # difference, then the 'diff' output will be presented, and then the current # file contents will be written to the previous state file. With this system, # you'll only get bugged once each time a file changes (rather than being # bugged every time a cron check fires off and a file is not matching a static # "expected state" file), and the previous state will be remembered even # across reboots. # # Note that since we run the 'diff' command rather than checking for # differences manually within the script (to get the benefit of the nice 'diff # -u' output), there's a short window in between diffing and saving the # previous state file during which the contents of the monitored file could # change from what 'diff' saw. Also note that when monitor_file creates the # previous state file (it does this if it doesn't exist already, like on the # first time it's called on a particular file), it doesn't try to duplicate # permissions of the monitored file -- permissions will be according to the # active umask. # # If monitor_file encounters a difference or error, it exits with status 1. # # -d, if specified, is a string of options to pass to the diff command. If # not specified, it defaults to -u. If you need to pass multiple options, # enclose the list of options in quotes so the shell passes them as a single # argument to monitor_file. # # -e, if specified, will cause any file differences (or errors) to be sent to # the email address specified rather than printed on stderr. You may think # that this option is only useful if emailing a remote account, since you get # emailing to a local account for free when a script called from cron has # output, but you may want to use it with local accounts as well, since the # email's Subject will be more succinct. If you use this option, the # MailTools module collection is required; it's available from CPAN. # # -m is required, and specifies the file to be monitored. # # -o is optional and specifies an "original regular expression" to be replaced # by the "replacement string" specified by -r. This is useful if you need to # filter the contents of the monitored file to exclude differences you don't # want reported. For instance, I run a cron job to check /proc/mdstat once a # minute (as an alternative to running mdadm in daemon mode and having to deal # with its limited event emailing capability or else writing a custom emailing # script for it), but if a resync is occurring, I only want to be notified # when it starts and when it finishes -- I don't want an email shot at me # every minute reporting the resync progress. Therefore, I use -o and -r # options like the ones in the EXAMPLES section above. Note that, as shown in # that example, you can use substitution variables like '$1' in your # replacement string (thus if you want to use a literal '$', be sure to # backslash it). Also note that using -o / -r will cause a # .filt file to be created (and we do not delete it). # # -p is optional, and specifies a "previous state directory". Ordinarily, # .filt, .prev, and .prev.prev files are saved in the same directory as the # monitored file. However, sometimes this is undesirable or impossible, e.g. # when monitoring a /proc pseudo-file. In cases like these, you can specify a # directory to write these files to with -p. Note that if -m specifies a file # using a relative path, this path will still be interpreted relative to the # working directory at the time monitor_file was called, not the -p directory. # # -P (capital p), if specified, will back up .prev to .prev.prev # prior to wiping it out, so that you'll be able to check on the state of the # monitored file as of the last time monitor_file was run, AND as of the time # before last. # # -r specifies a replacement string. See -o for more information. # # DATE MODIFICATION # ========== ================================================================== # 2008-09-02 "use English qw(-no_match_vars)": avoid regex performance penalty. # 2008-06-16 Changed the -o / -r replacement code from 's/$opt_o/$opt_r/g' to # 's/$opt_o/eval qq{"$opt_r"}/eg' so that substitution variables # like '$1' can be used in the replacement string. # 2007-12-23 Added -o and -r options to allow excluding uninteresting # differences. Changed -p to specify a previous state directory # (which we chdir() to) rather than a previous state file. -P was # unnecessarily copying .prev to .prev.prev on every invocation, # rather than only when there was a change. Documented the mild # race condition in between running 'diff' and saving the state of # the monitored file. Changed the message when we're creating the # .prev file for the first time to include the file contents (so # that when an email is being sent, it can be saved as a record of # what the normal expected state of the file is supposed to be, and # unified and context diffs can be cross-referenced to it). # 2006-08-12 Added -P option to save the old .prev file as .prev.prev. # 2006-08-12 Added -d option to allow passing options to diff. # 2006-08-12 My original use for this script was to monitor RAID status /proc # files, so it was sufficient to output (both versions of) the first # differing line (this was also so the email would be as succint as # possible and SMS versions sent by my sms_biff script would be # meaningful). Later I started using it on files like nslookup # output where the first differing line wasn't necessarily enough, # so I added -w to also output the entire contents of the old new # versions of the file. However, I'm now also using it on long # files like web pages with differences potentially in multiple # places, so we really want proper diff output. We could use CPAN # modules to diff our own slurped file contents buffers, and this # would retain the advantage of there not being a race condition in # between diffing and saving the previous state file, but since that # race condition may never matter, and for simplicity, we'll just # call out to the 'diff' command, and do a 'cp' immediately # afterwards to limit the race window. # 2006-08-12 When I first wrote this it was to monitor system files, so # defaulting the previous state file to # /var/run/monitor_file made sense. However, I'm # now mostly using it for monitoring web pages pulled down with curl # (and then often mangled to remove non-meaningful changed parts), # so the default for the previous state file has been changed to be # .prev. If you still want to use # /var/run/monitor_file files you'll have to specify them with -p. # 2004-02-28 Search-and-replace error in my_die() caused it to not work. # 2003-04-21 If the monitored file and previous state file differ in number of # lines, print totals using scalar(@), not $#. Also, # only print an 's' at the end of "line" if the total isn't 1. # 2002-11-21 Original. ## Modules used ################################################################ use Cwd; # for cwd() use English qw(-no_match_vars); # allow use of names like @ARG rather than @_ use File::Basename; # for basename() and dirname() use Getopt::Std; # for getopts() # Use only while debugging (due to major performance hit): #use diagnostics; # turn on -w and output verbose versions of warnings ## Subroutines ################################################################# sub email_or_print { if (not $print_fh) { if ($opt_e) { require Mail::Send; # TBD: Reassign STDERR to be this handle for Perl errors? Carp? $email = new Mail::Send; $email->to("$opt_e"); $email->subject("$progname: $opt_m changed state!"); $print_fh = $email->open(); } if (not $print_fh) { # We could come here if not $opt_e or if $email->open() failed. $print_fh = STDERR; print $print_fh "$progname: $opt_m changed state!\n"; } } print $print_fh @ARG; } ## Main ######################################################################## $progname = basename($PROGRAM_NAME); # Process commandline arguments. use vars qw($opt_d $opt_e $opt_m $opt_o $opt_p $opt_P $opt_r $opt_w); if ((not getopts("d:e:m:o:p:Pr:")) or (not $opt_m) or ($opt_o xor $opt_r)) { print STDERR "Usage: $progname -m \n ", "[-d ] [-e ] [-p ] [-P]\n", " [-o -r ]\n"; exit 1; } if (not $opt_d) { $opt_d = "-u"; } if ($opt_m !~ m(^/)) { $opt_m = cwd() . "/$opt_m"; } if ($opt_p) { if (not chdir($opt_p)) { email_or_print "chdir($opt_p): $OS_ERROR."; exit 1; } $base_name = basename($opt_m); } else { $base_name = $opt_m; } $prev_state_file = $base_name . ".prev"; # Check for the previous state file. if (not -f $prev_state_file) { email_or_print "$prev_state_file did not exist. Creating it"; if (not open(MONITORED_FILE, $opt_m)) { email_or_print "...\n"; email_or_print "$opt_m: $OS_ERROR.\n"; exit 1; } if (not open(PREV_FILE, ">$prev_state_file")) { email_or_print "...\n"; email_or_print "$prev_state_file: $OS_ERROR.\n"; exit 1; } email_or_print " with this content:\n"; while ($monitored_file_line = ) { if ($opt_o) { $monitored_file_line =~ s/$opt_o/eval qq{"$opt_r"}/eg; } email_or_print $monitored_file_line; print PREV_FILE $monitored_file_line; } exit 1; } if ($opt_o) { # Need to do filtering before diffing. $cur_file = $base_name . ".filt"; if (not open(MONITORED_FILE, $opt_m)) { email_or_print "$opt_m: $OS_ERROR.\n"; exit 1; } if (not open(MONITORED_FILE_FILT, ">$cur_file")) { email_or_print "$cur_file: $OS_ERROR.\n"; exit 1; } while ($monitored_file_line = ) { $monitored_file_line =~ s/$opt_o/eval qq{"$opt_r"}/eg; print MONITORED_FILE_FILT $monitored_file_line; } close MONITORED_FILE_FILT; } else { # Simply diff the original files. $cur_file = $opt_m; } $save_state_command = "cp -pf \Q$cur_file\E \Q$prev_state_file\E 2>&1"; if ($opt_P) { $save_state_command = "(mv -f \Q$prev_state_file\E \Q$prev_state_file\E.prev 2>&1; " . $save_state_command . ")"; } # Do the diff. As documented above, there's a mild race condition here. The # file could potentially change in between the call to diff and the call to # cp/mv. We could remedy this by using CPAN modules to do the diffing here # inside the script, or by copying the live file to a temporary file before # diffing and saving (which might make the diff output sent via email slightly # confusing, since it wouldn't include the original file path -- this path would # still be included in the Subject when using -e, though). $diff_output = `diff $opt_d \Q$prev_state_file\E \Q$cur_file\E 2>&1 || $save_state_command`; if ($diff_output) { email_or_print $diff_output; exit 1; } # We'll let the potential email file handle close itself (and auto-send).