#!/usr/bin/perl -w # # rename_to_DATE--SUBJ # # AUTHOR: # Dan Harkless # # COPYRIGHT: # This file is Copyright (C) 2008 by Dan Harkless, and is released under the # GNU General Public License . # # USAGE: # % rename_to_DATE--SUBJ # [-d ] [-e ] [-E ] [-n|-v] [-s] [-u] [-W] ... # # EXAMPLES: # % rename_to_DATE--SUBJ -d ../html_bodies -e .html -E .html -uv [0-9]* # % rename_to_DATE--SUBJ -E .eml -v -W *.eml # # DESCRIPTION: # Takes a group of mail message files (as from [n]mh) and renames them to be # of the form YYYY-MM-DD_HH:MM[:SS]--SUBJECT. This was written for the # purpose of converting an archive of security vulnerability emails into a # group of files downloadable from a website. # # Any '/'s in an email's Subject: line will be converted to '\'s in its new # filename. # # COMMANDLINE OPTIONS: # -d # Usually the mail files specified on the commandline are renamed. However, # for the archive this script was written for, all we wanted was the HTML # bodies of the mails, not the headers. Therefore, nmh's mhstore was used # to extract these in to a separate directory (../html_bodies), they were # renamed to be .html (rather than the # ..txt that mhstore would usually default to), # and then '-d ../html_bodies' was specified so that the files in that # directory would be renamed rather than the mail files specified on the # commandline. # # -e # In a case like the one described above under -d, you can use -e to specify # a file extension that's been added to the files in the -d directory. For # instance, if '-d ../html_bodies -e .html' is specified, then when # processing the mail file '42' in the current directory, the file we # actually rename is '../html_bodies/42.html'. The './42' file would be # expected to contain the Date: and Subject: headers; # '../html_bodies/42.html' need not. Note that the renamed files won't # end in .html unless you also specify '-E .html' (see below). # # -E # Add to the end of the renamed filenames. Thus they'll be of the # form YYYY-MM-DD_HH:MM[:SS]--SUBJECT.. Note that renamed files will # be extensionless if you don't specify this. # # -n # Just print the renaming actions we would take -- don't actually do any # renaming. Implies -v. # # -s # Omit the :SS (seconds) part of the timestamp in the new filename. # # -u # Replace spaces with underscores in the new filename. # # -v # Print the renaming actions we're taking as we do them. # # -W # Replace characters that are not allowed to occur in Windows filenames # ('\', '/', ':', '*', '?', '"', '<', '>', and '|') with underscores in the # new filename. # # DATE MODIFICATION # ========== ================================================================== # 2008-09-02 "use English qw(-no_match_vars)": avoid regex performance penalty. # 2007-04-21 Added -W and -E options for someone who was trying to use the # script on Windows (where '/' isn't the only illegal filename # character). Fixed handling of Windows-style files with '\r\n' # line-terminators. Clarified documentation of -d and -e. # 2004-01-13 Added -s and -u options. # 2004-01-12 Original. ## Modules used ################################################################ use English qw(-no_match_vars); # allow use of names like @ARG rather than @_ use File::Basename; # for basename() use Getopt::Std; # for getopts() ## Subroutines ################################################################# sub os_error { print STDERR "@ARG: $OS_ERROR.\n"; exit 1; } ## Main ######################################################################## $progname = basename($PROGRAM_NAME); use vars qw($opt_d $opt_e $opt_E $opt_n $opt_s $opt_u $opt_v $opt_W); if (not getopts("d:e:E:nsuvW") or scalar(@ARGV) < 1) { print STDERR "Usage: $progname\n ", "[-d ] [-e ] [-E ] [-n|-v] [-s] [-u] [-W] ...\n"; exit 1; } if (not $opt_d) { $opt_d = ""; } elsif ($opt_d !~ m(/$)) { $opt_d .= '/'; } if (not $opt_e) { $opt_e = ""; } if (not $opt_E) { $opt_E = ""; } %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6, Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 ); foreach $mail (@ARGV) { open(MAIL, $mail) or os_error($mail); $saw_date = 0; $saw_subject = 0; while () { if (/^Subject:\s+([^\n\r]*?)\s*$/) { $subject = $1; while (1) { $ARG = ; if ($ARG =~ /^\t(.*)/) { $subject .= $1; } else { last; } } $subject =~ tr(/)(\\); if ($opt_u) { $subject =~ tr( )(_); } $saw_subject = 1; } if (/^Date: \w+, (\d+) (\w+) (\d+) (\d\d):(\d\d):(\d\d)/) { $date = sprintf "%04d-%02d-%02d_%02d:%02d", $3, $months{$2}, $1, $4, $5; if (not $opt_s) { $date .= sprintf ":%02d", $6; } $saw_date = 1; } if ($saw_date and $saw_subject) { last; } } close MAIL; $rename_from = "$opt_d$mail$opt_e"; $rename_to = "$date--$subject$opt_E"; if ($opt_W) { $rename_to =~ s{[\\/:*?"<>|]}{_}g; } $rename_to = $opt_d . $rename_to; if ($opt_n or $opt_v) { print "rename(\"$rename_from\", \"$rename_to\")\n"; } if (-e $rename_to) { print STDERR "$rename_to: WARNING -- overwriting.\n"; } if (not $opt_n) { rename($rename_from, $rename_to) or os_error("rename(\"$rename_from\", \"$rename_to\")"); } }