#!/usr/bin/perl -w # # d2u # # AUTHOR: # Dan Harkless # # COPYRIGHT: # This file is Copyright (C) 2008 by Dan Harkless, and is released under the # GNU General Public License . # # USAGE: # % d2u [-r [(-f|-F) [-s (0|1|2)]]] [(-v|-V)] [...] # # DESCRIPTION: # A wrapper around 'dos2unix' which preserves file ownership, permissions, and # modification time, and which won't mangle binary files. # # For each file specified on the commandline, the 'file' command is run, and # if it outputs a string containing "with CRLF", 'dos2unix' is run on the # file. # # 'dos2unix' also gets run if file's output contains the string "script text", # since with scripts 'file' doesn't tell you whether they have CRLFs. # # COMMANDLINE OPTIONS: # -f # Follow symlinks when recursing. # # -F # Use File::find()'s follow_fast option rather than regular follow. -F will # in general be faster than -f, but symlinks may cause some of your files to # be processed more than once, which may reverse the time advantage, # especially if you have very large files. # # -r # Recurse into any directories specified on the commandline and process all # files in those directories and their descendents. # # -s # Set File::find()'s follow_skip option to the given value: # # 0 : d2u aborts if any file is about to be processed a second time. # # 1 : Same as not specifying -s; d2u aborts if any directory or symlink is # about to be processed a second time, but plain files about to be # processed again are silently ignored. # # 2 : Any file about to be processed a second time is silently ignored. # # -v # Don't suppress dos2unix's "converting file" messages. # # -V # Don't suppress dos2unix's "converting file" messages; also output the # results of the 'file' command for each file. # # DATE MODIFICATION # ========== ================================================================== # 2008-09-02 "use English qw(-no_match_vars)": avoid regex performance penalty. # 2003-09-24 It's possible for 'file' to report stuff like "ASCII English text, # with CRLF, CR line terminators" (or "...CRLF, LF..."), so changed # trigger string from "with CRLF line terminators" to "with CRLF". # 2003-09-03 Need to use "-|" rather than `file '$f'` to prevent all shell # interpretation of special characters in filenames. # 2003-08-18 Added -f, -F, and -s to control link-following behavior. # 2003-08-18 Running on a large file hierarchy by being called from 'find' can # be very slow due to the repeated interpreter startup / compile # time. Added -r option for d2u to do its own recursion. # 2003-05-30 Original. ## Modules used ################################################################ use English qw(-no_match_vars); # allow use of names like @ARG rather than @_ use File::Basename; # for basename() use File::Find; # for find() use Getopt::Std; # for getopts() ## Subroutines ################################################################# sub error { print STDERR "$progname: @ARG\n"; $exit_status = 1; $had_an_error_on_last_file = 1; } sub process_file { my $f = shift; $had_an_error_on_last_file = 0; $child_pid = open(CHILD, "-|"); if ($child_pid) { # We're the parent. $file_output = ; close CHILD; } else { # We're the child. exec("file", $f) or die; } # Have to check for ": can't" because 'file' (at least the astron.com # version as of 3.37) doesn't return an error status if it can't read a # file. We don't use our error() routine on these guys because the # message already includes the filename and a newline. if ($file_output =~ /: can\'t/) { # \ just to make Emacs' CPerl mode happy $exit_status = 1; $had_an_error_on_last_file = 1; print STDERR $file_output; } elsif ($opt_V) { print STDERR $file_output; } if ($file_output =~ /(with CRLF|script text)/) { if (-e "$f.d2u") { error "Skipping \"$f\": tempfile name \"$f.d2u\" already exists."; } else { @dos2unix_args = ("dos2unix"); if (not ($opt_v or $opt_V)) { push @dos2unix_args, "-q"; } push @dos2unix_args, ("-n", $f, "$f.d2u"); system(@dos2unix_args) == 0 or error "$OS_ERROR."; @f_stat = stat($f) or error "stat: \"$f\": $OS_ERROR."; $f_mode = $f_stat[2] & 07777; $f_modtime = $f_stat[9]; system("cp", "-f", "$f.d2u", $f) == 0 or error "$OS_ERROR."; chmod $f_mode, $f or error "chmod: \"$f\": $OS_ERROR."; utime(time, $f_modtime, $f) or error "utime: \"$f\": $OS_ERROR."; unlink "$f.d2u" or error "unlink: \"$f.d2u\": $OS_ERROR."; } } } sub process_file_from_find { process_file $File::Find::name; } ## Main ######################################################################## $progname = basename($PROGRAM_NAME); use vars qw($opt_f $opt_F $opt_r $opt_s $opt_v $opt_V); # to eliminate warning if (not getopts("fFrs:vV") or scalar(@ARGV) < 1) { error "Usage:" . " $progname [-r [(-f|-F) [-s (0|1|2)]]] [(-v|-V)] [...]"; exit 1; } if (not defined $opt_s) { $opt_s = 1; } %find_options = (wanted => \&process_file_from_find, follow => $opt_f, follow_fast => $opt_F, follow_skip => $opt_s, no_chdir => 1); $exit_status = 0; foreach $f (@ARGV) { if (not -e $f) { error "\"$f\": $OS_ERROR."; } elsif (-d $f) { if ($opt_r) { find(\%find_options, $f); } else { process_file $f; } } elsif (-f $f) { process_file $f; } } if ($exit_status != 0 and not $had_an_error_on_last_file) { error "Error exit delayed from previous errors." } exit $exit_status