| #!/usr/local/bin/perl |
| # |
| # Heuristically converts line endings to the current OS's preferred format |
| # |
| # All existing line endings must be identical (e.g. lf's only, or even |
| # the accedental cr.cr.lf sequence.) If some lines end lf, and others as |
| # cr.lf, the file is presumed binary. If the cr character appears anywhere |
| # except prefixed to an lf, the file is presumed binary. If there is no |
| # change in the resulting file size, or the file is binary, the conversion |
| # is discarded. |
| # |
| # Todo: Handle NULL stdin characters gracefully. |
| # |
| |
| use IO::File; |
| use File::Find; |
| |
| # The ignore list is '-' seperated, with this leading hyphen and |
| # trailing hyphens in ever concatinated list below. |
| $ignore = "-"; |
| |
| # Image formats |
| $ignore .= "gif-jpg-jpeg-png-ico-bmp-"; |
| |
| # Archive formats |
| $ignore .= "tar-gz-z-zip-jar-war-bz2-tgz-"; |
| |
| # Many document formats |
| $ignore .= "eps-psd-pdf-chm-ai-"; |
| |
| # Some encodings |
| $ignore .= "ucs2-ucs4-"; |
| |
| # Some binary objects |
| $ignore .= "class-so-dll-exe-obj-lib-a-o-lo-slo-sl-dylib-"; |
| |
| # Some build env files |
| $ignore .= "mcp-xdc-ncb-opt-pdb-ilk-exp-res-pch-idb-sbr-"; |
| |
| $preservedate = 1; |
| |
| $forceending = 0; |
| |
| $givenpaths = 0; |
| |
| $notnative = 0; |
| |
| while (defined @ARGV[0]) { |
| if (@ARGV[0] eq '--touch') { |
| $preservedate = 0; |
| } |
| elsif (@ARGV[0] eq '--nocr') { |
| $notnative = -1; |
| } |
| elsif (@ARGV[0] eq '--cr') { |
| $notnative = 1; |
| } |
| elsif (@ARGV[0] eq '--force') { |
| $forceending = 1; |
| } |
| elsif (@ARGV[0] eq '--FORCE') { |
| $forceending = 2; |
| } |
| elsif (@ARGV[0] =~ m/^-/) { |
| die "What is " . @ARGV[0] . " supposed to mean?\n\n" |
| . "Syntax:\t$0 [option()s] [path(s)]\n\n" . <<'OUTCH' |
| Where: paths specifies the top level directory to convert (default of '.') |
| options are; |
| |
| --cr keep/add one ^M |
| --nocr remove ^M's |
| --touch the datestamp (default: keeps date/attribs) |
| --force mismatched corrections (unbalanced ^M's) |
| --FORCE all files regardless of file name! |
| |
| OUTCH |
| } |
| else { |
| find(\&totxt, @ARGV[0]); |
| print "scanned " . @ARGV[0] . "\n"; |
| $givenpaths = 1; |
| } |
| shift @ARGV; |
| } |
| |
| if (!$givenpaths) { |
| find(\&totxt, '.'); |
| print "did .\n"; |
| } |
| |
| sub totxt { |
| $oname = $_; |
| $tname = '.#' . $_; |
| if (!-f) { |
| return; |
| } |
| @exts = split /\./; |
| if ($forceending < 2) { |
| while ($#exts && ($ext = pop(@exts))) { |
| if ($ignore =~ m|-$ext-|i) { |
| return; |
| } |
| } |
| } |
| return if ($File::Find::dir =~ m|^(.+/)?.svn(/.+)?$|); |
| @ostat = stat($oname); |
| $srcfl = new IO::File $oname, "r" or die; |
| $dstfl = new IO::File $tname, "w" or die; |
| binmode $srcfl; |
| if ($notnative) { |
| binmode $dstfl; |
| } |
| undef $t; |
| while (<$srcfl>) { |
| if (s/(\r*)\n$/\n/) { |
| $n = length $1; |
| if (!defined $t) { |
| $t = $n; |
| } |
| if (!$forceending && (($n != $t) || m/\r/)) { |
| print "mismatch in " .$oname. ":" .$n. " expected " .$t. "\n"; |
| undef $t; |
| last; |
| } |
| elsif ($notnative > 0) { |
| s/\n$/\r\n/; |
| } |
| } |
| print $dstfl $_; |
| } |
| if (defined $t && (tell $srcfl == tell $dstfl)) { |
| undef $t; |
| } |
| undef $srcfl; |
| undef $dstfl; |
| if (defined $t) { |
| unlink $oname or die; |
| rename $tname, $oname or die; |
| @anames = ($oname); |
| if ($preservedate) { |
| utime $ostat[9], $ostat[9], @anames; |
| } |
| chmod $ostat[2] & 07777, @anames; |
| chown $ostat[5], $ostat[6], @anames; |
| print "Converted file " . $oname . " to text in " . $File::Find::dir . "\n"; |
| } |
| else { |
| unlink $tname or die; |
| } |
| } |