#!/usr/bin/perl # stripmime - get rid of MIME crap in email # # by Adam Glass # TO DO: - does this program work on strictly uuencoded attachments? # (bwaaahahaha -- doubt it) # - command line argument (regexp) to match against filenames of # of attachments and (in|ex)clude them from output? ############################################################################ $version = "0.7.2"; # version string $headers = 1; # default to echo message headers $cleanheaders = 0; # strip non-essential headers in "forward" message sections $echomime = 0; # default to not echo mime section info $sep = ""; # section separator $needsep = 0; # flag for section change $sectionmark = 0; # 0=MIME headers, 1=message's headers, 2+ otherwise $sectiontype = 0; # section ID (0=message headers,1=body/nonmime,2=body/mime) $mimesection = ""; # used to record MIME type of current section ############################################################################ # (If you're customizing my code and adding/removing MIME types to include # by default, this would be a good place to do it. %mimes is a Perl # associative array (hash) of MIME types. If the value of a key is: # 1: It is passed through to STDOUT (if it appears in the input stream) # 3: It is passed to STDOUT but each line is prepended with "> " # Any other value is ignored -- not passed to STDOUT. (Or, in the # parlance of the command line arguments, excluded.) $mimes{"text/plain"} = 1; # default MIME type to pass (we like text) $mimes{"message/rfc822"} = 3; # un-MIME Netscape forwarded messages ############################################################################ # # Command-line argument parsing. # foreach $arg (@ARGV) { if (substr($arg,0,1) eq "-") { # starts with a -? dovers() if ($arg =~ /^(-)?-version/i); # version text dohelp() if ($arg =~ /^(-)?-help/i); # help/usage message $sectiontype = 0; # reset for later use for ($i=1;$i) following types else { # not a valid flag print "invalid option: ".substr($arg,$i,1)."\n"; dohelp(); # do help/usage message } } } elsif ($sectiontype != 0) { # expecting MIME types @mimelist = split(",",$arg); # temporary array if (@mimelist == 0) { dohelp(); } # none? do help/usage msg foreach $mime (@mimelist) { # for each one, if ($mime !~ /\w\/\w/) { dohelp(); } else { # valid-looking? $mimes{lc($mime)} = $sectiontype; # set action to take } } $sectiontype = 0; # reset for next argument } } # If -i/-e/-f was last argument, we didn't get MIME types we were expecting... if ($sectiontype != 0) { dohelp(); } # do help/usage msg if ($echomime) { $cleanheaders = 0; } # -m overrides -c # # main loop - scan through STDIN # while($intxt = ) { # scan through STDIN until done ## do different things based on whether we just saw a MIME boundary ## marker or if we're in a section that requires specific processing # # check for final MIME boundary & mark need to echo it # if ($sep && ($intxt eq "--$sep--\n")) { $needsep = 1; # print final boudary (if -m) $sectiontype = 1; # just in case there's more } # # check for non-final MIME boundary (MIME section change) # elsif ($sep && ($intxt eq "--$sep\n")) { # found MIME boundary $needsep = 1; # MIME boundaries mean we're moving onto a new section, so check # Content-Type field to identify section and set sectiontype as needed if ($intxt = ) { # read Content-Type line $save_intxt = $intxt; $hassemi = index($intxt,";"); if ($hassemi != -1) { $intxt = substr($intxt,0,$hassemi); } # trim ";"? if ($intxt !~ /^Content-Type: (.*)/) { # if it looks bad, exit. die "Missing Content-Type after separator (".chop($save_intxt).")"; } else { $mimesection = lc($1); # save for later $sectiontype = 2; # set sectiontype = mime $sectionmark = 0; # MIME section (headers) if (($mimes{$mimesection} == 1) || ($mimes{$mimesection} == 3)) { if ($needsep && $echomime) { # and we need to print a print "--$sep\n"; # MIME separator line, do so $needsep = 0; # and reset need flag } if ($echomime) { print "> " if ($mimes{$mimesection} == 3); print $save_intxt; } } } } else { die "Input stream ended unexpectedly after section marker"; } } # # are we printing the input stream message's headers? ($sectiontype == 0) # if so, deal with header-printing issues elsif ($sectiontype == 0) { if ($intxt =~ /^Content-Type:/) { # MIME header: catch boundary print $intxt if ($echomime); if ($intxt =~ /boundary=(")?(.*)(")?/) { # found boundary here $sep = $2; $sep =~ tr/"//d; } else { # it's on the next line if (!($intxt = )) { print "Early EOF!\n"; exit -1; } if ($intxt =~ /boundary=(")?(.*)(")?/) { $sep = $2; $sep =~ tr/"//d; print $intxt if ($echomime); } } } else { # just reached end of section if ($intxt eq "\n") { # watch for end of headers $sectiontype = 1; # and reset section type $sectionmark = 0; # and section position flag } if ($headers) { if (($intxt !~ /^Content-/i) && # is this NOT a MIME header? ($intxt !~ /^Mime-/i)) { print $intxt; # then automatically print it } else { # but if it IS a MIME header, print $intxt if ($echomime); # only print if we should } } } } # # if we're in a miscellaneous non-MIME section of the body ($sectiontype==1) # just echo to STDOUT (unless it's one of those annoying MIME message # lines) elsif ($sectiontype == 1) { if (($sectionmark == 1) && (!$echomime) && ($intxt =~ /^This is a multi-part message in MIME format./i)) { # do nothing } else { print "$intxt"; } } # if it's a MIME section we should be printing, process&print it # # $sectiontype is 2 if we're in a MIME section, regardless of type # $sectionid is the action to take with this MIME section # (1=echo,3=forward,anything else=ignore) # $sectionmark is the location within a section (0=in MIME headers, # >0=in MIME body. For MIME sections which are themselves messages # (as in the case of message/rfc822-style forwarded messages, $sectionmark # will be 1 in the forwarded message's headers, 2+ afterwards) elsif (($sectiontype == 2) && (($mimes{$mimesection} == 1) || # MIME section? ($mimes{$mimesection} == 3 ))) { $sectionid = $mimes{$mimesection}; if ($sectionid == 3) { # FORWARD section (echo with >) if ($sectionmark == 0) { # in section's MIME headers print "> $intxt" if ($echomime);# were we told to print MIME hdrs? } elsif ($sectionmark == 1) { # in forwarded message's headers if ($cleanheaders) { # cleaning them? if ($intxt =~ /^(From|Date|Subject|To|Cc): /i) { print "> $intxt"; # yes + good header = print } } else { print "> $intxt"; # not cleaning, so always print } } else { # in the forwarded message's body print "> $intxt"; # so always print it } } elsif ($sectionid == 1) { # INCLUDE section (print it verbatim) if ($sectionmark == 0) { # in section's MIME headers print "$intxt" if ($echomime); # were we told to print MIME hdrs? } elsif ($sectionmark > 0) { # not in MIME headers print "$intxt"; # so always print it } } } if ($intxt eq "\n") { # past MIME or message headers? $sectionmark += 1; } } # # STDIN just EOFed, so print a terminal separator if need be # if ($sep) { # print final separator if needed print "--$sep--\n" if ($echomime && $needsep); } # # that's all, folks # exit 0; ############################################################################ sub dohelp() { print <] Expects an email message on STDIN and echoes the email to STDOUT except for MIME attachments. Options: -i -- comma-separated MIME types to include in output -e -- comma-separated MIME types to exclude from output -f -- comma-separated MIME types to "forward" (prepend "> ") -m -- echo MIME separators and message/section header info (default won't) -h -- doesn't echo the message headers (default will) -c -- only echo basic headers in forward (-f) MIME types (overridden by -m) Or: --help -- print this help message --version -- print version info and exits For -i, -e and -f options, command line arguments are parsed left-to-right, and later directives override earlier ones. Unless you explicitly exclude (-e) them, text/plain will be included (-i) and message/rfc822 will be forwarded (-f) by default. ZZEOF ; exit 1; } sub dovers() { print "stripmime version $version by Adam Glass \n"; print " This program is distributed with NO WARRANTY WHATSOEVER under "; print "the terms\n of the GNU Public License (GPL), details about which "; print "can be found at:\n "; print " http://www.gnu.org/copyleft/gpl.txt\n"; exit 2; }