#!/usr/bin/env perl ## jpg-recover -- Copyright (c) 2001-2002, Adam Glass ## ## jpg-recover scans through a file (probably a disk image) looking for ## JPEG headers, copying them to sequential filenames. I wrote this ## when I lost a bunch of pictures to a corrupted file allocation ## table, and it worked well for me. Not 100% successful, but usually ## better than nothing. ## ## NOTE! USE THIS PROGRAM AT YOUR OWN RISK; THIS PROGRAM COMES WITH ## NO WARRANTY WHATSOEVER. ## ## Current version: ## $Id: jpg-recover.pl,v 1.2 2002/02/24 23:07:38 adam Exp $ ## ## Permission to use, copy, modify, and distribute this software for any ## purpose with or without fee is hereby granted, provided that the above ## copyright notice and this permission notice appear in all copies. ## ## Many thanks to all the people who have sent in updates and suggestions! ## ## Generally, the following variables are fine -- you may want to tune ## them if you know what you're doing, but you shouldn't have to. ## ## $minsize may be worth fiddling with, since the end marker ('FFD9') ## is not unique -- it can occur multiple times within JPEG data. ## If $minsize is set too low, you'll end up with truncated images (false ## matches on the end marker), but if $minsize is too long, jpg-recover ## will miss the end of the file -- and combine two images or simply have ## corruption at the end of the image. So you may need to try a number ## of different values. (The units are bytes; the default is to not ## believe the data in the file if it's specifying an image shorter ## than 102400 bytes -- 100k.) ## ## Also, you may have JPEG start markers other than what's already ## included in the @starts array. In which case jpg-recover probably ## won't find any of your images and you'll need to poke around and ## find JPEG start markers by hand. Sorry! :( ## ## ------------------------------------------------------------------------ if ($ARGV[0] ne "") { $infile=$ARGV[0]; # use user-defined file if one is supplied } else { $infile="smcard.img"; # default input file to read (the disk image file) } $outtemp="lostpic{}.jpg"; # output file template: '{}' is replaced with file # $filenum=1; # output file numbering starts here $position=0; # position in input file, for reassuring output $minsize=102400; # ignore apparent JPEG file end markers before here $announce=102400; # announce position every 100k (0=disable announcemnts) $state=0; # current read state: 0=waiting for start marker, # 1=inside file, waiting for end $outlen=0; # output file length (for pretty output) $buffer=""; # scanning buffer (leave this alone) $maxbufsize=60; # maximum buffer size (must be greater than markers) $last=""; # where characters get read in (leave this alone) @starts = ( # array of valid JPEG start markers... "\xff\xd8\xff\xe1\x1c\x45\x45\x78\x69\x66", # --- ATTENTION --- "\xff\xd8\xff\xe0\x00\x10\x4a\x46\x49\x46", # (if you find a new one, "\xff\xd8\xff\xe1\x57\xcc\x45\x78\x69\x66", # please send it to me!) "\xff\xd8\xff\xe1\x19\xfe\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x1b\xfe\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x1d\xfe\x45\x78\x69\x66", # (this will change in "\xff\xd8\xff\xe1\x15\xfe\x45\x78\x69\x66", # a future release to "\xff\xd8\xff\xe1\x17\xfe\x45\x78\x69\x66", # use a data file of "\xff\xd8\xff\xe1\x17\xfe\x45\x79\x69\x66", # patterns) "\xff\xd8\xff\xe1\x23\xfe\x45\x79\x69\x66", # "\xff\xd8\xff\xe1\x13\xfe\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x23\xfe\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x21\xfe\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x17\xfe\x45\x79\x69\x66", # "\xff\xd8\xff\xe1\x38\x45\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x24\x45\x45\x78\x69\x66", # "\xff\xd8\xff\xe1\x57\xd8\x45\x78\x69\x66"); # $end="\xff\xd9"; # JPEG end marker # start should really match: # "\xff\xd8\xff\xe0JFIF" # or maybe even: # "\xff\xd8\xff\xe0EXIF" # but no one seems to comply with this ... why? open(INF,"< $infile") or die "couldn't open $infile"; binmode(INF); print "Scanning '$infile' looking for pictures...\n"; while(!eof(INF)) { $position += read(INF,$last,1); if (length($buffer) >= $maxbufsize) { $gone = substr($buffer,0,1); $buffer = substr($buffer,1,length($buffer)-1).$last; } else { $gone = ""; $buffer .= $last; } if ($announce && ($position % $announce == 0)) { print "Passing byte $position of $infile ...\n"; } if ($state == 0) { # waiting for start if (matched_start($buffer)) { $outfile = seq_filename($outtemp,$filenum); print "Image #$filenum: start found, dumping to $outfile ...\n"; $buffer=""; open(OUTF,">$outfile"); binmode(OUTF); $outlen = length($start); print OUTF $start; $state=1; } } else { # waiting for end if ((index($buffer,$end) >= 0) && ($outlen > $minsize)) { print OUTF $gone.$buffer; close OUTF; print "Done -- wrote ~$outlen bytes to $outfile (JPEG end \@ $position)\n"; $filenum += 1; $state=0; $buffer=""; } elsif (matched_start($buffer)) { $newstart = index($buffer,$start); print OUTF $gone.substr($buffer,0,$newstart).$end; close OUTF; print "Done -- wrote ~$outlen bytes to $outfile (JPEG start \@ $position)\n"; $filenum += 1; $outfile = seq_filename($outtemp,$filenum); print "Image #$filenum: start found, dumping to $outfile ...\n"; open(OUTF,">$outfile"); binmode(OUTF); $outlen = length($start); $buffer=""; print OUTF $start; # leave state at 1 } else { print OUTF $gone; $outlen += 1; } } } close INF; if ($state) { print OUTF $gone.$buffer.$end; print "Done -- wrote ~$outlen bytes to $outfile (EOF \@ $position.)\n"; close OUTF; $filenum += 1; } print "Finished scanning $infile.\n"; print "\n"; print "Created ".($filenum-1)." file(s) -- check 'em out!\n"; exit 0; sub matched_start { local ($buf) = @_; local $i = 0; for($i=0;$i<@starts;$i++) { if (index($buf,$starts[$i]) >= 0) { $start = $starts[$i]; return 1; } } return 0; } sub seq_filename { local ($template,$number) = @_; local $numstr = ""; $numstr = sprintf("%.3d",$number); $template =~ s/{}/$numstr/; return $template; }