#!/usr/bin/perl # you'll need the BAT module from http://crawler.archive.org/cgi-bin/wiki.pl?BnfArcTools # Loading the package to parse an ARC file use BAT 'ARC'; # Create an ARC instance my $obj =new ARC( gzip => 1, # read from a compressed ARC file filename => $ARGV[0], # ARC file name ); # Iterate to get the next record while( my $record =$obj->next_record() ) { print "record name: $record->{name}\n"; # Print the record name print "record mime: $record->{mime}\n"; # print the record type mime if($record->{content}) # get the file content { # do something with it } if($record->{etag}) # If the etag exists { print "record etag: $record->{etag}\n"; } }