#!/usr/bin/perl # Copyright 2013 # Joey Kelly, joeykelly.net # Licence: GPL version 2 # This script is meant to remove all lines in file a that exist in file b. # It is assumed that if identical lines exist, all duplicates are ignored. # Usage: ./sieve.pl a.txt b.txt > c.txt use strict; use warnings; # let's figure out which files we want to sift my $a = shift; my $b = shift; # now let's slurp or read in both files # note that we're using a textbook method to do this my $acontents = do { local $/; local @ARGV = $a; <> }; my $bcontents = do { local $/; local @ARGV = $b; <> }; # uncomment to debug #print "$acontents"; #print "$bcontents"; # now we'll put all lines of file a into a hash # there is probably a better way to do this, with less steps, but I'm lazy today my @alines = split("\n",$acontents); my %alines; foreach (@alines) { $alines{$_} = 1; } my @akeys = keys %alines; # uncomment to debug #print "lines in file a: "; #print @akeys; #print "\n"; # now let's get and step through all the lines of file b, and remove from the hash any line that exists my @blines = split("\n",$bcontents); foreach (@blines) { chomp; delete $alines{$_}; } # uncomment to debug #print "remaining lines in file a: "; #@akeys = keys %alines; #print @akeys; #print "\n"; # now to print out any remaining lines from file a foreach (@alines) { print "$_\n" if $alines{$_}; }