#!/usr/bin/perl

# Copyright 2013
# Joey Kelly, joeykelly.net
# Licence: GPL version 2

# This script is meant to remove all lines in file a that exist in file b.
# It is assumed that if identical lines exist, all duplicates are ignored.

# Usage: ./sieve.pl a.txt b.txt > c.txt

use strict;
use warnings;


# let's figure out which files we want to sift
my $a = shift;
my $b = shift;


# now let's slurp or read in both files
# note that we're using a textbook method to do this
my $acontents = do { local $/; local @ARGV = $a; <> };
my $bcontents = do { local $/; local @ARGV = $b; <> };
# uncomment to debug
#print "$acontents";
#print "$bcontents";


# now we'll put all lines of file a into a hash
# there is probably a better way to do this, with less steps, but I'm lazy today
my @alines = split("\n",$acontents);
my %alines;
foreach (@alines) {
  $alines{$_} = 1;
}
my @akeys = keys %alines;
# uncomment to debug
#print "lines in file a: ";
#print @akeys;
#print "\n";


# now let's get and step through all the lines of file b, and remove from the hash any line that exists
my @blines = split("\n",$bcontents);
foreach (@blines) {
  chomp;
  delete $alines{$_};
}
# uncomment to debug
#print "remaining lines in file a: ";
#@akeys = keys %alines;
#print @akeys;
#print "\n";


# now to print out any remaining lines from file a
foreach (@alines) {
  print "$_\n" if $alines{$_};
}