#!/usr/bin/perl # SafeWeb traffic fingerprint generator and comparer # # by Andrew Hintz ( http://guh.nu ) # # This file accompanies the paper 'Fingerprinting Websites Using Traffic Analysis.' # (available from http://guh.nu/projects/ta/safeweb/ ) # The paper will also be published in the Springer Lecture Notes in Computer Science series in the proceedings for the Workshop on Privacy Enhancing Technologies, 2002. # The paper was presented at the Workshop on Privacy Enhancing Technologies, 14-15 April 2002. ( http://www.pet2002.org ) # This program is released under the GPL. ( http://www.gnu.org ) If you'd like the program to also be released under a different license, contact me and we can probably work something out. #Performance Note: the program creates a lot of == 0 hash values during compares. However, it shouldn't be a problem unless you're planning on modifying the program to run on lots of fingerprints... and if you're doing that you're probably either benchmarking or doing something evil. die "\nUSAGE:\nto see the fingerprint:\n\tfingerprint.pl tcpdumplog_file_name ip_of_safeweb_user\n\nor to see how similar two fingerprints are:\n\tfingerprint.pl log1 ip1 log2 ip2 [range]\n\n" unless ( (2 == @ARGV) || (4 == @ARGV) || (5 == @ARGV) ); %fp1 = fingerprint($ARGV[0], $ARGV[1]); if (2 == @ARGV){ #the user wants to just see the fingerprint of one session $count = 0; foreach $size (sort numerically keys %fp1){ print "size:$size\tcount:$fp1{$size}\n"; $count += 1; } print "Total number of different sizes: $count\n"; } if (4 <= @ARGV){ #the user wants to compare two fingerprints %fp2 = fingerprint($ARGV[2], $ARGV[3]); ##############count the number of different sizes in each fingerprint $count = 0; foreach $size (keys %fp1){ $count += $fp1{$size}; } print "Number of connections in the file \"$ARGV[0]\": $count\n"; $count = 0; foreach $size (keys %fp2){ $count += $fp2{$size}; } print "Number of connections in the file \"$ARGV[2]\": $count\n"; ###########compare to see the number of similarities $similar = 0; # $fp1 & $fp2 will be deystroyed, so make copies of them %fp = %fp1; %fptwo = %fp2; if (5 == @ARGV){$range=$ARGV[4];}else{$range=0;} #range defaults to zero (ie. only do exact matches) #loop over each diff #want to do negative first so we have the pattern (0,-1,1,-2,2, ... -range, range) in $diff for ($fordiff = 0.5; $fordiff <= ($range + 0.5); $fordiff += 0.5){ $diff = int($fordiff); if ( !($fordiff - int($fordiff)) ) {$diff *= -1;} foreach $size (sort numerically keys %fptwo){ for ($garbage=0; $garbage < $fp2{$size}; $garbage++){ if ( ($fp{$size + $diff} > 0) && ($fptwo{$size} > 0) ){ #if a match $fp{$size + $diff} -= 1; #match made, so decrement both fps $fptwo{$size} -= 1; # '' $similar += 1; } } } if (0.5 == $fordiff){print "Number of exact matches: $similar\n";} } if ($range) {print "Number of matches within +-$range: $similar\n";} } ########################################### #used just for sorting sub numerically { $a <=> $b; } ############################################## sub fingerprint { #returns a hash my($logname, $ip) = @_; #$logname is filename of the tcpdump log, $ip is ip to look at if ("" eq $ip) {die "you gave me a null IP address, which you don't want to do. $ip";} #initialize hashes %totalsizes=(); %sizecount=(); open (DUMP, $logname ) || die "error opening file: $!\n"; while (){ chomp; @dump = split; #splits $_ by spaces #I think [5] is the dest, and [3] is the source if (@dump[5] =~ m/^$ip/){ #make sure dest IP eq $ip if ( (@dump[3] =~ m/https$/) || (@dump[3] =~ m/krb524$/)){ #make sure source port eq https @dest = split(/\./, @dump[5]); # @dest[4] contains the dest port $destport = @dest[4]; $destport =~ s/://; ############@dump[7] contains the field with the size in bytes @size = split (/\(/, @dump[7]); @size[1] =~ s/\)//; unless (0 == @size[1]) {$totalsizes{$destport} += @size[1];} #add size of packet to hash of dest ports } } } close (DUMP); foreach $port (sort keys %totalsizes){ $sizecount{$totalsizes{$port}} += 1; } return %sizecount; }