#! /usr/bin/perl -w # Perl routine: anonymize.pl (1 January 2000) # This script is a filter that anonymizes headers of e-mail messages # arriving at a Sympa mailing list. # Once a message is filtered, it should not be possible to identify the # name of the country from which the message came. # # The filter reads from the standard input and it writes to the standard # output. A Sympa 'subscribers' is read. # # This script does this: # * It deletes all headers it does not recognize. # * It passes some headers unchanged ("In-Reply-To:", "References:"). # * It erases e-mail addresses (and not hostnames or IP numbers) in # 2 other headers ("Reply-To:", "Sender:"). # * It makes no change to the "From:" header if its address is not in a # subscribers file. The mailing list server is expected to reject the # message. It would send an error messages to the address in the # "From:" header. # * If the address in the "From:" header is in a subscribers file then # that address is replaced with a special anonymous address. That # special anonymous address should be subscribed to the list. #-------------------------------------------------------------------------- # Testing of the script was never finished. An earlier version of the # script was tested and it ran well. A hacker intrusion ended testing, # and it would be preferable if the Sympa subscribers file were # encrypted. # # This script is designed to be used with the Sympa mailing list, # http://listes.cru.fr/sympa/ # ------------------------------------------------- # (1) Various lines in the script need to be customised. The code # that alters the "Message-ID:" field can be modified. # # (2) Verify that this Perl anonymize.pl script seems to be OK: # cat test.txt | ./anonymize.pl LISTNAME d # cat test.txt | /etc/smrsh/anonymize.pl LISTNAME debug # The existence of the 2nd token will cause output be written to # the standard output. # The item "/etc/smrsh/anonymize.pl" would be a soft symbolic # link to this script. # # (3) Edit the sendmail 'aliases' file (if using sendmail) and make # it so that this filter filters incoming messages posted to the # mailing list: # LISTNAME-list : "|/etc/smrsh/anonymize.pl LISTNAME" # # When called without a 2nd argument, the script will pipe the # output into the queue program, which should write the message # to a directory known at the time "queue.c" was compiled # (The default directory is "/home/sympa/expl/msg"). # # (4) If sendmail can't call this Perl script, it may return the # error, "unknown mailer error 255". # ------------------------------------------------- # # Some of these stops might be needed: # Edit /etc/aliases file. # cd /etc/smrsh # ln -s /home/sympa/bin/anonymize.pl # cd /home/sympa/bin/ # chmod a+x anonymize.pl # chmod u+s anonymize.pl # ... ??: chown sympa /etc/smrsh/anonymize.pl # Permissions can cause problems. The needed permissions for # "sympa/", "sympa/bin/", "sympa/spool" are different if the suid # bit on "sympa/bin/anonymizer.pl" is not set. use strict; use English; my($QUEUEPROGRAM) = '/home/sympa/bin/queue'; # The name of the queue program and the path to it. my($ANON_FROM_ADDR) = 'anon@ns1.strongnet.co.nz'; # All 'From:' addresses are changed to this. # This addr must be subscribed my($ANON_REPL_ADDR) = '-@-'; # Adresses inthe "Reply-To:" field are altered to this my($USERS_PATH) = '/home/sympa/expl/'; #'/home/sympa/expl/' Needs trailing "/" my($USERS_FNAME) = 'subscribers'; # name of the list's 'subscribers' file my($IS_MD5) = 0; # Set to 0 if MD5 not present (e.g. testing in Win95) #if ( 0 eq 1 && $IS_MD5) { use MD5; } #--------------------------------------------------------------------------------------- my($UF) = (''); my($NAMEOFSYMPALIST, $insideheaders, $dontprint) = ("UNKNOWN-LIST", 1, 0); if ((defined $ARGV[0]) && ("" ne $ARGV[0])) { $NAMEOFSYMPALIST = $ARGV[0]; } my($test) = (''); if ((defined $ARGV[1]) && ("" ne $ARGV[1])) { $test = $ARGV[1]; } $test = ('' ne $test); # To debug, comment out the next line: if (not $test) { open(STDOUT, "| $QUEUEPROGRAM $NAMEOFSYMPALIST"); # pipe output into Sympa Mailing List } $UF = $USERS_PATH . $NAMEOFSYMPALIST . '/' . $USERS_FNAME; print " ** (Debugging mode) UF='$UF', test=$test\n" if $test; my($MsgId_Count, $buf, $nextbuf) = (0, "", ""); my ($loc); $nextbuf = ; while (1) { $buf = $nextbuf; last if not defined $buf; # Leave loop while (1) { $nextbuf = ; last if not defined $nextbuf; last if not $insideheaders; last unless ($nextbuf =~ m/^(\t|\s)+\S/ ); # continuation lines can start with a tab $buf = $buf . $INPUT_RECORD_SEPARATOR . $nextbuf; } $dontprint = 0; $loc=1; if ($insideheaders) { if ($buf =~ /^\s*$/x) { $loc=2; $insideheaders = 0; } elsif ($buf =~ /^From:/i ) { # Sympa won't accept msg if no 'From:' field $loc=3; $buf = &anon_from($UF,$buf,1,$ANON_FROM_ADDR,$NAMEOFSYMPALIST) . $INPUT_RECORD_SEPARATOR; } elsif ($buf =~ /^( # Anonymize Reply-To| Sender| X-Sender| Auto-Submitted| Action| Final-Recipient| To| Cc| Bcc| Group| Followup-To ):/isx ) { # /i: ignore case, /s: include new lines, /x: over lines # Continued 'Received:' lines start with spaces $loc=4; $buf = &anon_from($UF,$buf,0,$ANON_REPL_ADDR,$NAMEOFSYMPALIST) . $INPUT_RECORD_SEPARATOR; } elsif ($buf =~ /^(Message-Id):/isx ) { if ($MsgId_Count <= 0) { $loc=5; my($a1,$a2) = ($buf =~ m/^(Message-Id):\s*(.*?)\s*$/is); chomp $a2; if ($IS_MD5) { $a2 = MD5->hexhash($a2); if (5<=length($a2)) { $a2 = substr($a2,5) . substr($a2,0,5); } $a2 = MD5->hexhash($a2); if (7<=length($a2)) { $a2 = substr($a2,7) . substr($a2,1,7); } $a2 = MD5->hexhash($a2); } $buf = 'Message-Id: ' . $INPUT_RECORD_SEPARATOR; $MsgId_Count = $MsgId_Count + 1; } else { $loc=6; $dontprint = 1; } } elsif ($buf =~ /^( # Allow through with no alteration In-Reply-To |References |Date |Subject |Mime-Version |Content-Type |Content-Transfer-Encoding |Content-Language |X-Loop |X-Sequence |X-Validation-BY |Status |Last-Attempt-Date ):/isx ) { $loc=7; 1; } else { $loc=8; $dontprint = 1; # Delete 'Received:' and other headers } } if (not $dontprint) { print $buf; } #if (not $dontprint) { print $loc . ":" . $buf; } #else { print "(" . $loc . ":" . $buf . ")"; } } if ($insideheaders) { print "\n\nWARNING: Anonymizing filter did not find a message body.\n"; } sub anon_from { my($file, $URI, $Is_From, $ANON_FROM_ADDR, $NAMEOFSYMPALIST) = @_; chomp $URI; my($URI0) = $URI; print " ** in:anon_from(_,'$URI')\n" if $test; my($pat0) = '[^\<\>\#\%\"\s\x00-\x1F\x7F\{\}\|\\\^\[\]\`,;\(\)]'; my($addrpat) = $pat0 . '*\@' . $pat0 . '+'; my ($u1, $addr, $u2) = ($URI =~ m/^(.*?)($addrpat)(.*?)$/ ); if ((not $Is_From) || (defined $addr) && &anon_load_users_file($file, $addr)) { $URI =~ s/$NAMEOFSYMPALIST(?![a-zA-Z_.])/\[\[\[$NAMEOFSYMPALIST\]\]\]/g; $URI =~ s/\<$addrpat\>/<$ANON_FROM_ADDR>/g; $URI =~ s/$addrpat/$ANON_FROM_ADDR/g; $URI =~ s/\s\s+/ /g; $URI =~ s/^\s*(.*?)\s*$/$1/; $URI =~ s/[[[$NAMEOFSYMPALIST]]]/$NAMEOFSYMPALIST/g; } # If there is no match then transmit without removing 'From:' header, # and expect that Sympa will reject it with an error message. print " ** out:anon_from(_,'$URI0') = '$URI'\n" if $test; return $URI; } # http://www.ics.uci.edu/pub/ietf/uri/rfc2368.txt (mailto), ../rfc2396.txt (URI) sub anon_load_users_file { my($file, $email) = @_; # Adapted from 'sub _load_users_file()' # do_log('debug2', 'List::_load_users_file(%s)', $file); print " ** Anon filter reading file '$file'\n" if $test; ## Open the file and switch to paragraph mode if ('' eq $file) { print " ** anon_load_users_file: no subscribers file named\n" if $test; return 1; } open(L, $file) || return undef; my @old = ($INPUT_RECORD_SEPARATOR); $INPUT_RECORD_SEPARATOR = ''; # Paras will be separated by >=1 lines with 0 spaces my($found) = (0); my($buf); while (1) { $buf = ; last if not defined($buf) ; # ($a) = ($buf =~ /^\s*email\s+(.+)\s*$/m); ($a) = ($buf =~ /^\s*email\s+($email)\s*$/m); if (defined $a) { print " ** anon_load_users_file: 1[$a]\n" if $test; $found = 1; last; } else { # print " ** anon_load_users_file: (... not defined)\n" if $test; } } close(L); ($INPUT_RECORD_SEPARATOR) = @old; print " ** anon_load_users_file(_,$email) = '$found'\n" if $test; return $found; } # This code is free (etc.) and it is by Craig Carey, research@ijs.co.nz [14-Jan-00]