#!/usr/bin/perl

use Mail::Box::Manager;
use File::Scan::ClamAV;
use Mail::SpamAssassin;


use Time::HiRes qw(gettimeofday);

#
# This program is Copyright 2005 by Hans Poo.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the Perl Artistic License or the
# GNU General Public License as published by the Free Software
# Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# Please read the GPL licenese at: http://www.gnu.org/copyleft/gpl.html
# 

use strict;

use Getopt::Long;

my $begin=time;

my $socket="/tmp/clamd";
my $test;
my $verbose;
my $help;

$SIG{INT}= \&int_handler;

my ($tot_clean, $tot_infected, $tot_mboxes, $tot_infected_emails, $tot_clean_emails, $tot_spam_emails, $spam) = (0,0,0,0,0,0,0,0);

GetOptions ("socket:s"=> \$socket, verbose => \$verbose, "test" => \$test, help => \$help);


if ($help || ! @ARGV) {
	print STDERR <<EOF;

cleanmbox.pl: Remove only the infected or spam emails in mbox mailboxes, using clamd and spamassassin.

Usage: cleanmbox.pl [--help] [--socket=path_to_clamd_socket] [--test] [--verbose] mbox_file1 [mbox_file2, ...]

socket: Socket address of the clamd daemon, default is /tmp/clamd.
test: Don't do any modifications, just check, normally you would like to use clamscan for this.
help: Display this information.

Motivation: I wrote this program, because clamscan can't remove individual infected emails from "mbox format" mailboxes. Clamscan target is maildir ant this script target is mbox.

The safer conditions to run this program are:
1.- Mail server and any other program that modifies the mailboxes off. This is because the perl library that updates the mailbox file uses dotfile locking, and may be not enough depending on your platform.
2.- Backup target mailbox files.

This script uses the next perl libraries: Mail::Box::Manager, File::Scan::ClamAV and Mail::SpamAssassin, if any are missing please ask your sysadmin to install them with the following command:

perl -MCPAN -e 'install Mail::Box::Manager'
perl -MCPAN -e 'install File::Scan::ClamAV'
perl -MCPAN -e 'install Mail::SpamAssassin'

You use this program under your own risk, without any guarantee of any kind.
Licenced under the GPL.

EOF
	exit 0;
}

unless ($socket) {
	print STDERR "Clamav socket not given, i will try to find it...\n";
	$socket = `netstat -nl | grep clam | grep -v milter | awk '{print \$9}'`;
}

unless (-S $socket) {
	print STDERR "Can't connect to clamd at socket $socket\n";
	exit 1;
}

my $av = new File::Scan::ClamAV(port => $socket);
my $mgr    = Mail::Box::Manager->new;

my $sa = Mail::SpamAssassin->new();

my $begin_time = gettimeofday;

foreach my $mailbox (@ARGV) {

	$tot_mboxes++;

	my $before = gettimeofday;

	unless (-w $mailbox) {
		print STDERR "Can't open $mailbox for writing\n";
		next;
	}

	my $folder = $mgr->open(folder => $mailbox, access => 'rw', lock_type => "DotLock");

	my $mode = (stat($mailbox))[2];
	my $uid = (stat($mailbox))[4];
	my $gid = (stat($mailbox))[5];

	my $emails = $folder->messages;

	print "\nScanning ". $folder->name . ", ".scalar($folder->messages). " message(s)... wait\n";

	my $infected = 0;
	my $i=0;

	foreach my $msg ($folder->messages) {
		$i++;
		print "checking message $i, size ", $msg->size, "\n" if $verbose;
		my ($code, $virus) = $av->streamscan($msg->string);

		if ($code eq 'OK') {
			 my $mail = $sa->parse($msg->string);
			 my $status = $sa->check($mail);

			 if ($status->is_spam()) {
				print "Spam email found, subject: \"", $msg->subject, "\"... deleting\n";
				$msg->delete unless $test;
				$spam++; $tot_spam_emails++;
			 } else {
				$tot_clean_emails++;
			 }

		} elsif ($code eq 'FOUND') {
			$tot_infected_emails++;
			$msg->delete unless $test;
			print "Virus: $virus found\n";
			$infected++;
		} else {
			print "Error processing $mailbox: ", $av->errstr, ". Operation Canceled\n";
			next;
		}
	}
	print "No messages found\n" if $i == 0;

	$folder->close(write => 'MODIFIED') unless $test;
	undef $folder;

	if ($infected || $spam) {
		$tot_infected++;
		unless($test) {
			print "Fixing ownership and permissions\n" if $verbose;
			chown $uid, $gid, $mailbox;
			chmod $mode, $mailbox;
		}
	} else {
		$tot_clean++;
		print "Mailbox clean\n";
	}

	my $result = $infected ? "$infected emails infected" : "mailbox clean";
	printf "Done, $mailbox $result, it took %.2f seconds\n", gettimeofday-$before;

}

&end;
exit 0;


sub end {

my $tot_time = gettimeofday - $begin_time;

my $tot_emails = $tot_infected_emails + $tot_clean_emails;
print <<EOF
---------------------------------------------
Mailboxes
Scanned:\t$tot_mboxes
Infected:\t$tot_infected
Clean:\t$tot_clean

Emails
Scanned:\t$tot_emails
Infected:\t$tot_infected_emails
Spam:\t$tot_spam_emails
Clean:\t$tot_clean_emails

Total Time Elapsed:\t$tot_time seconds
---------------------------------------------
EOF


}

sub int_handler {

&end;
exit 1;

}


