#!/usr/bin/perl
#
#   MailScanner - SMTP E-Mail Virus Scanner
#   Copyright (C) 2002  Julian Field
#
#   $Id: mailscanner,v 1.66.2.7 2002/07/29 15:37:47 jkf Exp $
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#   The author, Julian Field, can be contacted by email at
#      Jules@JulianField.net
#   or by paper mail at
#      Julian Field
#      Dept of Electronics & Computer Science
#      University of Southampton
#      Southampton
#      SO17 1BJ
#      United Kingdom
#

# Main program....

use strict;
use POSIX;
require 5.005;

my $autoinstalled=0;
# To detect whether we've been auto-configured & installed
# -- $autoinstalled will be set to 1 if so.
#@@$autoinstalled=1;

# Needed for Sys::Syslog, as Debian Potato (at least) doesn't
# appear to have "gethostname" syscall as used (indirectly) by Sys::Syslog
# So it uses `hostname` instead, which it can't do if PATH is tainted.
# It's good to have this anyway, although we may need to modify it for
# other OS when we find that something we need isn't here -- nwp 14/01/02
$ENV{PATH}="/sbin:/bin:/usr/sbin:/usr/bin";

# We *really* should clear *all* environment bar what we *know* we
# need here. It will avoid surprises (like bash running BASH_ENV or
# SpamAssassin using $ENV{HOME} rather than getpwnam to decide where
# to drop its load.

# Needed for -T:
delete $ENV{'BASH_ENV'}; # Don't run things on bash startup

# Needed for SpamAssassin:
delete $ENV{'HOME'};

# Remember to update this before producing new version of MailScanner
$Config::MailScannerVersion = '3.22';

my($dir);
$dir = $0;
# can't use s/// as it doesn't untaint $dir -- nwp 14/01/02 (just)
$dir =~ m#^(.*)/([^/]+)$#;
$dir = $1;
$Config::MailScannerProcessName = $2;
# Add my directory onto the front of the include path
unless ($autoinstalled) {
    unshift @INC, $dir;
}

require 'logger.pl';
#require 'sendmail.pl';
require 'workarea.pl';
#require 'explode.pl';
require 'sweep.pl';
require 'info.pl';
require 'config.pl';
#require 'mta-specific.pl';
require 'disinfect.pl';

# Specify config file location on command line if you like
$Config::ConfigFile = $ARGV[0] if defined $ARGV[0];

# Start Syslog-ing - now done in ReadConfig()
#Log::Start($basename);
#Log::InfoLog("MailScanner E-Mail Virus Scanner version $Config::MailScannerVersion starting.");
Config::ReadConfig($Config::ConfigFile);
require 'explode.pl';
require 'mta-specific.pl';
require 'lock.pl'; # Must be after mta-specific.pl ReadConfig().
require 'sendmail.pl';
umask 0077; # Set nice and safe to no-one else can access anything!

# Become a daemon and save our PID
if ($Config::Debugging) {
  print STDERR "In Debugging mode, not forking...\n";
  # Get current debugging flag, and invert it:
  my $current = config MIME::ToolUtils 'DEBUGGING';
  #config MIME::ToolUtils DEBUGGING => !$current;
} else {
  fork && exit;
  setsid();
  #$SIG{CHLD} = \&REAPER;
  #if (fork) {
  #        wait; # Ensure child has exited
  #        exit 0;
  #}
  ## This new child's parent is perl
  ## Close output streams to break connection to handin server
  #close(STDIN);
  #close(STDOUT);
  #close(STDERR);
  #fork && exit 0;
  ## This new grand-child's parent is init
}

# Tried to set [u,g]id after writing pid, but then it fails when it re-execs itself.
# Using the posix calls because I don't want to have to bother to find out what
# happens when "$< = $uid" fails (i.e. not running as root). Yet.
if ($Config::RunAsUser ne "") {
  my $uid = getpwnam($Config::RunAsUser);
  if ($uid) { # Only do this if setting to non-root
    Log::InfoLog("ECS MailScanner setting UID to $Config::RunAsUser ($uid)");
    POSIX::setuid($uid) or Log::DieLog("Can't set UID $uid");
  }
}

if ($Config::RunAsUser ne "") {
  my $gid = getgrnam($Config::RunAsGroup);
  if ($gid) { # Only do this if setting to non-root
    Log::InfoLog("ECS MailScanner setting GID to $Config::RunAsGroup ($gid)");
    POSIX::setgid($gid) or Log::DieLog("Can't set GID $gid");
  }
}


$> = $<;
$) = $(;

local(*PID);
open(PID, ">$Config::PidFile")
  or Log::DieLog("Cannot write pid file $Config::PidFile, %s", $!);
print PID "$$\n";
close PID;
#undef *PID;

# Must do this *after* setting EUID/EGID.
Sendmail::SpamAssassinInit();

# Version checking jiggled down to here to get spamassassin init done first.

# Verify versions of MIME-tools, as they often cause problems.
#
# These version numbers are what come in the MIME-tools v5.410 package,
# which I (nwp) use.
my %mime_required = (
		     Parser	=> "5.406",
		     Entity	=> "5.404",
		     Tools	=> "5.410",
		     Words	=> "5.404",
		     Head	=> "5.403",
		     Decoder	=> "5.403",
		     Body	=> "5.403",
);

foreach (keys %mime_required) {
    no strict 'refs';
    my $varname = "MIME::". ucfirst lc($_) ."::VERSION";
    defined $$varname or next;
    my $module_version = $$varname;
    $module_version >= $mime_required{$_} or
      Log::DieLog("FATAL: Newer MIME-tools module needed: MIME::$_ is only $module_version-- $mime_required{$_} required");
}
{
    no strict 'refs';
    my $varname = "Mail::SpamAssassin::VERSION";
    Log::DieLog("FATAL: Newer Mail::SpamAssassin module needed: Mail::SpamAssassin is only $Mail::SpamAssassin::VERSION-- 2.1 required")
      if defined $Mail::SpamAssassin::VERSION && 
         $Mail::SpamAssassin::VERSION<"2.1";
}


# Must do this after all the "require-ing" so it cannot be done in config.pl
# Check the incoming and outgoing queues are on the same device
my($indevice, $outdevice, @instat, @outstat, $inuid, $outuid, $ingrp, $outgrp);
my $RunAsUid = getpwnam($Config::RunAsUser);
my $RunAsGid = getgrnam($Config::RunAsGroup);
chdir($Config::InQueueDir);
@instat = stat('.');
($indevice, $inuid, $ingrp) = @instat[0,4,5];
chdir($Config::OutQueueDir);
@outstat = stat('.');
($outdevice, $outuid, $outgrp) = @outstat[0,4,5];
Log::DieLog("$Config::InQueueDir and $Config::OutQueueDir must be on the same filesystem/partition!")
  unless $indevice == $outdevice;
Log::DieLog("$Config::InQueueDir is not owned by user $Config::RunAsUser!")
  if $Config::RunAsUser && ($inuid != $RunAsUid);
Log::DieLog("$Config::OutQueueDir is not owned by user $Config::RunAsUser!")
  if $Config::RunAsUser && ($outuid != $RunAsUid);
#Log::DieLog("$Config::InQueueDir is not owned by group $Config::RunAsGroup!")
#  if $Config::RunAsGroup && ($ingrp != $RunAsGid);
#Log::DieLog("$Config::OutQueueDir is not owned by group $Config::RunAsGroup!")
#  if $Config::RunAsGroup && ($outgrp != $RunAsGid);


# Clean up the entire outgoing sendmail queue in case I was
# killed off half way through processing some messages.
my $CleanUpList = Sendmail::ListWholeQueue($Config::InQueueDir);
Sendmail::ClearOutQueue($CleanUpList, $Config::OutQueueDir);

# Restart periodically, and handle time_t rollover in the year 2038
my($StartTime, $RestartTime);
$StartTime = time;
$RestartTime = $StartTime + $Config::RestartEvery;
while (time>=$StartTime && time<$RestartTime) {
  # Clear up the work area
  ClearWorkArea($Config::SrcDir);

  # Find all the messages to work on and write the header files
  # for all the messages I will need to scan.
  my(@DefinitelyClean, @Unscanned, @MessagesIn, %MessagesInfo,
     %Headers, $Msgs, $Bytes);
  %MessagesInfo = ();
  %Headers = ();
  Sendmail::FindMessagesToProcess($Config::InQueueDir, $Config::SrcDir,
                                  \@DefinitelyClean, \@Unscanned, \@MessagesIn,
                                  \%MessagesInfo, \%Headers, \$Msgs, \$Bytes);

  # Lookup the hosts that sent us all these messages in all the RBL's
  # to try to detect spam.
  # Map Message ID --> Spam Text
  # DeletedSpam is used to contain list of messages which have been
  # deleted from the queue by HandleSpam so we never have to do a
  # "-f" check on a mail spool file.
  # I feel the need, the need for S P E E D !
  my(%SpamInfo, %SpamReport, %DeletedSpam, $spamId);
  %SpamInfo = (); # values are 0 or 1
  %SpamReport = (); # values are spam report text
  %DeletedSpam = ();
  if ($Config::CheckSpam) {
    Sendmail::FindSpammers(\%MessagesInfo, \%Headers, \%SpamReport, \%SpamInfo)
      if (@MessagesIn || @DefinitelyClean);

    # Deliver spam unless we are told not to
    foreach $spamId (keys %SpamInfo) {
      # Handle one message, and store whether it was deleted from the queue
      $DeletedSpam{$spamId} = HandleSpam($spamId, $SpamInfo{$spamId},
                                         $MessagesInfo{$spamId},
                                         $Config::QuarantineDir,
                                         $Config::InQueueDir);
    }
  }

  # Immediately deliver messages I have chosen not to scan because
  # the user doesn't want their domain scanned
  my(%EmptyHash);
  %EmptyHash = ();
  Sendmail::MoveToOutgoingQueue(\@Unscanned, 'unscanned',
                                \%MessagesInfo,
                                \%EmptyHash, \%EmptyHash, \%DeletedSpam,
                                $Config::InQueueDir, $Config::OutQueueDir)
    if @Unscanned;

  # Immediately deliver messages I don't need to scan
  Sendmail::MoveToOutgoingQueue(\@DefinitelyClean, 'unused',
                                \%MessagesInfo,
                                \%SpamReport, \%SpamInfo, \%DeletedSpam,
                                $Config::InQueueDir, $Config::OutQueueDir)
    if @DefinitelyClean;

  # Wait and go round again if nothing to scan
  sleep(30), next unless @MessagesIn;

  # Construct directory hierarchy for the message attachments
  BuildInDirs($Config::SrcDir, \@MessagesIn);

  # Save all the attachments into files. Fill a list with any unparsable
  # messages, and return a hash of MIME entities comprising all the parsable
  # messages.
  # 24/5/01 Must explode all the TNEF attachments too, returning a ref to a
  # list of TNEF messages. Messages are either all TNEF or not at all.
  # Each value of %IsTNEF should be a ref to the %MimeEntities entry for the
  # appropriate winmail.dat file.
  my(%MimeEntities, $Start, $Duration, @Unparsable, @BadTNEF, %IsTNEF);
  $Start = time;
  %MimeEntities = ExplodeMessages($Config::InQueueDir, $Config::SrcDir,
                                  \@Unparsable, \%IsTNEF, \@BadTNEF,
                                  \%DeletedSpam, @MessagesIn);
  #DumpSkeletons(\%MimeEntities);

  # Check all the attachments for viruses and work out
  # the cleanliness of each message.
  my($InfectionReports, %InfectionTypes, @CleanIds, @DirtyIds, @SilentIds);
  $InfectionReports = Sweep::VirusScan($Config::SrcDir, \%InfectionTypes);
  # Write infection reports for all the unparsable messages
  Sweep::LogUnparsable($InfectionReports, \%InfectionTypes, \@Unparsable);
  # Write infection reports for all the unparsable TNEF attachments
  Sweep::LogBadTNEF($InfectionReports, \%InfectionTypes,
                    \@BadTNEF, $Config::SrcDir);
  $Duration = time - $Start;
  Log::InfoLog("Scanned $Msgs messages, $Bytes bytes in $Duration seconds");

  # Use the list of messages in the queue and the infection reports,
  # to produce 3 lists of messages: those definitely clean,
  # those dirty that we should disinfect, and those we should silently
  # ditch (only telling the local postmaster about them).
  Sweep::CleanAndDirty($InfectionReports, \@MessagesIn,
                       \@CleanIds, \@DirtyIds, \@SilentIds);
  #Sweep::AddSilentReports($InfectionReports, \@SilentIds);
  #Sweep::PrintSummary($InfectionReports, \@CleanIds, \@DirtyIds);

  # 24/5/01 Make these structures use the %IsTNEF information, so they are
  # still totally correct. Rollocks. This doesn't of course walk the dir
  # structure, it walks the MIME structure which contains no information
  # about the files comprising the TNEF attachment. So I still need to
  # handle TNEF messages specially in the code below this.
  my(%NumParts, $File2Entity, %Entity2Parent, %Entity2File);
  $File2Entity = EntitiesInfo(\%MimeEntities, \%NumParts,
                              \%Entity2Parent, \%Entity2File);
  #PrintParts(\%NumParts);
  #PrintFilenames($File2Entity);
  #PrintInfectedSections($InfectionReports, $File2Entity);
  #PrintParents(\%Entity2Parent);

  # Replace all the infected attachments with a text file.
  # For infections applying to the whole message (e.g. we could not parse it),
  # replace the entire message with a new body addressed to the same recipients.
  # 24/5/01 Must also pick up all the TNEF infected messages and remove the
  # entire winmail.dat attachment from each one.
  # 28/9/01 Must not attempt to disinfect bad TNEF messages, they will report
  # as not having a virus in them!
  Disinfect($InfectionReports, \%InfectionTypes, \%MimeEntities,
            $File2Entity, \%Entity2Parent, \%Entity2File, \%IsTNEF,
            $Config::SrcDir);

  # Quarantine the infected attachments
  QuarantineInfections($InfectionReports, $Config::QuarantineDir,
                       $Config::SrcDir, $Config::InQueueDir)
    if $Config::QuarantineAction =~ /store/i;

  # Deliver all the cleaned up messages into the outgoing queue and read
  # the original recipient list for each one in the process
  if ($Config::SignCleanMessages) {
    # Sign all the clean messages with the inline signature
    SignCleanMessages(\@CleanIds, \%MimeEntities);
    # And deliver them, from their MIME entity structures
    Sendmail::DeliverIds(\@CleanIds, \%MimeEntities, \%MessagesInfo,
                         \%SpamReport, \%SpamInfo, \%DeletedSpam,
                         $Config::InQueueDir, $Config::OutQueueDir, 'clean');
  } else {
    Sendmail::MoveToOutgoingQueue(\@CleanIds, \%MimeEntities,
                                  \%MessagesInfo,
                                  \%SpamReport, \%SpamInfo, \%DeletedSpam,
                                  $Config::InQueueDir, $Config::OutQueueDir);
  }
  if ($Config::DeliverToRecipients) {
    Sendmail::DeliverIds(\@DirtyIds, \%MimeEntities, \%MessagesInfo,
                         \%SpamReport, \%SpamInfo, \%DeletedSpam,
                         $Config::InQueueDir, $Config::OutQueueDir, 'dirty');
    # Still deliver "silent" viruses to the recipient so they get to see
    # that MailScanner is protecting them.
    Sendmail::DeliverIds(\@SilentIds, \%MimeEntities, \%MessagesInfo,
                         \%SpamReport, \%SpamInfo, \%DeletedSpam,
                         $Config::InQueueDir, $Config::OutQueueDir, 'dirty')
      if @SilentIds;
  } else {
    Sendmail::DeleteIds(\@DirtyIds,  $Config::InQueueDir);
    Sendmail::DeleteIds(\@SilentIds, $Config::InQueueDir) if @SilentIds;
  }

  # Just silently delete infected messages we don't want to hear about.
  # This is now handled a bit further up as we still want the recipients
  # to get the virus warning, just not the sender (as the sender was
  # faked anyay).
  Log::InfoLog("Deleted infected messages " .
               join(',',@SilentIds)) if @SilentIds;
  #Sendmail::DeleteIds(\@SilentIds, $Config::InQueueDir);
  # and take them out of the virus scanning work area too
  ClearWorkAreaIds($Config::SrcDir, \@SilentIds) if @SilentIds;

  # Tell the message senders about the infections.
  # Need to send different messages depending on the %InfectionTypes.
  Sendmail::WarnSenders(\@DirtyIds, \%MessagesInfo, $InfectionReports,
                        \%InfectionTypes, $Config::TellSenders)
    if $Config::TellSenders;
  # And notify postmaster about them too
  if ($Config::TellLocalPostie) {
    Sendmail::WarnLocalPostmaster(\@DirtyIds, \%MessagesInfo,
                                  $InfectionReports, \%Headers);
    # Tell the postie about the silent infections
    Sendmail::WarnLocalPostmaster(\@SilentIds, \%MessagesInfo,
                                  $InfectionReports, \%Headers);
  }

  # Now attempt to disinfect the infected attachments and deliver them.
  # Only do this if they want disinfected messages delivered *and*
  # they want virus scanning to happen at all. Cannot disinfect anything
  # without virus scanning enabled.
  DisinfectAndDeliver($InfectionReports, \@CleanIds, \@DirtyIds,
                      \@BadTNEF, \%MessagesInfo)
    if $Config::DeliverDisinfected && $Config::VirusScanning;
  last if $Config::Debugging;
}

# I have done a large number of virus scanning runs now,
# so kill and re-run myself.
unless ($Config::Debugging) {
  my ($tmpstr,$me,@args);

  $tmpstr = `pwd`;	# necessary to avoid tainting
  Log::DebugLog("About to re-exec myself: exec args ($0), (" . @ARGV . "), cwd is ($tmpstr)...");

  # Don't want to leave connections to 514/udp open...
  Log::Stop();
  
  # untaint command line args...
  $0 =~ /(.*)/;
  $me = $1;
  foreach (@ARGV) {
    /(.*)/;
    push @args, $1;
  }

  # ...and exec myself.
  exec $me, @args or do {
    my $ExecError = $!;
    # open up logging again...
    Log::Start($Config::MailScannerProcessName, $Config::SyslogFacility);
    Log::DieLog("Could not re-exec myself: $ExecError");
  };

}

exit 0;

#
# A little child reaper in case the kids go walkabout
#
sub REAPER {
        $SIG{CHLD} = \&REAPER;  # loathe sysV
        wait;
}

