#!/opt/perl/bin/perl -w
$vernum = "1.5 - 04 MAR 99" ;           # Code version and modify date
#
# (See configuration parameters below)
#
# This code copyright 1998-99 by
# D. W. Eaton, Artronic Development, Phoenix, AZ -- dwe@arde.com
#
# This software is made freely available under the provisions of the Perl
# "Artistic" license:  http://language.perl.com/misc/Artistic.html
#
# This code is not supported and is not warranteed to perform any particular
# function. Contact dwe@arde.com for aditional information.
# If you find bugs or make enhancements, it would be appreciated if you
# sent them on to the author at dwe@arde.com.
#
=head1

 Find all log files (we care about) and munge them together

 Use:
   munglog [options] [logdir]

   where:
    options are:
         -q[uiet]              = quiet mode - surpress messages
         -v[erbose]            = verbose mode (additional messages)
         -suf=text             = suffix ("text") to apply to log files
    logdir                     = the directory in which the logs should be

 You do not need to be in any particular directory to execute this script,
 but you must have access to write into the destination directory.

=cut

$script = $0;                 # Full path to script
$script =~ s/^.*\///;         # Remove all but leaf name

require "ctime.pl";
$starttime = time;
$starttime = ctime($starttime);
chomp ($starttime);

# Constants
$false = 0;  # FALSE
$true  = 1;  # TRUE
#
# Configuration:
$logdir = "/opt/web/logs"; # Default log dir
$primehost = "www.interex.org"; # Prime host ... where the real /index.html goes
# Default directory for output
$mungedir = "/var/tmp"; # The results
# Was: $mungedir = "/opt/web/web-interex.org/htdocs/www/Private/wwwstats"; # The results
$matchsuffix = ""; # Default suffix on log files to be analyzed
# $mungedir = "/tmp"; # For test
$mungefile = "munged-access_log"; # Name of munged output file
$work_dir = $mungedir;
#
# Error subroutine handling:
#   &printerror($nopfx,$nonum,"message",$always);
$nopfx = "";     # Do not print a prefix
$nonum = 0;      # Do not print a line number
$always = $true; # Always print
$never = $false; # Override error prints, never do them if TRUE
$error_log = ""; # Empty error log
undef (%srcfilelist);
#
# Command line option defaults:
$quiet = $false;          # Quiet mode if TRUE
$verbose = $false;        # Verbose mode if TRUE
#
$deletebak = $false;      # Delete modified file ".bak" if TRUE

# Initialze:
$reqargs = 0;       # minimum number of required arguments
$noerrors = $true;  # no errors detected unless this value is FALSE

# Environment variables:
($username) = getpwuid($>);
#
# ----------------
# Get any command-line arguments that may have been passed:
$numargs = scalar (@ARGV) ;

# Get command line options first:

if ($numargs)
{
 while ($ARGV[0] =~/^-/)
 {
  $nextarg = shift(@ARGV);
  if ($nextarg =~ /^-verbose$/i || $nextarg =~ /^-v$/i)
  {
   &printerror($nopfx,$nonum,"Verbose mode enabled",$always);
   $verbose = $true;          # Verbose mode
  }
  elsif ($nextarg =~ /^-quiet$/i || $nextarg =~ /^-q$/i)
  {
   $quiet = $true;          # Quiet mode
   &printerror($nopfx,$nonum,"Quiet mode enabled",$always);
  }
  elsif ($nextarg =~ /^-suf/i)
  {
   $thisarg = "";
   $value = "";
   ($thisarg,$value) = split ("=",$nextarg,2);
   if (defined $value)
   {
    $matchsuffix = $value; # Set suffix on log files to be analyzed
    # print "Found suffix $matchsuffix\n";
   }
  }
 }
}

# Now get other command line arguments:
$numargs = scalar (@ARGV) ; # re-compute number of args left
if ($numargs > 0)
{
   $i = 1;
   do
   {
    $arg1 = shift (@ARGV) ;
    $responses{"userarg$i"} = $arg1 ;
    $i++ ;
   } until $i > $numargs;
}

# Got all arguments now ....
# ----------------
unless ($quiet)
{
 print "$starttime $script - version $versn\n";
 print "Log munge script version $versn for $username\n";
}

if ($reqargs > $numargs)
{
 # Insufficient command line arguments:
 &printerror($nopfx,$nonum,"ERROR: Insufficient number of command line argumets",$always);
 $noerrors = $false; # errors detected, set this to null
}
else
{

 if ($responses{'userarg1'})
 {
  $logdir = "$responses{'userarg1'}";
  unless ($quiet)
  {
   print "User specified log directory: $logdir\n";
  }
 }

 # (DWE: NEED to do proper 'cd' here ...)
 $start_dir = $logdir;
 unless ($quiet)
 {
  print "Starting work dir is '$start_dir'\n";
 }
 &cdme($start_dir);

 if (-d "$work_dir")
 {
  &printerror("Warning",$nonum,"$work_dir already exists, may contain older or preprocessed files already",$always);
 }

 &cdme($start_dir);

 &printerror($nopfx,$nonum,"Finding and using all 'log' files ... $matchpatt",$always);
 # Find and use all "log" files ...
 $matchpatt = "\^host.*access_log$matchsuffix\$";
 ($matches,$filelist) = &getls(".",$matchpatt);
 if ($matches)
 {
  $rest = $filelist;

  $munge_logfile = $work_dir . "/" . $mungefile;
  if (!open (MUNGELOG, ">$munge_logfile"))
  {
   print STDERR "ERROR: Unable to open munged output log '$munge_logfile'\n$!\n";
  }
  else
  {
   while ($rest)
   {
    ($nextfile,$rest) = split("\n",$rest,2);
    unless ($quiet)
    {
     print "Scanning '$nextfile' ...\n";
    }
    $thishost = $nextfile; # Find host part
    $thishost =~ s/^\.\/host./\//; # Remove lead stuff and fool wwwstat with a leading slash
    $thishost =~ s/-access_log$matchsuffix$//;
    $tothost{$thishost} = 0; # Clear file line number
    if (!open (LOGDIR, "<$nextfile"))
    {
     &printerror("ERROR",$nonum,"unable to open $nextfile\n$!",$always);
    }
    else
    {
     # Process a 'log' file
     while (<LOGDIR>)
     {
      $line = "$_";      # Next line
      $line =~ s/\n$//;  # Strip newline at end of line
      if ($line)
      {
       $tothost{$thishost}++; # Count lines
       # Look for the "welcome page" entries ...
       if ($line =~ /\] \"GET \/ / ||
           $line =~ /\] \"GET \/index.pl /)
       {
        # This is one we care about
        $line =~ s/\] \"GET \//\] \"GET $thishost\//;
        $line =~ s/\/index\.pl /\//; # Remove the default script name that does the magic
        $counthost{$thishost}++; # Count it while we are here
       }
       elsif ($line =~ /\] \"GET \/index.html /)
       {
        # This is one was a force to the primehost welcome page we care about
        $line =~ s/\] \"GET \//\] \"GET \/$primehost\//;
        $counthost{$primehost}++; # Count it while we are here
       }
       print MUNGELOG "$line\n"; # Don't bother to output blank lines
      }
     }
    }
    close (LOGDIR);
   }
   if (scalar (%tothost))
   {
    unless ($quiet)
    {
     print "Count of all accesses:\n";
     foreach $key (sort keys %tothost)
     {
      print "   $key : $tothost{$key}\n";
     }
    }
   }

   if (scalar (%counthost))
   {
    unless ($quiet)
    {
     print "Count of host-level accesses:\n";
     foreach $key (sort keys %counthost)
     {
      print "   $key : $counthost{$key}\n";
     }
    }
   }
   close (MUNGELOG);
   unless ($quiet)
   {
    print "Munged log may be found in: '$munge_logfile'\n";
   }
  }
 }

 $endtime = time;
 $endtime = ctime($endtime);
 chomp ($endtime);

}
# Finally, if there were errors, try to offer help ...
if (! $noerrors)
{
 # Print help message
 unless ($quiet)
 {
 print STDERR qq^
 Munge the logs for host names

 Use:
   $0 [options] logdir

   where:
    options are:
         -q[uiet]              = quiet mode - surpress messages
         -suf=text             = suffix ("text") to apply to log files
         -v[erbose]            = verbose mode (additional messages)

    logdir                     = the directory containging the logs

 You do not need to be in any particular directory to execute this script,
 but you must have write access to the target directory.

 Examples:
   mungelog logdir >test.pad 2>test-err.pad
   mungelog -q
 \n^; # End of print
 }
}

if ($endtime)
{
 # OK, we did something to declare an end to ...
 unless ($quiet)
 {
  print "$endtime $script done.\n";
 } 
}
exit (0);

# -- subroutines --
#---------------------
# Change directories and verify we are there
#   &cdme("newdir")
sub cdme
{
 local ($newdir) = @_ ;

 if (! chdir("$newdir"))
 {
  print STDERR "Unable to change directory to '$newdir'\n";
  print STDERR "$!\n";
  exit (1);
 }
 else
 {
  if ($verbose)
  {
   unless ($quiet)
   {
    print "... changed to directory '$newdir'\n";
   }
  }
 }
}
#---------------------
# List contents of directory
# (subject to optional pattern match)
#   $result = &lsme("newdir"[,"pattern"])
# where $result:
#     0 = directory not found or not opened or no pattern matches found
#    +n = number of pattern matches found
sub lsme
{
 local ($newdir,$newpatt) = @_;
 local ($name,$results);

 $results = 0;

 if (! opendir (DIR, $newdir))
 {
  print STDERR "Cannot open directory '$newdir'\n" ;
  return (0) ;
 }

 while (defined($name = readdir (DIR)))
 {
  # Scan each file in turn ...
  if ($newpatt)
  {
   next unless ($name =~ /$newpatt/) ;
  }
  $results++;
  unless ($quiet)
  {
   print "$newdir/$name\n";
  }
 }
 closedir (DIR) ;
 if ($results)
 {
  unless ($quiet)
  {
   print "Found $results files\n";
  }
 }
 else
 {
  unless ($quiet)
  {
   print "(No results files found)\n";
  }
 }
 return $results;
}
#-----------------------
# Get list of contents of directory
# (subject to optional pattern match)
#   ($results,$textresults) = &getls("newdir"[,"pattern"])
# where $results:
#     0 = directory not found or not opened or no pattern matches found
#    +n = count of the matches
# and $textresults:
#  null = directory not found or not opened or no pattern matches found
#  text = actual matches, one per line
sub getls
{
 local ($newdir,$newpatt) = @_;
 local ($name,$results,$textresults);

 $results = 0;
 $textresults = "";

 if (! opendir (DIR, $newdir))
 {
  print STDERR "Cannot open directory '$newdir'\n" ;
  return (0) ;
 }

 while (defined($name = readdir (DIR)))
 {
  # Scan each file in turn ...
  if ($newpatt)
  {
   next unless ($name =~ /$newpatt/) ;
  }
  $results++;
  $textresults .= "$newdir/$name\n";
 }
 closedir (DIR) ;
 return ($results,$textresults);
}
#-----------------------
# Get list of all files below this directory
#   &getfilelist(dirname,level)
# Until directory level reaches "level", don't record files
sub getfilelist
{
 local ($dirname,$level) = @_;
 local ($matches,$filelist,$rest,$nextfile,$thisdirset);

 ($matches,$filelist) = &getls($dirname,"");
 if ($matches)
 {
  unless ($quiet)
  {
   print "Found $matches to process\n" if ($verbose);
  }
  $rest = $filelist;
  while ($rest)
  {
   ($nextfile,$rest) = split("\n",$rest,2);
   $nextfile =~ s/^.\///;              # Strip lead junk
   $thisdirset = $nextfile;
   if ($nextfile ne "." && $nextfile ne ".." &&
       $nextfile !~ /\/\.$/ && $nextfile !~ /\/\.\.$/)
   {
    # print "checking '$nextfile' at level '$level' ...\n";
    if (-d $nextfile)
    {
     $level++;
     &getfilelist($nextfile,$level);
    }
    else
    {
     if ($level > 1)
     {
      $srcfilelist{$nextfile}++;
     }
    }
   }
   else
   {
    # print "Ignoring '$nextfile' at level '$level'.\n";
   }
  }
 }
}

#-----------------------
# Scan the reference model ...
#  $status = &savefile($outfile,$newcontent);
# Status:
#   0 = OK
#   1 = fail
sub savefile
{
 local ($outfile,$newcontent) = @_;

 if (!open (OUTFILE, ">$outfile.new"))
 {
  if (-f "$outfile.new")
  {
   unlink ("$outfile.new");
  }
  &printerror("ERROR",$nonum,"unable to open $outfile.new\n$!",$always);
  return (1);
 }
 else
 {
  print OUTFILE "$newcontent";
  close (OUTFILE);
  unlink ("$outfile.bak");
  rename ("$outfile", "$outfile.bak");
  rename ("$outfile.new", "$outfile");
  if ($deletebak)
  {
   unlink ("$outfile.bak");
  }
  return (0);
 }
 
}
#-----------------------
# Print an array
#  &printarray("Output",@out);
sub printarray
{
 local ($label,@temp) = @_;

 unless ($quiet)
 {
  print "\n$label results:\n---------------\n";
 }
 while (@temp)
 {
  unless ($quiet)
  {
   print "$temp[0]\n";
  }
   shift (@temp);
 }
 unless ($quiet)
 {
  print "---------------\n";
 }
}

#-----------------------
# Print and save error messages
#  &printerror("prefix",$num,"message",$condition);
# Always save the message to the error log, if $condition is truem
# also print it to STDERR right away.
# If $num > 0, include it (probably a line number) in message
sub printerror
{
 local ($prefix,$num,$message,$condition) = @_;
 local ($realmsg);

 # Build up message
 $realmsg = ""; 
 if ($prefix)
 {
  $realmsg .= "$prefix "; 
 }
 if ($num)
 {
  $realmsg .= "[" . $num . "]";
 }
 if ($prefix || $num)
 {
  $realmsg .= ": ";
 }
 $realmsg .= "$message";
 if ($condition && ! $never) 
 {
  # Print it right now
  unless ($quiet)
  {
   print STDERR "$realmsg\n";
  }
 }
 # Always append to error log
 $error_log .= "$realmsg\n";
}
