The following example event script monitors a specified log file.
The IDOL Server installer provides additional example scripts.
#!/usr/bin/perl
use strict;
use warnings;
use File::Tail;
use Encode;
use URI::Escape;
#---------------------------------------------------
# Script to monitor a specified log file ($ARGV[0]) generated by any IDOL
# servers, constantly tailing it. When a query appears in the logs it
# gathers the appropriate information and sends it to the Stats server
# at the specified host ($ARGV[1]) and port ($ARGV[2]).
#
# The log file has the following format:
# <date> <time> [<thread number>] <log level> <information>
#
# For example:
# (...)
# 12/12/2007 08:22:21 [7] 30-Normal:
#/action=suggest&maxresults=6&reference=doc_123123 (127.0.0.1)
# 12/12/2007 08:22:23 [7] 30-Normal: Returning 6 matches
# 12/12/2007 08:22:23 [7] 30-Normal: Suggest complete
# (...)
#
# Parameters: Four main parameters are required to run the script.
#
# - logfile: The full path and name of the log file.
# - stathost: Host name of the stats server.
# - statport: Event port of the Stats server to which the script can
#send XML events.
# - idolname: IDOL Server name generating the log file that you want to
# monitor. The value must be the same one specified in the Stats server
# configuration file when setting the "IDOLName" stat parameter.
#
# For example: idolname = "myIDOLServer"
# If only one IDOL Server is used, you do not have to use idolname.
#
# Run the script as follows:
#
# perl stats.pl <logfile> <host> <port> <idolname>
#
#----------------------------------------------------------------
use constant XML_HEADER => "<?xml version='1.0' encoding='ISO-8859-1'?>\n<events>\n";
use constant START_FROM_LOGFILE_END => 0;
use constant READ_ENTIRE_LOGFILE => -1;
if (!defined $ARGV[2]) {
print "Syntax: $0 logfile host port [idolname]\n";
exit(0);
}
my $name = $ARGV[0]; # The log file to monitor
my $statshost = $ARGV[1]; # Host of the Stats Server
my $statsport = $ARGV[2]; # Event port of the Stats Server
my $idolname = $ARGV[3]; # IDOL Server name. Set an IDOL Server name if Stats Server is running in Multiple IDOL Server mode.
my $BATCHSIZE=1; # Wait until we have this many events and send them all at once
my $tailn = START_FROM_LOGFILE_END; # Start tailing from the end
my $resetTail=0; # Start tailing after the file has been automatically closed and reopened
my $tail; # Structure returned while tailing a file
my %threadqueries; # The 'current query' on each thread
my %threadips; # The 'current ip' on each thread
my %threadtime; # The 'current time' on each thread
my $total = 0; # Number of XML events generated by the script
my %escapes = (); # Hash mapping characters to hex equivalent
my $data = ""; # Data being sent to the StatsServer
my $events = 0;
my $bIsWindows = 0;
sub buildQueryXML($$$$$@); # return event XML for a particular query with matches and terms
sub postEventsData($$$); # Send XML events to the Stats server
sub processLine($); #Processes a line returned from the log file
sub releaseAndReopenHandle($);
#Are we running on Windows?
eval
{
require Win32::Process;
};
if ($@) {
$bIsWindows = 0;
} else {
$bIsWindows = 1;
}
#--------------------------------
# Main loop
#--------------------------------
for (;;) {
eval {
# Tail the specified log file
$tail = new File::Tail(name => $name, maxinterval => 2, interval => 1, tail => $tailn, adjustafter => 1);
$tailn = START_FROM_LOGFILE_END;
# Event Creating loop
for (;;) {
$data = XML_HEADER;
$events = 0;
# Event Writing loop
while ($events < $BATCHSIZE) {
# Wait for new input in the log file.
my ($nfound, $timeleft, @pending) = File::Tail::select(undef, undef, undef, 1, $tail);
# Exit if no new line is found
last if $events > 0 && !$nfound;
# Returns one line from the log file
my $line = $tail->read();
processLine($line);
}
# End of the XML event
$data .= "</events>\n";
print STDERR "DATA:\n$data\n";
# End of the current batch. Post the response to the statsserver.
if ($events) {
$total += $events;
eval {
postEventsData($statshost, $statsport, $data);
};
print "Posting events failed: $@" if $@;
if ($total % 1000 == 0) {
print STDERR "Total events: $total\n";
}
$data = "";
#The file handle used by File::Tail is used permanently until it
#($tail) goes out of scope. On Windows, this means it keeps a lock
#on the log file we are tailing: therefore, the log file has no
#opportunity to roll over after it exceeds the maximum permitted
#size. The following call closes the handle, sleeps, and reopens
#the handle. This allows the rollover to occur correctly if logging
#is made to the log file during the sleep, and the log file size
#has overstepped the limit.
if ($bIsWindows) {
releaseAndReopenHandle($tail);
}
}
}
};
warn unless $tailn;
$tailn = READ_ENTIRE_LOGFILE;
sleep 1;
} #end of the Main loop
sub processLine($) {
my $line = shift;
# Check whether the line contains the following format (that is, 16/09/2008 14:20:00 [1] 30-Normal: ....)
if ($line =~ m,^(\d\d/\d\d/\d{4} \d\d:\d\d:\d\d) \[(\d+)\] \d\d-(Full|Normal|Always|Warning|Error): (.*),) {
# A log line (beginning with a time)
my $timeLog = $1; # Get the time when the IDOL Server received the query
my $thread = $2; # The thread number the query is on
my $message = $4; # The rest of the log line; $3 captures the log level (Full, Normal, and so on)
# Check whether the message contains a query (action=termgetbest&...)
if ($message =~ m,^/?(a.*?=.+) \(([\d\.]+)\)$,) {
# The initial 'action received'
$threadqueries{$thread} = $1; # Get the query
$threadips{$thread} = $2; # Get IP address of the IDOL Server that sent the query
$threadtime{$thread} = $timeLog; # Get the query time
}
# Check whether the message contains the number of hits, returned matches or completed query
elsif ($threadqueries{$thread} && ($message =~ /^Completed Action, returning (\d+) hits$/ || $message =~ /^Returning (\d+) match/ || $message =~ /^.* complete$/)) {
# The end of the query. Form the event xml and save
my $matches = $1 || 0;
my $query = $threadqueries{$thread};
my $ip = $threadips{$thread};
# If the query format is correct, generate the data for the XML event
if (defined $query && defined $ip && $query =~ m!/?a.*?=(\w+)([?&].*(?<=[?&])text=([^?&]*))?!) {
$events++;
my $terms = $3 || "";
$data .= buildQueryXML($idolname, $query, $1, $matches, $ip, split(/ /,uri_unescape($terms)));
}
print STDERR "$threadtime{$thread} $threadqueries{$thread}\n";
# Reset
$threadqueries{$thread} = "";
$threadips{$thread} = "";
$threadtime{$thread} = "";
} # end of the action
} # end of the line
}
#Windows-only: close and reopen the log file handle, with a suitable pause in
#between, to allow the log file to be rolled over.
sub releaseAndReopenHandle($) {
my $tail = shift;
my $logfile = $tail->input();
close($tail->{'handle'});
sleep(5);
open($tail->{'handle'}, "<$logfile") or die "Cannot reopen logfile handle: $!\n";
}
sub stripControlChars($) {
my $x = shift;
$x =~ tr/\x00-\x1F//d;
return $x;
}
#--------------------------------------------------------------------
# Extract information from a query and build the XML to send
#--------------------------------------------------------------------
sub buildQueryXML($$$$$@) {
my $idolname = shift;
my $query = shift;
my $action = shift;
my $matches = shift;
my $ip = shift;
my @terms = @_;
my $xml = "<queryinfo>\n<ver>0.1</ver>\n<url><![CDATA[$query]]></url>\n";
$xml .= "<action>$action</action>\n";
$xml .= "<terms><term>" . uri_escape(stripControlChars($_)) . "</term></terms>\n" for @terms;
$xml .= "<numhits>$matches</numhits>\n";
$xml .= "<ip>$ip</ip>\n";
if ($idolname) {
$xml .= "<idolname>$idolname</idolname>\n";
}
$xml .= "</queryinfo>\n";
return $xml;
}
#--------------------------------------------------------------------
# Post a batch of events data to the specified host and port
#--------------------------------------------------------------------
sub postEventsData($$$) {
my $host = shift;
my $port = shift;
my $data = shift;
my $nConnectTry = 3;
use Socket;
socket(INDEXSOCK, Socket::PF_INET, Socket::SOCK_STREAM, getprotobyname('tcp')) || print STDERR "Socket Failure: $!";
my $inet_addr = Socket::inet_aton($host) || print STDERR "Internet_addr Failure: $1\n";
my $paddr= Socket::sockaddr_in($port, $inet_addr) || print STDERR "Sockaddr_in Failure: $!\n";
while (!connect(INDEXSOCK, $paddr) && $nConnectTry > 0) {
$nConnectTry--;
}
$nConnectTry > 0 or die "Connect problem: $|\n";
select INDEXSOCK; $| =1;
select STDOUT;
print INDEXSOCK "POST ";
print INDEXSOCK "/stats";
print INDEXSOCK " HTTP/1.0\r\n";
print INDEXSOCK "Content-Length: ".length($data)."\r\n\r\n";
print INDEXSOCK "$data";
my $buffer;
read(INDEXSOCK, $buffer, 100);
close INDEXSOCK; return(1);
}
|
|