#!/usr/bin/perl

# jmt12

use strict;
use warnings;

# Setup Environment
BEGIN
{
  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};

  unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");

  if (defined $ENV{'GSDLEXTS'})
  {
    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
    foreach my $e (@extensions)
    {
      my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
      unshift (@INC, "$ext_prefix/perllib");
      unshift (@INC, "$ext_prefix/perllib/cpan");
      unshift (@INC, "$ext_prefix/perllib/plugins");
      unshift (@INC, "$ext_prefix/perllib/classify");
    }
  }
  if (defined $ENV{'GSDL3EXTS'})
  {
    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
    foreach my $e (@extensions)
    {
      my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
      unshift (@INC, "$ext_prefix/perllib");
      unshift (@INC, "$ext_prefix/perllib/cpan");
      unshift (@INC, "$ext_prefix/perllib/plugins");
      unshift (@INC, "$ext_prefix/perllib/classify");
    }
  }

  # Manually installed CPAN package in GEXT*INSTALL
  # - parse up version number
  my ($major, $minor, $revision) = $] =~ /(\d+)\.(\d\d\d)(\d\d\d)/;
  # - get rid of leading zeros by making them integers
  $major += 0;
  $minor += 0;
  $revision += 0;
  # - and add to Perl's path
  unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/lib/perl5/site_perl/' . $major . '.' . $minor . '.' . $revision);
  unshift (@INC, $ENV{'GEXTTDBEDIT_INSTALLED'} . '/share/perl/' . $major . '.' . $minor . '.' . $revision);
}

use Cwd;

# We need to do a little file locking
use Fcntl qw(:flock); #import LOCK_* constants
# Advanced child process control allowing bidirectional pipes
use IPC::Run qw(harness start pump finish);
# we need to run as a daemon
use Proc::Daemon;
# Try and find the hostname
use Sys::Hostname;

# The server will need to accept requests from multiple threads, and
# so will need threads in and of itself
use threads;
use threads::shared;

# Greenstone utility functions (filename_cat)
use util;
use dbutil::tdb;

# A simple server that listens on a socket and 'forks' off child threads to
# handle each incoming request
use SocketsSwimmingThreadPoolServer;

# Globally available - but once set these are read-only - so locking isn't
# an issue
my $use_harness = 0;
my $tdbexe = 'tdbcli';
my $parent_pid = 0;
my $collection = '';
my $no_daemon = 0;
my $debug = 1;
my $server;
my $machine_name = (`hostname -s` || `hostname -a` || `hostname` || $ENV{'HOSTNAME'});
chomp($machine_name);
my $server_host = $machine_name . '.local';
my $server_port;
my $server_threads;
# - shared and, more importantly, lockable
my %listeners :shared;
my $should_stop :shared = 0;
my $debug_log :shared = 0;

my $msg_counter :shared = 0;

print "===== TDB Server =====\n";
print "Provides a server to allow multiple remote machines to simultaenously\n";
print "edit one or more TDB databases on the local machine. This is to work\n";
print "around NFS file locking issues when parallel processing on a cluster.\n";

MAIN:
{
  # Check arguments
  # - compulsory
  if (!defined $ARGV[0] || $ARGV[0] !~ /^\d+$/)
  {
    &printUsageAndExit('Error! Missing parent process ID or not a PID');
  }
  $parent_pid = $ARGV[0];
  if (!defined $ARGV[1])
  {
    &printUsageAndExit('Error! Missing active Greenstone collection name');
  }
  $collection = $ARGV[1];
  # - optional
  my $i = 2;
  while (defined $ARGV[$i])
  {
    if ($ARGV[$i] eq "-nodaemon")
    {
      $no_daemon = 1;
    }
    if ($ARGV[$i] eq "-debug")
    {
      $debug = 1;
    }
    $i++;
  }

  # Read in the collection specific configuration
  my $cfg_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tdbserver.conf');
  open(CFGIN, '<' . $cfg_path) or die("Failed to read config file: " . $cfg_path);
  my $line = '';
  while (($line = <CFGIN>))
  {
    if ($line =~ /^(\w+)\s+(.*)$/)
    {
      my $key = $1;
      my $value = $2;
      # Allow the override of serverhost
      if ($key eq "serverhost")
      {
        $server_host = $value;
      }
      if ($key eq "serverport")
      {
        $server_port = $value;
      }
      if ($key eq "threads")
      {
        $server_threads = $value;
      }
    }
  }
  close(CFGIN);

  if ($debug)
  {
    print " - collection: " . $collection . "\n";
    print " - parent pid: " . $parent_pid . "\n";
    print " - no daemon? " . $no_daemon . "\n";
    print " - debug? " . $debug . "\n";
    print " - serverhost: " . $server_host . "\n";
    print " - serverport: " . $server_port . "\n";
    print " - threads: " . $server_threads . "\n";
    print "\n";
  }

  # Information about any running TDBServer is stored in a lockfile in
  # Greenstone's tmp directory (named after the active collection)
  my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'tmp');
  if (!-d $tmp_dir)
  {
    mkdir($tmp_dir, 0755);
  }
  my $server_lockfile_path = &util::filename_cat($tmp_dir, 'tdbserver.lock');

  # If already running, then exit
  print " * Testing if TDBServer for this collection already running... ";
  if (-e $server_lockfile_path)
  {
    die("Error! TDBServer already running!\nLockfile found at: " . $server_lockfile_path);
  }
  print "All clear!\n";

  # Ensure we can see tdb edit tools on the path
  print " * Testing for tool: " . $tdbexe . "... ";
  my $result = `$tdbexe 2>&1`;
  if ($result !~ /usage:\s+$tdbexe/)
  {
    die("Error! " . $tdbexe . " not available - check path.");
  }
  print "Found!\n";

  # Daemonize
  my $pid = 0;
  if (!$no_daemon)
  {
    print " * Spawning Daemon...\n" unless (!$debug);
    if ($debug)
    {
      my $logs_dir = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'logs');
      if (!-d $logs_dir)
      {
        mkdir($logs_dir, 0755);
      }
      my $daemon_out_path = &util::filename_cat($logs_dir, 'tdbserver.out');
      my $daemon_err_path = &util::filename_cat($logs_dir, 'tdbserver.err');
      $pid = Proc::Daemon::Init( { work_dir => getcwd(),
                                   child_STDOUT => $daemon_out_path,
                                   child_STDERR => $daemon_err_path,
                                 } );
    }
    else
    {
      # Streams to /dev/null
      $pid = Proc::Daemon::Init( { work_dir => getcwd(),
                                 } );
    }
  }

  # Master process has pid > 0
  if ($pid == 0)
  {
    print "[" . time() . ":" . $server_host . ":" . $server_port . "]\n";
    print " * Starting server on " . $server_host . ":" . $server_port . "\n";
    # - create server object
    print " * Creating pool of " . $server_threads . " threads listening on socket\n";
    $server = SocketsSwimmingThreadPoolServer->new(host=>$server_host,
                                                   port=>$server_port,
                                                   thread_count=>$server_threads,
                                                   main_cb => \&exitCheck,
                                                   processor_cb => \&process);

    # - write a lockfile
    print " * Creating lock file: " . $server_lockfile_path . "\n";
    open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    print SLFH $server_host . ':' . $server_port;
    flock(SLFH, LOCK_UN);
    close(SLFH);

    # Perform main loop
    # - loop is actually in Server code. start() only returns once server's stop
    #   command has been called
    print " * Listening:\n";
    $server->start;
    print " * Stopping...\n";

    # Perform deinitializes here
    # - remove server lockfile
    print " * Removing lock file...\n";
    unlink($server_lockfile_path);
    print "Done!\n";
  }
  # Forked child processes
  else
  {
    print " * Waiting for lockfile to be created";
    while (!-e $server_lockfile_path)
    {
      print '.';
      sleep(1);
    }
    print "\n * TDBServer lockfile created.\n";
    open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    my $line = <SLFH>;
    if ($line =~ /(^.+):(\d+)$/)
    {
      print " => Server now listening on " . $1 . ":" . $2 . "\n";
    }
    else
    {
      die ("Error! Failed to retrieve host and port information from lockfile!");
    }
    flock(SLFH, LOCK_UN);
    close(SLFH);
  }

  print "===== Complete! =====\n";
}
exit(0);

# @function exitCheck
# A callback function, called every 5 seconds (default) by the socket server,
# to see whether the parent process (by pid) is actually still running. This
# will cover the case where the parent process (import.pl or build.pl) dies
# without properly asking the server to shutdown.
sub exitCheck
{
  my $counter = shift @_;
  #rint "[DEBUG] Has parent process gone away? [" . $parent_pid . "]\n";
  # Parent PID not available or we aren't allowed to talk to it (debugging)
  if ($parent_pid == 0)
  {
    return;
  }
  # note: kill, when passed a first argument of 0, checks whether it's possible
  # to send a signal to the pid given as the second argument, and returns true
  # if it is. Thus it provides a means to determine if the parent process is
  # still running (and hence can be signalled) In newer versions of Perl
  # (5.8.9) it should even work cross-platform.
  if (!kill(0, $parent_pid))
  {
    print " * Parent processs gone away... forcing server shutdown\n";
    $server->stop;
    if ($debug)
    {
      lock($debug_log);
      $|++;
      print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
      $|--;
    }
  }
}

# /** @function process
#  *  A horribly named function that is called back to process each of the
#  *  requests to alter the TDB databases. It expects a complete TDB CLI
#  *  command as a text blob, or one of a limited number of special commands
#  *  ([a]dd or [r]emove listener, or [q]uit).
#  */
sub process
{
  my $data = shift @_;
  my $ip = shift @_;
  my $tid = shift @_;
  my $result = "#ERROR#";
  my $the_count = 0;
  {
    lock($msg_counter);
    $msg_counter++;
    $the_count = $msg_counter + 0;
    # //unlock($msg_counter);
  }
  &debugPrint($the_count, $tid, 'RECV', $data) unless !$debug;
  # process special commands first
  if ($data =~ /^#([arq]):(.*)$/)
  {
    my $command = $1;
    my $argument = $2;
    # addlistener(<pid>)
    if ($command eq "a")
    {
      lock(%listeners);
      $listeners{$argument} = 1;
      my $listener_count = scalar(keys(%listeners));
      $result = "[SUCCESS] added listener [" . $listener_count . " listeners]";
      # //unlock(%listeners)
    }
    # removelistener(<pid>)
    elsif ($command eq "r")
    {
      my $listener_count = 0;
      {
        lock(%listeners);
        if (defined $listeners{$argument})
        {
          delete $listeners{$argument};
        }
        $listener_count = scalar(keys(%listeners));
        # //unlock(%listeners)
      }
      lock($should_stop);
      if ($should_stop == 1 && $listener_count == 0)
      {
        # server isn't shared, but the stop data member is!
        $server->stop;
        $result = "[SUCCESS] removed last listener, stopping";
      }
      else
      {
        $result = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
      }
      # //unlock($should_stop)
    }
    # we may be asked to stop the server, but only by the process that created
    # us. If there are no listeners registered, we stop straight away,
    # otherwise we set a flag so that as soon as there are no listeners we
    # stop.
    elsif ($command eq "q")
    {
      if ($argument ne $parent_pid && $argument ne "*")
      {
        $result = "[IGNORED] can only be stopped by parent process";
      }
      else
      {
        my $listener_count = 0;
        {
          lock(%listeners);
          $listener_count = scalar(keys(%listeners));
          # //unlock(%listeners)
        }
        if ($listener_count == 0)
        {
          # server isn't shared, but the stop data member is!
          $server->stop;
          $result = "[SUCCESS] stopping";
        }
        else
        {
          lock($should_stop);
          $should_stop = 1;
          $result = "[PENDING] will stop when no more listeners";
          # //unlock($should_stop)
        }
      }
    }
  }
  # Everything thing else should be a TDB command of the form:
  #   <database>:<key>:<value>
  # where: database is [d]oc, [i]ndex, or [s]rc
  elsif ($data =~ /^([dis]):\[(.+?)\]([\+\?\-]?)(.*)$/s)
  {
    my $database = $1;
    my $key = $2;
    my $action = $3;
    # by default we add for backwards compatibility
    if (!defined $action || $action eq '')
    {
      print STDERR "Warning! Detected request without action (#" . $the_count . ") - assuming add/update.\n";
      $action = '+';
    }
    my $payload = $4;
    $payload =~ s/^\s+|\s+$//g;
    &debugPrint($the_count, $tid, 'PARSED', 'database=' . $database . ', key=' . $key . ', action=' . $action . ', payload=' . $payload) unless !$debug;

    # We need to try and persist the connection to TDB, otherwise the OS quickly
    # exhausts NFS daemons amongst other issues
    # Can I make use of the Greenstone DBUtils TDB somehow?
    # Arg - because there s the potential to mix reads and writes, this isn't
    # as straightforward as first thought

    # Build path to database file
    my $tdb_path = '';
    if ($database eq 'd')
    {
      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-doc.tdb');
    }
    elsif ($database eq 's')
    {
      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'archives', 'archiveinf-src.tdb');
    }
    else
    {
      $tdb_path = &util::filename_cat($ENV{'GSDLHOME'}, 'collect', $collection, 'building', 'text', $collection . '.tdb');
    }
    # Harnesses seem like goodly magic - but unfortunately may be broken
    # magic. Testing on Medusa randomly hangs on the finish() function.
    if ($use_harness)
    {
      my $record = '[' . $key . ']' . $action . $payload;
      # Open harness to TDBCLI
      &debugPrint($the_count, $tid, 'TDBCLI', 'Opening harness') unless !$debug;
      my @tdb_command = ($tdbexe, $tdb_path, '-mid ' . $the_count);
      my $buffer_to_tdb = '';
      my $buffer_from_tdb = '';
      my $tdb_harness = start(\@tdb_command, \$buffer_to_tdb, \$buffer_from_tdb);
      # Check the harness worked
      if (!pumpable $tdb_harness)
      {
        die("Error! Harness to " . $tdbexe . " has gone away!");
      }
      # - write the data to the TDBCLI
      $buffer_to_tdb = $record . "\n";
      while (length($buffer_to_tdb))
      {
        pump($tdb_harness);
      }
      # - read any response from TDBCLI
      &debugPrint($the_count, $tid, 'TDBCLI', 'Reading') unless !$debug;
      while ($buffer_from_tdb !~ /-{70}/)
      {
        pump($tdb_harness);
      }
      # - explicitly tell the pipe to quit (empty key)
      &debugPrint($the_count, $tid, 'TDBCLI', 'Closing') unless !$debug;
      $buffer_to_tdb = "[]\n";
      while (length($buffer_to_tdb))
      {
        pump($tdb_harness);
      }
      # - not that this result doesn't include the [Server] prefix as it
      #   may be parsed for data by the client
      $result = $buffer_from_tdb;
      chomp($result);
      # Finished with harness
      &debugPrint($the_count, $tid, 'TDBCLI', 'Finishing harness') unless !$debug;
      finish($tdb_harness);
      &debugPrint($the_count, $tid, 'TDBCLI', 'Complete') unless !$debug;
    }
    # Use different TDB tools depending on arguments
    # - lookups using TDBGET
    elsif ($action eq '?')
    {
      my $command_name = '';
      my $command1 = '';
      # Special case for retrieve all keys (indicated by *)
      if ($key eq '*')
      {
        $command_name = 'TDBKEYS';
        $command1 = 'tdbkeys "' . $tdb_path . '"';
      }
      else
      {
        $command_name = 'TDBGET';
        $command1 = 'tdbget "' . $tdb_path . '" "' . $key . '"';
      }
      &debugPrint($the_count, $tid, $command_name, 'Command: ' . $command1) unless !$debug;
      if (-e $tdb_path)
      {
        $result = `$command1`;
      }
      else
      {
        &debugPrint("TDB database doesn't exist (yet): " . $tdb_path);
        $result = '';
      }
      &debugPrint($the_count, $tid, $command_name, 'Result: ' . $result) unless !$debug;
      if ($result !~ /-{70}/)
      {
        $result .= "-"x70 . "\n";
      }
    }
    # - adds, updates and deletes using TXT2TDB
    elsif ($action eq '+' || $action eq '-')
    {
      my $command2 = 'txt2tdb -append "' . $tdb_path . '"';
      &debugPrint($the_count, $tid, 'TXT2TDB', 'Command: ' . $command2) unless !$debug;
      open(my $infodb_handle, '| ' . $command2) or die("Error! Failed to open pipe to TXT2TDB\n");
      print $infodb_handle '[' . $key . ']';
      if ($action eq '-')
      {
        print $infodb_handle $action;
      }
      print $infodb_handle $payload;
      close($infodb_handle);
      $result = "-"x70 . "\n";
      &debugPrint($the_count, $tid, 'TXT2TDB', 'Result: ' . $result) unless !$debug;
    }
    else
    {
      print STDERR "Warning! Request " . $the_count . " asked for unknown action '" . $action . "' - Ignoring!\n";
    }
  }
  # Synchronized debug log writing
  &debugPrint($the_count, $tid, 'SEND', $result) unless !$debug;
  return $result;
}

sub debugPrint
{
  my ($the_count, $tid, $type, $msg) = @_;
  if ($debug)
  {
    lock($debug_log);
    $|++;
    print "[" . time() . "] #" . $the_count . ", tid:" . $tid . ", act:" . $type . "\n" . $msg . "\n\n";
    $|--;
    # //unlock($debug_log);
  }
}

sub printUsageAndExit
{
  my ($msg) = @_;
  print "$msg\n\n";
  print "Usage: TDBServer.pl <parent_pid> <collectionname> [-nodaemon] [-debug]\n\n";
  exit(0);
}

1;
