#!/usr/bin/perl

use strict;
use warnings;

# Configuration
my $thread_count = 10;

# Setup Environment
BEGIN
{
  die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
  die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};

  unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/XML/XPath");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
  unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");

  my $ext_prefix;
  my @extensions;
  if (defined $ENV{'GSDLEXTS'})
  {
      $ext_prefix = $ENV{'GSDLHOME'};
      @extensions = split(/:/, $ENV{'GSDLEXTS'});
  }
  elsif (defined $ENV{'GSDL3EXTS'})
  {
      $ext_prefix = $ENV{'GSDL3SRCHOME'};
      @extensions = split(/:/, $ENV{'GSDL3EXTS'});
  }
  foreach my $e (@extensions)
  {
      my $perllib_path = $ext_prefix . '/ext/' . $e . '/perllib';
      unshift (@INC, $perllib_path);
      unshift (@INC, $perllib_path . '/cpan');
      unshift (@INC, $perllib_path . '/plugins');
      unshift (@INC, $perllib_path . '/plugouts');
      unshift (@INC, $perllib_path . '/classify');
  }

  # Installed CPAN packages for GEXT*INSTALL
  my $perl_version = `perl-version.pl`;
  my $perl_path = sprintf("%s/lib/perl/%s", $ENV{'GEXTPARALLELBUILDING_INSTALLED'}, $perl_version);
  ###rint STDERR "[DEBUG] CPAN Path: $perl_path\n";
  unshift (@INC, $perl_path);
}

use Cwd;
# Locking is required (otherwise other threads might glom onto the lock file
# before we've written our port number to it).
use Fcntl qw(:flock);
# advanced child process control
use IPC::Run qw(harness start pump finish);
# the GDBMCLI tool accepts commands on STDIN and write results on STDOUT
# so we'll need a double ended pipe
# @note couldn't get this to reliably work when passing between threads
#use IPC::Open2;
# we need to run as a daemon
use Proc::Daemon;
# and the whole thing will need to accept requests from multiple threads, and
# so will need threads in and of itself
use threads;
use threads::shared;

# Greenstone utility functions (filename_cat)
use util;
# A simple server that listens on a socket and 'forks' off child threads to
# handle each incoming request
use SocketsSwimmingThreadPoolServer;

# Globally available
my $parent_pid = 0;
my $infodb_file_path = '';
my $remove_old = '';
my $gdbm;
my $gdbm_reader;
my $gdbm_writer;
my $debug = 1;
my $server;
# - shared and, more importantly, lockable
my %listeners :shared;
my $accessing_gdbm :shared;
my $should_stop :shared;
my $debug_log :shared;

print "===== GDBM Server =====\n";
print "Provides a persistent connection to one or more GDBM databases via a\n";
print "pool of threads listening on a specific socket.\n";

MAIN:
{
  $accessing_gdbm = 0;
  $should_stop = 0;
  $debug_log = 0;

  # Check arguments
  if (!defined $ARGV[0] || !defined $ARGV[1])
  {
    print "Error! Missing parent process id or path to database\n\n";
    print "Usage: GDBMServer.pl <pid name> <path to database> [-removeold] [-nodaemon] [-debug]\n\n";
    exit(0);
  }
  $parent_pid = $ARGV[0];
  $infodb_file_path = $ARGV[1];
  my $no_daemon = 0;
  my $i = 2;
  while (defined $ARGV[$i])
  {
    if ($ARGV[$i] eq "-nodaemon")
    {
      $no_daemon = 1;
    }
    if ($ARGV[$i] eq "-removeold")
    {
      $remove_old = '-removeold ';
    }
    if ($ARGV[$i] eq "-debug")
    {
      $debug = 1;
    }
    $i++;
  }

  if ($debug)
  {
    print " - parent pid: " . $parent_pid . "\n";
    print " - infodb: " . $infodb_file_path . "\n";
    print " - no daemon? " . $no_daemon . "\n";
    print " - remove old? " . $remove_old . "\n";
    print " - debug? yes\n";
    print "\n";
  }

  # Information about any running GDBMServer is stored in a lockfile in
  # Greenstone's tmp directory (and based on the database opened)
  my $tmp_dir = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
  my ($infodb_file, $extension) = $infodb_file_path =~ /([^\\\/]+)\.(db|gdb)$/i;
  my $server_lockfile_path = &util::filename_cat($tmp_dir, 'gdbmserver-' . $infodb_file . '.lock');

  # If already running, then exit
  print " * Testing for other GDBMServers already running... ";
  if (-e $server_lockfile_path)
  {
    print "Error! GDBMServer already running!\n";
    print "Lockfile found at: " . $server_lockfile_path . "\n";
    exit(0);
  }
  print "All clear!\n";

  # Ensure we can see gdbmcli on the path
  print " * Testing for GDBMCLI... ";
  my $result = `gdbmcli 2>&1`;
  if ($result !~ /GDBM Command Line Interface/)
  {
    print "Error! GDBMCLI not available - check path.\n";
    exit(0);
  }
  print "Found!\n";

  # @note Easiest way to figure out the open file descriptors is to close them.
  #       You start by figure out what the maximum number of file handles is
  #       for your system. You then iterate through trying to close them using
  #       the POSIX close function - which returns true iif the file descriptor
  #       existed and was successfully closed. You can then keep track of the
  #       highest file descriptor number successfully closed. Do this before and
  #       after your desired function call (like start() below) and the
  #       difference in hc_fd gives your new file descriptors in use! [jmt12]
  #my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
  #$openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
  #my $hc_fd = 2;
  #foreach ( 3 .. $openmax )
  #{
  #  $hc_fd = $_ if POSIX::close( $_ );
  #}
  #print "[debug] After start fd count = " . $hc_fd . "\n";
  #exit(0);

  # @note As mentioned above, I couldn't get the file handles produced by open2
  #       realiably shared between handles (even using the tricks mentioned on
  #       the perlmonks site - http://perlmonks.org/?node_id=395513). Typically
  #       they'd work the first time they were used (read or write) but
  #       subsequent actions would block indefinitely. Moreover, I couldn't get
  #       open2 to work as advertised and accept arguments (despite several
  #       hours of dicking around) - and the work-around to make the dbpath the
  #       first lot of IO works, but then definately causes the next action to
  #       block forever. [jmt12]
#  #my @cmd = ('gdbmcli',$infodb_file_path); # doesn't work - runs gdbmcli twice?
#  my @cmd = ('gdbmcli');
#  $gdbm_reader = IO::Handle->new();
#  $gdbm_writer = IO::Handle->new();
#  print " * Opening GDBM database\n";
#  $gdbm_pid = open2($gdbm_reader, $gdbm_writer, 'gdbmcli');
#  if (!$gdbm_writer)
#  {
#    die("Error! Failed to open GDBMCLI for writing\n");
#  }
#  if (!$gdbm_reader)
#  {
#    die("Error! Failed to open GDBMCLI for reading\n");
#  }
#  # For ungodly reasons open2 doesn't work as advertised - it doesn't pass any
#  # arguments - so instead the first command to the GDBMCLI is the path to the
#  # GDBM database to load.
#  print $gdbm_writer $infodb_file_path . "\n" or die("Error! Failed to actually write something to GDBMCLI\n");
#  my $load_result;
#  if ($load_result = <$gdbm_reader>)
#  {
#    print $load_result;
#  }
#  else
#  {
#    die("Error! Failed to actually read something from GDBMCLI\n");
#  }

  # Open the database connection
  my @cmd = ('gdbmcli',$infodb_file_path);
  $gdbm_writer = '';
  $gdbm_reader = '';
  # @note start opens a total of four file descriptors to the 'cmd', but
  #       we never get to know their names (maybe $gdbm->{'WIN'} etc but
  #       I'm note sure) so instead we just have to hope that the number
  #       of file descriptors already open doesn't change, in which case
  #       these are fd 3, 4, 5, and 6.
  $gdbm = start \@cmd, \$gdbm_writer, \$gdbm_reader;
  # - start opens four handles!

  # Daemonize
  my $pid = 0;
  if (!$no_daemon)
  {
    # Determine the anonymous array of file descriptors *not* to close
    my $dont_close_fd = [];
    # Building upon the "POSIX::Close()" test above, we need to explicitly
    # determine the new file descriptors opened by the start command.
    my $openmax = POSIX::sysconf( &POSIX::_SC_OPEN_MAX );
    $openmax = ( ! defined( $openmax ) || $openmax < 0 ) ? ( shift || 64 ) : $openmax;
    # - then figure out the file descriptors currently open. We do this
    #   by attempting to 'copy' each file descriptor.
    for (my $fd = 3; $fd <= $openmax; $fd++)
    {
      #rint "Checking file descriptor: $fd -> ";
      my $tmpfh;
      if (open $tmpfh, ">&$fd")
      {
        #rint "writable!\n";
        push(@{$dont_close_fd}, $fd);
        close($tmpfh);
      }
      elsif (open $tmpfh, "<&$fd")
      {
        #rint "readable!\n";
        push(@{$dont_close_fd}, $fd);
        close($tmpfh);
      }
      #else
      #{
      #  print "not open\n";
      #}
    }
    print " * When forking don't close these filehandles: [" . join(",", @{$dont_close_fd}) . "]\n";

    print " * Spawning Daemon...\n" unless (!$debug);
    my $daemon_out_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.out');
    my $daemon_err_path = &util::filename_cat($ENV{'GEXTPARALLELBUILDING'},'logs', 'gdbmserver-' . $infodb_file . '.err');
    $pid = Proc::Daemon::Init( { work_dir => getcwd(),
                                 child_STDOUT => $daemon_out_path,
                                 child_STDERR => $daemon_err_path,
  # @note as mentioned above, start creates four file descriptors that we need
  #       to keep open even through the separation of the daemon process.
                                 dont_close_fd => $dont_close_fd,
                               } );
  }

  # Parent process has pid > 0
  if ($pid == 0)
  {

    # Perform initializes here
    # - database connection is now handled as a special command, as there may
    #   be multiple databases handled by this server
    # - localhost is good enough for now
    my $host = 'localhost';
    # - determine a suitable port (checking that they aren't already in use)
    # @note this isn't at all portable, but then neither is the daemon the
    #       way I've written it.
    my $port = 8190;
    my $result = `netstat -tnl | grep :$port`;
    while ($result =~ /LISTEN/)
    {
      $port++;
      $result = `netstat -tnl | grep :$port`;
    }
    # - create server object
    print " * Creating pool of " . $thread_count . " threads listening on socket: " . $host . ":" . $port . "\n";
    $server = SocketsSwimmingThreadPoolServer->new(host=>$host,
                                                   port=>$port,
                                                   main_cb => \&exitCheck,
                                                   processor_cb => \&process);

    # - write our port number into the lockfile so that other threads can figure
    #   out where we are
    print " * Writing port number to lock file: " . $server_lockfile_path . "\n";
    open(SLFH, ">", $server_lockfile_path) or die("Error! Failed to open file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    flock(SLFH, LOCK_EX) or die("Error! Cannot lock file for writing: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    print SLFH $host . ':' . $port;
    flock(SLFH, LOCK_UN);
    close(SLFH);

    # Perform main loop
    # - loop is actually in Server code. start() only returns once server's stop
    #   command has been called
    print " * Listening:\n";
    $server->start;
    print " * Stopping...\n";

    # Perform deinitializes here
    # - remove server lockfile
    print " * Removing lock file...\n";
    unlink($server_lockfile_path);
    # - now close database handles (forcing flush)
    print " * Closing GDBMCLI\n";
    finish($gdbm);
    print "Done!\n";
  }
  else
  {
    print " * Waiting for GDBMServer lockfile to be created";
    while (!-e $server_lockfile_path)
    {
      print '.';
      sleep(1);
    }
    print "\n * GDBMServer lockfile created.\n";
    open(SLFH, "<", $server_lockfile_path) or die("Error! Failed to open file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    flock(SLFH, LOCK_SH) or die("Error! Cannot lock file for reading: " . $server_lockfile_path . "\nReason: " . $! . "\n");
    my $line = <SLFH>;
    if ($line =~ /(^.+):(\d+)$/)
    {
      print " => Server now listening on " . $1 . ":" . $2 . "\n";
    }
    else
    {
      die ("Error! Failed to retrieve host and port information from lockfile!");
    }
    flock(SLFH, LOCK_UN);
    close(SLFH);
  }

  print "===== Complete! =====\n";
}
exit(0);

# @function exitCheck
# A callback function, called every 5 seconds (default) by the socket server,
# to see whether the parent process (by pid) is actually still running. This
# will cover the case where the parent process (import.pl or build.pl) dies
# without properly asking the server to shutdown.
sub exitCheck
{
  my $counter = shift @_;
  # note: kill, when passed a first argument of 0, checks whether it's possible
  # to send a signal to the pid given as the second argument, and returns true
  # if it is. Thus it provides a means to determine if the parent process is
  # still running (and hence can be signalled) In newer versions of Perl
  # (5.8.9) it should even work cross-platform.
  if (!kill(0, $parent_pid))
  {
    print " * Parent processs gone away... forcing server shutdown\n";
    $server->stop;
    if ($debug)
    {
      lock($debug_log);
      $|++;
      print "[" . time() . "|MAIN] Parent process gone away... forcing server shutdown.\n\n";
      $|--;
    }
  }
}

# /** @function process
#  *  A horribly named function that is called back to process each of the
#  *  requests to alter the GDBM database. It expects either a typical GDBM
#  *  text blob, or one of a limited number of special commands (which start
#  *  with the sentinel character "!"). Note that synchronization over the
#  *  open GDBM handle is used to ensure only one edit occurs at a time.
#  */
sub process
{
  my $data = shift @_;
  my $ip = shift @_;
  my $tid = shift @_;
  my $value = "#ERROR#";
  # Synchronized debug log writing
  if ($debug)
  {
    lock($debug_log);
    $|++;
    print "[" . time() . "|" . $tid . "|RECV] " . $data . "\n";
    $|--;
  }
  # process special commands first
  if ($data =~ /^!(.*):(.*)$/)
  {
    my $command = $1;
    my $argument = $2;
    # addlistener(<pid>)
    if ($command eq "addlistener")
    {
      lock(%listeners);
      $listeners{$argument} = 1;
      my $listener_count = scalar(keys(%listeners));
      $value = "[SUCCESS] added listener [" . $listener_count . " listeners]";
      # unlock(%listeners)
    }
    # removelistener(<pid>)
    elsif ($command eq "removelistener")
    {
      lock(%listeners);
      if (defined $listeners{$argument})
      {
        delete $listeners{$argument};
      }
      my $listener_count = scalar(keys(%listeners));
      lock($should_stop);
      if ($should_stop == 1 && $listener_count == 0)
      {
        # server isn't shared, but the stop data member is!
        $server->stop;
        $value = "[SUCCESS] removed last listener, stopping";
      }
      else
      {
        $value = "[SUCCESS] removed listener [" . $listener_count . " listeners]";
      }
      # unlock($should_stop)
      # unlock(%listeners)
    }
    # we may be asked to stop the server, but only by the process that created
    # us. If there are no listeners registered, we stop straight away,
    # otherwise we set a flag so that as soon as there are no listeners we
    # stop.
    elsif ($command eq "stop")
    {
      if ($argument ne $parent_pid && $argument ne "*")
      {
        $value = "[IGNORED] can only be stopped by parent process";
      }
      else
      {
        my $listener_count = 0;
        {
          lock(%listeners);
          $listener_count = scalar(keys(%listeners));
          # unlock(%listeners)
        }
        if ($listener_count == 0)
        {
          # server isn't shared, but the stop data member is!
          $server->stop;
          $value = "[SUCCESS] stopping";
        }
        else
        {
          lock($should_stop);
          $should_stop = 1;
          $value = "[PENDING] will stop when no more listeners";
          # unlock($should_stop)
        }
      }
    }
  }
  # Everything thing else should be a GDBMCLI command
  else
  {
    lock($accessing_gdbm);
    # lets check that we can still access the GDBM bidirectional pump
    if (!pumpable $gdbm)
    {
      die("Error! Somehow the underlying GDBM bidirectional pipe has gone away!");
    }
    # - write the command to GDBM
    $gdbm_writer = $data . "\n";
    $gdbm_reader = '';
    #rint "[debug] sending command to gdbmcli\n";
    pump($gdbm) while length($gdbm_writer);
    #rint "[debug] reading output from gdbmcli\n";
    pump($gdbm) until $gdbm_reader =~ /-{70}/;
    $value = $gdbm_reader;
    # trim value
    chomp($value);
    #rint "[debug] result: " . $value . "\n";
    #unlock($accessing_gdbm);
  }
  # Synchronized debug log writing
  if ($debug)
  {
    lock($debug_log);
    $|++;
    print "[" . time() . "|" . $tid . "|SEND] " . $value . "\n\n";
    $|--;
  }
  return $value;
}
