###########################################################################
#
# parallelbuildingbuildcolutils.pm --
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# search for: parallel indexname indexlevel

package parallelbuildingbuildcolutils;

# Pragma
use strict;
no strict 'refs'; # allow filehandles to be variables and vice versa
no strict 'subs'; # allow barewords (eg STDERR) as function arguments

# Greenstone Modules
use buildcolutils;
use gsprintf;

BEGIN
{
  @parallelbuildingbuildcolutils::ISA = ('buildcolutils');
}

# Parallel Build Customization
my $arguments = [
      { 'name' => "workers",
        'desc' => "**Parallel Processing** The number of 'worker' threads to spawn when parallel processing",
        'type' => "int",
        'range' => "0,",
        'reqd' => "no",
        'hiddengli' => "yes" }
      ];

## @method new()
#
sub new
{
  my $class = shift(@_);
  my $self = new buildcolutils(@_);

  # Sanity checks

  return bless($self, $class);
}

# @function getSupportedArguments
# Retrieve the list of arguments that are specific to this subclass of
# buildcolutils so they can be added to the list of supported arguments to
# buildcol.pl. The use of any of these arguments automatically causes this
# subclass to be instantiated and used in preference to the parent class.
# ATM it is up to the implementer to ensure these arguments are unique between
# subclasses
sub getSupportedArguments
{
  return $arguments;
}
# getSupportedArguments()

# @function set_collection_options
#
sub set_collection_options
{
  my $self = shift @_;
  my ($collectcfg) = @_;

  $self->SUPER::set_collection_options($collectcfg);

  # Sanity tests
  if ($collectcfg->{'infodbtype'} eq 'sqlite' && $self->{'workers'} > 0)
  {
    print STDERR "WARNING: Parallel builds not current supported by SQLite - reverting to serial build\n";
    $self->{'workers'} = 0;
  }

  # Add parallel building prefix to requests buildertype as necessary
  if ($self->{'buildtype'} !~ /^parallelbuilding/)
  {
    print STDERR "WARNING: using parallel processing version of indexer: " . $self->{'buildtype'} . "\n";
    $self->{'buildtype'} = 'parallel' . $self->{'buildtype'};
  }
}
# set_collection_options()

# @function build_collection()
# Parallel Building Support
# - if parallel building is requested then we subvert the normal 'all' mode
#   process, insert attempting to create an XML 'recipe' for building this
#   collection. We then pass this recipe to an Open MPI augmented compiled
#   executable (which will in turn make multiple calls back to buildcol.pl
#   according to the instructions in the recipe)!
sub build_collection
{
  my $self = shift(@_);
  my $builders_ref = shift(@_);
  my $out = $self->{'out'};

  if ($self->{'workers'} > 0)
  {
    print $out "*** parallel building\n";
    # Some infodb modes (namely GDBMServer at the moment) need to open the
    # connection to the database in such a way that it persists over the
    # child threads. We do this by adding a dummy call to build the file path
    # to archiveinf-doc as it is the database in question. The '1' at the end
    # means launch the server... it will then persist until this block passes
    # out of scope (presumably after all the child mpi processes are done)
    my $arcinfo_doc_filename = &dbutil::get_infodb_file_path($self->{'infodbtype'}, "archiveinf-doc", $self->{'archivedir'}, 1);

    # we initially create the recipe as a datastructure to make it easier for
    # each builder to determine what has already been defined
    # - each step of the recipe will have a command as a string and a (possibly
    #   empty) array of steps that depend on this step (possibly recursive)
    print $out "Generating indexing 'recipe'\n";
    my $recipe = [];
    # pass to each builder to have it populated with appropriate commands
    map { local $_=$_; $_->prepareIndexRecipe($self->{'collection'}, $recipe); } @{$builders_ref};
    # now write the recipe to an XML file, resolving any path macros
    my $max_parallel_tasks = scalar(@{$recipe});
    my $xml_lines = ();
    push(@{$xml_lines},'<?xml version="1.0" standalone="no" ?>');
    push(@{$xml_lines},'<Recipe>');
    foreach my $item (@{$recipe})
    {
      my $max_parallel_child_tasks = &print_recipe($xml_lines, $item);
      if ($max_parallel_child_tasks > $max_parallel_tasks)
      {
        $max_parallel_tasks = $max_parallel_child_tasks;
      }
    }
    push(@{$xml_lines}, '</Recipe>');
    my $recipe_path = &util::get_tmp_filename('.xml');
    open(XMLOUT, ">:utf8", $recipe_path) or die("Error! Failed to open recipe file for writing: " . $recipe_path . "\nReason: " . $!);
    print XMLOUT join("\n", @{$xml_lines});
    close(XMLOUT);
    # call mpibuildcol executable using mpirun and passing path to recipe
    my $number_of_threads = $self->{'workers'} + 1;
    my $mpirun_cmd = 'mpirun -n ' . $number_of_threads . ' mpibuildcol "' . $recipe_path . '"';
    print $out "Running command: " . $mpirun_cmd . "\n";
    print `$mpirun_cmd`;
    # clean up recipe
    unlink($recipe_path);
  }
  else
  {
    $self::SUPER->build_collection($builders_ref);
  }
}
# build_collection()

# @function build_auxiliary_files
#
sub build_auxiliary_files
{
  my $self = shift(@_);
  my ($builders_ref) = @_;
  if (!$self->{'parallel'} && !$self->{'debug'})
  {
    $self->SUPER::build_auxiliary_files($builders_ref);
  }
}
# build_auxiliary_files()

# @function print_recipe
#
sub print_recipe
{
  my ($xml_lines, $item) = @_;
  my $max_parallel_tasks = 0;

  # start building up the command in our xml buffer
  push(@{$xml_lines}, '<Task>');
  my $command = $item->{'command'};
  $command =~ s/&/&amp;/g;
  $command =~ s/</&lt;/g;
  $command =~ s/>/&gt;/g;
  push(@{$xml_lines}, '<Command>' . $command . '</Command>');

  # - print children before closing task
  if (defined $item->{'children'})
  {
    $max_parallel_tasks = scalar(@{$item->{'children'}});

    foreach my $child_item (@{$item->{'children'}})
    {
      my $max_parallel_child_tasks = &print_recipe($xml_lines, $child_item);
      if ($max_parallel_child_tasks > $max_parallel_tasks)
      {
        $max_parallel_tasks = $max_parallel_child_tasks;
      }
    }
  }
  # - now we can close the task having printed nested children
  push(@{$xml_lines},'</Task>');
  # done
  return $max_parallel_tasks;
}
# print_recipe()
