###########################################################################
#
# plugin.pm -- functions to handle using plugins
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package plugin;


use strict; # to pick up typos and undeclared variables...
no strict 'refs'; # ...but allow filehandles to be variables and vice versa
no strict 'subs';

require util;
use gsprintf 'gsprintf';

# mapping from old plugin names to new ones for backwards compatibility
# can remove at sometime in future when we no longer want to support old xxPlug names in the config file
my $plugin_name_map = {
    'GAPlug' => 'GreenstoneXMLPlugin',
    'ArcPlug' => 'ArchivesInfPlugin',
    'RecPlug' => 'DirectoryPlugin',
    'TEXTPlug' => 'TextPlugin',
    'XMLPlug' => 'ReadXMLFile',
    'EMAILPlug' => 'EmailPlugin',
    'SRCPlug' => 'SourceCodePlugin',
    'NULPlug' => 'NulPlugin',
    'W3ImgPlug' => 'HTMLImagePlugin',
    'PagedImgPlug' => 'PagedImagePlugin',
    'METSPlug' => 'GreenstoneMETSPlugin',
    'PPTPlug' => 'PowerPointPlugin',
    'PSPlug' => 'PostScriptPlugin',
    'DBPlug' => 'DatabasePlugin'
    };

# global variables
my $stats = {'num_processed' => 0,
	     'num_blocked' => 0,
	     'num_not_processed' => 0,
	     'num_not_recognised' => 0,
	     'num_archives' => 0
	     };

#globaloptions contains any options that should be passed to all plugins
my ($verbosity, $outhandle, $failhandle, $globaloptions);

sub get_valid_pluginname {
    my ($pluginname) = @_;
    my $valid_name = $pluginname;
    if (defined $plugin_name_map->{$pluginname}) {
	$valid_name = $plugin_name_map->{$pluginname};
    } elsif ($pluginname =~ /Plug$/) {
	$valid_name =~ s/Plug/Plugin/;
	
    }
    return $valid_name;
}

sub load_plugin_require
{
    my ($pluginname) = @_;

    my @check_list = ();

    # pp_plugname shorthand for 'perllib' 'plugin' '$pluginname.pm' 
    my $pp_plugname 
	= &util::filename_cat('perllib', 'plugins', "${pluginname}.pm");
    my $collectdir = $ENV{'GSDLCOLLECTDIR'};

    # find the plugin
    if (defined($ENV{'GSDLCOLLECTION'}))
    {
	my $customplugname 
	    = &util::filename_cat($collectdir, "custom",$ENV{'GSDLCOLLECTION'},
				  $pp_plugname);
	push(@check_list,$customplugname);
    }

    my $colplugname = &util::filename_cat($collectdir, $pp_plugname);
    push(@check_list,$colplugname);

    if (defined $ENV{'GSDLEXTS'}) {

	my $ext_prefix = &util::filename_cat($ENV{'GSDLHOME'}, "ext");

	my @extensions = split(/:/,$ENV{'GSDLEXTS'});
	foreach my $e (@extensions) {
	    my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
	    push(@check_list,$extplugname);

	}
    }
    if (defined $ENV{'GSDL3EXTS'}) {

	my $ext_prefix = &util::filename_cat($ENV{'GSDL3SRCHOME'}, "ext");

	my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
	foreach my $e (@extensions) {
	    my $extplugname = &util::filename_cat($ext_prefix, $e, $pp_plugname);
	    push(@check_list,$extplugname);

	}
    }


    my $mainplugname = &util::filename_cat($ENV{'GSDLHOME'}, $pp_plugname);
    push(@check_list,$mainplugname);

    my $success=0;
    foreach my $plugname (@check_list) {
	if (-e $plugname) {
	    require $plugname;
	    $success=1;
	    last;
	}
    }

    if (!$success) {
	&gsprintf(STDERR, "{plugin.could_not_find_plugin}\n",
		  $pluginname);
	die "\n";
    }
}

sub load_plugin_for_info {
    my ($pluginname) = shift @_;
    $pluginname = &get_valid_pluginname($pluginname);
    load_plugin_require($pluginname);

    # create a plugin object
    my ($plugobj);
    my $options = "-gsdlinfo";
    
    eval ("\$plugobj = new \$pluginname([],[$options])");
    die "$@" if $@;

    return $plugobj;
}

sub load_plugins {
    my ($plugin_list) = shift @_;
    my $incremental_mode;
    ($verbosity, $outhandle, $failhandle, $globaloptions, $incremental_mode) = @_; # globals
    my @plugin_objects = ();
    $verbosity = 2 unless defined $verbosity;
    $outhandle = 'STDERR' unless defined $outhandle;
    $failhandle = 'STDERR' unless defined $failhandle;

    my $colplugindir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"perllib/plugins");
     unshift (@INC, $colplugindir);  

    map { $_ = "\"$_\""; } @$globaloptions;
    my $globals = join (",", @$globaloptions);

    foreach my $pluginoptions (@$plugin_list) {
	my $pluginname = shift @$pluginoptions;
	next unless defined $pluginname;
	$pluginname = &get_valid_pluginname($pluginname);
	load_plugin_require($pluginname);

	# create a plugin object
	my ($plugobj);
	map { $_ = "\"$_\""; } @$pluginoptions;
	my $options = join (",", @$pluginoptions);
	if ($globals) {
	    if (@$pluginoptions) {
		$options .= ",";
	    }
	    $options .= "$globals";
	}
	# need to escape backslash before putting in to the eval
	# but watch out for any \" (which shouldn't be further escaped)
	$options =~ s/\\([^"])/\\\\$1/g; #"
	$options =~ s/\$/\\\$/g;

	eval ("\$plugobj = new \$pluginname([],[$options])");
	die "$@" if $@;
	
	# initialize plugin
	$plugobj->init($verbosity, $outhandle, $failhandle);
	
	$plugobj->set_incremental($incremental_mode);

	# add this object to the list
	push (@plugin_objects, $plugobj);
    }

    return \@plugin_objects;
}


sub begin {
    my ($pluginfo, $base_dir, $processor, $maxdocs, $gli) = @_;

    map { $_->{'gli'} = $gli; } @$pluginfo;
    map { $_->begin($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
}

 sub remove_all {
    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
    
    map { $_->remove_all($pluginfo, $base_dir, $processor, $maxdocs); } @$pluginfo;
}
  
sub remove_some {
    my ($pluginfo, $infodbtype, $archivedir, $deleted_files) = @_;
    return if (scalar(@$deleted_files)==0);
    my $arcinfo_src_filename = &dbutil::get_infodb_file_path($infodbtype, "archiveinf-src", $archivedir);

    foreach my $file (@$deleted_files) {
	# use 'archiveinf-src' info database to look up all the OIDs
	# that this file is used in (note in most cases, it's just one OID)
	
	my $src_rec_string = &dbutil::read_infodb_entry($infodbtype, $arcinfo_src_filename, $file);
	my $src_rec = &dbutil::convert_infodb_string_to_hash($src_rec_string);
	my $oids = $src_rec->{'oid'};
	my $rv;
	foreach my $plugobj (@$pluginfo) {

	    $rv = $plugobj->remove_one($file, $oids, $archivedir);
	    if (defined $rv && $rv != -1) {
		return $rv;
	    } # else undefined (was not recognised by the plugin) or there was an error, try the next one
	}
	return 0;
    }

}
sub file_block_read {
    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;


    $gli = 0 unless defined $gli;

    my $rv = 0;
    my $glifile = $file;
    
    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
    
    # Announce to GLI that we are handling a file
    print STDERR "<File n='$glifile'>\n" if $gli;
    
    # the .kill file is a handy (if not very elegant) way of aborting 
    # an import.pl or buildcol.pl process
    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
	gsprintf($outhandle, "{plugin.kill_file}\n");
	die "\n";
    }
    
    foreach my $plugobj (@$pluginfo) {

      	$rv = $plugobj->file_block_read($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli); 
	#last if (defined $rv && $rv==1); # stop this file once we have found something to 'process' it
    }
    
}


sub metadata_read {
    my ($pluginfo, $base_dir, $file, $block_hash, 
	$extrametakeys, $extrametadata, $extrametafile, 
	$processor, $maxdocs, $gli, $aux) = @_;

    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
    $gli = 0 unless defined $gli;

    my $rv = 0;
    my $glifile = $file;
    
    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
    
    # Announce to GLI that we are handling a file
    print STDERR "<File n='$glifile'>\n" if $gli;
    
    # the .kill file is a handy (if not very elegant) way of aborting 
    # an import.pl or buildcol.pl process
    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
	gsprintf($outhandle, "{plugin.kill_file}\n");
	die "\n";
    }

    my $had_error = 0;
    # pass this file by each of the plugins in turn until one
    # is found which will process it
    # read must return:
    # undef - could not recognise
    # -1 - tried but error
    # 0 - blocked
    # anything else for successful processing
	
    foreach my $plugobj (@$pluginfo) {

	$rv = $plugobj->metadata_read($pluginfo, $base_dir, $file, $block_hash,
			     $extrametakeys, $extrametadata, $extrametafile,
			     $processor, $maxdocs, $gli, $aux);

	if (defined $rv) {
	    if ($rv == -1) {
	        # an error has occurred
		$had_error = 1;
		print STDERR "<ProcessingError n='$glifile'>\n" if $gli;
	    } else {
        	return $rv;
	    }
	} # else undefined - was not recognised by the plugin
    }

    return 0;
}

sub read {
    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli, $aux) = @_;

    $maxdocs = -1 unless defined $maxdocs && $maxdocs =~ /\d/;
    $total_count = 0 unless defined $total_count && $total_count =~ /\d/;
    $gli = 0 unless defined $gli;

    my $rv = 0;
    my $glifile = $file;
    
    $glifile =~ s/^[\/\\]+//; # file sometimes starts with a / so get rid of it
    
    # Announce to GLI that we are handling a file
    print STDERR "<File n='$glifile'>\n" if $gli;
    
    # the .kill file is a handy (if not very elegant) way of aborting 
    # an import.pl or buildcol.pl process
    if (-e &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, ".kill")) {
	gsprintf($outhandle, "{plugin.kill_file}\n");
	die "\n";
    }

    my $had_error = 0;
    # pass this file by each of the plugins in turn until one
    # is found which will process it
    # read must return:
    # undef - could not recognise
    # -1 - tried but error
    # 0 - blocked
    # anything else for successful processing
	
    foreach my $plugobj (@$pluginfo) {

      	$rv = $plugobj->read($pluginfo, $base_dir, $file, 
			     $block_hash, $metadata, $processor, $maxdocs, 
			     $total_count, $gli, $aux);

	if (defined $rv) {
	    if ($rv == -1) {
	        # an error has occurred
		$had_error = 1;
	    } else {
        	return $rv;
	    }
	} # else undefined - was not recognised by the plugin
    }

    if ($had_error) {
	# was recognised but couldn't be processed
	if ($verbosity >= 2) {
	    gsprintf($outhandle, "{plugin.no_plugin_could_process}\n", $file);
	}
	# tell the GLI that it was not processed
	print STDERR "<NonProcessedFile n='$glifile'>\n" if $gli;
      
	gsprintf($failhandle, "$file: {plugin.no_plugin_could_process_this_file}\n");
	$stats->{'num_not_processed'} ++;
    } else {
	# was not recognised
	if ($verbosity >= 2) {
	    gsprintf($outhandle, "{plugin.no_plugin_could_recognise}\n",$file);
	}
	# tell the GLI that it was not processed
	print STDERR "<NonRecognisedFile n='$glifile'>\n" if $gli;
	
	gsprintf($failhandle, "$file: {plugin.no_plugin_could_recognise_this_file}\n");
	$stats->{'num_not_recognised'} ++;
    }
    return 0;
}

# write out some general stats that the plugins have compiled - note that
# the buildcol.pl process doesn't currently call this process so the stats
# are only output after import.pl -
sub write_stats {
    my ($pluginfo, $statshandle, $faillog, $gli) = @_;

    $gli = 0 unless defined $gli;

    foreach my $plugobj (@$pluginfo) {
	$plugobj->compile_stats($stats);
    }

    my $total = $stats->{'num_processed'} + $stats->{'num_blocked'} + 
	$stats->{'num_not_processed'} + $stats->{'num_not_recognised'};

    print STDERR "<ImportComplete considered='$total' processed='$stats->{'num_processed'}' blocked='$stats->{'num_blocked'}' ignored='$stats->{'num_not_recognised'}' failed='$stats->{'num_not_processed'}'>\n" if $gli;

    if ($total == 1) {
	gsprintf($statshandle, "* {plugin.one_considered}\n");
    } else {
	gsprintf($statshandle, "* {plugin.n_considered}\n", $total);
    }
    if ($stats->{'num_archives'}) {
	if ($stats->{'num_archives'} == 1) {
	    gsprintf($statshandle, "   ({plugin.including_archive})\n");
	}
	else {
	    gsprintf($statshandle, "   ({plugin.including_archives})\n",
		     $stats->{'num_archives'});
	}
    }
    if ($stats->{'num_processed'} == 1) {
	gsprintf($statshandle, "* {plugin.one_included}\n");
    } else {
	gsprintf($statshandle, "* {plugin.n_included}\n", $stats->{'num_processed'});
    }
    if ($stats->{'num_not_recognised'}) {
	if ($stats->{'num_not_recognised'} == 1) {
	    gsprintf($statshandle, "* {plugin.one_unrecognised}\n");
	} else {
	    gsprintf($statshandle, "* {plugin.n_unrecognised}\n",
		     $stats->{'num_not_recognised'});
	}

    }
    if ($stats->{'num_not_processed'}) {
	if ($stats->{'num_not_processed'} == 1) {
	    gsprintf($statshandle, "* {plugin.one_rejected}\n");
	} else {
	    gsprintf($statshandle, "* {plugin.n_rejected}\n",
		     $stats->{'num_not_processed'});
	}
    }
    if ($stats->{'num_not_processed'} || $stats->{'num_not_recognised'}) {
	gsprintf($statshandle, " {plugin.see_faillog}\n", $faillog);
    }
}

sub end {
    my ($pluginfo, $processor) = @_;
    map { $_->end($processor); } @$pluginfo;
}

sub deinit {
    my ($pluginfo, $processor) = @_;
   

    map { $_->deinit($processor); } @$pluginfo;
}

1;
