###########################################################################
#
# jenaTDBBuilder.pm -- topup builder that gets jenaTDB initialized correctly
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package jenaTDBBuilder;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

use util;
use FileUtils;

use extrabuilder;

sub BEGIN {
    @jenaTDBBuilder::ISA = ('extrabuilder');
}


sub new {
    my $class = shift(@_);
    my $self = new extrabuilder (@_);
    $self = bless $self, $class;

    $self->{'buildtype'} = "jenaTDB";

    return $self;
}


sub default_buildproc {
    my $self  = shift (@_);

    return "jenaTDBBuildproc";
}


sub build_indexes {
    my $self = shift (@_);
    my ($indexname) = @_;

    my $outhandle = $self->{'outhandle'};
    my $build_dir = $self->{'build_dir'};
    my $verbosity = $self->{'verbosity'};

    print $outhandle "\n*** Jena Fuseki3 Indexing/Storing Document metadata triples\n"  if ($verbosity >= 1);

    my $jenaTDB_dir = &util::filename_cat($build_dir, "jenaTDB");

    if (! -d $jenaTDB_dir) {
	&FileUtils::makeAllDirectories($jenaTDB_dir);
    }
    
    my $opt_create_index = ($self->{'incremental'}) ? "" : "-removeold";


    if ($opt_create_index) {

	# init an jenaTDB database
	
#	my $adb_filename = &util::filename_cat($jenaTDB_dir,"lsh-features.adb");

	print $outhandle "\n    creating jenaTDB dataset for collection\n"  if ($verbosity >= 1);

#	my $init_cmd = "jenaTDB -N -d $adb_filename";
#	my $init_status = system($init_cmd);
#	if ($init_status != 0) {
#	    print STDERR "Error: failed to initialize the jenaTDB database\n";
#	    print STDERR "         $adb_filename\n";
#	    print STDERR "       $!\n";
#	    if ($verbosity>=2) {
#		print STDERR "       cmd: $init_cmd\n";
#	    }
#	    return;
#	}

    }
    
    # Run the docs through the jenaTDB document processor

    $self->{'buildproc'}->set_mode ('text');
    $self->{'buildproc'}->reset();

    # If '-removeold' in effect (i.e. *not* keepold), then clear out
    # the triple-store graph for this collection

    if (!$self->{'keepold'}) {
	my $collection = $self->{'collection'};
#	my $cmd = "gs-triplestore-reset $collection";
	my $cmd = "gs-triplestore-reset3 $collection";
		
	my $status = system($cmd);
	if ($status == 0) {
	    print $outhandle "    => removeold: Deleting triplestore graph for $collection\n";
	}
	else {
	    print STDERR "Error: failed to run:\n  $cmd\n$!\n";
	    print STDERR "       Graph has not been deleted\n";
	}
    }

    &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'},
		   $self->{'buildproc'}, $self->{'maxdocs'});
    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
		   "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    &plugin::end($self->{'pluginfo'});


}

sub supports_make_infodatabase {
    return 1;
}



sub output_classifier {
    my $self = shift (@_);
    
    my ($infodb_type, $infodb_handle, $OID, $classify_infodb) = @_;

    my $collect = $self->{'collection'};
    
    print STDERR "***** jenaTDBBuilder::output_classifier() info call for $OID\n";
    print STDERR "***** \n";

    # print STDERR join("\n",keys %$classify_infodb);
}

sub callback_with_self_closure {
    my ($self) = @_;

    my $callback = sub {
	my ($infodb_type, $infodb_handle, $OID, $classify_infodb) = @_;
	$self->output_classifier($infodb_type, $infodb_handle, $OID, $classify_infodb);
    };

    return $callback;
}

    
sub make_infodatabase {
    my $self = shift (@_);
    my $outhandle = $self->{'outhandle'};

    &classify::reset_next_classify_num();
        
    print STDERR "BuildDir: $self->{'build_dir'}\n";

#    my $textdir = &FileUtils::filenameConcatenate($self->{'build_dir'}, "text");
#    my $assocdir = &FileUtils::filenameConcatenate($self->{'build_dir'}, "assoc");
#    &FileUtils::makeAllDirectories ($textdir);
#    &FileUtils::makeAllDirectories ($assocdir);

    ## Get info database file path
    print STDERR "*** jenaTDBBuilder::make_infodatabase() forcing infodbtype to be: fuseki\n";
    $self->{'infodbtype'} = "fuseki";
    my $infodb_type = $self->{'infodbtype'};
    
#   my $infodb_file_path = &dbutil::get_infodb_file_path($infodb_type, $self->{'collection'}, $textdir);

    print $outhandle "\n*** creating the jenaTDB classifier triples\n" 
	if ($self->{'verbosity'} >= 1);
    print STDERR "<Stage name='CreateInfoData'>\n" if $self->{'gli'};

    # init all the classifiers
    &classify::init_classifiers ($self->{'classifiers'});

    my $reconstructed_docs = undef;
    my $database_recs = undef;

    if ($self->{'incremental'}) {
	print STDERR "!!!!!\n";
	print STDERR "! Incremental support for jenaTDBBuilder::make_infodatabase() has not been implemented\n";
	print STDERR "!!!!!\n";
	$database_recs = {};

	## &dbutil::read_infodb_file($infodb_type, $infodb_file_path, $database_recs);
    }

    
    my ($infodb_handle); # passed in to output_classify_info(), but OK to be null in this case
#    if ($self->{'debug'}) {
#    	$infodb_handle = *STDOUT;
#    }
#    else {
#    	$infodb_handle = &dbutil::open_infodb_write_handle($infodb_type, $infodb_file_path);
#    	if (!defined($infodb_handle))
#    	{
#    	    print STDERR "<FatalError name='NoRunText2DB'/>\n</Stage>\n" if $self->{'gli'};
#    	    die "builder::make_infodatabase - couldn't open infodb write handle\n";
#    	}
#    }

    # set up the document processor

    #$self->{'buildproc'}->set_output_handle ($infodb_handle);
    $self->{'buildproc'}->set_mode ('infodb');
    #$self->{'buildproc'}->set_assocdir ($assocdir);
    #$self->{'buildproc'}->set_dontdb ($self->{'dontdb'});
    $self->{'buildproc'}->set_classifiers ($self->{'classifiers'});
    $self->{'buildproc'}->set_indexing_text (0);
    #$self->{'buildproc'}->set_store_text(1);

    #if ($self->{'incremental'}) {
#	# reconstruct doc_obj metadata from database for all docs
#	$reconstructed_docs 
#	    = &classify::reconstruct_doc_objs_metadata($infodb_type, 
#						       $infodb_file_path,
#						       $database_recs);
#   }
    
    # make_infodatabase needs full reset even for incremental build
    # as incremental works by reconstructing all docs from the database and
    # then adding in the new ones
    $self->{'buildproc'}->zero_reset(); 

    $self->{'buildproc'}->{'mdprefix_fields'} = {};
   
    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'}, 
		   "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'},0, $self->{'gli'});

#    if ($self->{'incremental'}) {
#	# create flat classify structure, ready for new docs to be added
#	foreach my $doc_obj ( @$reconstructed_docs ) {
#	    if (! defined $self->{'buildproc'}->{'dont_process_reconstructed'}->{$doc_obj->get_OID()}) {
#		print $outhandle "  Adding reconstructed ", $doc_obj->get_OID(), " into classify structures\n";
#		$self->{'buildproc'}->process($doc_obj,undef);
#	    } 
#	}
#    }
    # this has changed to only output collection meta if its 
    # not in the config file
    # print STDERR "****!!! Consider adding in a jenaTDB triplestore version of output_collection_meta()\n";
    
    $self->output_collection_meta($infodb_handle);
    
    my $output_callback = \&dbutil::write_infodb_entry;
    # my $output_callback = callback_with_self_closure($self);
    
    # output classification information
    &classify::output_classify_info ($self->{'classifiers'}, $infodb_type, $infodb_handle,
				     $self->{'remove_empty_classifications'},
				     $output_callback,
				     $self->{'gli'});

    # Output classifier reverse lookup, used in incremental deletion
    ####&classify::print_reverse_lookup($infodb_handle);

    # output doclist
    my @doc_list = $self->{'buildproc'}->get_doc_list();
    my $browselist_infodb = { 'hastxt' => [ "0" ],
			      'childtype' => [ "VList" ],
			      'numleafdocs' => [ scalar(@doc_list) ],
			      'thistype' => [ "Invisible" ],
			      'contains' => [ join(";", @doc_list) ] };
    &dbutil::write_infodb_entry($infodb_type, $infodb_handle, "browselist", $browselist_infodb);

#    &dbutil::close_infodb_write_handle($infodb_type, $infodb_handle) if !$self->{'debug'};
    
#    if ($infodb_type eq "gdbm-txtgz") {
#	my $gdb_infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $self->{'collection'}, $textdir);
#	if (-e $gdb_infodb_file_path) {
#	    &FileUtils::removeFiles($gdb_infodb_file_path);
#	}
#    }
    print STDERR "</Stage>\n" if $self->{'gli'};
}



1;

