##########################################################################
#
# jenaTDBBuildproc.pm -- 
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# This document processor outputs a document for indexing (should be 
# implemented by subclass) and storing in the database

package jenaTDBBuildproc;

use strict; 
no strict 'refs'; # allow filehandles to be variables and viceversa

use docprint;
use util;
use FileUtils;

use extrabuildproc;


BEGIN {
    @jenaTDBBuildproc::ISA = ('extrabuildproc');
}

sub new()
  {
    my $class = shift @_;

    my $self = new extrabuildproc (@_);

    # Do the following here so it doesn't keep checking (within the util.pm method)
    # whether it needs to create the directory or not
    my $tmp_dir = &util::get_collectlevel_tmp_dir();
    $self->{'tmp_dir'} = $tmp_dir;


    my $xslt_file_in = "gsdom2rdf.xsl";

    my $xslt_filename_in = &util::locate_config_file($xslt_file_in);
    if (!defined $xslt_filename_in) {
	print STDERR "Can not find $xslt_file_in, please make sure you have supplied the correct file path\n";
	die "\n";
    }

    my $xslt_filename_out = &FileUtils::filenameConcatenate($tmp_dir,$xslt_file_in);

    my $collection = $self->{'collection'};

    my $url_prefix = &util::get_full_greenstone_url_prefix(undef,undef,1); # (gs_mode=default, gs_lib=default, get_public_url=1)
    print STDERR "**** url_prefix = $url_prefix\n";
    
    my $property_hashmap = { 'libraryurl' => $url_prefix,
			     'collect'    => $collection };

    file_copy_with_property_sub($xslt_filename_in,$xslt_filename_out,$property_hashmap);

    $self->{'xslt_file'} = $xslt_file_in;
    $self->{'xslt_filename'} = $xslt_filename_out;

    return bless $self, $class;
}


sub property_lookup
{
    my ($hashmap,$value) = @_;
    
    my $lookup = (defined $hashmap->{$value}) ? $hashmap->{$value} : "\@$value\@";

    return $lookup;
}


# Performs a text file copy, substituding substings of the form
# @xxx@ in the input file with the values set in hashmap
# passed in 

sub file_copy_with_property_sub
{
    my ($filename_in,$filename_out,$property_hashmap) = @_;

    if (!open(FIN, "<$filename_in")) {
	print STDERR "util::file_substitute_at_properteis failed to open $filename_in\n  $!\n";
	return;
    }
    binmode(FIN,":utf8");

    if (!open(FOUT, ">$filename_out")) {
	print STDERR "util::file_substitute_at_properteis failed to open $filename_out\n  $!\n";
	return;
    }
    binmode(FOUT,":utf8");

    my $line;
    while (defined($line = <FIN>)) {
	
	$line =~ s/\@([^@ ]+)\@/&property_lookup($property_hashmap,$1)/ige;

	print FOUT $line;
    }

    close(FIN);
    close(FOUT);	    
}


sub open_xslt_pipe
{
    my $self = shift @_;
    my ($output_file_name, $xslt_file)=@_;

    return unless defined $xslt_file and $xslt_file ne "" and &FileUtils::fileExists($xslt_file);
    
    my $apply_xslt_jar = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","ApplyXSLT.jar");
    my $xalan_jar      = &FileUtils::javaFilenameConcatenate($ENV{'GSDLHOME'},"bin","java","xalan.jar");

    my $java_class_path = &util::javapathname_cat($apply_xslt_jar,$xalan_jar);

    $xslt_file = &util::makeFilenameJavaCygwinCompatible($xslt_file);

    my $mapping_file_path = "";

    my $cmd = "| java -cp \"$java_class_path\" org.nzdl.gsdl.ApplyXSLT -t \"$xslt_file\" "; 


    if (defined $self->{'mapping_file'} and $self->{'mapping_file'} ne ""){
	my $mapping_file_path = "\"".$self->{'mapping_file'}."\""; 
	$cmd .= "-m $mapping_file_path";
    }
    
    if (!open(*XMLWRITER, $cmd)) {
	print STDERR "Can't open pipe to xslt: $!";
	print STDERR "Command was:\n  $cmd\n";
	$self->{'xslt_writer'} = undef;
    }
    else {
	$self->{'xslt_writer'} = *XMLWRITER;
	
	print XMLWRITER "<?DocStart?>\n";	    
	print XMLWRITER "$output_file_name\n";
    }
  }
  

sub close_xslt_pipe
{
  my $self = shift @_;

  return unless defined $self->{'xslt_writer'} ;
    
  my $xsltwriter = $self->{'xslt_writer'};
  
  print $xsltwriter "<?DocEnd?>\n";
  close($xsltwriter);

  undef $self->{'xslt_writer'};

}

sub make_ttl_safe
{
    my ($front,$str,$back) = @_;
    
    $str =~ s/\\/\\\\/g;
    
    $str =~ s/\&amp;#x([0-9A-F]+);/chr(hex($1))/eig;
    $str =~ s/\&amp;#([0-9]+);/chr($1)/eig;

    $str =~ s/[\r\n]+/\\n/g;

    # Take care of metadata cases such as:
    #    <Metadata ... name="/equivDocLink">...</Metadata>
    $front =~ s/(\"(?:\w+\.)?)\//$1SLASH/g;

    return "$front$str$back";
}


sub xml_to_ttl {
    my $self = shift (@_);
    my ($section_text,$output_root) = @_;
    my $handle = $self->{'output_handle'};
    
    my $tmp_dir = $self->{'tmp_dir'};
    my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"$output_root.ttl");
    my $tmp_doc_filename_cc    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);

    my $xslt_filename = $self->{'xslt_filename'};
    $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe

    my $xml_outhandler = $self->{'xslt_writer'};

    if (defined $xml_outhandler) {
	binmode($xml_outhandler,":utf8");

	### my $section_text = &docprint::get_section_xml($doc_obj);

	$section_text =~ s/(<Metadata[^>]*>)(.*?)(<\/Metadata>)/&make_ttl_safe($1,$2,$3)/gse;
	## $1&make_ttl_safe($2)$3

##      print STDERR "*** st = $section_text\n\n";
##	$self->debug_section_text($section_text);
	
	print $xml_outhandler $section_text;
    }

    $self->close_xslt_pipe();

    # now feed the generated file to jena's (TDB) triple store

    my $outhandle = $self->{'outhandle'};
    print $outhandle "  Inserting triples for $output_root\n";

    my $collection = $self->{'collection'};

    if (-f $tmp_doc_filename) {

#	my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\"";
	my $cmd = "gs-triplestore-add3 $collection \"$tmp_doc_filename\"";

	my $status = system($cmd);
	if ($status != 0) {
	    print STDERR "Error: failed to run:\n  $cmd\n$!\n";
	}
	
	#print STDERR "**** jenaTDBBuildproc::xml_to_ttl() temporarily supressing deletion of: $tmp_doc_filename\n";
	unlink $tmp_doc_filename;
    }
    else {
	print STDERR "*** Failed to generate: $tmp_doc_filename\n";
    }

}
    
sub textedit {
    my $self = shift (@_);
    my ($doc_obj) = @_;
    my $handle = $self->{'output_handle'};

    # print STDERR "**** jenaTDBBuildproc::textedit()\n";
    
    my $doc_oid = $doc_obj->get_OID();
    my $ttl_output_root_file = "doc-$doc_oid";

    my $section_text = &docprint::get_section_xml($doc_obj);
    $self->xml_to_ttl($section_text,$ttl_output_root_file);
    
    # my $tmp_dir = $self->{'tmp_dir'};
    # my $tmp_doc_filename = &FileUtils::filenameConcatenate($tmp_dir,"doc-$doc_oid.ttl");
    # my $tmp_doc_filename_cc    = &util::makeFilenameJavaCygwinCompatible($tmp_doc_filename);

    # my $xslt_filename = $self->{'xslt_filename'};
    # $self->open_xslt_pipe($tmp_doc_filename_cc, $xslt_filename); # stops with error if not able to open pipe

    # my $xml_outhandler = $self->{'xslt_writer'};

    # if (defined $xml_outhandler) {
    # 	binmode($xml_outhandler,":utf8");

    # 	my $section_text = &docprint::get_section_xml($doc_obj);

    # 	$section_text =~ s/(<Metadata[^>]*>)(.*?)(<\/Metadata>)/&make_ttl_safe($1,$2,$3)/gse;
    # 	## $1&make_ttl_safe($2)$3

    # 	##    print STDERR "*** st = $section_text\n\n";

    # 	print $xml_outhandler $section_text;
    # }

    # $self->close_xslt_pipe();

    # # now feed the generated file to jena's (TDB) tripple store

    # my $outhandle = $self->{'outhandle'};
    # print $outhandle "  Inserting tripples for $doc_oid\n";

    # my $collection = $self->{'collection'};

    # if (-f $tmp_doc_filename) {

    # 	# my $cmd = "gs-triplestore-add $collection \"$tmp_doc_filename\"";
    # 	my $cmd = "gs-triplestore-add3 $collection \"$tmp_doc_filename\"";
		
    # 	my $status = system($cmd);
    # 	if ($status != 0) {
    # 	    print STDERR "Error: failed to run:\n  $cmd\n$!\n";
    # 	}
	
    # 	# print STDERR "**** temporarily supressing deletion of: $tmp_doc_filename\n";
    # 	unlink $tmp_doc_filename;
    # }
    # else {
    # 	print STDERR "*** Failed to generate: $tmp_doc_filename\n";
    # }

}


sub text {
    my $self = shift (@_);
    my ($doc_obj,$file) = @_;

    $self->textedit($doc_obj,$file,"add");
}

sub textreindex
{
    my $self = shift @_;
    my ($doc_obj,$file) = @_;

    $self->textedit($doc_obj,$file,"update");
}

sub textdelete
{
    my $self = shift @_;

    my ($doc_obj,$file) = @_;

    print STDERR "Warning: jenaTDB command-line does not currently support delete operation\n";

    # $self->textedit($doc_obj,$file,"delete");
}


sub infodbedit
{
    my $self = shift (@_);
    my ($doc_obj, $filename, $edit_mode) = @_;

    # print STDERR "**** jenaTDBBuidproc::infodbedit(): $filename, $edit_mode\n";
    
    # only output this document if it is a "indexed_doc" or "info_doc" (database only) document
    my $doctype = $doc_obj->get_doc_type();
    return if ($doctype ne "indexed_doc" && $doctype ne "info_doc");


    #
    # The following is done in basebuildproc, consider if it makes sense to do here
    #
    
#    #add this document to the browse structure
#    push(@{$self->{'doclist'}},$doc_obj->get_OID()) 
#	unless ($doctype eq "classification");
#    $self->{'num_docs'} += 1 unless ($doctype eq "classification");
	
#    if (!defined $filename) {
#	# a reconstructed doc
#	my $num_reconstructed_bytes = $doc_obj->get_metadata_element ($doc_obj->get_top_section (), "total_numbytes");
#	if (defined $num_reconstructed_bytes) {
#	    $self->{'num_bytes'} += $num_reconstructed_bytes;
#	}
#    }

    # classify the document
    &classify::classify_doc ($self->{'classifiers'}, $doc_obj);

    
}    


1;
