###########################################################################
#
# OpenOfficePlugin.pm -- for processing standalone images
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package OpenOfficePlugin;

use ConvertBinaryFile;
use OpenOfficeConverter;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa
no strict 'subs';

use gsprintf 'gsprintf';

sub BEGIN {
    @OpenOfficePlugin::ISA = ('ConvertBinaryFile', 'OpenOfficeConverter');
}

my $word_pe = "doc|dot|docx|odt|wpd";
my $rtf_pe = "rtf";
my $ppt_pe = "ppt|pptx|odp";
my $xls_pe = "xls|xlsx|ods";

my $arguments =
    [ { 'name' => "process_exp",
	'desc' => "{BaseImporter.process_exp}",
	'type' => "regexp",
	'deft' => &get_default_process_exp(),
	'reqd' => "no" },
      ];

# This is not quite fully functional, eg doesn't do proper PowerPoint processing. So I have made it hidden in gli for now.
my $options = { 'name'     => "OpenOfficePlugin",
		'desc'     => "{OpenOfficePlugin.desc}",
		'abstract' => "no",
		'inherits' => "yes",
		'hiddengli' => "yes",
		'args'     => $arguments };


sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    push(@{$hashArgOptLists->{"OptList"}},$options);

    my $ooc_self = new OpenOfficeConverter($pluginlist, $inputargs, $hashArgOptLists, 1);
    my $cbf_self = new ConvertBinaryFile($pluginlist, $inputargs, $hashArgOptLists);

    my $self = BaseImporter::merge_inheritance($ooc_self, $cbf_self);

    if ($self->{'info_only'}) {
	# don't worry about any options etc
	return bless $self, $class;
    }
    if ($OpenOfficeConverter::openoffice_conversion_available) {
	$self->{'openoffice_ext_working'} = 1;
    }
    else {
	$self->{'openoffice_ext_working'} = 0;
    }
    $self->{'convert_to'} = "structuredhtml";

    $self = bless $self, $class;

    # set convert_to_plugin and convert_to_ext
    $self->set_standard_convert_settings();

    # set up appropriate secondary plugin options here!!! 
    my $secondary_plugin_name = $self->{'convert_to_plugin'};
    my $secondary_plugin_options = $self->{'secondary_plugin_options'};

    if (!defined $secondary_plugin_options->{$secondary_plugin_name}) {
	$secondary_plugin_options->{$secondary_plugin_name} = [];
    }
    my $specific_options = $secondary_plugin_options->{$secondary_plugin_name};

    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);

    return $self;
}

sub init {
    my $self = shift (@_);
    my ($verbosity, $outhandle, $failhandle) = @_;

    $self->SUPER::init(@_);
    $self->OpenOfficeConverter::init();
}

sub begin {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;

    $self->SUPER::begin(@_);
    $self->OpenOfficeConverter::begin(@_);
}


sub get_default_process_exp {
    my $self = shift (@_);

    return "(?i)\.($word_pe|$ppt_pe|$rtf_pe|$xls_pe)\$";
}


sub read_into_doc_obj {
    my $self = shift (@_);  
    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
    
    if (!$self->{'openoffice_ext_working'}) {
	# use BaseImporter version to set up the doc obj, then we will do a little bit more in process
	return $self->BaseImporter::read_into_doc_obj(@_);
    }
    # use convertbinaryfile version, will call tmp_area_convert_file and use 
    # secondary plugins
    return $self->ConvertBinaryFile::read_into_doc_obj(@_);

}

# override this to use our own convert method
sub tmp_area_convert_file {
    my $self = shift (@_);
    my ($output_ext, $input_filename, $textref) = @_;

    my ($result, $result_str, $new_filename) = $self->OpenOfficeConverter::convert($input_filename, $output_ext);
    if ($result != 0) {
	return $new_filename;
    }
    my $outhandle=$self->{'outhandle'};
    print $outhandle "Open Office Conversion error\n";
    print $outhandle $result_str;
    return "";
}

# first return value is used in _iconxx_ to give a srcicon, 
# second return value is used for FileFormat metadata
sub get_file_type_from_extension {
    my $self = shift (@_);
    my ($file) = @_;

    #check against the various bit of process_exp
    if ($file =~ /$word_pe/) {
	return ("doc", "Word");
    }
    if ($file =~ /$ppt_pe/) {
	return ("ppt", "PPT");
    }
    if ($file =~ /$xls_pe/) {
	return ("xls", "Excel");
    }
    if ($file =~ /$rtf_pe/) {
	return ("rtf", "RTF");
    }
    
    return ("unknown", "Unknown");
}


sub process {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;

    my $cursection = $doc_obj->get_top_section();
    
    # store original file as associated file
    my $filename = &FileUtils::filenameConcatenate($base_dir, $file);
    my $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);

    my ($ext, $format) = $self->get_file_type_from_extension($file);
    # overwrite the one set by secondary plugin
    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $format);

    my $srclink_filename = $doc_obj->get_sourcefile();
    #$doc_obj->add_utf8_metadata ($cursection, "srclink_file", "[SourceFile]");
    # srclink_file is now deprecated because of the "_" in the metadataname. Use srclinkFile
    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename);
    $doc_obj->add_utf8_metadata ($cursection, "srclinkFile", $srclink_filename);

    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$ext."_"); 	
    # if oo conversion not available, we have no text, so add some
    if (!$self->{'openoffice_ext_working'}) {
	if ($gli) {
	    &gsprintf(STDERR, "<Warning p='openOfficePlugin' r='{OpenOfficeConverter.noconversionavailable}: {OpenOfficeConverter.".$self->{'no_openoffice_conversion_reason'}."}'>");
	}
	print STDERR "OpenOfficePlugin: no conversion available, just adding $file as is\n";
	#we have no text - adds dummy text and NoText metadata
	$self->add_dummy_text($doc_obj, $doc_obj->get_top_section());
	
    }
}

sub process_old {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    # old from here
   # need to check that not empty
    my $doc_ext = $self->{'filename_extension'};
    my $file_type = "unknown";
    $file_type = $self->{'file_type'} if defined $self->{'file_type'};
    
    # associate original file with doc object
    my $cursection = $doc_obj->get_top_section();
    my $filename = &FileUtils::filenameConcatenate($base_dir, $file);
    my $assocfilename = "doc.$doc_ext";
    if ($self->{'keep_original_filename'} == 1) {
	# this should be the same filename that was used for the Source and SourceFile metadata, 
	# as we will use [SourceFile] in the srclink
	$assocfilename = $doc_obj->get_assocfile_from_sourcefile();
    }
    $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);

    # We use set instead of add here because we only want one value
    $doc_obj->set_utf8_metadata_element($cursection, "FileFormat", $file_type);
    my $srclink_filename = "doc.$doc_ext";
    #my $doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/doc.$doc_ext\">";
    if ($self->{'keep_original_filename'} == 1) {
	$srclink_filename = "[SourceFile]";
	#$doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/[assocfilepath]/[SourceFile]\">";
    }
    #$doc_obj->add_utf8_metadata ($cursection, "srclink",  $doclink); 
    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icon".$doc_ext."_"); 
    #$doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>"); 
    $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $srclink_filename);
    return 1;

}
