###########################################################################
#
# OpenOfficeConverter - helper plugin that does office document conversion 
#                       using jodconverter combined with OpenOffice
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2010 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
package OpenOfficeConverter;

use ConvertBinaryFile;
use BaseMediaConverter;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

use gsprintf 'gsprintf';

# these two variables mustn't be initialised here or they will get stuck 
# at those values.
our $openoffice_conversion_available; 
our $no_openoffice_conversion_reason; 

BEGIN {
    @OpenOfficeConverter::ISA = ('BaseMediaConverter');

    # Check that OpenOffice and jodconverter are installed and available on 
    # the path 
    $openoffice_conversion_available = 1;
    $no_openoffice_conversion_reason = "";
    
    if (! defined $ENV{'GEXT_OPENOFFICE'}) {
	$openoffice_conversion_available = 0;
	$no_openoffice_conversion_reason = "gextopenofficenotinstalled";
    }
    else {
	my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
	#my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
	my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");

	if (!-e $jodjar) {
	    #print STDERR "Failed to find $jodjar\n";
	    $openoffice_conversion_available = 0;
	    $no_openoffice_conversion_reason = "gextjodconverternotinstalled";
	}
	else {
	    # test to see if soffice is in path	    

	    if(!defined $ENV{'SOFFICE_HOST'}) {
		$ENV{'SOFFICE_HOST'} = "localhost";
	    }
	    if(!defined $ENV{'SOFFICE_PORT'}) {
		$ENV{'SOFFICE_PORT'} = "8100"; 
	    }
	    
	    #my $cmd = "soffice --headless 2>&1"; # for linux and mac
	    my $cmd = "soffice \"--accept=socket,host=$ENV{'SOFFICE_HOST'},port=$ENV{'SOFFICE_PORT'};urp;StarOffice.ServiceManager\" --headless"; # basic shared command
	    my $status = 0;

	    # No more JODCONVERTER_PORT env var: the port that jodconverter uses is now the
	    # same as SOFFICE_PORT, because the 2 are meant to communicate on the same port.
	    # The default port that jodconverter expects OO (OpenOffice/libreoffice/soffice/staroffice) to listen on is 8100, so we try that.
	    if ($ENV{'GSDLOS'} =~ m/^windows$/) {		
		
		# important to have this set when the Greenstone server 
		# and open office is on a remote windows machine
		if(!defined $ENV{'SOFFICE_HOME'}) {
		    # check all the favourite haunts of openoffice in turn
			$ENV{'SOFFICE_HOME'} = &util::get_first_existing_dir(
				&FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"OpenOffice.org 3"),
				&FileUtils::filenameConcatenate($ENV{'ProgramFiles(X86)'},"OpenOffice.org 3"),
				&FileUtils::filenameConcatenate($ENV{'ProgramFiles'},"LibreOffice"),
				&FileUtils::filenameConcatenate($ENV{'ProgramFiles(X86)'},"LibreOffice")				
			);
			
			# These 2 env vars are diff and appear as expected when printed out in a cmd prompt on a 64 bit Windows. But on same machine
			# in perl, at least when launched through (Java 32 bit) GLI, their values shown as the same: as "C:\Program Files (x86)"
			#print STDERR "@@@@ ProgFiles: $ENV{'ProgramFiles'}\n";
			#print STDERR "@@@@ ProgFiles x86: $ENV{'ProgramFiles(X86)'}\n";
		}
		if(defined $ENV{'SOFFICE_HOME'}) {
			#print STDERR "@@@@ Found $ENV{'SOFFICE_HOME'}\n";
			my $ooffice_dir_guess =
				&FileUtils::filenameConcatenate($ENV{'SOFFICE_HOME'},"program");
			if (-d  $ooffice_dir_guess) {
				&util::envvar_append("PATH",$ooffice_dir_guess);
			}
		}
		
		# for windows, when working on a remote system, want to be able to start OO if
		# not already running. We'll use the uno socket method to do so. Else client-gli
		# tends to hang, waiting for the prompt to return after OO has been started up
		# (which doesn't happen, so need to Ctrl-C GLI and run it again for it to work).
		
		# first need to silently check soffice exists else windows will display a popup
		$status = system("which soffice >nul 2>&1"); # which.exe should be in bin/windows
		if ($status == 0) {
		    #$cmd = "start \"soffice process\" $cmd >nul 2>&1"; # order of >nul and 2>&1 matters, see below
			
			# On Windows, when perl/C/C++ uses system() to launch any standalone process in the background
			# with "start \"window title\" cmd", perl's child process (despite being mostly independent/detached 
			# from perl) *appears* to share stdout/stderr (stdin) streams with the parent perl process. This is
			# noticeable with perl launching soffice or any program that neither writes to stderr/out nor sends
			# eof/eos to indicate parent perl's streams are closed (e.g. Notepad).
			# This is *not* a problem when perl scripts are run from command line. Command line programs run fine.
			# But when combined with SafeProcess used by GLI, such silent standalone programs like soffice
			# cause GLI to hang every time right until the silent program's terminated, because SafeProcess' join()
			# calls on the perl child process' stderr and stdout block at SafeProcess.InputStreamGobbler.readLine(),
			# because these stderr/out streams didn't receive eof/eos, implying the silent grandchild process
			# somehow kept them open.
			# This is true at Windows/C's system() command level: InputStreamGobblers using join() calls (as SafeProcess
			# does), combined with a c++ program that uses system() that launches something like Notepad, just all block
			# until Notepad is closed.
			# When perl calls a custom java BackgroundLauncher.java program to launch an external program with Runtime.exec(),
			# this hanging problem doesn't happen as java's exec() is not implemented with Windows C's system().
			# Similarly, a WScript file (vbs or js script) that uses WinScriptHost.Run method to run an external program
			# also doesn't exhibit this problem. So we use a custom vbs script to launch external programs like soffice in
			# the background from perl (to avoid having to set up Java or have the java program compiled up by the release-kits,
			# since perl building scripts are not just run from GLI but are also run directly from the cmd line).
			
			$cmd = "CScript //Nologo $ENV{'GSDLHOME'}\\bin\\windows\\background-launcher.vbs $cmd";	
			
		    #print STDERR "***** Tried to start-up OpenOffice with:\n$cmd\n";		    
		}
		# else cmd is still undefined
	    }
	    else {
		# Windows seems to launch OpenOffice as a service (i.e.
		# automatically puts it in the background).  
		# For Unix putting it in the background needs to be done 
		# explicitly by appending & to the end

		# IMPORTANT! When redirecting output from 2>&1 to a file or null, >/dev/null (or >out.txt) should come BEFORE 2>&1 
		# i.e. $some_cmd >out.txt 2>&1 and likewise $some_cmd >/dev/null 2>&1
		# Doing it in the wrong order further causes problems with SafeProcess when GLI starts up and calls pluginfo on the pluginslist:
		# SafeProcess blocks forever on read() from stdout of the process running pluginfo, because there's never data including no eof/eos
		# on the process' stdout when running this command in the wrong order.
		$cmd .=  " >/dev/null 2>&1 &";
	    }

	    #print STDERR "@@@@ running: $cmd\n";
	    
	    $status = system($cmd) if ($status == 0);
	    if ($status != 0) {
		#print STDERR "Failed to run: $cmd\n";
		#print STDERR "$!\n";
		$openoffice_conversion_available = 0;
		$no_openoffice_conversion_reason = "openofficenotinstalled";
	    } 
	}
    }
}

my $arguments = [
    { 'name' => "openoffice_port",
      'desc' => "{OpenOfficeConverter.openoffice_port}",
      'type' => "int",
      'deft' => "8100",
      'range' => "81,",
      'reqd' => "no" },
    ];


my $options = { 'name' => "OpenOfficeConverter",
		'desc' => "{OpenOfficeConverter.desc}",
		'abstract' => "yes",
		'inherits' => "yes",
		'args' => $arguments };

sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists,$auxilary) = @_;
    push(@$pluginlist, $class);

    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    push(@{$hashArgOptLists->{"OptList"}},$options);

    my $self = new BaseMediaConverter($pluginlist, $inputargs, 
				      $hashArgOptLists, $auxilary);

    if ($self->{'info_only'}) {
	# don't worry about any options etc
	return bless $self, $class;
    }
    if (!$openoffice_conversion_available) {
	$self->{'no_openoffice_conversion_reason'} = $no_openoffice_conversion_reason;

	my $outhandle = $self->{'outhandle'};
	&gsprintf($outhandle, "OpenOfficeConverter: {OpenOfficeConverter.noconversionavailable} ({OpenOfficeConverter.$no_openoffice_conversion_reason})\n");
    }  

    $self->{'openoffice_conversion_available'} = $openoffice_conversion_available;
    
    return bless $self, $class;

}

# launch_cmd variable seems to be entirely unused
sub init {
    my $self = shift(@_);
    my ($verbosity, $outhandle, $failhandle) = @_;

    if ($openoffice_conversion_available) {
	my $oo_port = $self->{'openoffice_port'};

	my $launch_cmd = "soffice";
	$launch_cmd .= " \"--accept=socket,host=localhost,port=$oo_port;urp;StarOffice.ServiceManager\"";
	$launch_cmd .= " --headless";
	$self->{'openoffice_launch_cmd'} = $launch_cmd;
    }

    $self->{'ootmp_file_paths'} = ();
}

sub deinit {
    my $self = shift(@_);

    $self->clean_up_temporary_files();
}


sub convert {
    my $self = shift(@_);
    my $source_file_full_path = shift(@_);
    my $target_file_type      = shift(@_);
    my $convert_options       = shift(@_) || "";
    my $convert_id            = shift(@_) || "";
    my $cache_mode            = shift(@_) || "";

    return (0,undef,undef) unless $openoffice_conversion_available;
    # check the filename
    return (0,undef,undef) if ( !-f $source_file_full_path);

    my $outhandle = $self->{'outhandle'};
    my $verbosity = $self->{'verbosity'};

    my $source_file_no_path = &File::Basename::basename($source_file_full_path);
    # Determine the full name and path of the output file
    my $target_file_path;
    if ($self->{'enable_cache'}) {
	$self->init_cache_for_file($source_file_full_path);
	my $cache_dir = $self->{'cached_dir'};
	my $file_root = $self->{'cached_file_root'};
	$file_root .= "_$convert_id" if ($convert_id ne "");
	my $target_file = "$file_root.$target_file_type";
	$target_file_path = &FileUtils::filenameConcatenate($cache_dir,$target_file);
    }
    else {
	$target_file_path = &util::get_timestamped_tmp_filename_in_collection($source_file_full_path, $target_file_type);
	push(@{$self->{'ootmp_file_paths'}}, $target_file_path);
    }

    # Generate and run the convert command

    my $gextoo_home = $ENV{'GEXT_OPENOFFICE'};
    #my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"lib","java","jodconverter.jar");
    my $jodjar = &FileUtils::filenameConcatenate($gextoo_home,"jodconverter-2.2.2","lib","jodconverter-cli-2.2.2.jar");

    # debugging: print out all the env vars, when import's verbosity is high
    #foreach my $key (sort(keys %ENV)) {
    #	print $outhandle "$key = $ENV{$key}\n";
    #}

#    *If* SOFFICE_HOME\program was not added to the PATH, *another* way is to insert: 
#    -Doffice.home=C:\\PROGRA~1\\OPENOF~1.ORG
#    into the command to launch jodconverter so that, when using the remote GS server
#    on Windows, jodconverter can find the openoffice installation: 

    my $office_short_path;
    if(defined $ENV{'SOFFICE_HOME'}) { # points to libreoffice or openoffice, since soffice launches both
	if ($ENV{'GSDLOS'} =~ m/^windows$/) {
	    $office_short_path = Win32::GetShortPathName($ENV{'SOFFICE_HOME'});
	} else {
	    $office_short_path = $ENV{'SOFFICE_HOME'};
	}
    } 
    else { # SOFFICE_HOME not user-defined, try defaults
	if ($ENV{'GSDLOS'} =~ m/^windows$/) {
	    $office_short_path = "C:\\PROGRA~1\\OPENOF~1.ORG";
	} else {
	    if (-d "/usr/lib/libreoffice") { # try libreoffice first
		$office_short_path = "/usr/lib/libreoffice";
	    } elsif (-d "/usr/lib/openoffice") { # perhaps it's more likely that openoffice exists (fallback)
		$office_short_path = "/usr/lib/openoffice";
	    } elsif (-d "/usr/lib64/openoffice.org3") { # on 64 bit linux such as kjdon's CentOS
		$office_short_path = "/usr/lib64/openoffice.org3";
	    } else { # set to default linux officeHome which jodconverter tries for
		$office_short_path = "/opt/openoffice.org3";
	    }
	}
    }

    #if($self->{'openoffice_port'} != $ENV{'SOFFICE_PORT'}) { # if the user changed it
    #$ENV{'SOFFICE_PORT'} = $self->{'openoffice_port'};
    #}

    # jodjar always assumes that soffice (officeHome) is in "C:\Prog Files\OpenOffice org 3"
    # or "/opt/openoffice.org3" rather than checking for any user-defined SOFFICE_HOME var. 
    # Therefore, we run jodjar with the -Doffice.home flag explicitly set, instead of without.
    # my $convert_cmd = "java -jar \"$jodjar\" --port $ENV{'SOFFICE_PORT'}";   
    my $convert_cmd = "java -Doffice.home=\"".$office_short_path."\" -jar \"$jodjar\" --port $self->{'openoffice_port'}"; # --port $ENV{'SOFFICE_PORT'}
    $convert_cmd .= " \"$source_file_full_path\" \"$target_file_path\"";

    if ($verbosity>2) {
	print $outhandle "Convert command: $convert_cmd\n";
    }

    my $print_info = { 'message_prefix' => "OpenOffice Conversion",
		       'message' => "Converting $source_file_no_path to: $target_file_type" };
    $print_info->{'cache_mode'} = $cache_mode if ($cache_mode ne "");

    my ($regenerated,$result,$had_error) 
	= $self->autorun_general_cmd($convert_cmd,$source_file_full_path, $target_file_path,$print_info);
    if ($had_error) {
	return (0, $result,$target_file_path);
    }
    return (1, $result,$target_file_path);
}


sub convert_without_result {
    my $self = shift(@_);

    my $source_file_path = shift(@_);
    my $target_file_type = shift(@_);
    my $convert_options  = shift(@_) || "";
    my $convert_id       = shift(@_) || "";

    return $self->convert($source_file_path,$target_file_type,
			  $convert_options,$convert_id,"without_result");
}


sub tmp_area_convert_fileXX {
    my $self = shift (@_);
    my ($output_ext, $input_filename, $textref) = @_;
    
    my $outhandle = $self->{'outhandle'};
    my $convert_to = $self->{'convert_to'};
    my $failhandle = $self->{'failhandle'};
    my $convert_to_ext = $self->{'convert_to_ext'};
    
    # derive tmp filename from input filename
    my ($tailname, $dirname, $suffix)
	= &File::Basename::fileparse($input_filename, "\\.[^\\.]+\$");
}


sub clean_up_temporary_files {
    my $self = shift(@_);

    foreach my $ootmp_file_path (@{$self->{'ootmp_file_paths'}}) {
	if (-e $ootmp_file_path) {
	    &FileUtils::removeFiles($ootmp_file_path);
	}
    }

    $self->{'ootmp_file_paths'} = ();
}



1;	
