###########################################################################
#
# solrserver.pm -- class for starting,stopping and sending commands to
# the Solr9 server.
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C)2025 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################


package solrserver;

use strict; 
#no strict 'refs';

use solrutil;

BEGIN {
    # SOLR_PORT should now be defined in the environment
    die "SOLR_PORT not set\n" unless defined $ENV{'SOLR_PORT'};
    die "SOLR_HOST not set\n" unless defined $ENV{'SOLR_HOST'};
}

sub new {
    my $class = shift(@_);

    my $self = {}; 
    $self->{'server_explicitly_started'} = undef;

    my $solr_url = "http://".$ENV{'SOLR_HOST'}.":".$ENV{'SOLR_PORT'}."/solr";
    $self->{'base-url'} = $solr_url; 
    $self->{'admin-url'} = "$solr_url/admin/cores";

    return bless $self, $class;
}


sub get_solr_base_url {
    my $self = shift (@_);
    return $self->{'base-url'};
}


sub _wget_service
{
    my $self = shift (@_);
    my ($output_format,$url,$cgi_get_args) = @_;

    my $full_url = $url;

    $url .= "?$cgi_get_args" if (defined $cgi_get_args);

##    print STDERR "\n\n**** _wget_service SOLR WEB URL: $url\n\n";
    
    # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lion binaries (android too?)
    &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set

    my $cmd = "wget -O - \"$url\" 2>&1";

    my $preamble_output = "";    
    my $xml_output = "";
    my $error_output = undef;
    my $is_error = 0;

    my $in_preamble = ($output_format eq "xml") ? 1 : 0;
    
    print STDERR "**** wgetcmd = \n $cmd\n";

	if (open(WIN,"$cmd |")) {

	my $line;
	while (defined ($line=<WIN>)) {
            #print STDERR "line = $line\n";
	    if ($line =~ m/ERROR \d+:/) {
		chomp $line;
		$error_output = $line;
		$is_error = 1;
		last;
	    }
	    elsif ($line =~ m/failed: (Connection refused|Bad file descriptor)/ || $line =~ m/failed:/i) {
                # When the server wasn't running on windows, also got "failed: Bad file descriptor".
                # But making more robust by adding support for any "failed:..." wget response msg
		chomp $line;
		$error_output = $line;
		last;
	    }
	    elsif ($in_preamble) {
		if ($line =~ m/<.*>/) {
		    $in_preamble = 0;
		}
		else {
		    $preamble_output .= $line;
		}
	    }

	    if (! $in_preamble) {
		$xml_output .= $line;
	    }
	}
	close(WIN);

    }
    else {
	$error_output = "Error: failed to run $cmd\n";
	$error_output .= "  $!\n";
    }

    if(defined $error_output) {
        if($is_error) {
            print STDERR "\n\n**** WGET_SERVICE got an error: $error_output\n\n";
        } else {
            print STDERR "\n\n**** WGET_SERVICE got: $error_output. (SOLR server likely not running.)\n\n";
        }
     }
    
    my $output = { 'url'      => $full_url,
		   'preamble' => $preamble_output,
		   'output'   => $xml_output,
		   'error'    => $error_output };

    print STDERR "url = $full_url\n";
    print STDERR "preamble = $preamble_output\n";
    #print STDERR "output = $xml_output\n";
    if (defined $error_output) {
        print STDERR "error = $error_output\n";
    }
    return $output;
}


sub _base_service
{
    my $self = shift (@_);
    my ($cgi_get_args) = @_;

    my $base_url = $self->{'base-url'};

    return $self->_wget_service("html",$base_url,$cgi_get_args);
}
 
sub _admin_service
{
    my $self = shift (@_);
    my ($cgi_get_args) = @_;

    my $admin_url = $self->{'admin-url'};

    return $self->_wget_service("xml",$admin_url,$cgi_get_args);
}


sub server_running
{
    my $self = shift @_;

    my $output = $self->_base_service();

    my $have_error = defined $output->{'error'};

    my $running = ($have_error) ? 0 : 1;
    print STDERR "server running = $running\n";
    return $running;
}

sub admin_unload_all_cores_for_prefix
{
    my $self = shift @_;
    my ($coreprefix) = @_;

    # by default returns json, so add wt=xml
    my $cgi_get_args = "action=STATUS&indexInfo=false&wt=xml";
    my $output = $self->_admin_service($cgi_get_args);

    if (defined $output->{'error'}) {
	# severe error, such as failing to connect to the server
	$self->print_error($output);
	return;
    }

    my $xml_output = $output->{'output'};
	
    my $matching_element="<lst\\s+name=\"($coreprefix-[a-z]+)\">";
    my @matches = ($xml_output =~ m/$matching_element/g);

    foreach my $core (@matches) {
	print STDERR "unloading solr core $core\n";
	$self->admin_unload_core_explicitly_retaining_index($core);
    }
}

sub print_error {
    my $self = shift @_;
    my ($output) = @_;

    my $url      = $output->{'url'};
    my $preamble = $output->{'preamble'};
    my $error    = $output->{'error'};
    
    print STDERR "----\n";
    print STDERR "Error: Failed to get XML response from:\n";
    print STDERR "         $url\n";
    print STDERR "Output was:\n";
    print STDERR $preamble if ($preamble ne "");
    print STDERR "$error\n";
    print STDERR "----\n";
}

sub admin_ping_core
{
    my $self = shift @_;
    my ($core) = @_;

    my $cgi_get_args = "action=STATUS&core=$core&wt=xml";

    my $ping_status = 1;

    my $output = $self->_admin_service($cgi_get_args);

    if (defined $output->{'error'}) {
	# severe error, such as failing to connect to the server
	$ping_status = 0;
	$self->print_error($output);

    }
    else {
	
	# If the collection doesn't exist yet, then there will be
	# an empty element of the form:
	#   <lst name="collect-doc"/>
	# where 'collect' is the actual name of the collection, 
	# such as demo

	my $xml_output = $output->{'output'};
	
	my $empty_element="<lst\\s+name=\"$core\"\\s*\\/>";
	
	$ping_status = !($xml_output =~ m/$empty_element/s);
    }

    return $ping_status;
}

sub filtered_copy_UNUSED
{
    my $self = shift @_;

    my $src_file = shift @_;
    my $dst_file = shift @_;
    my $re_substitutions = shift @_;

    # $re_substitutions is a hashmap of the form: [re_key] => subst_str
    
    my $content = "";

    if (open(FIN,'<:utf8',$src_file)) {

	my $line;
	while (defined($line=<FIN>)) {
	    $content .= $line;
	}
    }

    close(FIN);

    # perform RE string substitutions
    foreach my $re_key (keys %$re_substitutions) {

	my $subst_str = $re_substitutions->{$re_key};

	# Perform substitution of the form:
	#  $content =~ s/$re_key/$subst_str/g;
	# but allow allow separator char (default '/') 
	# and flags (default 'g') to be parameterized

	$content =~ s/$re_key/$subst_str/g;
    }
    
    if (open(FOUT, '>:utf8', $dst_file)) {
	print FOUT $content;
	close(FOUT);
    }
    else {
	print STDERR "Error: Failed to open file '$dst_file' for writing.\n$!\n";
    }   
}

# perl code doesn't need to modify this anymore
sub solr_xml_to_solr_xml_in_UNUSED
{
    my $self = shift @_;
    my ($solr_xml_dir) = @_;
    
    my $gsdl3home = $ENV{'GSDL3HOME'};
    
    if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
	# if not passed in, use stored solr_live_home
	$solr_xml_dir = $self->{'solr_live_home'};
    }

    my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
    my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");

    my $gsdl3home_re = &util::filename_to_regex($gsdl3home);

    my $replacement_map = { qr/$gsdl3home_re/ => "\@gsdl3webhome\@" };

    $self->filtered_copy_UNUSED($solrxml,$solrxml_in,$replacement_map);
}


sub solr_xml_in_to_solr_xml_UNUSED
{
    my $self = shift @_;
    my ($solr_xml_dir) = @_;

    my $gsdl3home = $ENV{'GSDL3HOME'};
    if (!defined $solr_xml_dir || !-d $solr_xml_dir) {
	# if not passed in, use stored solr home
	$solr_xml_dir = $self->{'solr_live_home'};
    }
    my $solrxml_in = &util::filename_cat($solr_xml_dir, "solr.xml.in");
    my $solrxml = &util::filename_cat($solr_xml_dir, "solr.xml");
    
    my $gsdl3home_re = &util::filename_to_regex($gsdl3home);

    my $replacement_map = { qr/\@gsdl3webhome\@/ => $gsdl3home_re };

    $self->filtered_copy_UNUSED($solrxml_in,$solrxml,$replacement_map);
}

#### UP TO HERE
# Some of the Solr CoreAdmin API calls available. 
# See https://solr.apache.org/guide/solr/latest/configuration-guide/coreadmin-api.html
sub admin_reload_core
{
    my $self = shift @_;
    my ($core) = @_;

    my $cgi_get_args = "action=RELOAD&core=$core";

    $self->_admin_service($cgi_get_args);

}

# changes the name of core to other
sub admin_rename_core
{
    my $self = shift @_;
    my ($oldcore, $newcore) = @_;

    my $cgi_get_args = "action=RENAME&core=$oldcore&other=$newcore";

    $self->_admin_service($cgi_get_args);

}

# swaps the names for two cores
sub admin_swap_core
{
    my $self = shift @_;
    my ($oldcore, $newcore) = @_;

    my $cgi_get_args = "action=SWAP&core=$oldcore&other=$newcore";

    $self->_admin_service($cgi_get_args);

}


sub admin_create_core
{
    my $self = shift @_;
    my ($core, $site, $collection) = @_; # core is in the form (building-)core

    #my $is_building = ($core =~ s/^(building-)//);
    #print STDERR "is buildng = $is_building\n";
    #my ($site, $coll, $idx) = ($core =~ m/^(.*)-(.*)-(.*)$/);
    #print STDERR "site, coll, idx = $site, $coll, $idx\n";
    #if ($is_building) {$idx = "b-$idx";}
    my $cgi_get_args = "action=CREATE&name=$core";
    my $data_dir = &FileUtils::filenameConcatenate($ENV{'SOLR_HOME'}, "cores",$site,$collection,$core);

    $cgi_get_args .= "&instanceDir=$data_dir";

    #$cgi_get_args .= "&dataDir=data";
    
    $self->_admin_service($cgi_get_args);
}

sub admin_create_core_old
{
    my $self = shift @_;
    my ($core, $data_parent_dir) = @_; # data_parent_dir is optional, can be index_dir. Defaults to builddir if not provided

    my ($ds_idx) = ($core =~ m/^.*-(.*?)$/);

    my $cgi_get_args = "action=CREATE&name=$core";

    my $collect_home = $ENV{'GSDLCOLLECTDIR'};
    my $etc_dirname = &FileUtils::filenameConcatenate($collect_home,"etc");

    if(!defined $data_parent_dir) {
	$data_parent_dir = $self->{'build_dir'};
    } 
    
    my $idx_dirname = &util::filename_cat($data_parent_dir,$ds_idx); # "dataDir"  
	    
    $cgi_get_args .= "&instanceDir=$etc_dirname";
    $cgi_get_args .= "&dataDir=$idx_dirname";

    $self->_admin_service($cgi_get_args);

}

# removes (unloads) core from the ext/solr/sorl.xml config file
sub admin_unload_core
{
    my $self = shift @_;
    my ($core, $delete) = @_;

    my $cgi_get_args = "action=UNLOAD&core=$core"; 
    # should we also deleteInstanceDir=true??
    if(defined $delete && $delete == 1) {
	$cgi_get_args = $cgi_get_args."&deleteData=true";
    }

    $self->_admin_service($cgi_get_args);

}

sub admin_unload_core_explicitly_retaining_index
{
    # For UNLOAD core params, see page 315 of
    #    https://archive.apache.org/dist/lucene/solr/ref-guide/apache-solr-ref-guide-4.7.pdf
    
    my $self = shift @_;
    my ($core) = @_;
    
    # Don't delete index (sidx/didx folder) along with unloading core, so force 0 as parameter
    # (though not deleting the index is the default behaviour of admin_unload_core() anyway,
    # since activate is meant to manually take care of deleting the index folder and moving the
    # building folder to replace index, instead of activate asking unload_core to delete the
    # index folder).
    # But this function's very particular behaviour may be crucial for other instances such as
    # its use in solrbuilder::post_build_indexes(), so even if admin_unload_core() could
    # conceivably be changed to delete the index by default, this method would still do the
    # right thing when called by solrbuilder::post_build_indexes().
    $self->admin_unload_core($core, 0);
}


sub start
{
    my $self = shift @_;
    my ($verbosity) = @_;
    
    $verbosity = 1 unless defined $verbosity;

    my $solr_ext_home    = $ENV{'GEXT_SOLR9'};
   
   # $self->{'solr_live_home'} = $solr_live_home; # will be used later to generate solr.xml.in from solr.xml and vice-versa
    my $server_port = $ENV{'SOLR_PORT'};
    my $server_host = $ENV{'SOLR_HOST'};

    chdir($solr_ext_home);

    my $server_java_cmd = "ant start-solr";

    my $server_status = "unknown";
    if ($self->server_running()) {

	$server_status = "already-running";
	## print STDERR "@@@@ server already running\n\n";
    }
    elsif (open(STARTIN,"$server_java_cmd 2>&1 |")) {

	## print STDERR "@@@@ need to start tomcat\n\n";
	print STDERR "**** starting up solr server with cmd start =\n $server_java_cmd\n" if ($verbosity > 1);

	my $line;
	while (defined($line=<STARTIN>)) {	   
	
	    #if ($line =~ m/^(BUILD FAILED)/) {
	    print "SOLR startup: $line";
	    #}
		if ($line =~ m/^BUILD SUCCESSFUL/) {
			last;
		}
	}

	close(STARTIN);
	
	if ($self->server_running()) {
	    $server_status = "explicitly-started";
	    #print STDERR "\n*** SOLR server has started up now.\n\n";
	} else {
	    $server_status = "failed-to-start"; # no need to set this, will be exiting below anyway

	    print STDERR "Error: failed to start greenstone SOLR server\n";
	    print STDERR "$!\n";
	    print STDERR "Command attempted was:\n";
	    print STDERR "  $server_java_cmd\n";
	    print STDERR "run from directory:\n";
	    print STDERR "  $solr_ext_home\n";
	    print STDERR "----\n";
	    
	    exit -1;
	}
    }
    else {
	## print STDERR "@@@@ failed to start solr\n\n";
	$server_status = "failed-to-start"; # no need to set this, will be exiting below anyway

	print STDERR "Error: unable to start greenstone SOLR server\n";
	print STDERR "$!\n";
	print STDERR "Command attempted was:\n";
	print STDERR "  $server_java_cmd\n";
	print STDERR "run from directory:\n";
	print STDERR "  $solr_ext_home\n";
	print STDERR "----\n";

	exit -1;
    }

    if ($server_status eq "explicitly-started") {
	$self->{'server_explicitly_started'} = 1;
	print "SOLR server ready and listening for connections at ";
	print " $server_host:$server_port\n";
	    
	# now we know the server is ready to accept connections
    }
    elsif ($server_status eq "already-running") {
	print STDERR "Using existing SOLR server detected at $server_host:$server_port\n";
	$self->{'server_explicitly_started'} = 0;
    }
    elsif ($server_status eq "failed-to-start") {
	print STDERR "Tried to start SOLR web server at $server_host:$server_port";
	print STDERR ", but encountered an initialization error\n";
	exit -1;
    }

}

sub explicitly_started
{
    my $self = shift @_;

    return $self->{'server_explicitly_started'};
}

sub stop
{    
    my $self = shift @_;
    my ($options) = @_;

    my $solr_home         = $ENV{'GEXT_SOLR9'};

    chdir($solr_home);

    # defaults
    my $do_wait = 1; 
    my $output_verbosity = 1;

    if (defined $options) {
	if (defined $options->{'do_wait'}) {
	    $do_wait = $options->{'do_wait'};
	}
	if (defined $options->{'output_verbosity'}) {
	    $output_verbosity = $options->{'output_verbosity'};
	}
    }

    my $server_java_cmd = "ant stop-solr";

    print STDERR "**** java server stop cmd:\n  $server_java_cmd\n" if ($output_verbosity>1);

    if (open(STOPIN,"$server_java_cmd 2>&1 |")) {

	my $line;
	while (defined($line=<STOPIN>)) {
	    print "SOLR shutdown: $line" if ($output_verbosity>1);
	}
	close(STOPIN);

	if ($do_wait) {
	    wait(); # let the child process finish
	}

	if ($output_verbosity>0) {
	    print "SOLR server shutdown\n";
	}
    }
    else {
	print STDERR "Error: failed to stop SOLR server\n";
	print STDERR "$!\n";
	print STDERR "Command attempted was:\n";
	print STDERR "  $server_java_cmd\n";
	print STDERR "run from directory:\n";
	print STDERR "  $solr_home\n";
	print STDERR "----\n";

	exit -2;
    }
}



1;
