#####################################################################
#
# GoogleVisionAPIConverter.pm -- helper plugin that allows other plugins
# (such as ImagePlugin and PagedImagePlugin) to extend their
# processing capability through sub-classing inheritence (such as
# GoogleVisionImagePlugin and GoogleVisionPagedImagePlugin) to
# expand the image processing capabilities at ingest time to
# include the Google Vision API allowing for: metadata labelling
# of objects within a scene; and OCR text recognition.
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package GoogleVisionAPIConverter;

use strict;
no  strict 'refs'; # allow filehandles to be variables and viceversa
no  strict 'subs';

use gsprintf;
use FileUtils;

##use ImagePlugin;
use BaseMediaConverter;

use utf8;
use JSON; # qw( from_json, encode_json );

sub BEGIN {
    @GoogleVisionAPIConverter::ISA = ('BaseMediaConverter');
}

my $arguments = [
    { 'name' => "google_application_credentials",
	  'desc' => "{GoogleVisionAPIConverter.google_applicatio_credentials}",
	  'type' => "string",
	  'reqd' => "no",
	  'deft' => "google-sa-credentials-key.json"
    },
    { 'name' => "enable_image_labelling",
	  'desc' => "{GoogleVisionAPIConverter.enable_image_labelling}",
	  'type' => "flag",
	  'reqd' => "no" },
    { 'name' => "enable_image_ocr",
	  'desc' => "{GoogleVisionAPIConverter.enable_image_ocr}",
	  'type' => "flag",
	  'reqd' => "no" },
    { 'name' => "enable_document_ocr",
	  'desc' => "{GoogleVisionAPIConverter.enable_document_ocr}",
	  'type' => "flag",
	  'reqd' => "no" }
];

my $options = { 'name'     => "GoogleVisionAPIConverter",
		'desc'     => "{GoogleVisionAPIConverter.desc}",
		'abstract' => "no",
		'inherits' => "yes",
		'args'     => $arguments };

sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    push(@{$hashArgOptLists->{"OptList"}},$options);

    my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);

    return bless $self, $class;
}

sub begin {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;

    if ($self->{'enable_image_ocr'} && $self->{'enable_document_ocr'}) {
        print STDERR "Please use the following command syntax for vision types: (--enable_image_ocr | --enable_document_ocr) [--enable_image_labelling]\n";
        print STDERR "\t\t --enable_image_ocr : optical character recognition for text within images\n";
        print STDERR "\t\t --enable_document_ocr : optical character recognition for text within documents\n";
        print STDERR "\t\t --enable_image_labelling : annotation labeling for objects within images\n";
        exit(2);
    }

    $self->SUPER::begin(@_);
}


sub vision_monitor_line {
    my ($line) = @_;
    
    my $had_error = 0;
    my $generate_dot = 0;
        
    if ($line =~ m/^.*$/) 
    {
        $generate_dot = 1;
    }

    return ($had_error,$generate_dot);
}

sub run_gv_convert {
    my $self = shift (@_);
    my ($filename,$file,$doc_obj,$opt_section) = @_;
    
    my $section = (defined $opt_section) ? $opt_section : $doc_obj->get_top_section();
    
    my $verbosity = $self->{'verbosity'};
    my $outhandle = $self->{'outhandle'};
    print $outhandle "----- GoogleVisionAPIConveter run_gv_convert -----\n";

    my @vision_types = (); # array containing target ocr / labelling type(s)

    if ($self->{'enable_image_labelling'}) { push(@vision_types, "enable_image_labelling"); } 
    if ($self->{'enable_image_ocr'})       { push(@vision_types, "enable_image_ocr");       }
    if ($self->{'enable_document_ocr'})    { push(@vision_types, "enable_document_ocr");    } 

    my $vision_types_length = scalar(@vision_types);

    # print STDERR "**** vision types = ", join(", ",@vision_types), "\n";
    if ($vision_types_length != 0) {
       
        $self->init_cache_for_file($filename);
        my $cached_image_dir = $self->{'cached_dir'};
	# my $cached_image_root = $self->{'cached_file_root'};

        # my $filename_no_path = &File::Basename::basename($filename);

        my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
        my $credentials_filename = &FileUtils::filenameConcatenate($collect_dir, "etc", $self->{'google_application_credentials'});

	for my $vision_type (@vision_types) {

	    my $ofile = "${vision_type}-google-vision-output.json";
	    my $ofilename = &FileUtils::filenameConcatenate($cached_image_dir,$ofile);

	    my $vision_cmd = "vision.py --$vision_type --credentials \"$credentials_filename\" \"$filename\" \"$ofilename\"";

            $self->run_vision($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section);

	    if ($vision_type eq "enable_document_ocr") {
		my $gv_dococr_rec = { 'filename' => $ofilename, 'section' => $section };
		
		push(@{$self->{'gv-dococr-json-filename-recs'}}, $gv_dococr_rec);
	    }	    
	}
    }

    return "json";
}

sub gv_ocr_bounding_box_rect
{
    my $self = shift (@_);
    my ($gv_block,) = @_;

    my $bbox_rect = undef;
    
    my $gv_boundingBox = $gv_block->{'boundingBox'};

    my $gv_vertices = $gv_boundingBox->{'vertices'};
    my $gv_num_vertices = scalar(@$gv_vertices);

    if ($gv_num_vertices > 0) {
	# print STDERR "**** gs_vertices[0] = ", JSON::encode_json($gv_vertices->[0]), "\n";

	# Discovered that sometimes the 'x' value in the 'vertices' structure is not defined
	# So can't rely on picking up $gv_vertices->[0 for 'x' and 'y'
	# start off with 'undef' and test for !defined in for-loop

	my $min_x = undef;
	my $min_y = undef;
	my $max_x = undef;
	my $max_y = undef;

	
	for (my $v=0; $v<$gv_num_vertices; $v++) {
	    my $x = $gv_vertices->[$v]->{'x'};
	    my $y = $gv_vertices->[$v]->{'y'};

	    if (defined $x) {
		$min_x = $x if (!defined $min_x || ($x < $min_x));
		$max_x = $x if (!defined $max_x || ($x > $max_x));
	    }

	    if (defined $y) {
		$min_y = $y if (!defined $min_y || ($y < $min_y));
		$max_y = $y if (!defined $max_y || ($y > $max_y));
	    }
	}
    
	my $x_org = $min_x;
	my $y_org = $min_y;
	my $x_dim = $max_x - $min_x +1;
	my $y_dim = $max_y - $min_y +1;
    
	$bbox_rect = { "x_org" => $x_org, "y_org" => $y_org, "x_dim" => $x_dim, "y_dim" => $y_dim};
    }
    
    return $bbox_rect;
}

sub run_vision
{
    my $self = shift (@_);
    my ($file, $filename, $ofile, $ofilename, $vision_cmd, $vision_type, $doc_obj,$section) = @_;

    my $vision_regenerated;
    my $vision_result;
    my $vision_error;

    my $print_info = {
	'message_prefix' => "GoogleVisionAPI",
	'message'        => "Sending $file to GoogleVisionAPI using vision.py"
    };

    ($vision_regenerated,$vision_result,$vision_error)
        = $self->run_cached_general_cmd($vision_cmd,$filename,$ofilename,$print_info);

    # Need to work a bit harder in setting up the associated JSON file
    # => strip of 'enable_' in favour of 'gv_'
    # => add in section number as part of the file name to avoid clashes
    
    my $section_file_suffix = $section;
    $section_file_suffix =~ s/\./_/g;

    my $assoc_ofile = $ofile;
    $assoc_ofile =~ s/^enable_/gv_/;
    $assoc_ofile =~ s/-google-vision//;
    $assoc_ofile =~ s/\.(.*?)$/$section_file_suffix.$1/;
	
    $doc_obj->associate_file($ofilename,$assoc_ofile,"application/json",$section);

    my $json_text = do { # read in json file
        open(my $json_fh, "<:encoding(UTF-8)", $ofilename)
            or die("Can't open \"$ofilename\": $!\n");
        local $/;
        <$json_fh>
    };

    my $decoded_json = JSON::from_json($json_text);
    
    my $ocr_text;
    if ($vision_type eq "enable_document_ocr" || $vision_type eq "enable_image_ocr") {

	if (defined $decoded_json->{'textAnnotations'}) {
	    $ocr_text = $decoded_json->{'textAnnotations'}->[0]->{'description'}; # access full ocr content
	    $doc_obj->add_utf8_text($section, $ocr_text); # append text to section
	    
	    my $blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'};
	    my %text_and_language;

	    foreach my $block (@{$blocks}) {
		foreach my $paragraph (@{$block->{'paragraphs'}}) {
		    foreach my $word (@{$paragraph->{'words'}}) {
			my $detected_language = $word->{'property'}->{'detectedLanguages'}->[0]->{'languageCode'} || "no_lang";
			my $word_text = "";
			foreach my $letter (@{$word->{'symbols'}}) {
			    $word_text .= $letter->{'text'};
			}
			$text_and_language{$detected_language} .= $word_text . " "; 
		    }
		}
	    }
	
	    for (keys %text_and_language) {
		$doc_obj->add_utf8_metadata($section, "z_" . $_, $text_and_language{$_});
	    }
	    
	}

	# Note: Even if there is no actual OCR'd text detected (if test above),
	# stil set metadata that show that we applied the Google Vision API seeking text
	
	my $assoc_json_metaname = "HasGoogleVision";
	
	if ($vision_type eq "enable_document_ocr") {
	    $assoc_json_metaname .= "DocumentOCRJSON";

	    $doc_obj->add_utf8_metadata($section, "GVDocumentOCRJSON",$assoc_ofile);
	}
	else {
	    # $vision_type eq "enable_image_ocr"
	    $assoc_json_metaname .= "ImageOCRJSON";

	    $doc_obj->add_utf8_metadata($section, "GVImageOCRJSON",$assoc_ofile);
	}

	$doc_obj->add_utf8_metadata($section, $assoc_json_metaname, 1);	
    }
    elsif ($vision_type eq "enable_image_labelling") {
        $ocr_text = $decoded_json->{'labelAnnotations'};	
        foreach my $label (@{$ocr_text}) {
            # Write to metadata : 'description'='Book' 'score'='0.9' 'topicality'='0.9' 'mid'='/m/0123'
            $doc_obj->add_utf8_metadata($section, "description", $label->{'description'});
            $doc_obj->add_utf8_metadata($section, "score",       $label->{'score'});
            $doc_obj->add_utf8_metadata($section, "topicality",  $label->{'topicality'});
            $doc_obj->add_utf8_metadata($section, "mid",         $label->{'mid'});

	    # Write to metadata, e.g.,: 'descriptions'='Book' 'Book_score'='0.9' 'Book_topicality'='0.9' 'Book_mid'='/m/0123'
	    #
	    # ... but first generate a 'safe' metadata name, derived from the metadata value for 'description'
	    my $description_mdvalue = $label->{'description'};
	    my $description_mdname = $description_mdvalue;
	    $description_mdname = s/\s/_/g; # replace spaces with underscores
		
            $doc_obj->add_utf8_metadata($section, "descriptions", $label->{'description'});
            $doc_obj->add_utf8_metadata($section, $description_mdname . "_score", $label->{'score'});
            $doc_obj->add_utf8_metadata($section, $description_mdname . "_topicality", $label->{'topicality'});
            $doc_obj->add_utf8_metadata($section, $description_mdname . "_mid", $label->{'mid'});

        }
	
	$doc_obj->add_utf8_metadata($section, "HasGoogleVisionImageLabellingJSON", 1);
	$doc_obj->add_utf8_metadata($section, "GVImageLabellingJSON",$assoc_ofile);

    }
}

#
# OpenAnnotation Linked Data Prefix
#
sub get_openannotation_ld_prefix
{
    my $self = shift (@_);

    my ($opt_suffix) = @_;
    
    my $site    = $self->{'site'};    
    my $collect = $ENV{'GSDLCOLLECTION'};

    my $ld_prefix = $ENV{'OPENANNOTATION_LD_PREFIX'} || "http://ld.greenstone.org/";

    if ($ld_prefix !~ m/\/$/) {
	# tack on a trailing slash, if not present
	$ld_prefix .= "/";
    }

    
    $ld_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific
    $ld_prefix .= "${collect}/";

    if ((defined $opt_suffix) && ($opt_suffix ne "")) {
	$ld_prefix .= "$opt_suffix";
    }


    # Example of the default 'faker' URI
    #   "http://ld.greenstone.org/$site/$collect"

    # However if OPENANNOTATION_LD_PREFIX is defined, then use that for the domainame part

    # EXample of a opt_suffix:    
    #   $OID/manifest";

    #
    # Example of a active GS3 URL that will serve up a (for example) manifest file
    #   https://mydomain.org/greenstone3/library/collection/hull-dictionary/document/HMS_1?sa=iiif-manifest&ed=1&excerptid-text=iiif-manifest&includeFileAssocOpenAnnotations=true

    return $ld_prefix;   
}

sub start_openannotation_list
{
    my $self = shift (@_);
    my ($doc_obj,$section) = @_;

    my $OID = $doc_obj->get_OID();

    my $site = $self->{'site'};    
    #my $collect =	my $collect_dir = $ENV{'GSDLCOLLECTION'};
    #my $collect = $ENV{'GSDLCOLLECTION'};
    
    # Implication of the following is that the generated openannotation-list JSON content
    # is bound to the site/collection where it has been imported.
    # => if renaming a collection at the file system level, then
    #  (i)  The versios of openannotation-list*.json in the collections 'cache' dir
    #       need to be removed
    #  (ii) And collection rebuilt
    
    #my $uri_prefix = "http://ld.greenstone.org/";
    #$uri_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific
    #$uri_prefix .= "${collect}/";

    my $uri_prefix = $self->get_openannotation_ld_prefix();

    my $id_uri = "${uri_prefix}${OID}/openannotation-list.json";
	
    my $openannotation_list = {
        "\@context" => "http://www.shared-canvas.org/ns/context.json",
	# "\@id"      => "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896", 
	"\@id"      => $id_uri,  
	"\@type"    => "sc:AnnotationList",
	"resources" => []
    };

    $self->{'openannotation-list'} = $openannotation_list;
    $self->{'openannotation-uri-prefix'} = $uri_prefix;
}

    
sub convert_gvocr_to_openannotation_resource
{
    my $self = shift (@_);
    my ($gv_blocks, $doc_obj, $section) = @_;

    my $OID = $doc_obj->get_OID();
    my $OID_with_section = ($section ne "") ? "${OID}_$section" : $OID;
    $section = 1 if ($section eq ""); # occurs when the document is a single image
    
	
    # Example Open Annotation resource (for single annotation):
#      {
#             "@context": "http://iiif.io/api/presentation/2/context.json",
#             "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
#             "@type": "oa:Annotation",
#             "motivation": [
#                 "oa:commenting"
#             ],
#             "on": {
#                 "@type": "oa:SpecificResource",
#                 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
#                 "selector": {
#                     "@type": "oa:FragmentSelector",
#                     "value": "xywh=622,591,642,940"
#                 },
#                 "within": {
#                     "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
#                     "@type": "sc:Manifest"
#                 }
#             },
#             "resource": [
#                 {
#                     "@type": "dctypes:Text",
#                     "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED:
# 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
#                     "format": "text/html"
#                 }
#             ]
#         },

    my $self_openannotation_resources = $self->{'openannotation-list'}->{'resources'};
    my $top_section = $doc_obj->get_top_section();
    
    my $block_i = 0;

    my $uri_prefix = $self->{'openannotation-uri-prefix'};
    
    foreach my $block (@{$gv_blocks}) {
	$block_i++;

	my $annotation_id_uri = "${uri_prefix}${OID_with_section}/annotation/gv-block-$block_i";
	$doc_obj->add_utf8_metadata($top_section, "GoogleVisionBlocks", $annotation_id_uri);
	
	my $openannotation_resource = {
	    "\@context"  => "http://iiif.io/api/presentation/2/context.json",
	    # "\@id"       => "https://iiif.harvardartmuseums.org/annotations/9641482",
	    "\@id"       => $annotation_id_uri, 
	    "\@type"     => "oa:Annotation",
	    "motivation" => [ "oa:commenting" ]
	};
	

	my $bbox_rect = $self->gv_ocr_bounding_box_rect($block);
	my $bb_x_org = $bbox_rect->{'x_org'};
	my $bb_y_org = $bbox_rect->{'y_org'};
	my $bb_x_dim = $bbox_rect->{'x_dim'};
	my $bb_y_dim = $bbox_rect->{'y_dim'};

	my $canvas_full_uri = "${uri_prefix}${OID}/canvas/$section";
	my $manifest_id_uri = "${uri_prefix}${OID_with_section}/manifest";

	# {
	#     "type": "FragmentSelector",
	#     "value": "xywh=1265,1217,166,205"
	# },
	# {
	#     "type": "SvgSelector",
	#     "value": "<svg xmlns='http://www.w3.org/2000/svg'><path xmlns=\"http://www.w3.org/2000/svg\" d=\"M1265,1422.08859v-205h166v205z\" data-paper-data=\"{&quot;state&quot;:null}\" fill=\"none\" fill-rule=\"nonzero\" stroke=\"#00bfff\" stroke-width=\"1\" stroke-linecap=\"butt\" stroke-linejoin=\"miter\" stroke-miterlimit=\"10\" stroke-dasharray=\"\" stroke-dashoffset=\"0\" font-family=\"none\" font-weight=\"none\" font-size=\"none\" text-anchor=\"none\" style=\"mix-blend-mode: normal\"/></svg>"
	# }

	my $bb_y_org_plus_y_dim = $bb_y_org + $bb_y_dim;
	my $openannotation_on = [ {
	    "\@type"   => "oa:SpecificResource",
	    # "full"   => "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896", 
	    "full"     => $canvas_full_uri, 
	    "selector" => {
		"\@type"  => "oa:Choice",
		"default" => {
		    "\@type" => "oa:FragmentSelector",
		    "value"  => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}"
		  },
		 "item"   => {
		    "\@type" => "oa:SvgSelector",
		    "value"  => "<svg xmlns='http://www.w3.org/2000/svg'><path xmlns='http://www.w3.org/2000/svg' d='M${bb_x_org},${bb_y_org_plus_y_dim}v-${bb_y_dim}h${bb_x_dim}v${bb_y_dim}z' data-paper-data='{&quot;state&quot;:null}' fill='none' fill-rule='nonzero' stroke='#008000' stroke-width='1' stroke-linecap='butt' stroke-linejoin='miter' stroke-miterlimit='10' stroke-dasharray='' stroke-dashoffset='0' font-family='none' font-weight='none' font-size='none' text-anchor='none' style='mix-blend-mode: normal'/></svg>"
		}
	    }
	    #"within" => {
	    #	#"\@id"   => "https://iiif.harvardartmuseums.org/manifests/object/299843", 
	    #	"\@id"   => $manifest_id_uri, 
	    #	"\@type" => "sc:Manifest"
	    #}
	} ];

	# # "on": "http://localhost:8887/coin/canvas#xywh=3706,208,522,522"
	# my $openannotation_on = "${canvas_full_uri}#xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}";
	
	$openannotation_resource->{'on'} = $openannotation_on;
	    

	my $block_text_html = "";
	
	foreach my $paragraph (@{$block->{'paragraphs'}}) {
	    my $para_text = "";
	    
	    foreach my $word (@{$paragraph->{'words'}}) {
		my $word_text = "";
		
		foreach my $letter (@{$word->{'symbols'}}) {
		    $word_text .= $letter->{'text'};
		}

		$para_text .= " " if $para_text ne "";
		$para_text .= $word_text;
	    }

	    $block_text_html .= "<p>\n$para_text\n</p>\n\n";
	    # $block_text_html .= "\n$para_text\n\n";
	}
	    
	my $openannotation_inner_resource = [{
		"\@type"  => "dctypes:Text",
		"chars"   => "$block_text_html",
		"format"  => "text/html"
	}];


	#"resource": {
        #  "@type": "cnt:ContentAsText",
        #  "format": "text/plain",
        #  "chars": "Zeus seated on stool-throne"
	#},

	#my $openannotation_inner_resource = [{
	#    "\@type"  => "cnt:ContentAsText",
	#    "format"  => "text/plain",
	#    "chars"   => "$block_text_html"
	#}];

	$openannotation_resource->{'resource'} = $openannotation_inner_resource;

	push(@$self_openannotation_resources,$openannotation_resource);	    
    }
}


sub convert_and_append_openannotation_resources
{
    my $self = shift (@_);
    my ($gv_dococr_json_filename, $doc_obj, $section) = @_;


    # Read in JSON file
    my $json_text = do { 
	open(my $json_fh, "<:encoding(UTF-8)", $gv_dococr_json_filename)
	    or die("Can't open \"$gv_dococr_json_filename\": $!\n");
	local $/;
	<$json_fh>
    };
    
    my $decoded_json = JSON::from_json($json_text);
    
    my $gv_blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'};

    $self->convert_gvocr_to_openannotation_resource($gv_blocks, $doc_obj, $section);
}



sub end_openannotation_list
{
    my $self = shift (@_);
    my ($doc_obj,$json_ofilename) = @_;

    my $ret_status = 1;
    
    if (!open(JOUT, "> $json_ofilename")) {
	print STDERR "Error: Failed save Open Annotation List JSON to \"$json_ofilename\":\n    $!\n";
        $ret_status = 0;
    }
    else {
	binmode(JOUT, ":utf8");
	
	my $openannotation_list = $self->{'openannotation-list'};
	my $openannotation_list_json_text = JSON::encode_json($openannotation_list);
	
	print JOUT $openannotation_list_json_text;
	close JOUT;
	
    }    
    
    $self->{'openannotation-list'} = undef;
    $self->{'openannotation-uri-prefix'} = undef;

    return $ret_status;
}


sub openannotation_list_associate_json
{
    my $self = shift (@_);
    my ($doc_obj, $gv_dococr_json_filename_recs) = @_;

    my $outhandle = $self->{'outhandle'};

    my $all_saved_ok = 1;
    
    for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) {
	my $gv_json_filename = $gv_json_filename_rec->{'filename'};
	my $section = $gv_json_filename_rec->{'section'};


	my ($gv_dococr_filename_root) = ($gv_dococr_json_filename_recs->[0]->{'filename'} =~ m/^(.+)\.json$/);

	# slight of hand so new directory spot in cache_dir picked out is where we want it!
	$gv_dococr_filename_root .= "/";
    
	my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
	my $toplevel_cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
    
	$self->init_cache_for_file($gv_dococr_filename_root);
	my $cached_dir = $self->{'cached_dir'};

	my $assoc_openannotation_json_ofile = "openannotation-list${section}.json";
	my $cached_openannotation_json_ofilename = &FileUtils::filenameConcatenate($cached_dir,$assoc_openannotation_json_ofile);    

	my $needs_json_regen = 0;

	if (!-f $cached_openannotation_json_ofilename) {
	    $needs_json_regen = 1;
	}
	else {
	    if (-M $gv_json_filename > -M $cached_openannotation_json_ofilename) {
		$needs_json_regen = 1;
	    }
	}

	my $saved_ok = 1;
    
	if ($needs_json_regen) {

	    print $outhandle "  OpenAnnotation-List: Generating $cached_openannotation_json_ofilename\n";
	
	    $self->start_openannotation_list($doc_obj);
	    $self->convert_and_append_openannotation_resources($gv_json_filename, $doc_obj,$section);
	    
	    $saved_ok = $self->end_openannotation_list($doc_obj,$cached_openannotation_json_ofilename);
	}
	else {
	    print $outhandle "  OpenAnnotation-List: Cached file $cached_openannotation_json_ofilename already exists\n";
	}

	if ($saved_ok) {
	    my $top_section = $doc_obj->get_top_section();
	    $doc_obj->associate_file($cached_openannotation_json_ofilename,$assoc_openannotation_json_ofile,"application/json",$top_section);
	    $doc_obj->add_utf8_metadata($section, "HasOpenAnnotationJSON", 1);
	    $doc_obj->add_utf8_metadata($section, "OpenAnnotationJSONFilename",$assoc_openannotation_json_ofile);

	    # Also record this at the top-level of the doc
	    $doc_obj->set_utf8_metadata_element($top_section, "ContainsOpenAnnotationJSON", 1);

	}
	else {
	    $all_saved_ok = 0;
	}
    }

    return $all_saved_ok;
}


sub opt_run_gen_openannotation
{    
    my $self = shift (@_);
    my ($doc_obj) = @_;

    my $gv_dococr_json_filename_recs = $self->{'gv-dococr-json-filename-recs'};
    my $num_gv_dococr_json_filename_recs = scalar(@$gv_dococr_json_filename_recs);

    my $ret_val_ok = 1;
    
    if ($num_gv_dococr_json_filename_recs > 0) {
	$ret_val_ok = $self->openannotation_list_associate_json($doc_obj,$gv_dococr_json_filename_recs);	
    }

    return $ret_val_ok;
}



#
# OpenAnnotation Linked Data Prefix
#
sub get_webannotation_ld_prefix
{
    my $self = shift (@_);

    my ($opt_suffix) = @_;
    
    my $site    = $self->{'site'};    
    my $collect = $ENV{'GSDLCOLLECTION'};

    my $ld_prefix = $ENV{'WEBANNOTATION_LD_PREFIX'} || "http://ld.greenstone.org/";

    if ($ld_prefix !~ m/\/$/) {
	# tack on a trailing slash, if not present
	$ld_prefix .= "/";
    }

    $ld_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific
    $ld_prefix .= "${collect}/";

    if ((defined $opt_suffix) && ($opt_suffix ne "")) {
	$ld_prefix .= "$opt_suffix";
    }


    # Example of the default 'faker' URI
    #   "http://ld.greenstone.org/$site/$collect"

    # However if OPENANNOTATION_LD_PREFIX is defined, then use that for the domainame part

    # EXample of a opt_suffix:    
    #   $OID/manifest";

    #
    # Example of a active GS3 URL that will serve up a (for example) manifest file
    #   https://mydomain.org/greenstone3/library/collection/hull-dictionary/document/HMS_1?sa=iiif-manifest&ed=1&excerptid-text=iiif-manifest&includeFileAssocOpenAnnotations=true

    return $ld_prefix;   
}


sub start_webannotation_list_INPROGRESS
{
    my $self = shift (@_);
    my ($doc_obj,$section) = @_;

    my $OID = $doc_obj->get_OID();

    my $site = $self->{'site'};    
    # my $collect =	my $collect_dir = $ENV{'GSDLCOLLECTION'};
    # my $collect = $ENV{'GSDLCOLLECTION'};
    
    # Implication of the following is that the generated webannotation-list JSON content
    # is bound to the site/collection where it has been imported.
    # => if renaming a collection at the file system level, then
    #  (i)  The versios of webannotation-list*.json in the collections 'cache' dir
    #       need to be removed
    #  (ii) And collection rebuilt
    
    #my $uri_prefix = "http://ld.greenstone.org/";
    #$uri_prefix .= "${site}/" if (defined $site) && $site ne ""; # GS3 specific
    #$uri_prefix .= "${collect}/";

    my $uri_prefix = $self->get_webannotation_ld_prefix();
    
    my $id_uri = "${uri_prefix}${OID}/webannotation-list.json";
	
    my $webannotation_list = {
        "\@context" => "http://www.shared-canvas.org/ns/context.json",
	"\@id"      => $id_uri,  
	"\@type"    => "sc:AnnotationList",
	"resources" => []
    };

    $self->{'webannotation-list'} = $webannotation_list;
    $self->{'webannotation-uri-prefix'} = $uri_prefix;
}



    
sub convert_gvocr_to_webannotation_resource_INPROGRESS
{
    my $self = shift (@_);
    my ($gv_blocks, $doc_obj, $section) = @_;

    my $OID = $doc_obj->get_OID();
    my $OID_with_section = ($section ne "") ? "${OID}_$section" : $OID;
    $section = 1 if ($section eq ""); # occurs when the document is a single image
    
    # Details on difference between OpenAnnotation and WebAnnotation covered at
    #    https://www.google.com/search?q=iiif+simpleannotationserver&sxsrf=ALiCzsbIpm1YO0SYE9sCXBQ231_oyEmopw:1672137985013&source=lnms&tbm=vid&sa=X&ved=2ahUKEwizu_K0z5n8AhXF1DgGHQ7FCb4Q_AUoA3oECAEQBQ&biw=1536&bih=742&dpr=1.25#fpstate=ive&vld=cid:07a4e9d9,vid:gFNWWIe5QpM

    
    my $self_webannotation_resources = $self->{'webannotation-list'}->{'resources'};

    my $block_i = 0;

    my $uri_prefix = $self->{'webannotation-uri-prefix'};
    
    foreach my $block (@{$gv_blocks}) {
	$block_i++;

	my $annotation_id_uri = "${uri_prefix}${OID_with_section}/annotation/gv-block-$block_i";
	    
	my $webannotation_resource = {
	    "\@context"  => "http://iiif.io/api/presentation/2/context.json",
	    "id"       => $annotation_id_uri, 
	    "type"     => "Annotation",
	    "motivation" => [ "commenting" ]
	};
	

	my $bbox_rect = $self->gv_ocr_bounding_box_rect($block);
	my $bb_x_org = $bbox_rect->{'x_org'};
	my $bb_y_org = $bbox_rect->{'y_org'};
	my $bb_x_dim = $bbox_rect->{'x_dim'};
	my $bb_y_dim = $bbox_rect->{'y_dim'};

	my $canvas_full_uri = "${uri_prefix}${OID}/canvas/$section";
	my $manifest_id_uri = "${uri_prefix}${OID_with_section}/manifest";

	# Needs updating -- see openannotation_on above !!!!!!! *********
	my $webannotation_target = [ {
	    "type" => "oa:SpecificResource",
	    "full"   => $canvas_full_uri, 
	    "selector" => {
		"type" => "oa:FragmentSelector",
		"value"  => "xywh=${bb_x_org},${bb_y_org},${bb_x_dim},${bb_y_dim}"
	    },
	    "within" => {
		"id"   => $manifest_id_uri, 
		"type" => "sc:Manifest"
	    }
	} ];
	$webannotation_resource->{'target'} = $webannotation_target;
	    

	my $block_text_html = "";
	
	foreach my $paragraph (@{$block->{'paragraphs'}}) {
	    my $para_text = "";
	    
	    foreach my $word (@{$paragraph->{'words'}}) {
		my $word_text = "";
		
		foreach my $letter (@{$word->{'symbols'}}) {
		    $word_text .= $letter->{'text'};
		}

		$para_text .= " " if $para_text ne "";
		$para_text .= $word_text;
	    }

	    $block_text_html .= "<p>\n$para_text\n</p>\n\n";
	}
	    
	my $webannotation_body = [{
		"type"  => "TextualBody",
		"chars"   => "$block_text_html",
		"format"  => "text/html"
	}];

	$webannotation_resource->{'body'} = $webannotation_body;

	push(@$self_webannotation_resources,$webannotation_resource);	    
    }
}


sub convert_and_append_webannotation_resources_INPROGRESS
{
    my $self = shift (@_);
    my ($gv_dococr_json_filename, $doc_obj, $section) = @_;


    # Read in JSON file
    my $json_text = do { 
	open(my $json_fh, "<:encoding(UTF-8)", $gv_dococr_json_filename)
	    or die("Can't open \"$gv_dococr_json_filename\": $!\n");
	local $/;
	<$json_fh>
    };
    
    my $decoded_json = JSON::from_json($json_text);
    
    my $gv_blocks = $decoded_json->{'fullTextAnnotation'}->{'pages'}->[0]->{'blocks'};

    $self->convert_gvocr_to_webannotation_resource_INPROGRESS($gv_blocks, $doc_obj, $section);
}



sub end_webannotation_list_INPROGRESS
{
    my $self = shift (@_);
    my ($doc_obj,$json_ofilename) = @_;

    my $ret_status = 1;
    
    if (!open(JOUT, "> $json_ofilename")) {
	print STDERR "Error: Failed save Open Annotation List JSON to \"$json_ofilename\":\n    $!\n";
        $ret_status = 0;
    }
    else {
	binmode(JOUT, ":utf8");
	
	my $webannotation_list = $self->{'webannotation-list'};
	my $webannotation_list_json_text = JSON::encode_json($webannotation_list);
	
	print JOUT $webannotation_list_json_text;
	close JOUT;
	
    }    
    
    $self->{'webannotation-list'} = undef;
    $self->{'webannotation-uri-prefix'} = undef;

    return $ret_status;
}

sub webannotation_list_associate_json_INPROGRESS
{
    my $self = shift (@_);
    my ($doc_obj, $gv_dococr_json_filename_recs) = @_;

    my $outhandle = $self->{'outhandle'};

    my $all_saved_ok = 1;
    
    for my $gv_json_filename_rec (@$gv_dococr_json_filename_recs) {
	my $gv_json_filename = $gv_json_filename_rec->{'filename'};
	my $section = $gv_json_filename_rec->{'section'};


	my ($gv_dococr_filename_root) = ($gv_dococr_json_filename_recs->[0]->{'filename'} =~ m/^(.+)\.json$/);

	# slight of hand so new directory spot in cache_dir picked out is where we want it!
	$gv_dococr_filename_root .= "/";
    
	my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
	my $toplevel_cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
    
	$self->init_cache_for_file($gv_dococr_filename_root);
	my $cached_dir = $self->{'cached_dir'};

	my $assoc_webannotation_json_ofile = "webannotation-list${section}.json";
	my $cached_webannotation_json_ofilename = &FileUtils::filenameConcatenate($cached_dir,$assoc_webannotation_json_ofile);    

	my $needs_json_regen = 0;

	if (!-f $cached_webannotation_json_ofilename) {
	    $needs_json_regen = 1;
	}
	else {
	    if (-M $gv_json_filename > -M $cached_webannotation_json_ofilename) {
		$needs_json_regen = 1;
	    }
	}

	my $saved_ok = 1;
    
	if ($needs_json_regen) {

	    print $outhandle "  WebAnnotation-List: Generating $cached_webannotation_json_ofilename\n";
	
	    $self->start_webannotation_list_INPROGRESS($doc_obj);
	    $self->convert_and_append_webannotation_resources_INPROGRESS($gv_json_filename, $doc_obj,$section);
	    
	    $saved_ok = $self->end_webannotation_list_INPROGRESS($doc_obj,$cached_webannotation_json_ofilename);
	}
	else {
	    print $outhandle "  WebAnnotation-List: Cached file $cached_webannotation_json_ofilename already exists\n";
	}

	if ($saved_ok) {
	    my $top_section = $doc_obj->get_top_section();
	    $doc_obj->associate_file($cached_webannotation_json_ofilename,$assoc_webannotation_json_ofile,"application/json",$top_section);
	}
	else {
	    $all_saved_ok = 0;
	}
    }

    return $all_saved_ok;
}


sub opt_run_gen_webannotation_INPROGRESS
{    
    my $self = shift (@_);
    my ($doc_obj) = @_;

    my $gv_dococr_json_filename_recs = $self->{'gv-dococr-json-filename-recs'};
    my $num_gv_dococr_json_filename_recs = scalar(@$gv_dococr_json_filename_recs);

    my $ret_val_ok = 1;
    
    if ($num_gv_dococr_json_filename_recs > 0) {
	$ret_val_ok = $self->webannotation_list_associate_json($doc_obj,$gv_dococr_json_filename_recs);	
    }

    return $ret_val_ok;
}


1;

