#########################################################################
#
# wekaDBBuildproc.pm -- builds up a CSV file to be used by Weka
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package wekaDBBuildproc;

use strict; 
no strict 'refs'; # allow filehandles to be variables and viceversa

use multiread;
use Encode;
use JSON;

use util;

use extrabuildproc;

my $av_time_slice    = 6; # 6 secs
my $av_frame_overlap = 3; # 3 secs

BEGIN {
    @wekaDBBuildproc::ISA = ('extrabuildproc');
}

sub new()
  {
    my $class = shift @_;

    my $self = new extrabuildproc (@_);

    return bless $self, $class;
}


sub read_json_file
{
    my $self = shift (@_);
    my ($json_filename) = @_;

    my $json_decoded_data = undef;
    
    if (open(JSON_FILE, "$json_filename")) {
	my $json_file_content;
	
	my $json_file_reader = new multiread();
	$json_file_reader->set_handle('wekaDBBuildproc::JSON_FILE');
	$json_file_reader->read_file(\$json_file_content);

	# Next 2 lines result in an UTF8-friendly string
	my $json_file_content_bytes = encode('UTF-8', $json_file_content);
	$json_file_content = decode("utf8",$json_file_content_bytes);
	
	$json_decoded_data = JSON->new->utf8->decode($json_file_content);
	
	close(JSON_FILE);
    }
    else {
	print STDERR "wekaDBBuildproc::read_json_file(): Failed to open $json_filename\n";
	print STDERR "  $!\n";

    }

    
    return $json_decoded_data;
}

sub textedit {
    my $self = shift (@_);
    my ($doc_obj,$file,$mode) = @_;

    # Code written on the assumption that that wekaDB does a replace
    # operation when presented with a docid that already extis.
    # => don't need to do anything special to distinguish between
    #    a mode of "add" and "update"

    my $outhandle = $self->{'outhandle'};


    my $source_dir = $self->{'source_dir'}; # typically the archives dir
    my $build_dir  = $self->{'build_dir'};

    # full path to Weka CSV file
    my $weka_features_filename 
	= &util::filename_cat($build_dir, "wekaDB", "av-features.csv");

    # get doc id
    my $doc_oid = $doc_obj->get_OID();

    # map to assoc dir
    my $top_section = $doc_obj->get_top_section();

    my $assoc_file       = $doc_obj->get_metadata_element ($top_section,"assocfilepath");
    my $assoc_filename   = &util::filename_cat($source_dir,$assoc_file);

    my $av_json_filename = &util::filename_cat($assoc_filename,"av.json");
    my $av_json_hashmap  = $self->read_json_file($av_json_filename);

    if (!defined $av_json_hashmap) {
	return;
    }
    
    print $outhandle "  Appending features for $doc_oid\n";

    my $AV_OUT;
    if (!open($AV_OUT,">>","$weka_features_filename")) {
	print STDERR "Failed to append to $weka_features_filename\n";
	print STDERR "  $!\n";
	return;
    }
    
    binmode($AV_OUT,":utf8");

    my $arousal_vals = $av_json_hashmap->{"arousal"};
    my $valence_vals = $av_json_hashmap->{"valence"};

    my $num_vals = scalar(@{$arousal_vals});

    my $t = $av_time_slice;
    for (my $i=0; $i<$num_vals; $i++) {
	my $arousal_val = $arousal_vals->[$i];
	my $valence_val = $valence_vals->[$i];

	print $AV_OUT "$doc_oid-$t,$arousal_val,$valence_val\n";
	$t += $av_frame_overlap;
    }

    close($AV_OUT);
}

sub text {
    my $self = shift (@_);
    my ($doc_obj,$file) = @_;

    $self->textedit($doc_obj,$file,"add");
}

sub textreindex
{
    my $self = shift @_;
    my ($doc_obj,$file) = @_;

    $self->textedit($doc_obj,$file,"update");
}

sub textdelete
{
    my $self = shift @_;

    my ($doc_obj,$file) = @_;

    print STDERR "Warning: wekaDBBuildproc::textdelete() wekaDB not currently implemented\n";

    # $self->textedit($doc_obj,$file,"delete");
}

1;
