###########################################################################
#
# wekaDBBuilder.pm -- topup builder that gets Weka ML initialized correctly
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package wekaDBBuilder;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa

use extrabuilder;

sub BEGIN {
    @wekaDBBuilder::ISA = ('extrabuilder');
}


sub new {
    my $class = shift(@_);
    my $self = new extrabuilder (@_);
    $self = bless $self, $class;

    $self->{'buildtype'} = "wekaDB";

    return $self;
}


sub default_buildproc {
    my $self  = shift (@_);

    return "wekaDBBuildproc";
}


sub build_indexes {
    my $self = shift (@_);
    my ($indexname) = @_;

    my $outhandle = $self->{'outhandle'};
    my $build_dir = $self->{'build_dir'};
    my $verbosity = $self->{'verbosity'};

    print $outhandle "\n*** Weka Machine Learning builder\n"  if ($verbosity >= 1);

    my $wekaDB_dir = &util::filename_cat($build_dir, "wekaDB");

    if (! -d $wekaDB_dir) {
	&util::mk_all_dir ($wekaDB_dir);
    }
    
    my $opt_create_index = ($self->{'incremental'}) ? "" : "-removeold";


    if ($opt_create_index) {

	# init Weka CSV feature file
	
	my $weka_features_filename = &util::filename_cat($wekaDB_dir,"av-features.csv");

	print $outhandle "\n    creating Weka CSV Features File\n"  if ($verbosity >= 1);

	# Create a CSV features file with the headline in it
	if (open(my $WEKA_FOUT, '>', $weka_features_filename)) {
	    binmode($WEKA_FOUT,":utf8");
	    print $WEKA_FOUT "IdentifierWithTimeOffset,Arousal,Valence\n";
	    close($WEKA_FOUT);
	}
	else {
	    print STDERR "Error: failed to create:\n";
	    print STDERR "         $weka_features_filename\n";
	    print STDERR "       $!\n";
	    return;
	}
	
    }

    # Run the docs through the wekaDB document processor

    $self->{'buildproc'}->set_mode ('text');
    $self->{'buildproc'}->reset();
    &plugin::begin($self->{'pluginfo'}, $self->{'source_dir'},
		   $self->{'buildproc'}, $self->{'maxdocs'});
    &plugin::read ($self->{'pluginfo'}, $self->{'source_dir'},
		   "", {}, {}, $self->{'buildproc'}, $self->{'maxdocs'}, 0, $self->{'gli'});
    &plugin::end($self->{'pluginfo'});


}

1;

