###########################################################################
#
# jSongMinerExtractor - helper plugin that identifies audio through
#                       external web services based on either an audio
#                       computed fingerprint or ID3 title and album
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2010 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################
package jSongMinerExtractor;

use BaseMediaConverter;

use Cwd;
use URI::Escape;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa


BEGIN {
    @jSongMinerExtractor::ISA = ('BaseMediaConverter');
}


my $arguments = [
    { 'name' => "track_identification",
      'desc' => "{jSongMinerExtractor.track_identification}",
      'type' => "enum",
      'list' => [{'name' => "Fingerprint then ID3 tags", 'desc' => "{jSongMinerExtractor.fingerprint_first}"},
		 {'name' => "ID3 tags only", 'desc' => "{jSongMinerExtractor.only_ids}"},
		 {'name' => "Disabled",     'desc' => "{jSongMinerExtractor.off}"} ],
      'deft' => 'Fingerprint then ID3 tags',
      'reqd' => "no" }
    ];

my $options = { 'name' => "jSongMinerExtractor",
		'desc' => "{jSongMinerExtractor.desc}",
		'abstract' => "yes",
		'inherits' => "yes",
		'args' => $arguments };

sub new {
    my ($class) = shift (@_);
    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
    push(@$pluginlist, $class);

    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
    push(@{$hashArgOptLists->{"OptList"}},$options);

    my $self = new BaseMediaConverter($pluginlist, $inputargs, $hashArgOptLists, 1);

    # Set controlling variables
    my $gsdl_home = $ENV{'GSDLHOME'};
    my $music_ir_home = $ENV{'GEXT_MUSICIR'};

    $self->{'jmir_directory'} = &util::filename_cat($music_ir_home,"lib","java"); # Set the directory holding the jMIR .jar files

    return bless $self, $class;
}

sub urlEncode{
	# ARG 1: $to_encode is the string to URL encode
	my ($to_encode) = @_;
	return uri_escape($to_encode);
}

# URL Decode the given string
sub urlDecode{
	# ARG 1: $to_decode is the string to URL decode
	my ($to_decode) = @_;
	# really want to use 'uri_unescape_utf8' in the following, but
	# it doesn't exist in the Perl module.  Follow will suffice
	# my $decoded= uri_unescape_utf8($to_decode);
	my $decoded = Encode::decode_utf8(uri_unescape($to_decode));

	return $decoded;
}

sub map_id3v1_genre_num
{
    my ($genre_num) = @_;

    $genre_num =~ s/&#40;/\(/g;
    $genre_num =~ s/&#41;/\)/g;
    $genre_num =~ s/^[\(\[\{](.*)[\)\]\}]$/$1/;

    if (($genre_num =~ m/^\d+$/) && ($genre_num >= 148)) {
	return "Unknown";
    }

    my $lookup_id3v1_genre = {
	"0"  => "Blues",
	"1"  => "Classic Rock",
	"2"  => "Country",
	"3"  => "Dance",
	"4"  => "Disco",
	"5"  => "Funk",
	"6"  => "Grunge",
	"7"  => "Hip-Hop",
	"8"  => "Jazz",
	"9"  => "Metal",
	"10" => "New Age",
	"11" => "Oldies",
	"12" => "Other",
	"13" => "Pop",
	"14" => "R&B",
	"15" => "Rap",
	"16" => "Reggae",
	"17" => "Rock",
	"18" => "Techno",
	"19" => "Industrial",
	"20" => "Alternative",
	"21" => "Ska",
	"22" => "Death Metal",
	"23" => "Pranks",
	"24" => "Soundtrack",
	"25" => "Euro-Techno",
	"26" => "Ambient",
	"27" => "Trip-Hop",
	"28" => "Vocal",
	"29" => "Jazz+Funk",
	"30" => "Fusion",
	"31" => "Trance",
	"32" => "Classical",
	"33" => "Instrumental",
	"34" => "Acid",
	"35" => "House",
	"36" => "Game",
	"37" => "Sound Clip",
	"38" => "Gospel",
	"39" => "Noise",
	"40" => "Alternative Rock",
	"41" => "Bass",
	"42" => "Soul",
	"43" => "Punk",
	"44" => "Space",
	"45" => "Meditative",
	"46" => "Instrumental Pop",
	"47" => "Instrumental Rock",
	"48" => "Ethnic",
	"49" => "Gothic",
	"50" => "Darkwave",
	"51" => "Techno-Industrial",
	"52" => "Electronic",
	"53" => "Pop-Folk",
	"54" => "Eurodance",
	"55" => "Dream",
	"56" => "Southern Rock",
	"57" => "Comedy",
	"58" => "Cult",
	"59" => "Gangsta",
	"60" => "Top 40",
	"61" => "Christian Rap",
	"62" => "Pop/Funk",
	"63" => "Jungle",
	"64" => "Native US",
	"65" => "Cabaret",
	"66" => "New Wave",
	"67" => "Psychadelic",
	"68" => "Rave",
	"69" => "Showtunes",
	"70" => "Trailer",
	"71" => "Lo-Fi",
	"72" => "Tribal",
	"73" => "Acid Punk",
	"74" => "Acid Jazz",
	"75" => "Polka",
	"76" => "Retro",
	"77" => "Musical",
	"78" => "Rock & Roll",
	"79" => "Hard Rock",
	"80" => "Folk",
	"81" => "Folk-Rock",
	"82" => "National Folk",
	"83" => "Swing",
	"84" => "Fast Fusion",
	"85" => "Bebob",
	"86" => "Latin",
	"87" => "Revival",
	"88" => "Celtic",
	"89" => "Bluegrass",
	"90" => "Avantgarde",
	"91" => "Gothic Rock",
	"92" => "Progressive Rock",
	"93" => "Psychedelic Rock",
	"94" => "Symphonic Rock",
	"95" => "Slow Rock",
	"96" => "Big Band",
	"97" => "Chorus",
	"98" => "Easy Listening",
	"99" => "Acoustic",
	"100" => "Humour",
	"101" => "Speech",
	"102" => "Chanson",
	"103" => "Opera",
	"104" => "Chamber Music",
	"105" => "Sonata",
	"106" => "Symphony",
	"107" => "Booty Bass",
	"108" => "Primus",
	"109" => "Porn Groove",
	"110" => "Satire",
	"111" => "Slow Jam",
	"112" => "Club",
	"113" => "Tango",
	"114" => "Samba",
	"115" => "Folklore",
	"116" => "Ballad",
	"117" => "Power Ballad",
	"118" => "Rhythmic Soul",
	"119" => "Freestyle",
	"120" => "Duet",
	"121" => "Punk Rock",
	"122" => "Drum Solo",
	"123" => "Acapella",
	"124" => "Euro-House",
	"125" => "Dance Hall",
	"126" => "Goa",
	"127" => "Drum & Bass",
	"128" => "Club - House",
	"129" => "Hardcore",
	"130" => "Terror",
	"131" => "Indie",
	"132" => "BritPop",
	"133" => "Negerpunk",
	"134" => "Polsk Punk",
	"135" => "Beat",
	"136" => "Christian Gangsta Rap",
	"137" => "Heavy Metal",
	"138" => "Black Metal",
	"139" => "Crossover",
	"140" => "Contemporary Christian",
	"141" => "Christian Rock",
	"142" => "Merengue",
	"143" => "Salsa",
	"144" => "Thrash Metal",
	"145" => "Anime",
	"146" => "JPop",
	"147" => "Synthpop" 
    };

    my $mapped_genre;

    if (defined $lookup_id3v1_genre->{$genre_num}) {
#	print STDERR "*** changing $genre_num -> ", $lookup_id3v1_genre->{$genre_num}, "\n";
	$mapped_genre = $lookup_id3v1_genre->{$genre_num};
    }
    else {
	$mapped_genre = $genre_num;
    }

    return $mapped_genre;
}

sub check_for_existing_id3_genre
{
    my $self = shift @_;
    my ($doc_obj) = @_;

    my $top_section=$doc_obj->get_top_section();

    # Look for ex.ID3.Genre as well, as special case
    my $genre_md_list = $doc_obj->get_metadata($top_section,"ex.ID3.Genre");
    my @new_genre_md_list = ();
    
    foreach my $gv (@$genre_md_list) {
#	print STDERR "*** got match on ex.ID3.Genre -> '$gv'\n";
	my $new_gv = map_id3v1_genre_num($gv);
	    
	push(@new_genre_md_list,$new_gv);
    }
    
    $doc_obj->delete_metadata($top_section,"ex.ID3.Genre");
    foreach my $gv (@new_genre_md_list) {
	$doc_obj->add_utf8_metadata($top_section,"ex.ID3.Genre",$gv);
    }
}


sub parse_txt_metadata
{
    my $self = shift @_;
    my ($doc_obj,$target_txt_file_path) = @_;

    if (open(MIN,"<$target_txt_file_path")) {

	my ($md_name, $md_value);

	while (defined($md_name=<MIN>) && defined($md_value=<MIN>)) {

	    chomp $md_name;
	    chomp $md_value;

	    my $top_section=$doc_obj->get_top_section();

	    $md_name =~ s/\+//g;
	    $md_value =~ s/\+/ /g;
	    
	    $md_name = urlDecode($md_name);
	    $md_value = urlDecode($md_value);

	    # $md_name =~ s/\s+/ /sg;
	    $md_name =~ s/\(.*?\)$//s; # can stretch over multiple lines
	    $md_name =~ s/Last\.FM/LastFM/g;
	    $md_name =~ s/:/^/g;
	    $md_name =~ s/(API)?\^/./;

#	    print STDERR "*** md_name = '$md_name'\n";

	    if ($md_name =~ m/genre$/i) {
#		print STDERR "*** got match on $md_name -> $md_value\n";
		$md_value = map_id3v1_genre_num($md_value);
	    }

	    $doc_obj->add_utf8_metadata($top_section,$md_name,$md_value);
	}

	close(MIN);

	$self->check_for_existing_id3_genre($doc_obj);

    }
    else {
	print STDERR "Error: Failed to open $target_txt_file_path\n";
	print STDERR "       !$\n";
    }
}


sub retrieve_metadata
{
    my $self = shift(@_);
    my ($source_file_path,$id3_title,$id3_artist,$convert_options) = @_;

    $convert_options = "" if (!defined $convert_options);

    my $outhandle = $self->{'outhandle'};
    my $verbosity = $self->{'verbosity'};

    my $source_file_no_path = &File::Basename::basename($source_file_path);

    $self->init_cache_for_file($source_file_path);

    my $target_txt_file_path;
    my $target_acexml_file_path;

    if ($self->{'enable_cache'}) {
	my $cached_dir = $self->{'cached_dir'};
	my $file_root = $self->{'cached_file_root'};

	my $target_txt_file = "${file_root}_metadata.txt";
	my $target_acexml_file = "${file_root}.xml";

	$target_txt_file_path = &util::filename_cat($cached_dir,$target_txt_file);
	$target_acexml_file_path = &util::filename_cat($cached_dir,$target_acexml_file);
    }
    else {
	$target_txt_file_path = &util::get_tmp_filename("_metadata.txt");
	$target_acexml_file_path = &util::get_tmp_filename(".xml");
    }

    my $jmir_directory = $self->{'jmir_directory'};

    my $store_cwd = cwd();

    if (!-d $jmir_directory) {
	print STDERR "Error: Unable able to find directory '$jmir_directory'\n";
	print STDERR "       Cannot run jAudio\n";
    }
    elsif (chdir($jmir_directory)) {

	my $source_file_path_os = $source_file_path;
	if ($^O eq "cygwin") {
	    $source_file_path_os = `cygpath -w "$source_file_path"`;
	    $source_file_path_os =~ s/\s+$//;
	}
	my $target_txt_file_path_os = $target_txt_file_path;
	if ($^O eq "cygwin") {
	    $target_txt_file_path_os = `cygpath -w "$target_txt_file_path"`;
	    $target_txt_file_path_os =~ s/\s+$//;
	}
	my $target_acexml_file_path_os = $target_acexml_file_path;
	if ($^O eq "cygwin") {
	    $target_acexml_file_path_os = `cygpath -w "$target_acexml_file_path"`;
	    $target_acexml_file_path_os =~ s/\s+$//;
	}


	my $jsongminer_cmd = "java -Xmx1024M -jar \"" . $jmir_directory . "/jSongMiner.jar\" $convert_options";
	$jsongminer_cmd .= " -title \"$id3_title\"" if defined $id3_title;
	$jsongminer_cmd .= " -artist \"$id3_artist\"" if defined $id3_artist;
	$jsongminer_cmd .= " -audio \"$source_file_path_os\"";
	$jsongminer_cmd .= " -savetxtfile \"$target_txt_file_path_os\"";
	$jsongminer_cmd .= " -saveacexmlfile \"$target_acexml_file_path_os\"";
	
	if ($verbosity>2) {
	    print $outhandle "jSongMinerExtractor: Running ...\n";
	    print $outhandle "jSongMinerExtractor:   $jsongminer_cmd\n";
	}
	
	my $print_info = { 'message_prefix' => "jSongMiner",
			   'message' => "jSongMinerExtractor: Retrieving audio metadata for $source_file_no_path" };

	my ($regenerated,$result,$had_error) 
	    = $self->autorun_general_cmd($jsongminer_cmd,$source_file_path,$target_txt_file_path,$print_info);
	
	if ($verbosity>2) {
	    print $outhandle "jSongMinerExtractor: ...done\n";
	}
    }
    else {
	print STDERR "Error: failed to change directory to '$jmir_directory'\n";
	print STDERR "       Cannot run jAudio\n";
    }

    chdir($store_cwd);

    return ($target_acexml_file_path,$target_txt_file_path);
}
    



1;