/*
 *    WekaFindInstanceKNN.java
 *    Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
 *
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */
package org.greenstone.gsdl3.util;


import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;

import weka.core.neighboursearch.LinearNNSearch;
import weka.core.neighboursearch.NearestNeighbourSearch;
//import weka.core.neighboursearch.KDTree;

// Based on StackOverflow:
//   https://stackoverflow.com/questions/31350506/how-to-calculate-the-nearest-neighbors-using-weka-from-the-command-line

// The following was also useful as a reference:
//    https://waikato.github.io/weka-blog/posts/2018-10-08-making-a-weka-classifier/

public class WekaFindInstanceKNN 
{

    public static Instances loadDataset(String input_filename) 
    {
	Instances instances = null;

	try {
	    DataSource source = new DataSource(input_filename);
	    instances = source.getDataSet();
	}
	catch (Exception e) {
	    e.printStackTrace();
	}
	
	return instances;
    }

    public static NearestNeighbourSearch initKNN(Instances instances) 
    {
	LinearNNSearch knn = new LinearNNSearch(instances);
	
	return knn;
    }

    public static void printNearestKNN(Instance sample_instance, Instances nearest_instances, 
				      int k_nearest)
    {
	try {
	    //cycle through the instances and printout the nearestneighbors

	    System.err.println("\n" + sample_instance);
	    for(int i =0; i<k_nearest; i++) {
		System.err.println("\t" + nearest_instances.instance(i));
	    }	
	}
	catch (Exception e) {
	    e.printStackTrace();
	}
	    
    }

    static Instances input_instances_ = null;
    static NearestNeighbourSearch knn_ = null;
    
    public static void init(String input_filename)
    {
	if (input_instances_ == null) {
	    input_instances_ = loadDataset(input_filename);
	}
	if (knn_ == null) {
	    knn_ = initKNN(input_instances_);
	}

    }

    public static Instances kNearestNeighbours(String doc_id_segment, double arousal_val,double valence_val, int k_nearest_num)
    {
	System.err.println("**** wekaFindInstnaceKNN::kNearestNeighbours() called with:");
	System.err.print(  "  doc_id_segment = " + doc_id_segment);
	System.err.print(  "  arousal_val = "   + arousal_val);
	System.err.print(  "  valence_val = "   + valence_val);
	System.err.println("  k_nearest_num = " + k_nearest_num);
	
	Instance sample_instance = new DenseInstance(3);
	sample_instance.setDataset(input_instances_);

	// sample sample:
	//   ds_22716_5743-6,-0.549489,-0.118439
	//sample_instance.setValue(0, "ds_22716_5743-6");
	//sample_instance.setValue(1, -0.549489);
	//sample_instance.setValue(2, -0.118439);


	sample_instance.setValue(0, doc_id_segment);
	
	sample_instance.setValue(1, arousal_val);
	sample_instance.setValue(2, valence_val);

	Instances nearest_instances = null;
	try {
	    nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest_num);
	    System.err.println("**** Expanded to include instances that are equidistant, nearest_instances len = " + nearest_instances.numInstances());

	    //printNearestKNN(sample_instance,nearest_instances, k_nearest_num);

	}
	catch (Exception e) {
	    e.printStackTrace();
	}

	return nearest_instances;
	
    }
    
    public static void main(String[] args)
    {
	// First example output, when working through the instances specified in the CSV file
	// (looking for similaries amongst all the instances in the CSV file)

	// ds_22716_5743-6,-0.549489,-0.118439
	//    ds_22761_1171-12,-0.549489,-0.118439
	//    ds_21046_7743-30,-0.549489,-0.118439
	//    ds_24768_23507-6,-0.549489,-0.118439
	//    ds_22761_1171-15,-0.549489,-0.118439

	if (args.length != 2) {
	    System.err.println("Usage: k-nearest-num file.{arff,csv}");
	    System.exit(1);
	}

	String k_nearest_str     = args[0];
	String input_filename = args[1];
	
	int k_nearest = Integer.parseInt(k_nearest_str);
	 
	System.out.println("Weka Command Line Find Nearest " + k_nearest_str
			   + " Neighbors for each Instance in "  + input_filename);

	init(input_filename);
	
	//Instances instances = loadDataset(input_filename);
	//NearestNeighbourSearch knn = initKNN(instances);


	Instance sample_instance = new DenseInstance(3);
	sample_instance.setDataset(input_instances_);

	// sample sample:
	//   ds_22716_5743-6,-0.549489,-0.118439
	sample_instance.setValue(0, "ds_22716_5743-6");
	sample_instance.setValue(1, -0.549489);
	sample_instance.setValue(2, -0.118439);

	try {
	    Instances nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest);
	    System.out.println("**** nearest_instances len = " + nearest_instances.numInstances());

	    printNearestKNN(sample_instance,nearest_instances, k_nearest);

	}
	catch (Exception e) {
	    e.printStackTrace();
	}


 
    }
}
