package org.hathitrust.lrl.pagelevel;

import java.io.File;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;

import org.hathitrust.lrl.core.SeedWords;
import org.hathitrust.lrl.core.UtilIO;
import org.json.JSONArray;
import org.json.JSONObject;

public class FilterSeedWords 
{	
	public final static int NumQueryTerms = 10;
	public final static int NumRowsPerPage = 5000;

	public static boolean DEBUG = true;
	public static int     DEBUG_QCOUNT_LIMIT = 2;
	
	protected static HashMap<String,Integer> page_id_map = new HashMap<String,Integer>();

	public static ArrayList<String> generateQueries(ArrayList<String> words)
	{
		// If looking to run queries across all languages, terms have the form;
		//   (alllangs_htrctokentext:whakapapa)
		// For just the English full text, as identified by OpenNLP 
		//    processing with the Extract Features dataset was formed:
		//   (en_htrctokentext:whakapapa) 
		
		ArrayList<String> query_term_group_list = new ArrayList<String>();
		
		int num_words = words.size();
		
		int i = 0;
		ArrayList<String> query_terms = null;
		while (i<num_words) {
			if (query_terms == null) {
				query_terms = new ArrayList<String>();
			}
			
			String word = words.get(i);
			query_terms.add("(alllangs_htrctokentext:"+word+")");

			i++;

			if ((i % NumQueryTerms) == 0) {
				String query_term_or = String.join(" OR ", query_terms);
				
				query_term_group_list.add(query_term_or);
				query_terms = null;
			}
			
		}
		return query_term_group_list;
	}
	
	public static ArrayList<String> querySolrEF(String query)
	{		 
		// https://solr2.htrc.illinois.edu/robust-solr/solr3456-faceted-htrc-full-ef16/select
	
		// q=(alllangs_htrctokentext:atu) OR (alllangs_htrctokentext:haere)&indent=on
		// &wt=json&start=0&rows=15
		// &facet=on&facet.field=volumegenre_htrcstrings&facet.field=volumelanguage_htrcstring&facet.field=volumerightsAttributes_htrcstring&facet.field=volumenames_htrcstrings&facet.field=volumepubPlace_htrcstring&facet.field=volumebibliographicFormat_htrcstring&facet.field=volumeclassification_lcc_htrcstrings&facet.field=volumeconcept_htrcstrings
		
		ArrayList<String> page_ids = new ArrayList<String>();
		
		String base_url = "https://solr2.htrc.illinois.edu/robust-solr/solr3456-faceted-htrc-full-ef16/select";
		String query_arg =  "q=" + query;
		String extra_args = "wt=json&start=0&rows=" + NumRowsPerPage;
		
		String args = query_arg + "&" + extra_args;
		//String full_url = base_url + "?" + query_arg + "&" + extra_args;
		
		System.out.println("  Initiating SolrEF query with args:" + args);
		String json_result_set_str = UtilIO.postQueryToURL(base_url,args);
		
		try {
		  JSONObject json_result_set = new JSONObject(json_result_set_str);
          JSONObject json_result_set_response = json_result_set.getJSONObject("response");
          
          long num_found = json_result_set_response.getLong("numFound");
          
          System.out.println("  Downloading first " + NumRowsPerPage + "of " + num_found + " matching pages" );
          
          
          JSONArray docs = json_result_set_response.getJSONArray("docs");
          
          for (int i=0; i<docs.length(); i++) {
        	  JSONObject json_doc_rec = docs.getJSONObject(i);
        	  String id = json_doc_rec.getString("id");
        	  page_ids.add(id);
          }
		}
		catch (Exception e) {
			e.printStackTrace();
		}
		
		return page_ids;
	}
	

	
	public static void addIdsToMap(ArrayList<String> id_list) 
	{
		for (String id: id_list)
		{
			if (page_id_map.containsKey(id)) {
				int freq = page_id_map.get(id);
				page_id_map.put(id, freq+1);
			}
			else {
				page_id_map.put(id, 1);
			}
		}		
	}
	
	
	public static void main(String[] args) 
	{		
		SeedWords seed_words = new SeedWords(UtilIO.LRL_HOME);
		ArrayList<String> filtered_words = seed_words.getFilteredWords();
		
		String [] filtered_words_array = new String[filtered_words.size()];
		String filtered_words_text = String.join("\n",filtered_words.toArray(filtered_words_array));
		
		String data_output_directory_str = UtilIO.LRL_HOME + File.separator + "data-output";
		File data_output_directory = new File(data_output_directory_str);
		
		if (!data_output_directory.exists()) {
		    data_output_directory.mkdir();
		}
		
		String output_filtered_words_filename = data_output_directory_str + File.separator + "maori-filtered-"+ filtered_words.size() + "-seedwords.json";
		
		UtilIO.writeTextToFile(output_filtered_words_filename, filtered_words_text);
		System.out.println("Saved filtered Maori seed words in:");
		System.out.println("  " + output_filtered_words_filename);
		
		System.out.flush();
		System.err.println("Done!");
	}

}
