package org.hathitrust.lrl;

import java.io.File;
import java.util.ArrayList;

import org.hathitrust.lrl.core.UtilIO;

public class MaoriMetadataFulltextOverlap {

	public static void computeOverlap(ArrayList<String> md_vol_ids, ArrayList<String> ft_vol_ids)
	{
		ArrayList<String> only_in_md = new ArrayList<String>();
		ArrayList<String> in_both = new ArrayList<String>();
		
		for (String id: md_vol_ids) {
			boolean md_id_is_in_ft = ft_vol_ids.contains(id);
			if (md_id_is_in_ft) {
				// In both => add to in_both
				// and remove from ft_vol_ids (which later on 'becomes' only_in_ft)
				in_both.add(id);
				ft_vol_ids.remove(id);
			}
			else {
				only_in_md.add(id);
			}
		}

		@SuppressWarnings("unchecked")
		ArrayList<String> only_in_ft = (ArrayList<String>)ft_vol_ids.clone();

		System.out.println("**** in_both (" + in_both.size() + "): " + in_both);
		System.out.println("**** only_in_ft(" + only_in_ft.size() + "): " + only_in_ft);
		System.out.println("**** only_in_md(" + only_in_md.size() + "): " + only_in_md);
		
	}
	
	public static void main(String[] args) 
	{
		String maori_volmd_ids_filename = UtilIO.LRL_HOME + File.separator + "sorted-data-output" 
				+ File.separator + "maori-volume_metadata-ids.txt";
		
		System.out.println("Reading language_t:mao volume-level IDs from file: ");
		System.out.println("  " + maori_volmd_ids_filename);
		ArrayList<String> md_vol_ids = UtilIO.readLineBasedTextFile(maori_volmd_ids_filename);
		
		
		String maori_fulltext_ids_filename = UtilIO.LRL_HOME + File.separator + "vol-count-uniq.txt";
		System.out.println("Reading full-text search seeded volume-level IDs from file: ");
		System.out.println("  " + maori_fulltext_ids_filename);
		ArrayList<String> ft_vol_ids = UtilIO.readLineBasedTextFile(maori_fulltext_ids_filename);
				
		computeOverlap(md_vol_ids,ft_vol_ids);
		
		System.out.flush();
		System.err.println("Done!");	
	}

}
 