/**
 * Class that uses OpenNLP with the Language Detection Model to determine, with a default
 * or configurable level of confidence, whether text (from a file or stdin) is in Maori or not.
 * Internal functions can be used for detecting any of the 103 languages currently supported by
 * the OpenNLP Language Detection Model.
 * 
 * http://opennlp.apache.org/news/model-langdetect-183.html
 * language detector model: http://opennlp.apache.org/models.html
 *        Pre-trained models for OpenNLP 1.5: http://opennlp.sourceforge.net/models-1.5/
 * Use of Apache OpenNLP in general:
 *   http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#intro.api
 * Use of OpenNLP for language detection:
 * http://opennlp.apache.org/docs/1.9.1/manual/opennlp.html#tools.langdetect
 * 
 * This code was based on the information and sample code at the above links and the links dispersed throughout this file.
 * See also the accompanying README file.
 *
 * July 2019
 */

package org.hathitrust.lrl.core;

import java.io.*;
import opennlp.tools.langdetect.*;
import opennlp.tools.util.*;

public class MaoriTextDetector extends LanguageTextDetector 
{
	/** The 3 letter language code for Maori in ISO 639-2 or ISO 639-3 */ 
	public static final String MAORI_3LETTER_CODE = "mri";

	public MaoriTextDetector(boolean silentMode) throws Exception 
	{
		this(silentMode, DEFAULT_MINIMUM_CONFIDENCE);
	}

	public MaoriTextDetector(boolean silentMode, double min_confidence) throws Exception 
	{
		super(silentMode,min_confidence);		
	}    

	/**
	 * @return true if the input text is Maori (mri) with MINIMUM_CONFIDENCE levels of confidence (if set,
	 * else DEFAULT_MINIMUM_CONFIDENCE levels of confidence).
	 */
	public boolean isTextInMaori(String text) {	
		return isTextInLanguage(MAORI_3LETTER_CODE, text);
	}

	/**
	 * Handle "smaller" textfiles/streams of text read in.
	 * Return value is the same as for isTextInMaori(String text);
	 */
	public boolean isTextInMaori(BufferedReader reader) throws Exception {
		return isTextInLanguage(MAORI_3LETTER_CODE, reader);
	}

	/*
	 * Need better handling of "larger" textfiles/streams of text read in:
	 * what if multiple languages with high confidence every NUM_LINES read in?
	 * Does this mean the file is multi-lingual with each section dominated by a different language?
	 * How best to convey such information to the user?
	 */
	/**
	 * Rudimentary attempt to deal with very large files.
	 * Return value is the same as for isTextInMaori(String text);
	 */
	public boolean isLargeTextInMaori(BufferedReader reader) throws Exception {
		return isLargeTextInLanguage(MAORI_3LETTER_CODE, reader);
	}



}
