/**
 *  Adding support for Videos in Terrier
 *  @author: John Thompson, jmt12, #9826509
 *
 *  The contents of this file are subject to the Mozilla Public License
 *  Version 1.1 (the "License"); you may not use this file except in
 *  compliance with the License. You may obtain a copy of the License at
 *  http://www.mozilla.org/MPL/
 *
 *  Software distributed under the License is distributed on an "AS IS"
 *  basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 *  the License for the specific language governing rights and limitations
 *  under the License.
 *
 *  Copyright (c) 2011 The University of Waikato. All Rights Reserved.
 */
package org.terrier.indexing;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.StringReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collections;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;
import org.terrier.indexing.StreamGobbler;
import org.terrier.indexing.tokenisation.TokenStream;
import org.terrier.indexing.tokenisation.Tokeniser;
import org.terrier.utility.ApplicationSetup;

public class VideoDocument
  implements Document
{
  /** A reference to the logger for messaging */
  protected static final Logger logger = Logger.getLogger(FileDocument.class);
  /** The map of properties (fields) for this document. */
  protected Map<String,String> properties;
  /** A reader built from a dummy text string. */
  protected Reader reader;
  /** A token stream produced by the configured tokeniser when feed the dummy
   *  reader.
   */
  protected TokenStream tokenizer;

  // Handbrake Configuration
  protected String streaming_hq_size = ApplicationSetup.getProperty("VideoDocument.streaming_hq_size", "720");
  protected String streaming_hq_video_bitrate = ApplicationSetup.getProperty("VideoDocument.streaming_hq_video_bitrate", "496");
  protected String streaming_hq_audio_bitrate = ApplicationSetup.getProperty("VideoDocument.streaming_hq_audio_bitrate", "80");

  /** The preview size (width). **/
  protected final String preview_width = ApplicationSetup.getProperty("ImageDocument.preview_width", "200");

  protected int max_keyframe_images_length = 1024;

  /** Default constructor. **/
  protected VideoDocument() {}

  /** Constructs an instance of the ImageDocument from the given input stream.
   *  @param docStream the input stream that reads the file.
   *  @param docProperties the initial properties (docno, filename)
   *  @param tok the tokeniser defined for this collection
   */
  public VideoDocument(InputStream istream, Map<String,String> default_properties, Tokeniser tok)
  {
    logger.info("VideoDocument::VideoDocument()");
    // 0. Initialization from arguments
    this.properties = default_properties;
    String filepath = this.properties.get("filename");
    System.out.println("[F:" + this.epochTime() + "] Starting ingest of " + filepath);

    // Set properties
    logger.info("VideoDocument - extracting properties");
    // A. Hardcoded properties
    this.properties.put("parser", "VideoDocument");
    this.properties.put("abstract", "This is a video so here is some dummy text to prevent indexer failing.");
    // B. Properties derived from filename
    String title = filepath.substring(filepath.lastIndexOf(System.getProperty("file.separator")) + 1);
    this.properties.put("title", title);
    String ext = filepath.substring(filepath.lastIndexOf(".") + 1);
    String target_filename = "doc." + ext;
    this.properties.put("source","doc." + ext);

    // - A unique associated directory. This gets a little tricky as we need
    //   to create the directory at the same time if an effort to promote
    //   synchronous behaviour
    System.out.println("[F:" + this.epochTime() + ":PR]");
    String unique_id = this.generateHash(filepath);
    //   - we start with the first 4 characters
    int offset = 0;
    String assoc_filename = "D" + unique_id.substring(offset, offset + 4);
    //   - we add ".dir" as a suffix to the directory that actually contains
    //     files (so the non-suffixed version contains nested directories)
    Path assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
    //   - then we continue adding blocks of 4 characters until we get a
    //     directory that doesn't already exist
    System.out.println("[F:" + this.epochTime() + ":IO]");
    while (assoc_path.toFile().exists() && offset < unique_id.length())
    {
      offset += 4;
      assoc_filename += System.getProperty("file.separator") + unique_id.substring(offset, offset + 4);
      assoc_path = Paths.get(ApplicationSetup.TERRIER_SHARE, "images", "assoc", assoc_filename + ".dir");
    }
    //   - still not unique? but run out of unique_id... time to complain
    if (assoc_path.toFile().exists())
    {
      logger.error("ImageDoument - can't determine unique assocfilepath");
      System.exit(0);
    }
    //   - create the directories quick... hopefully before someone else does
    assoc_path.toFile().mkdirs(); // bet there is a nice nio way to do this
    this.properties.put("assocfile", assoc_filename);

    // Copy (symlink) the file into place in the shared directory
    Path raw_video_path = Paths.get(properties.get("filename"));
    Path target_path = assoc_path.resolve(target_filename);
    logger.info("VideoDocument - symlinking original video into assoc directory [IO]");
    logger.info("[DEBUG] Raw Video Path: " + raw_video_path);
    logger.info("[DEBUG] Target Path:    " + target_path);
    try
    {
      Files.createSymbolicLink(target_path, raw_video_path);
    }
    // not supported? We'll try copying below
    catch (UnsupportedOperationException ex)
    {
      logger.warn("Symlinking not supported");
    }
    // All other exceptions can be fatal
    catch (Exception e)
    {
      logger.error("Exception while symlinking video: ", e);
    }
    // - copy if the file doesn't exist yet
    if (Files.notExists(target_path))
    {
      logger.info("VideoDocument - symlink failed, copying instead [IO]");
      try
      {
        Files.copy(raw_video_path, target_path);
      }
      // Fatality!
      catch (Exception e)
      {
        logger.error("Exception while copying video: ", e);
      }
    }

    // 1. Extract Metadata using MediaInfo and store as properties
    System.out.println("[F:" + this.epochTime() + ":PR]");
    logger.info("VideoDocument - extracting video metadata [PR]");
    try
    {
      String metadata_command[] = {
        "mediainfo",
        "--Output=XML",
        raw_video_path.toString()
      };
      logger.info("VideoDocument - metadata command: " + Arrays.toString(metadata_command));
      Process metadata_process = Runtime.getRuntime().exec(metadata_command);
      StreamGobbler metadata_process_error_gobbler = new StreamGobbler(metadata_process.getErrorStream());
      metadata_process_error_gobbler.start();
      BufferedReader metadata_br = new BufferedReader(new InputStreamReader(metadata_process.getInputStream()));
      String line;
      String type = "Unknown";
      Pattern type_pattern = Pattern.compile("<track type=\"([a-zA-Z]+)\">");
      Pattern metadata_pattern = Pattern.compile("<([a-zA-Z_]+)>(.*)</\\1>");
      while ((line = metadata_br.readLine()) != null)
      {
        Matcher type_matcher = type_pattern.matcher(line);
        if (type_matcher.matches())
        {
          type = type_matcher.group(1);
        }
        else
        {
          Matcher metadata_matcher = metadata_pattern.matcher(line);
          if (metadata_matcher.matches())
          {
            String field = type.toLowerCase() + ":" + metadata_matcher.group(1).toLowerCase();
            String value = metadata_matcher.group(2);
            logger.info("VideoDocument - adding metadata: " + field + " => " + value);
            this.properties.put(field, value);
          }
        }
      }
      int metadata_status = metadata_process.waitFor();
    }
    catch (Exception e)
    {
      logger.error("Exception while extracting video metadata:", e);
    }

    // 2. Convert Video to streamable format using HandbrakeCLI
    logger.info("VideoDocument - convert video to streamable format [PR]");
    Path converted_video_path = assoc_path.resolve("tsv.mp4");
    try
    {
      String convert_command[] = {
        "HandBrakeCLI",
        "-i", raw_video_path.toString(),
        "-t", "1",
        "-c", "1",
        "-o", converted_video_path.toString(),
        "-f", "mp4",
        "-O",
        "-w", this.streaming_hq_size,
        "--loose-anamorphic",
        "-e", "x264",
        "-b", this.streaming_hq_video_bitrate,
        "-a", "1",
        "-E", "faac",
        "-6", "dpl2",
        "-R", "Auto",
        "-B", this.streaming_hq_audio_bitrate,
        "-D", "0.0",
        "-x", "ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0:threads=1"
      };
      logger.info("VideoDocument - convert command: " + Arrays.toString(convert_command));
      // @todo determine the best way to account for configuration options
      Process convert_process = Runtime.getRuntime().exec(convert_command);
      StreamGobbler convert_process_error_gobbler = new StreamGobbler(convert_process.getErrorStream());
      convert_process_error_gobbler.start();
      StreamGobbler convert_process_out_gobbler = new StreamGobbler(convert_process.getInputStream());
      convert_process_out_gobbler.start();
      int convert_status = convert_process.waitFor();
      if (convert_status != 0 || !Files.exists(converted_video_path))
      {
        throw new Exception("Convert command failed (exit status: " + convert_status + ")");
      }
    }
    catch (Exception e)
    {
      logger.error("Exception while converting video to streamable format: ", e);
    }

    // 3. Generate keyframes from streamable video and attach the shot names
    //    as a property
    logger.info("VideoDocument - extracting keyframes from video [PR]");
    try
    {
      Path shots_path = assoc_path.resolve("shots.xml");
      String keyframe_command[] = {
        "hive2_ffmpegsvn",
        "-o", shots_path.toString(),
        "-k", assoc_path.toString(),
        "-m", "0.5",
        "-l", "0.05",
        converted_video_path.toString()
      };
      logger.info("VideoDocument - keyframe command: " + Arrays.toString(keyframe_command));
      Process keyframe_process = Runtime.getRuntime().exec(keyframe_command);
      //Path keyframe_err_file = temp_dir_path.resolve("hive2-err.txt");
      StreamGobbler keyframe_error_gobbler = new StreamGobbler(keyframe_process.getErrorStream());//, keyframe_err_file.toString());
      keyframe_error_gobbler.start();
      //Path keyframe_out_file = temp_dir_path.resolve("hive2-out.txt");
      StreamGobbler keyframe_out_gobbler = new StreamGobbler(keyframe_process.getInputStream()); //, keyframe_out_file.toString());
      keyframe_out_gobbler.start();
      int keyframe_status = keyframe_process.waitFor();
      if (keyframe_status != 0 || !Files.exists(shots_path))
      {
        throw new Exception("Keyframe command failed (exit status: " + keyframe_status + ")");
      }

      System.out.println("[F:" + this.epochTime() + ":IO]");
      logger.info("VideoDocument - associating keyframes to video [IO]");
      File files[] = assoc_path.toFile().listFiles();
      Arrays.sort(files);
      Pattern image_filename_pattern = Pattern.compile("tsv.*\\.jpg");
      String keyframe_images = "";
      for (int i = 0; i < files.length; i++)
      {
        String image_filename = files[i].toPath().getFileName().toString();
        logger.info("VideoDocument - considering keyframe image: " + image_filename);
        Matcher image_filename_matcher = image_filename_pattern.matcher(image_filename);
        if (image_filename_matcher.matches())
        {
          if (keyframe_images.equals(""))
          {
            keyframe_images = image_filename;
          }
          else
          {
            // Consider the maximum size of the preview images field
            if ((keyframe_images.length() + image_filename.length() + 1) < this.max_keyframe_images_length)
            {
              keyframe_images += "," + image_filename;
            }
            // Break out of loop
            else
            {
              i = files.length;
            }
          }
        }
      }
      this.properties.put("preview", keyframe_images);
    }
    catch (Exception e)
    {
      logger.error("Exception while extracting keyframes from video: ", e);
    }

    // 4. Create a dummy reader around some dummy text and then tokenize it
    System.out.println("[F:" + this.epochTime() + ":PR]");
    logger.info("VideoDocument - feed dummy text as token stream to indexer [PR]");
    try
    {
      this.reader = new StringReader(this.properties.get("abstract"));
      this.tokenizer = tok.tokenise(this.reader);
    }
    catch (Exception e)
    {
      logger.error("Exception while creating dummy text stream: ", e);
    }
    logger.info("VideoDocument - Complete!");
    System.out.println("[F:" + this.epochTime() + "] Complete");
  }
  /** VideoDocument(InputStream istream, Map<String,String> default_properties, Tokeniser tok) **/

  /** Returns true when the end of the document has been reached, and there
   *  are no other terms to be retrieved from it.
   *  @return boolean true if there are no more terms in the document, otherwise
   *          it returns false.
   */
  public boolean endOfDocument()
  {
    return !this.tokenizer.hasNext();
  }
  /** endOfDocument() **/

  /** @function epochTime
   *  Returns the current time in seconds since 1970JAN01
   */
  public long epochTime()
  {
    return System.currentTimeMillis()/1000;
  }
  /** epochTime() **/

  /** Returns the underlying map of all the properties defined by this Document.
   *  @since 1.1.0
   */
  public Map<String,String> getAllProperties()
  {
    return this.properties;
  }
  /** getAllProperties() **/

  /** Returns a list of the fields the current term appears in.
   * @return HashSet a set of the terms that the current term appears in.
   */
  public Set<String> getFields()
  {
    // Returns null because there is no support for fields with file documents.
    return Collections.emptySet();
  }
  /** getFields() **/

  /** Gets the next term of the document.
   *  <B>NB:</B>Null string returned from getNextTerm() should
   *  be ignored. They do not signify the lack of any more terms.
   *  endOfDocument() should be used to check that.
   *  @return String the next term of the document. Null returns should be
   *          ignored.
   */
  public String getNextTerm()
  {
    return this.tokenizer.next();
  }
  /** getNextTerm() **/

  /** Allows access to a named property of the Document. Examples might be URL,
   *  filename etc.
   *  @param name Name of the property. It is suggested, but not required that
   *         this name should not be case insensitive.
   *  @since 1.1.0
   */
  public String getProperty(String name)
  {
    return this.properties.get(name.toLowerCase());
  }
  /** getProperty(String name) **/

  /** Returns a Reader object so client code can tokenise the document
   * or deal with the document itself. Examples might be extracting URLs,
   * language detection. */
  public Reader getReader()
  {
    return this.reader;
  }
  /** getReader() **/

  /**
   */
  private String generateHash(String string)
  {
    StringBuffer sb = new StringBuffer();
    try
    {
      final MessageDigest message_digest = MessageDigest.getInstance("MD5");
      message_digest.reset();
      message_digest.update(string.getBytes(Charset.forName("UTF8")));
      final byte[] result_bytes = message_digest.digest();
      for (int i = 0; i < result_bytes.length; ++i)
      {
        sb.append(Integer.toHexString((result_bytes[i] & 0xFF) | 0x100).substring(1,3));
      }
    }
    catch (NoSuchAlgorithmException e)
    {
      System.err.println("Exception: " + e);
      System.exit(0);
    }
    return sb.toString();
  }
  /** generateHash(String) **/
}