###########################################################################
#
# SimpleVideoPlugin.pm -- Plugin for multimedia with some simple video
#                         processing
#
# A component of the Greenstone digital library software from the New
# Zealand Digital Library Project at the University of Waikato, New
# Zealand.
#
# Copyright (C) 2012 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package SimpleVideoPlugin;

use File::Temp qw/ tempdir /;

use BaseImporter;
use MetadataRead;
use util;
use FileUtils;

use strict;
use warnings;
no strict 'refs'; # allow filehandles to be variables and viceversa

sub BEGIN
{
  @SimpleVideoPlugin::ISA = ('MetadataRead', 'BaseImporter');
}

my $arguments = [ { 'name' => "process_exp",
                    'desc' => "{BaseImporter.process_exp}",
                    'type' => "regexp",
                    'reqd' => "no",
                    'deft' => &get_default_process_exp() },
                  { 'name' => "streamingHQsize",
                    'desc' => "{VideoPlugin.streamingsize}",
                    'type' => "int",
                    'deft' => "720",
                    'reqd' => "no" },
                  { 'name' => "streamingHQVideoBitrate",
                    'desc' => "{VideoPlugin.streamingbitrate}",
                    'type' => "int",
                    'deft' => "496",
                    'reqd' => "no" },
                  { 'name' => "streamingHQAudioBitrate",
                    'desc' => "{VideoPlugin.streamingbitrate}",
                    'type' => "int",
                    'deft' => "80",
                    'reqd' => "no" },
                  { 'name' => "videoDeinterlacingFilter",
                    'desc' => "Activate a deinterlacing filter to increase the quality of TV footage",
                    'type' => "enum",
                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
                               {'name' => "false", 'desc' => "{common.false}"}],
                    'deft' => "false",
                    'reqd' => "no" },
                  { 'name' => "isParallel",
                    'desc' => "Will the import use parallel processing? (maybe this should be set by parallel-import.pl somehow)",
                    'type' => "enum",
                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
                               {'name' => "false", 'desc' => "{common.false}"}],
                    'deft' => "true",
                    'reqd' => "no" },
                  { 'name' => "isCluster",
                    'desc' => "Will the import be run on a cluster (multiple computers) or not (single computer - possibly multiple processors)",
                    'type' => "enum",
                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
                               {'name' => "false", 'desc' => "{common.false}"}],
                    'deft' => "false",
                    'reqd' => "no" },
                  { 'name' => "separateIO",
                    'desc' => "copy and process the file locally (good for segregating IO cost)",
                    'type' => "enum",
                    'list' => [{'name' => "true", 'desc' => "{common.true}"},
                               {'name' => "false", 'desc' => "{common.false}"}],
                    'deft' => "false",
                    'reqd' => "no" },
                  { 'name' => "fixedCore",
                    'desc' => "Restrict the execution of Handbrake to a single core (0 = no restriction, > 0 use value-1'th core)",
                    'type' => "int",
                    'deft' => "0",
                    'reqd' => "no" },
                  { 'name' => "no_keyframes",
                    'desc' => "Disable keyframe extraction",
                    'type' => "flag",
                    'reqd' => "no" },
                ];

my $options = { 'name'     => "BasicVideoPlugin",
		'desc'     => "",
		'abstract' => "no",
		'inherits' => "yes",
                'args'     => $arguments };

sub new
{
  my ($class) = shift (@_);
  my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
  push(@$pluginlist, $class);

  push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
  push(@{$hashArgOptLists->{"OptList"}},$options);
  my $self = new BaseImporter($pluginlist, $inputargs, $hashArgOptLists);
  return bless $self, $class;
}

sub get_default_process_exp
{
  return '(?i)\.ts$';
}

sub get_oid_hash_type
{
  my $self = shift (@_);
  return "hash_on_ga_xml";
}

sub process
{
  my $self = shift (@_);
  my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;

  if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
  {
    $file = &FileUtils::filenameConcatenate($base_dir, $file);
  }

  print STDERR " * SimpleVideoPlugin processing: " . $file . "\n";

  # - I have to add some text (yay, back to needing dummy text) otherwise the
  #   DocumentText formatting is ignored (?!?)
  my $topsection = $doc_obj->get_top_section();
  $doc_obj->add_utf8_text($topsection, "This is dummy text");


  $file =~ /[\/]?([^\/]+)\.(?:ts)$/;
  my $filename = $1;

  # Optional date metadata (available on raw ReplayMe recordings)
  if ($filename =~ /(\d\d\d\d)-(\d\d)-(\d\d)/)
  {
    my $date = $1 . $2 . $3;
    $filename =~ s/[^a-z0-9]+/_/ig;
    $filename =~ s/^_+|_+$//g;
    $doc_obj->add_utf8_metadata($topsection,"Date",$date);
  }

  # Special Case: HDFS *only* supported by separateIO flag (you need to move
  # the file out of HDFS to local filespace to allow MediaInfo and Handbrake
  # to be run on it.
  my $separate_io = $self->{'separateIO'};
  if (&FileUtils::isHDFS($file))
  {
    $separate_io = 'true';
  }
  ###rint STDERR "[DEBUG] separate_io:" . $separate_io . "\n";

  my $process_dir = $ENV{'GSDLCOLLECTDIR'};
  # If we are in a cluster, then we don't want to be writing all the logs
  # etc to the shared file system. Instead, we write to the tmp drive
  if ($separate_io eq 'true')
  {
    $process_dir = &FileUtils::filenameConcatenate('/tmp', 'gsimport-' . $filename);
    if (!&FileUtils::directoryExists($process_dir))
    {
      mkdir($process_dir, 0775);
    }
  }
  my $logs_dir = &FileUtils::filenameConcatenate($process_dir, "logs");
  if (!&FileUtils::directoryExists($logs_dir))
  {
    mkdir($logs_dir, 0775);
  }
  my $convert_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '.log');
  my $pass_log_path = &FileUtils::filenameConcatenate($logs_dir, 'convert-' . $filename . '-pass');
  my $tmp_dir = &FileUtils::filenameConcatenate($process_dir, "cached");
  if (!&FileUtils::directoryExists($tmp_dir))
  {
    mkdir($tmp_dir, 0775);
  }
  $tmp_dir = &FileUtils::filenameConcatenate($tmp_dir, $filename);
  if (!&FileUtils::directoryExists($tmp_dir))
  {
    mkdir($tmp_dir, 0775);
  }

  # If we are separating IO, then we also start by copying the file to
  # the process directory (local tmp) as well
  my $ivideo_path = $file;
  if (!&FileUtils::isFilenameAbsolute($file) && $base_dir ne '')
  {
    $ivideo_path = &FileUtils::filenameConcatenate($base_dir, $file);
  }
  if ($separate_io eq 'true')
  {
    my $io_start = time();
    print " - creating local copy of file: " . $ivideo_path . " [IOS:" . time() . "]\n";
    my $local_ivideo_path = &FileUtils::filenameConcatenate($process_dir, $filename . ".ts");
    &FileUtils::copyFiles($ivideo_path, $local_ivideo_path);
    my $remote_size = &FileUtils::fileSize($ivideo_path);
    my $local_size = &FileUtils::fileSize($local_ivideo_path);
    if ($remote_size != $local_size)
    {
      die('Error! File copied is not of same size as original: ' . $remote_size . 'b != ' . $local_size . "b\n");
    }
    elsif (&FileUtils::fileExists($local_ivideo_path))
    {
      $ivideo_path = $local_ivideo_path;
      print ' - copied! [IOE:' . time() . "]\n";
    }
    else
    {
      die('Error! Failed to copy file: ' . $ivideo_path . ' => ' . $local_ivideo_path . ' [IOE:' . time() . "]\n");
    }
  }

  # 1. Use MediaInfo to extract important metadata
  print ' - Extracting metadata using MediaInfo...';
  my $mi_metadata = $self->getMetadata($ivideo_path);
  my $input_video_duration = &parseDurationAsSeconds($mi_metadata->{'General'}->{'Duration'});
  $doc_obj->add_utf8_metadata($topsection,"Duration",$mi_metadata->{'General'}->{'Duration'});
  $doc_obj->add_utf8_metadata($topsection,"Format", 'multimedia (' . $mi_metadata->{'General'}->{'Format'} . ')');
  if (defined $mi_metadata->{'General'}->{'File_size'})
  {
    $doc_obj->set_metadata_element($topsection, "FileSize", $mi_metadata->{'General'}->{'File_size'});
  }
  else
  {
    $doc_obj->set_metadata_element($topsection, "FileSize", &FileUtils::fileSize($ivideo_path));
  }
  if (defined $mi_metadata->{'Video'}->{'Format_Info'} && defined $mi_metadata->{'Video'}->{'Format'})
  {
    $doc_obj->add_utf8_metadata($topsection,"VideoFormat",$mi_metadata->{'Video'}->{'Format_Info'} . ' (' . $mi_metadata->{'Video'}->{'Format'} . ')');
  }
  if (defined $mi_metadata->{'Audio'}->{'Format_Info'} && defined $mi_metadata->{'Audio'}->{'Format'})
  {
    $doc_obj->add_utf8_metadata($topsection,"AudioFormat",$mi_metadata->{'Audio'}->{'Format_Info'} . ' (' . $mi_metadata->{'Audio'}->{'Format'} . ')');
  }
  $doc_obj->add_utf8_metadata($topsection,"Width",$mi_metadata->{'Video'}->{'Width'});
  $doc_obj->add_utf8_metadata($topsection,"Height",$mi_metadata->{'Video'}->{'Height'});
  print STDERR "Done!\n";

  # 2. Convert into FLV, reprocess to make seekable, and associate
  # - generate a path for our temporary converted video file
  my $ovideo_path = &FileUtils::filenameConcatenate($tmp_dir, 'gsv.mp4');
  if ($self->{'streamingHQsize'} > 0)
  {
    print STDERR " - Converting video to streamble format...\n";
    if (&FileUtils::fileExists($ovideo_path))
    {
      print "   - Found existing converted video in cache!\n";
    }
    else
    {
      # - first conversion pass
      print "   - Convert using Handbrake\n";
      my $streaming_HQ_size    = $self->{'streamingHQsize'};
      my $streaming_HQ_VideoBitrate    = $self->{'streamingHQVideoBitrate'};
      my $streaming_HQ_AudioBitrate    = $self->{'streamingHQAudioBitrate'};
      my $deinterlace = $self->{'videoDeinterlacingFilter'};
      my $video_processing_parameters;
      if (!$streaming_HQ_size || $streaming_HQ_size eq "fullsize")
      {
        $video_processing_parameters = "--strict-anamorphic";
      }
      else
      {
        $video_processing_parameters = "-w $streaming_HQ_size --loose-anamorphic";
      }
      if ($deinterlace eq "true")
      {
        $video_processing_parameters .= " --decomb";
      }
      # Default MenCoder options for x264
      my $mencoder_options = 'ref=2:bframes=2:subq=6:mixed-refs=0:weightb=0:8x8dct=0:trellis=0';
      my $is_cluster = $self->{'isCluster'};
      my $is_parallel = $self->{'isParallel'};
      # If we are parallel processing on a single (presumably) multicore computer
      # then we need to limit the number of threads (and hence CPUs) HandBrake
      # will utilize in order to emulate true parallel processing (otherwise the
      # first thread to get to HandBrake conversion will take up most the CPUs
      # causing all other threads to wait anyway). It will interesting to test
      # whether parallel processing or serial processing (with HandBrake parallel
      # processing) is faster. *update* threads=1 *only* controls the encoding and
      # several other parts of Handbrake can run parallel (demuxing etc). I've
      # had to include a 'taskset' command to truely make Handbrake serial
      if ($is_parallel eq 'true'  && $is_cluster eq 'false')
      {
        $mencoder_options .= ':threads=1';
      }
      # Banish HandbrakeCLI to the (fixedCore-1)'th CPU if necessary
      my $cmd = '';
      if (defined $self->{'fixedCore'} && $self->{'fixedCore'} > 0)
      {
        $cmd .= 'taskset -c ' . ($self->{'fixedCore'} - 1) . ' ';
      }
      $cmd .= 'HandBrakeCLI -i "' . $ivideo_path . '" -t 1 -c 1 -f mp4 -O -o "' . $ovideo_path . '" ' . $video_processing_parameters . ' -e x264 -b ' . $streaming_HQ_VideoBitrate . ' -a 1 -E faac -6 dpl2 -R Auto -B ' . $streaming_HQ_AudioBitrate . ' -D 0.0 -x ' . $mencoder_options . ' > "' . $convert_log_path . '" 2>&1';
      my $attempt_count = 0;
      do
      {
        $attempt_count++;
        ###rint "[DEBUG: Video conversion attempt #" . $attempt_count . ": |" . $cmd . "|]\n";
        `$cmd`;
      }
      while ($attempt_count < 5 && !&FileUtils::fileExists($ovideo_path))
    }
    if (!&FileUtils::fileExists($ovideo_path))
    {
      die("Fatal Error! Failed to convert video: " . $ovideo_path . "\nReason:" . $! . "\n");
    }
    # Extra check - ensure the converted video is approximately the same duration
    # as the input video, given or take around 5 seconds
    my $output_raw_video_duration = &getDuration($ovideo_path);
    my $output_video_duration = &parseDurationAsSeconds($output_raw_video_duration);
    if (abs($input_video_duration - $output_video_duration) > 5)
    {
      print STDERR "!Warning! Output video does not have same duration as input video.\n";
    }
    print STDERR " - conversion done!\n";
  }
  else
  {
    print "   - skipping web-streamable conversion\n";
  }

  # 3. Extract keyframes using hive
  my $generate_keyframes = 1;
  if (defined $self->{'no_keyframes'} && $self->{'no_keyframes'} == 1)
  {
    $generate_keyframes = 0;
    print "   - skipping keyframe generation\n";
  }
  else
  {
    print STDERR " - extract keyframes...\n";
    ###$tmp_dir = '/tmp/ramdrive';
    my $oshots_path = &FileUtils::filenameConcatenate($tmp_dir, 'shots.xml');
    if (&FileUtils::fileExists($oshots_path))
    {
      print "   - found existing keyframe images in cache\n";
    }
    else
    {
      print "   - generating keyframe images using Hive2\n";
      my $cmd = 'hive2_ffmpegsvn -o "' . $oshots_path . '" -k "' . $tmp_dir . '" "' . $ovideo_path . '" >> "' . $convert_log_path . '" 2>&1';
      ###print "[cmd: " . $cmd . "]\n";
      `$cmd`;
    }
    if (!&FileUtils::fileExists($oshots_path))
    {
      die("Fatal Error! Failed to extract keyframe images: " . $oshots_path . "\nReason:" . $! . "\n");
    }
    print STDERR "  - keyframes extracted!\n";
  }

  # 4. Associate files (copies back to shared space if IO separated)
  print STDERR " - Associate derived files to doc_obj... ";
  # - associate streamable video
  if (-f $ovideo_path)
  {
    $doc_obj->associate_file($ovideo_path, 'gsv.mp4', 'video/mp4', $topsection);
  }
  else
  {
    $doc_obj->associate_file($ivideo_path, 'gsv.ts', 'video/ts', $topsection);
  }
  # - associate all of the JPGs found in the temp directory
  if ($generate_keyframes)
  {
    opendir(my $dh, $tmp_dir);
    my @shots = readdir($dh);
    closedir($dh);
    my $thumbnail = 0;
    foreach my $shot (sort @shots)
    {
      my $shot_path = &FileUtils::filenameConcatenate($tmp_dir, $shot);
      if ($shot =~ /.jpg$/)
      {
        if (!$thumbnail)
        {
          $doc_obj->add_utf8_metadata($topsection,"Thumbnail",$shot);
          $thumbnail = 1;
        }
        $doc_obj->add_utf8_metadata($topsection,"Keyframe",$shot);
        #$doc_obj->associate_file($shot_path,$shot,"image/jpeg",$topsection);
      }
    }
  }
  print STDERR "Done!\n";

  # 5. Done! Cleanup.
  print STDERR "SimpleVideoPlugin: Complete! [IOS:" . time() . "]\n";
  return 1;
}


## @function getDuration()
#
sub getDuration
{
  my ($video_path) = @_;
  my $cmd = 'mediainfo --Inform="General;%Duration/String%" "' . $video_path . '" 2>&1';
  my $duration = `$cmd`;
  return $duration;
}
## getDuration() ##


## @function getMetadata()
#
sub getMetadata
{
  my ($self, $ivideo_path) = @_;
  my $cmd = 'mediainfo --Output=XML "' . $ivideo_path . '" 2>&1';
  ###rint "[DEBUG] command: " . $cmd . "\n";
  my $metadata_xml = `$cmd`;
  ###rint "[DEBUG] result: " . $metadata_xml . "\n\n";
  my @lines = split(/\r?\n/, $metadata_xml);
  my $metadata = {'Unknown'=>{}};
  my $metadata_type = 'Unknown';
  foreach my $line (@lines)
  {
   if ($line =~ /<track type="(.+)">/)
    {
      $metadata_type = $1;
      if (!defined $metadata->{$metadata_type})
      {
        $metadata->{$metadata_type} = {};
      }
    }
    elsif ($line =~ /<([^>]+)>(.+)<\/[^>]+>/)
    {
      my $field = $1;
      my $value = $2;
      $metadata->{$metadata_type}->{$field} = $value;
    }
  }
  return $metadata;
}
## getMetadata() ##


## @function parseDurationAsSeconds()
#
sub parseDurationAsSeconds
{
  my ($duration_str) = @_;
  my $seconds = 0;
  # h - hours
  if ($duration_str =~ /(\d+)h/)
  {
    $seconds += $1 * 60 * 60;
  }
  # mn - minutes
  if ($duration_str =~ /(\d+)mn/)
  {
    $seconds += $1 * 60;
  }
  # s - seconds
  if ($duration_str =~ /(\d+)s/)
  {
    $seconds += $1;
  }
  # ms - milliseconds (ignore - I'll be lucky to match to the closest second)
  return $seconds;
}
## parseDurationAsSeconds() ##

1;











