#!/usr/bin/perl

use strict;
use warnings;

# 1.1 Store all information extracted in a cool data structure
# - N = hostname, S = thread start, E = thread end
my $timing_data = {'M' => {'N'=>'', 'S'=>0, 'E'=>0}};
# 1.2 Check the file exists
if (!-f $ARGV[0])
{
  die("Error! File can't be read: " . $ARGV[0]);
}
my $main_log_filename = $ARGV[0];
# 1.2 From the filename we can parse in some information like the number of worker threads
my $number_of_workers = 0;
if ($main_log_filename =~ /\-W(\d+)E/)
{
  $number_of_workers = $1;
}
else
{
  die("Error! Malformed filename (expecting number of workers): " . $main_log_filename);
}
# 1.3 Initialize the data structure with the number of workers too (we don't
#     know the number of files yet, so they'll have to be adding on the fly)
for (my $i = 1; $i <= $number_of_workers; $i++)
{
  $timing_data->{'W' . $i} = {'N'=>'', 'S'=>0, 'E'=>0, 'F'=>{}};
}

# 2. Read in main log file
print " * Reading main log: " . $main_log_filename . "\n";
open(LOGIN, '<:utf8', $main_log_filename) or die("Error! Failed to open file for reading: " . $main_log_filename);
my $line = '';
my $currently_processing = {};
while ($line = <LOGIN>)
{
  # 2.1 Parse in the Master thread start time
  if ($line =~ /\[M\d?:(\d+)\] Starting on (.+)/)
  {
    $timing_data->{'M'}->{'S'} = $1;
    $timing_data->{'M'}->{'N'} = $2;
  }
  elsif ($line =~ /\[(W\d+):(\d+)\] Starting on (.+)/)
  {
    my $worker_id = $1;
    $timing_data->{$worker_id}->{'S'} = $2;
    $timing_data->{$worker_id}->{'N'} = $3;
  }
  elsif ($line =~ /\[(W\d+):(\d+)\] Processing/)
  {
    my $worker_id = $1;
    my $job_start_time = $2;
    $timing_data->{$worker_id}->{'F'}->{$job_start_time} = {'FN'=>'', 'PS'=>0, 'PE'=>0, 'E'=>0};  
    $currently_processing->{$worker_id} = $job_start_time;
  }
  # 2.3 Or we may parse in the starting times for each working thread
  # 2.4 Or we may also parse (the last encountered) completion time for each
  #     working thread
  elsif ($line =~ /\[(W\d+):(\d+)\] Process complete/)
  {
    my $worker_id = $1;
    my $job_end_time = $2;
    $timing_data->{$worker_id}->{'E'} = $job_end_time;
    my $job_start_time = $currently_processing->{$worker_id};
    $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'E'} = $job_end_time;
    delete($currently_processing->{$worker_id});
  }
  # 2.5 Finally, we may parse in the Master thread end time
  elsif ($line =~ /\[M\d?:(\d+)\] Master will exit when workers complete/)
  {
    $timing_data->{'M'}->{'E'} = $1;
  }
}
close(LOGIN);

# 3. Read each of worker logs parsing in information about the files processed
#    - each will be stored (in an associative array) against its start time
print " * Reading worker logs";
foreach my $worker_id (nsort keys %{$timing_data})
{
  my $jobs = $timing_data->{$worker_id}->{'F'};
  my $counter = 1;
  foreach my $job_start_time (sort keys %{$jobs})
  {
    my $log_filename = 'gsimport-' . $worker_id . '-' . $counter . '.log';
    print ".";
    open(WLOGIN, '<:utf8', $log_filename) or die("Error! Failed to open for reading: " . $log_filename);
    my $wline = '';
    while ($wline = <WLOGIN>)
    {
      if ($wline =~ /\[A:\d+\] SimpleVideoPlugin processing: (.+)/)
      {
        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'FN'} = $1;
      }
      # Start of video processing (excluding as much IO as possible) 
      elsif ($wline =~ /\[C1:(\d+)\]/)
      {
        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PS'} = $1;
      }
      # Immediately after processing video
      elsif ($wline =~ /\[E2:(\d+)\]/)
      {
        $timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} = $1;
      }
    }

    if ($timing_data->{$worker_id}->{'F'}->{$job_start_time}->{'PE'} <= 0)
    {
      print "\n[Warning - bogus log: $log_filename]";
    }

    close(WLOGIN);
    $counter++;
  }
}
print " Done!\n";

# 4. Produce CSV of information
print " * Generating timing information as CSV... ";
open(CSVOUT, '>:utf8', 'timing.csv') or die('Error! Failed to open file for writing: timing.csv');
print CSVOUT "number,id,hostname,start,end,hierarchy\n";
my $thread_counter = 1;
foreach my $thread (nsort keys %{$timing_data})
{
  my $data = $timing_data->{$thread};
  print CSVOUT $thread_counter . ',' . $thread . ',' . $data->{'N'} . ',' . strftime("%H:%M:%S", localtime($data->{'S'})) . ',' . strftime("%H:%M:%S", localtime($data->{'E'})) . ',';
  if ($thread eq 'M')
  {
    print CSVOUT '0';
  }
  else
  {
    print CSVOUT '1';
  }
  print CSVOUT "\n";
  $thread_counter++;
}
close(CSVOUT);
print "Done!\n";
