#!/usr/bin/perl

use strict;
use warnings;

use List::Util qw(sum);
use Sort::Key::Natural qw(natsort);

my $data = {};
my $base_dir = '/research/jmt12/temp';
my $filename = 'replication';
if (defined $ARGV[0])
{
  $filename = $ARGV[0];
}
my $path = $base_dir . '/' . $filename . '.csv';
if (!-f $path)
{
  die('File not found: ' . $path);
}

if (open(FIN, '<:utf8', $path))
{
  my $line = '';
  while ($line = <FIN>)
  {
    print STDERR '[debug] line: ' . $line . "\n";
    if ($line =~ /^(\d+),(\d+),(\d+)/)
    {
      my $replication = $1;
      my $test_run = $2;
      my $epoch = $3;
      my $avgtime = '???';
      my $avgiotime = '???';
      my $dl = '???';
      # Locate gantt chart
      my $gantt_path = $base_dir . '/' . $filename . '/' . $epoch . '/' . $epoch . '-gantt.html';
      print STDERR ' * Searching for: ' . $gantt_path . "\n";
      if(open(GIN, '<:utf8', $gantt_path))
      {
        my $line2 = '';
        while ($line = <GIN>)
        {
          if ($line =~ /<th>Average Processing Time:<\/th><td>([0-9hms]+)<\/td>/)
          {
            $avgtime = &parseTime($1);
          }
          if ($line =~ /<th>Average File IO Time:<\/th><td>([0-9hms]+)<\/td>/)
          {
            $avgiotime = &parseTime($1);
          }
          if ($line =~ /<th>Data Locality:<\/th><td>(\d+)%/)
          {
            $dl = $1;
          }
        }
        close(GIN);
      }
      else
      {
        print STDERR 'Warning! Failed to find chart: ' . $gantt_path . "\n";
      }

      if ($avgtime eq '???')
      {
        die("Failed to parse timing information from: " . $gantt_path);
      }

      # Store for averaging
      if (!defined $data->{$replication})
      {
        $data->{$replication} = {'count' => 0,
                                 'epochs' => [],
                                 'ios' => [],
                                 'times' => [],
                                 'dls' => []
                                };
      }
      $data->{$replication}->{'count'}++;
      push(@{$data->{$replication}->{'epochs'}}, $epoch);
      push(@{$data->{$replication}->{'ios'}}, $avgiotime);
      push(@{$data->{$replication}->{'times'}}, $avgtime);
      push(@{$data->{$replication}->{'dls'}}, $dl);
    }
  }
  close(FIN);
}
else
{
  die('Error! Failed to open file for reading: replication.csv');
}

# Perform some calculations
foreach my $replication (natsort keys %{$data})
{
  my $variables = {'pt' => 'times', 'io' => 'ios', 'dl' => 'dls'};
  foreach my $prefix (keys %{$variables})
  {
    my $values_name = $variables->{$prefix};
    $data->{$replication}->{$prefix . '_mean'} = &calculateMean($data->{$replication}->{$values_name});
    $data->{$replication}->{$prefix . '_median'} = &calculateMedian($data->{$replication}->{$values_name});
    $data->{$replication}->{$prefix . '_stddev'} = &calculateStandardDeviation($data->{$replication}->{$values_name}, $data->{$replication}->{$prefix . '_mean'});
    my $radius = 2 * $data->{$replication}->{$prefix . '_stddev'};
    $data->{$replication}->{$prefix . '_lbound'} = $data->{$replication}->{$prefix . '_mean'} - $radius;
    $data->{$replication}->{$prefix . '_ubound'} = $data->{$replication}->{$prefix . '_mean'} + $radius;
    # Special cases for percentages, which can't be less than 0 nor greater than 100
    if ($prefix eq 'dl')
    {
      if ($data->{$replication}->{$prefix . '_lbound'} < 0)
      {
        $data->{$replication}->{$prefix . '_lbound'} = 0;
      }
      if ($data->{$replication}->{$prefix . '_ubound'} > 100)
      {
        $data->{$replication}->{$prefix . '_ubound'} = 100;
      }
    }
  }
}

print '<html>
<head>
<style>
table {
  border:1px solid black;
  border-collapse:collapse;
  margin-left:auto;
  margin-right:auto;
  width:80%;
}
td {
  border:1px solid black;
  padding:2px;
  text-align:right;
}
th {
  border:1px solid black;
  background-color:#C7C7C7;
}
</style>
</head>
<body>';

print '<h1>Data Locality Report</h1>';

print '<ul><li><a href="#raw">Raw Data</a></li><li><a href="#averaged">Averaged</a></li></ul>';

print '<h2><a name="raw"></a>Raw Data</h2>
<table>
 <tr>
  <th rowspan="2">Replication</th>
  <th rowspan="2">Epoch</th>
  <th colspan="3">Avg Per File</th>
  <th rowspan="2">DataLocality</th>
 </tr>
 <tr>
  <th>IO</th><th>CPU</th><th>Total</th>
 </tr>
';
foreach my $replication (natsort keys %{$data})
{
  for (my $test_run = 0; $test_run < $data->{$replication}->{'count'}; $test_run++)
  {
    my $epoch = @{$data->{$replication}->{'epochs'}}[$test_run];
    my $avgiotime = @{$data->{$replication}->{'ios'}}[$test_run];
    my $avgtime = @{$data->{$replication}->{'times'}}[$test_run];
    my $dl = @{$data->{$replication}->{'dls'}}[$test_run];
    print sprintf('<tr><th><a name="result%d.%d" href="#avg%d">%2d</a></th><td><a href="%s/%d/%d-gantt.html">%d</a></td><td>%4d</td><td>%4d</td><td>%4d</td><td>%3d%%</td></tr>', $replication, $test_run, $replication, $replication, $filename, $epoch, $epoch, $epoch, $avgiotime, ($avgtime - $avgiotime), $avgtime, $dl) . "\n";
  }
}
print '</table>';
print '<a href="#">back to top</a><br />';


print '<h2><a name="averaged"></a>Averaged</h2>';
print '<table><tr><th rowspan="2">Replication</th><th rowspan="2">Count</th><th colspan="5">Processing Time (s)</th><th colspan="5">IO Time (s)</th><th colspan="5">Data Locality (%)</th></tr>
<tr>';
for (my $i = 0; $i < 3; $i++)
{
  print '<th>Median</th><th>Mean</th><th>StdDev</th><th>LBound</th><th>UBound</th>';
}
print '</tr>';
foreach my $replication (natsort keys %{$data})
{
  my $count = $data->{$replication}->{'count'};
  my $sum_dl = sum(@{$data->{$replication}->{'dls'}});
  my $avg_dl = $sum_dl / $count;
  print '<tr><th><a name="avg' . $replication . '" href="#result' . $replication . '.0">' . $replication . '</a></th><td>' . $count . '</td>';
  # Processing Time (pt)
  print renderStatisticsAsHTML($data->{$replication}, 'pt');
  # IO Time (io)
  print renderStatisticsAsHTML($data->{$replication}, 'io');
  # Data Locality Percentages (dl)
  print renderStatisticsAsHTML($data->{$replication}, 'dl', '%');
  print "</tr>\n";
}
print '</table>';
print '<a href="#">back to top</a>';
print '</html>';

exit;

## @function calculateMean()
sub calculateMean
{
  my ($data) = @_;
  my $count = scalar(@{$data});
  if ($count == 0)
  {
    die("Empty array\n");
  }
  my $total = 0;
  foreach (@{$data})
  {
    $total += $_;
  }
  my $average = $total / $count;
  return $average;
}
## calculateMean() ##

## @function calculateMedian()
sub calculateMedian
{
  my ($data) = @_;
  my @vals = sort {$a <=> $b} @{$data};
  my $len = @vals;
  if($len%2) #odd?
  {
    return $vals[int($len/2)];
  }
  else #even
  {
    return ($vals[int($len/2)-1] + $vals[int($len/2)])/2;
  }
}
## calculateMedian() ##

## @function calculateStandardDeviation()
sub calculateStandardDeviation
{
  my ($data, $average) = @_;
  my $count = scalar(@{$data});
  if ($count == 1)
  {
    return 0;
  }
  if (!defined $average)
  {
    $average = &calculateMean($data);
  }
  my $sqtotal = 0;
  foreach (@{$data})
  {
    $sqtotal += ($average - $_) ** 2;
  }
  my $std = ($sqtotal / ($count - 1)) ** 0.5;
  return $std;
}
## calculateStandardDeviation() ##

sub parseTime
{
  my ($raw_time_str) = @_;
  my $time_in_seconds = 0;
  if ($raw_time_str =~ /(\d+)h/)
  {
    $time_in_seconds += $1 * 60 * 60;
  }
  if ($raw_time_str =~ /(\d+)m/)
  {
    $time_in_seconds += $1 * 60;
  }
  if ($raw_time_str =~ /(\d+)s/)
  {
    $time_in_seconds += $1;
  }
  return $time_in_seconds;
}

## @function renderStatisticsAsHTML
sub renderStatisticsAsHTML
{
  my ($data, $prefix, $suffix) = @_;
  if (!defined $suffix)
  {
    $suffix = '';
  }
  my $html = '';
  $html .= '<td>' . $data->{$prefix . '_median'} . $suffix . '</td>';
  $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_mean'}) . $suffix . '</td>';
  $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_stddev'}) . $suffix . '</td>';
  $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_lbound'}) . $suffix . '</td>';
  $html .= '<td>' . sprintf('%0.2f', $data->{$prefix . '_ubound'}) . $suffix . '</td>';
  return $html;
}
## renderStatisticsAsHTML() ##
