###########################################################################
#
# HDThriftFS.pm -- file functions acting upon a HDFS via thrift
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the
# University of Waikato, New Zealand.
#
# Copyright (C) 2013 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

################################################################################
## An Object wrapped around a HDThrift file handle usable as a Perl Handle    ##
################################################################################

package FileUtils::HDThriftFS::ThriftFH;

require Tie::Handle;

use base 'Tie::Handle';
use Devel::Peek;
use MIME::Base91;

my $debug_encoding = 0;

sub TIEHANDLE
{
  my $class = shift;
  my $thrift_client = shift;
  my $self = {};
  $self->{'buffer_length'} = 4096; # 4k blocks
  $self->{'client'} = $thrift_client;
  $self->{'fh'} = 0;
  $self->{'file_length'} = 0;
  $self->{'mode'} = 'r';
  $self->{'read_offset'} = 0; # A read offset
  $self->{'warnings'} = {'binmode' => 0};
  return bless $self, $class;
}

sub WRITE
{
  my $self = shift;
  my ($scalar, $length, $offset) = @_;
  print STDERR "ThriftFH::WRITE() - implement me!\n";
}

sub PRINT
{
  my $self = shift;
  my $result = 0;
  # only available in write or append modes
  if ($self->{'mode'} eq 'w' || $self->{'mode'} eq 'a')
  {
    foreach my $decoded_buffer (@_)
    {
      if ($debug_encoding)
      {
        print STDERR "Print String: \n=== START ===\n"; Dump($decoded_buffer); print STDERR "\n=== END ===\n\n";
      }
      # We now need to Base91 encode everything sent through to Thrift Server
      my $encoded_buffer = MIME::Base91::encode($decoded_buffer);
      if ($debug_encoding)
      {
        print STDERR "Encoded String: \n=== START ===\n"; Dump($encoded_buffer); print STDERR "\n=== END ===\n\n";
      }
      $self->{'client'}->write($self->{'fh'}, $encoded_buffer);
    }
    $result = 1;
  }
  return $result;
}

sub PRINTF
{
  my $self = shift;
  my $format = shift;
  print STDERR "ThriftFH::PRINTF() - implement me!\n";
}

sub READ
{
  my $self = shift;
  my ($scalar_ref, $length, $offset) = @_;
  my $bytes_read = 0;
  if (!$self->{'eof'} && $self->{'mode'} eq 'r')
  {
    my $encoded_buffer = $self->{'client'}->read($self->{'fh'}, $offset, $length);
    if ($debug_encoding)
    {
      print STDERR "Read String: \n=== START ===\n"; Dump($encoded_buffer); print STDERR "\n=== END ===\n\n";
    }
    my $decoded_buffer = MIME::Base91::decode($encoded_buffer);
    if ($debug_encoding)
    {
      print STDERR "Decoded String: \n=== START ===\n"; Dump($decoded_buffer); print STDERR "\n=== END ===\n\n";
    }
    $bytes_read = scalar($decoded_buffer);
    # the only way I can see to modify the callers version of $scalar
    $_[0] = $decoded_buffer;
  }
  return $bytes_read;
}

sub READLINE
{
  my $self = shift;
  #rint "ThriftFH:READLINE()\n";
  #rint " - file_length=" . $self->{'file_length'} . "\n";
  my $line = undef;
  # out of file? out of lines!
  # not in read mode - can't read!
  if (!$self->{'eof'} && $self->{'mode'} eq 'r')
  {
    my $found_line = 0;
    my $search_offset = $self->{'read_offset'};
    my $search_eof = $self->{'eof'};
    # grab a 'buffer' of data starting at my current read offset
    #rint "->read(fh, $search_offset, " . $self->{'buffer_length'} . ")\n";
    my $encoded_buffer = $self->{'client'}->read($self->{'fh'}, $search_offset, $self->{'buffer_length'});
    my $decoded_buffer = MIME::Base91::decode($encoded_buffer);
    #rint " - buffer=|" . $decoded_buffer . "|\n";
    if (length($decoded_buffer) < $self->{'buffer_length'} || ($self->{'read_offset'} + length($decoded_buffer)) < $self->{'file_length'})
    {
      $search_eof = 1;
    }
    while (!$search_eof && index($decoded_buffer, "\n") == -1)
    {
      #rint " * no newline found yet - filling buffer...\n";
      $search_offset = $self->{'read_offset'} + length($decoded_buffer);
      #rint "->read(fh, $search_offset, " . $self->{'buffer_length'} . ")\n";
      my $more_encoded_buffer = $self->{'client'}->read($self->{'fh'}, $search_offset, $self->{'buffer_length'});
      my $more_decoded_buffer = MIME::Base91::decode($more_encoded_buffer);
      #rint " - more_buffer=|" . $more_decoded_buffer . "|\n";
      $decoded_buffer .= $more_decoded_buffer;
      #rint " - buffer=|" . $decoded_buffer. "|\n";
      # if I read less than I asked for, or my next search offset is beyond the
      # end of the file, then I've run out of 'file'
      if (length($more_decoded_buffer) < $self->{'buffer_length'} || ($self->{'read_offset'} + length($decoded_buffer)) < $self->{'file_length'})
      {
        $search_eof = 1;
        #rint " * found eof!\n";
      }
    }
    # if buffer contains newline, then we only return a fragment of buffer and
    # update the offset
    my $newline_offset = index($decoded_buffer, "\n");
    if ($newline_offset > -1)
    {
      #rint " * found newline - returning part of buffer\n";
      $line = substr($decoded_buffer, 0, $newline_offset + 1);
      # rest of buffer ignored
    }
    # otherwise we return all of buffer and mark the file as eof
    else
    {
      #rint " * no newline found - returning all of buffer\n";
      $line = $decoded_buffer;
      $self->{'eof'} = 1; # out of file
    }
    # move the read pointer by however much we read (in either case)
    $self->{'read_offset'} += length($line);
    #rint " => |" . $line . "|\n\n";
  }
  return $line;
}

sub GETC
{
  my $self = shift;
  print STDERR "ThriftFH::GETC() - implement me!\n";
  return '';
}

sub CLOSE
{
  my $self = shift;
  $self->{'client'}->close($self->{'fh'});
  $self->{'fh'} = 0;
  return 1;
}


## @function open()
#
sub OPEN
{
  my $self = shift;
  my $path = shift;
  my $mode = shift;
  if (!defined $mode)
  {
    $mode = '<'; # Default to read as that's least destructive
  }
  #rint STDERR "DEBUG: ThriftFH::OPEN(self, $path, $mode)\n";
  # write mode always creates a new file - clobbering any existing
  if ($mode eq 'w' || $mode eq '>')
  {
    # if file already exists, create() will (eventually) fail - so we need to
    # ensure it doesn't already exist beforehand
    if ($self->{'client'}->exists($path))
    {
      $self->{'client'}->rm ($path);
    }
    $self->{'fh'} = $self->{'client'}->create($path);
    $self->{'mode'} = 'w'; # writing
  }
  else
  {
    # open will fail if the file doesn't already exist - so we may
    # need to create it beforehand
    if (!$self->{'client'}->exists($path))
    {
      $self->{'fh'} = $self->{'client'}->create($path);
      $self->{'file_length'} = 0;
    }
    else
    {
      $self->{'fh'} = $self->{'client'}->open($path);
      # for reading we need to know the file size so as to avoid Thrift throwing
      # exceptions when we try to read past the end
      my $file_stat = $self->{'client'}->stat($path);
      $self->{'file_length'} = $file_stat->{length};
    }
    $self->{'mode'} = 'r'; # reading
  }
  return $self;
}
## open() ##


## @function binmode()
#
sub BINMODE
{
  my $self = shift;
  # Have we warned about BIN mode not being applicable?
  if ($self->{'warnings'}->{'binmode'} != 1)
  {
    print STDERR "Notice! BIN mode not applicable in ThriftFS (all data base91 encoded)\n";
    # we have now!
    $self->{'warnings'}->{'binmode'} = 1;
  }
}
## BINMODE


## @function eof()
#
sub EOF
{
  my $self = shift;
  return $self->{'eof'};
}
## eof() ##


sub TELL
{
  print STDERR "ThriftFH::TELL() - implement me!\n";
}

sub SEEK
{
  my $self = shift;
  my ($offset, $whence) = @_;
  print STDERR "ThriftFH::SEEK() - implement me!\n";
}

sub DESTROY
{
  my $self = shift;
  if ($self->{'fh'} != 0)
  {
    $self->{'client'}->close($self->{'fh'});
    $self->{'fh'} = 0;
  }
}

sub UNTIE
{
  my $self = shift;
}

1;
