package LoremTextPlugin;

use TextPlugin;

use strict;
no strict 'refs'; # allow filehandles to be variables and viceversa
no strict 'subs';

sub BEGIN {
    @LoremTextPlugin::ISA = ('TextPlugin');
}

my $arguments =
    [ { 'name' => "process_exp",
	'desc' => "{BasePlugin.process_exp}",
	'type' => "regexp",
	'deft' => &get_default_process_exp(),
	'reqd' => "no" } ];

my $options = { 'name'     => "LoremTextPlugin",
		'desc'     => "TextPlugin extended to do a little more processing (to stress CPU rather than IO)",
		'abstract' => "no",
		'inherits' => "yes",
		'srcreplaceable' => "yes", # Source docs in regular txt format can be replaced with GS-generated html
		'args'     => $arguments };


sub get_default_process_exp
{
  my $self = shift (@_);
  return q^(?i)\.te?xt$^;
}

sub new
{
  my ($class) = shift (@_);
  my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
  push(@$pluginlist, $class);

  push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
  push(@{$hashArgOptLists->{"OptList"}},$options);

  my $self = new TextPlugin($pluginlist, $inputargs, $hashArgOptLists);

  $self->{'lexicon'} = {};
  $self->{'word_lengths'} = {};

  return bless $self, $class;
}

# do plugin specific processing of doc_obj
sub process
{
  my $self = shift (@_);
  my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
  my $outhandle = $self->{'outhandle'};

  my $cursection = $doc_obj->get_top_section();

  # get title metadata
  # (don't need to get title if it has been passed
  # in from another plugin)
  if (!defined $metadata->{'Title'})
  {
    my $title = $self->get_title_metadata($textref);
    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
  }
  # Add FileFormat metadata
  $doc_obj->add_metadata($cursection, "FileFormat", "Text");

  # Process the text to determine
  # - a) the lexicon used, and
  $self->{'lexicon'} = {};
  # - b) the frequency of words of various lengths
  $self->{'word_lengths'} = {};

  my @words = split('/[\,\.\s]+/', $$textref);
  foreach my $word (@words)
  {
    $word = lc($word);
    if (defined $self->{'lexicon'}->{$word})
    {
      $self->{'lexicon'}->{$word}++;
    }
    else
    {
      $self->{'lexicon'}->{$word} = 1;
    }
    my $length = length($word);
    if (defined $self->{'word_lengths'}->{$length})
    {
      $self->{'word_lengths'}->{$length} = 1;
    }
    else
    {
      $self->{'word_lengths'}->{$length}++;
    }
  }

  my @lexicon;
  foreach my $word (sort keys %{$self->{'lexicon'}})
  {
    push(@lexicon, $word . ':' . $self->{'lexicon'}->{$word});
  }
  $doc_obj->add_metadata($cursection, "Lexicon", join(', ', @lexicon));

  my @word_lengths;
  foreach my $word_length (sort keys %{$self->{'word_lengths'}})
  {
    push(@word_lengths, $word_length . ':' . $self->{'word_lengths'}->{$word_length});
  }
  $doc_obj->add_metadata($cursection, "WordLengths", join(', ', @word_lengths));

  # insert preformat tags and add text to document object    
  $self->text_to_html($textref); # modifies the text
  $doc_obj->add_utf8_text($cursection, $$textref);

  return 1;
}

1;
