#!/usr/bin/perl

use strict;
use warnings;

use List::Util 'shuffle';

if (!defined $ARGV[0] || !-d $ARGV[0])
{
  print "usage: manifestinator.pl <import directory> [<max number of documents>]\n";
  exit(0);
}

my $max_docs = 0;
if (defined $ARGV[1] && $ARGV[1] =~ /^\d+$/)
{
  $max_docs = $ARGV[1];
}

my $manifest_filename = 'manifest';
if ($max_docs > 0)
{
  $manifest_filename .= '-' . $max_docs;
}
open(XMLOUT, '>:utf8', $manifest_filename . '.xml');
print XMLOUT "<Manifest>\n";
print XMLOUT "  <Index>\n";

&manifestify($ARGV[0], '', $max_docs, 0);

print XMLOUT "  </Index>\n";
print XMLOUT "</Manifest>\n";

close(XMLOUT);
exit;

sub manifestify
{
  my ($dir, $prefix, $max_docs, $current_count) = @_;
  if ($max_docs > 0 && $current_count >= $max_docs)
  {
    return $current_count;
  }
  if (!opendir(DH, $dir))
  {
    die ("Failed to open import directory for reading!\n");
  }
  my @files = readdir(DH);
  closedir(DH);
  foreach my $file (shuffle @files)
  {
    if ($file =~ /^\./)
    {
      next;
    }
    my $path = $dir . '/' . $file;
    if (-d $path)
    {
      my $new_prefix = $prefix . $file . '/';
      $current_count = &manifestify($path, $new_prefix, $max_docs, $current_count);
    }
    else
    {
      print XMLOUT "    <Filename>" . $prefix . $file . "</Filename>\n";
      $current_count++;
    }
    if ($max_docs > 0 && $current_count >= $max_docs)
    {
      return $current_count;
    }
  }
  return $current_count;
}
