#!/usr/bin/perl # This perl script may be called directly or by running build.bat on # windows (build.bat is in bin\windows) package build; use FileHandle; use File::Copy; BEGIN { die "GSDLHOME not set - did you remember to source setup.bash (unix) or " . "run setup.bat (windows)?\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set - did you remember to source setup.bash (unix) or " . "run setup.bat (windows)?\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); STDOUT->autoflush(1); STDERR->autoflush(1); } use parsargv; use util; use cfgread; &parse_args (\@ARGV); my ($collection) = @ARGV; if (!defined $collection || $collection !~ /\w/) { print STDERR "You must specify a collection to build\n"; &print_usage(); die "\n"; } if ($optionfile =~ /\w/) { open (OPTIONS, $optionfile) || die "Couldn't open $optionfile\n"; my $line = []; my $options = []; while (defined ($line = &cfgread::read_cfg_line ('build::OPTIONS'))) { push (@$options, @$line); } close OPTIONS; &parse_args ($options); } if ($maxdocs == -1) { $maxdocs = ""; } else { $maxdocs = "-maxdocs $maxdocs"; } my $cdir = $collectdir; $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect") unless $collectdir =~ /\w/; my $importdir = &util::filename_cat ($cdir, $collection, "import"); my $archivedir = &util::filename_cat ($cdir, $collection, "archives"); my $buildingdir = &util::filename_cat ($cdir, $collection, "building"); my $indexdir = &util::filename_cat ($cdir, $collection, "index"); my $bindir = &util::filename_cat ($ENV{'GSDLHOME'}, "bin"); my $use_out = 0; my $outfile = $out; if ($out !~ /^(STDERR|STDOUT)$/i) { open (OUT, ">$out") || die "Couldn't open output file $out\n"; $out = "OUT"; # delete any existing .final file &util::rm ("$outfile.final") if -e "$outfile.final"; $use_out = 1; } $out->autoflush(1); # delete any .kill file left laying around from a previously aborted build if (-e &util::filename_cat ($cdir, $collection, ".kill")) { &util::rm (&util::filename_cat ($cdir, $collection, ".kill")); } &main(); close OUT if $use_out; sub print_usage { print STDERR "\n usage: $0 [options] collection-name\n\n"; print STDERR " options:\n"; print STDERR " -optionfile file Get options from file, useful on systems where\n"; print STDERR " long command lines may cause problems\n"; print STDERR " -append Add new files to existing collection\n"; print STDERR " -remove_archives Remove archives directory after successfully\n"; print STDERR " building the collection.\n"; print STDERR " -remove_import Remove import directory after successfully\n"; print STDERR " importing the collection.\n"; print STDERR " -buildtype build|import If 'build' attempt to build directly\n"; print STDERR " from archives directory (bypassing import\n"; print STDERR " stage). Defaults to 'import'\n"; print STDERR " -maxdocs number Maximum number of documents to build\n"; print STDERR " -download directory Directory (or file) to get import documents from.\n"; print STDERR " There may be multiple download directories and they\n"; print STDERR " may be of type http://, ftp://, or file://\n"; print STDERR " Note that any existing import directory will be\n"; print STDERR " deleted to make way for the downloaded data if\n"; print STDERR " a -download option is supplied\n"; print STDERR " -collectdir directory Collection directory (defaults to " . &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n"; print STDERR " -dontinstall Only applicable if -collectdir is set to something\n"; print STDERR " other than the default. -dontinstall will suppress the\n"; print STDERR " default behaviour which is to install the collection to\n"; print STDERR " the gsdl/collect directory once it has been built.\n"; print STDERR " -save_archives Create a copy of the existing archives directory called\n"; print STDERR " archives.org\n"; print STDERR " -out Filename or handle to print output status to.\n"; print STDERR " The default is STDERR\n\n"; } sub main { if ($save_archives && -d $archivedir) { print $out "caching original archives to ${archivedir}.org\n"; &util::cp_r ($archivedir, "${archivedir}.org"); } # do the download thing if we have any -download options if (scalar (@download)) { # remove any existing import data if (&has_content ($importdir)) { print $out "build: WARNING: removing contents of $importdir\n"; &util::rm_r ($importdir); } foreach $download_dir (@download) { # remove any leading or trailing whitespace from filenames (just in case) $download_dir =~ s/^\s+//; $download_dir =~ s/\s+$//; if ($download_dir =~ /^http:\/\//) { # http download } elsif ($download_dir =~ /^ftp:\/\//) { # ftp download } else { # we assume anything not beginning with http:// or ftp:// # is a file or directory on the local file system. $download_dir =~ s/^file:(\/\/)?//; $download_dir =~ s/^\s+//; # may be whitespace between "file://" and the rest if (-e $download_dir) { # copy download_dir and all it contains to the import directory my $download_cmd = "perl " . &util::filename_cat ($bindir, "script", "filecopy.pl"); $download_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/; $download_cmd .= " -out \"$outfile.download\"" if $use_out; $download_cmd .= " \"" . $download_dir . "\" " . $collection; system ($download_cmd); # if using output directory append the file download output to it &append_file ($out, "$outfile.download"); } else { print $out "WARNING: '$download_dir' does not exist\n"; } } } } if (-e &util::filename_cat ($archivedir, "archives.inf")) { if (&has_content ($importdir)) { if ($buildtype eq "build") { &gsdl_build(); } else { &gsdl_import(); &gsdl_build(); } } else { # there are archives but no import, build directly from archives print $out "build: no import material was found, building directly\n"; print $out " from archives\n"; &gsdl_build(); } } else { if (&has_content ($importdir)) { if ($buildtype eq "build") { print $out "build: can't build directly from archives as no\n"; print $out " imported archives exist (did you forget to\n"; print $out " move the contents of $collection/import to\n"; print $out " collection/archives?)\n"; } &gsdl_import(); &gsdl_build(); } else { # no import or archives print $out "build: ERROR: The $collection collection has no import or archives data.\n"; &final_out (1) if $use_out; die "\n"; } } if ($collectdir ne "" && !$dontinstall) { my $install_collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect"); if (!&util::filenames_equal ($collectdir, $install_collectdir)) { # install collection to gsdl/collect print $out "installing the $collection collection\n"; my $newdir = &util::filename_cat ($install_collectdir, $collection); my $olddir = &util::filename_cat ($collectdir, $collection); if (-d $newdir) { print $out "build: Could not install collection as $newdir\n"; print $out " already exists. Collection will remain at\n"; print $out " $olddir\n"; &final_out (4) if $use_out; die "\n"; } if (!&File::Copy::move ($olddir, $newdir)) { print $out "build: Failed to install collection to $newdir\n"; print $out " Collection will remain at $olddir\n"; &final_out (5) if $use_out; die "\n"; } } } &final_out (0) if $use_out; } sub gsdl_import { print $out "importing the $collection collection\n\n"; my $import_cmd = "perl " . &util::filename_cat ($bindir, "script", "import.pl"); $import_cmd .= " -out \"$outfile.import\"" if $use_out; $import_cmd .= " -removeold" unless $append; $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/; $import_cmd .= " $maxdocs $collection"; system ($import_cmd); # if using output directory append the import output to it &append_file ($out, "$outfile.import"); if (-e &util::filename_cat ($archivedir, "archives.inf")) { print $out "$collection collection imported successfully\n\n"; if ($remove_import) { print $out "removing import directory ($importdir)\n"; &util::rm_r ($importdir); } } else { &final_out (2) if $use_out; print $out "\nimport.pl failed\n"; die "\n"; } } sub gsdl_build { print $out "building the $collection collection\n\n"; my $build_cmd = "perl " . &util::filename_cat ($bindir, "script", "buildcol.pl"); $build_cmd .= " -out \"$outfile.build\"" if $use_out; $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/; $build_cmd .= " $maxdocs $collection"; system ($build_cmd); # if using output directory append the buildcol output to it &append_file ($out, "$outfile.build"); if (-e &util::filename_cat ($buildingdir, "text", "$collection.ldb") || -e &util::filename_cat ($buildingdir, "text", "$collection.bdb")) { print $out "$collection collection built successfully\n\n"; if ($remove_archives) { print $out "removing archives directory ($archivedir)\n"; &util::rm_r ($archivedir); } } else { &final_out (3) if $use_out; print $out "\nbuildcol.pl failed\n"; die "\n"; } # replace old indexes with new ones if (&has_content ($indexdir)) { print $out "removing old indexes\n"; &util::rm_r ($indexdir); } rmdir ($indexdir) if -d $indexdir; &File::Copy::move ($buildingdir, $indexdir); # remove the cached arhives if ($save_archives && -d "${archivedir}.org") { &util::rm_r ("${archivedir}.org"); } } sub has_content { my ($dir) = @_; if (!-d $dir) {return 0;} opendir (DIR, $dir) || return 0; my @files = readdir DIR; close DIR; foreach my $file (@files) { if ($file !~ /^\.{1,2}$/) { return 1; } } return 0; } sub append_file { my ($handle, $file) = @_; open (FILE, $file) || return; undef $/; print $handle ; $/ = "\n"; close FILE; &util::rm ($file); } # creates a file called $outfile.final (should only be called if -out option # is used and isn't STDERR or STDOUT) and writes an output code to it. # An output code of 0 specifies that there was no error sub final_out { my ($exit_code) = @_; if (open (FINAL, ">$outfile.final")) { print FINAL $exit_code; close FINAL; } } sub parse_args { my ($argref) = @_; if (!parsargv::parse($argref, 'optionfile/.*/', \$optionfile, 'append', \$append, 'remove_archives', \$remove_archives, 'remove_import', \$remove_import, 'buildtype/^(build|import)$/import', \$buildtype, 'maxdocs/^\-?\d+/-1', \$maxdocs, 'download/.+', \@download, 'collectdir/.*/', \$collectdir, 'dontinstall', \$dontinstall, 'save_archives', \$save_archives, 'out/.*/STDERR', \$out)) { &print_usage(); die "\n"; } }