#!/usr/bin/perl -w

use strict;
use warnings;

use LWP;

use OAuth::Lite::Consumer;
use OAuth::Lite::AuthMethod;

use WWW::Mechanize;

# use CGI;


sub data_api
{
    my ($doc_id) = @_;

    #my $access_key = 'PUBLIC_OAUTH_CONSUMER_KEY';
    #my $secret_key = 'PUBLIC_OAUTH_CONSUMER_SECRET';

    my $access_key = '7e6ee38bae';                   # PUBLIC_OAUTH_CONSUMER_KEY
    my $secret_key = 'e0429c0394385486249b4a230702'; # PUBLIC_OAUTH_CONSUMER_SECRET

    #my $request_url = 'http://babel.hathitrust.org/cgi/htd/dapiserver';
    #my $request_url = "http://babel.hathitrust.org/cgi/htd/meta/mdp.39015019203879";
    my $request_url = "http://babel.hathitrust.org/cgi/htd/pagemeta/mdp.39015000000128/12";


    my $consumer = OAuth::Lite::Consumer->new( 'consumer_key' => $access_key,
					       'consumer_secret' => $secret_key,
					       'auth_method' => OAuth::Lite::AuthMethod::URL_QUERY );

    my $response = $consumer->request( 'method' => 'GET',
				       'url' => $request_url,
 #	  			       'params' => { 'hello' => 'world' } 
	);

#    print CGI::header();

#    print "<p><b>[CLIENT] sent this URL to server:</b><br/>";
#    print $consumer->oauth_request->uri;
    
#    print "<p><b>[CLIENT] received this HTTP response from server:</b><br/>";
#    print $response->status_line;
    
    if ($response->is_success) {
#	print "<br/><b>[CLIENT] received this content response from server:</b><blockquote>" .
#	    $response->content . "</blockquote>";

	print "Recieved content:\n";
	print "------\n";

	print $response->content()
    }
    else {
	print STDERR "**** Failed to retrieval any content from URL:\n";
	print STDERR "         ", $consumer->oauth_request->uri, "\n";
	print STDERR "**** Status: ", print $response->status_line, "\n";
	
    }


##    print STDERR "*****\n ", $consumer->oauth_request->uri, "\n";
}


sub bibliographic_api
{
    my ($catalog_id) = @_;
    
    my $catalog_json = "$catalog_id.json";
    my $base_url = "http://catalog.hathitrust.org/api/volumes/full/recordnumber";
    my $url = "$base_url/$catalog_json";

    my $ua = LWP::UserAgent->new();
#    $ua->agent("Greenstone DL Ingest");

    # make request
    my $request = HTTP::Request->new(GET => $url);

    # get response
    my $response = $ua->request($request);

    if ($response->is_success()) {

	my $content_type = $response->content_type();
	
	my $content = $response->content();

	my $group_by_dir = "output";

	if (!-d $group_by_dir) {
	    print "Creating '$group_by_dir'\n";
	    mkdir($group_by_dir);
	}

	my @group_by = ($catalog_id =~ m/\d{1,2}/g);

	while (my $next_subdir = shift @group_by) {
	    $group_by_dir .= "/$next_subdir";
	    if (!-d $group_by_dir) {
		mkdir($group_by_dir);
	    }

	    last if (scalar(@group_by)==1);	    
	}

	my $ofilename = "$group_by_dir/$catalog_json";
	if (!-e $ofilename) {
	    if (open(JOUT,">$ofilename")) {
		
		print JOUT $content;
		print JOUT "\n";
		close(JOUT);
	    }
	    else {
		print STDERR "Error: Failed to open $ofilename\n";
		print STDERR "!$\n";
	    }
	}
	else {
	    print STDOUT "$ofilename already exists.  Skipping.\n";
	}
    }
    else 
    {
	print STDERR "Error: Failed to retrieve $url\n";
	print STDERR "-----\n";
	print STDERR "Status line: ", $response->status_line(), "\n";
	print STDERR "    ", $response->content(),"\n";
    }
}



sub main
{

    my ($argv_ref) = @_;

    my $query=join("+",@$argv_ref) || "zealand";
    
    my $base_url = "http://catalog.hathitrust.org/Search/Home?checkspelling=true&type=all&submit=&type=all&sethtftonly=true";
    my $url = $base_url . "&lookfor=" . $query;

    my $mech = WWW::Mechanize->new();
    $mech->get($url);
    
    my $next_link = $mech->find_link( text_regex => qr/^Next\s+/);
    
    my $count=0;
    
    while (defined($next_link)) {


	my $catalog_links    = $mech->find_all_links(text_regex => qr/^Catalog Record\s*/);
#    my $full_links       = $mech->find_all_links(text_regex => qr/^Full view\s*$/,
#						 url_regex => qr/hdl\.handle\.net/);
#    my $restricted_links = $mech->find_all_links(text_regex => qr/^Limited \(search-only\)/,  
#						 url_regex => qr/hdl\.handle\.net/);

	my $num_catalog_links    = scalar(@$catalog_links);
#    my $num_full_links       = scalar(@$full_links);
#    my $num_restricted_links = scalar(@$restricted_links);

#    print "+++++ num cat links $num_catalog_links: num full = $num_full_links, num restricted = $num_restricted_links\n";

	
	foreach my $cat_link (@$catalog_links) {
	    my $cat_url = $cat_link->url();
	    my ($cat_id) = ($cat_url =~ m/\/([^\/]*)$/);
	    print "cat id = $cat_id\n";
	    bibliographic_api($cat_id);
	}


	$url = $next_link->url();
	
	$mech->get($url);
	$next_link = $mech->find_link( text_regex => qr/^Next\s+/);

	$count++;
	
##	last if ($count==1);

	print "Away to Process link: $url\n"; 

    }
    
}


main(\@ARGV);



