package org.greenstone.applet.GsdlCollageApplet;

import java.awt.*;
import java.io.*;
import java.net.*;
import java.util.*;

import javax.swing.ImageIcon; //****

//import org.apache.log4j.*;

/** 
 *  @author Katrina Edgar
 *  @author David Bainbridge
 *
 *  Controls retrieval of images from the specified starting url. Follows appropriate
 *  links from this starting point, traversing in a tree-like state through several other
 *  pages. Filters images and links based on specified parameters. Also controls the quantity
 *  of downloading that occurs by restricting the number of downloaded images that are yet to
 *  be displayed to 10, and the total number of downloads allowed is also restricted by
 *  the applet application (to prevent downloading occuring infinitely). */

public class DownloadUrls extends Thread {
    
    // for GS3    
    String baseURL = null;
    
    /** Refers to applet */
    GsdlCollageApplet app_          = null;
    /** Refers to download thread */
    DownloadImages download_images_ = null;

    /** The address from which the application should start looking for images */
    String starting_url_  = null;

    /** the root directory of Greenstone*/  
    String document_root_ = null;

    /** When this thread is asked to stop running, this variable will be set to true */
    private boolean stop_running = false;    

    /** When this thread is asked to stop downloading, this variable will be set to true.
     * For now the behaviour is the same as stop_running=true on this thread,
     * but in case it changes in the future, we have a separate variable.
     * Also, calling stopRunning() is not the same as setting stop_running = true, so 
     * to be careful, a separate variable for stop_downloading could be safer when coding.
     */
    private boolean stop_downloading = false;

    /** Set to true when unable to download perhaps because of no internet connection. */
    private boolean unable_to_download = false;
    
    /** CHRIS - Holds the contents of the collection's assoc directory */
    //    File[] assocDir_ = null;

    /** Restricts links followed from the starting url to links that contain this string */
    String href_musthave_ = null;
    /** Restricts links followed from the starting url to links that do not contain this string. 
     *  Also prevents image names from containing this string */
    String image_mustnothave_ = null;
    /** Ignore images whose names begin with this string */
    String image_ignore_  = null;
    /** Restricts the types of images included in the collage, for example jpg, gif, etc. */
    String image_type_ = null;

    /** A static delay used when attempting to download more images into a full downloading buffer */
    final int delay_       = 3000;
    /** The maximum number of images to have downloaded and not yet displayed */ 
    final int buffer_size_ = 1;

    /** Used in cases where the image maps to a url outside of it's original location.
     *  When used with Greenstone the collage images will refer to documents in the collections
     *  from which the images are sourced. When used individually, the images may be saved into
     *  a user directory and the pages they reference may be external hyperlinks. */
    Hashtable external_links_ = null;

    /** Records all urls which have already been examined */
    Hashtable visited_url_  = null;
    /** Determines whether there are still pages to examine and images to download */
    boolean thread_running_ = true;

    int verbosity_ = 0;

     /** Records all images which have already been examined */
    Hashtable visited_images_  = null;
        
    MediaTracker tracker;

    int total_images_to_download = 0;
    
    /** Constructor to initialise a download thread from which images are found, 
     *  saves parameters into local variables for use within the class.
     *
     *  @param app reference to the applet
     *  @param download_images class which stores the images retrieved in triplets
     *  @param starting_url the url from which the search for images should begin
     *  @param href_musthave restricts links to only those containing this string
     *  @param image_mustnothave restricts links and image names to only those that don't contain this string
     *  @param image_ignore restricts the beginning of image names
     *  @param image_type restricts the type of images included in the collage to those named */
    public DownloadUrls(GsdlCollageApplet app, 
			DownloadImages download_images, String starting_url, 
			String href_musthave, String image_mustnothave, 
			String image_ignore, String image_type, String document_root,int verbosity, MediaTracker trk)
    {
	super("DownloadUrls");
	app_             = app;
	download_images_ = download_images;

	starting_url_  = starting_url;
	href_musthave_ = href_musthave;
	image_mustnothave_ = image_mustnothave;
	image_ignore_  = image_ignore;
	image_type_    = image_type;
	document_root_ = document_root; 
	verbosity_ = verbosity;
        tracker = trk;

	System.err.println("starting_url_ "  + starting_url +"\n"+
	                   "href_musthave_ " +  href_musthave +"\n"+
			   "image_mustnothave_ " + image_mustnothave+"\n"+
			   "image_ignore_ "+  image_ignore+"\n"+
			   "image_type_ "+ image_type+"\n"+
			   "document_root_ "+ document_root_
			       );




    }

    /** Determines whether or not a url has already been examined
     *
     *  @param url_string the url to check
     *  @return true if the url has been visited, false if not */
    public boolean already_visited(String url_string)
    {
	int hash_pos = url_string.indexOf("#");
	if (hash_pos>0)
	 {
	     // strip off #anchor reference
	     url_string = url_string.substring(0,hash_pos);
	 }

      	// if the url has been visited before, return true
	if (visited_url_.containsKey(url_string))
	{
	    if (verbosity_ > 3)
		{
		    System.err.println("Visited " + url_string + " before!");
		}
	    return true;
	}
	
	visited_url_.put(url_string,"visited");

	return false;
    }

  /** Determines whether or not an images or its screenview has been visited)
     *  has already been examined
     *
     *  @param url_string the url to check
     *  @param img_name the image to check
     *  @return true if the url has been visited, false if not */
    public boolean image_visited(String url_string, String img_name)
    {
	String hash_dir = url_string.substring(0,url_string.lastIndexOf("/"));  
    
 	if ( visited_images_.containsKey(hash_dir)){
	    Hashtable hashed_images = (Hashtable)visited_images_.get(hash_dir);   
	   
            if (img_name.startsWith("screenview")){
		return true;
	    }

	    if (hashed_images.containsKey(img_name)){
		return true;
	    }
         
            Enumeration enu = hashed_images.keys();
            for(;enu.hasMoreElements();){
		String name = (String)enu.nextElement();
		if(name.startsWith("screenview")){
		    return true;
		}
	    }
    
            hashed_images.put(img_name,"visited");           
	}
	else{
	    Hashtable hashed_images = new Hashtable();
	    hashed_images.put(img_name,"visited");
	    visited_images_.put(hash_dir,hashed_images);	    
	}
	     
	return false;
    }

    // some other thread can call this method to tell this thread to stop running
    public void stopRunning() {
	if (verbosity_ >= 3) {
	    System.err.println("**** DownloadUrls.stopRunning() called");
	}
	
	stop_running = true;
	// Interrupt this thread, even if it's not the one running
	// Just want to make sure the DownloadURls' thread the CURL object runs in
	// gets interrupted if it's what's currently running
	if(!this.isInterrupted()) {
	    this.interrupt();
	}
	if(!Thread.currentThread().isInterrupted()) {
	    Thread.currentThread().interrupt();
	}
    }

    public boolean isStopping() {
	return stop_running;
    }

    public int totalImagesToDownload() {
	return total_images_to_download;
    }
    
    /** Restricts the type of images that can be included in the collage
     *
     * @param url_string the url to check
     * @return true if the image is of a specified type, false if not */
    public boolean image_file_extension(String url_string)
    {
	// lower case comparisons
	String url_lstring = url_string.toLowerCase();


	// greenstone3 can add jsessionids at end, which messes up image file extension detection
	int jsessionID_index = url_lstring.indexOf(";jsessionid=");
	if(jsessionID_index >= 0) {
	    url_lstring = url_lstring.substring(0, jsessionID_index);
	}
	
	if (image_type_ == null)
	    return true;

	String tmp = image_type_;
	String original_image_type_ = image_type_;

	while (image_type_ != null && image_type_.indexOf("%") >= 0) {
	    
	    tmp = image_type_.substring(0, image_type_.indexOf("%"));
             
	    if (image_type_.length() > image_type_.indexOf("%") + 1)
		image_type_ = image_type_.substring(image_type_.indexOf("%") + 1, image_type_.length());                 
	    else
		image_type_ = null;
            
	    if (!tmp.trim().equals("") && url_lstring.endsWith(tmp)) {
		image_type_ = original_image_type_;
		return true;
	    }
	}

	if (image_type_ != null && url_lstring.endsWith(image_type_)) {
		image_type_ = original_image_type_;
		return true;
	}

	image_type_ = original_image_type_;
	return false;
    }

    /** Restricts images to only those that satisfy several specified conditions
     *  regarding the content of the image name and url.
     *
     *  @param url_string the url to check
     *  @return true if the image is satisfactory, false if not */
    public boolean filter_image(String url_string)
    {
        
	if (image_ignore_==null || !url_string.startsWith(image_ignore_))
	{
	    if (!already_visited(url_string))
	    {
		if (image_mustnothave_ != null) {
		    
		    String tmp = image_mustnothave_;
		    String original_image_mustnothave_ = image_mustnothave_;

		    while (image_mustnothave_ != null && image_mustnothave_.indexOf("%") >= 0) {
	    
			tmp = image_mustnothave_.substring(0, image_mustnothave_.indexOf("%"));
			if (image_mustnothave_.length() > image_mustnothave_.indexOf("%") + 1)
			    image_mustnothave_ = image_mustnothave_.substring(image_mustnothave_.indexOf("%") + 1, 
									      image_mustnothave_.length());
			else
			    image_mustnothave_ = null;
                          			
                     
                         
			if (!tmp.trim().equals("") && url_string.indexOf(tmp) >= 0) {
			   
			    image_mustnothave_ = original_image_mustnothave_;
			    return false;
			}
       		    }

		    image_mustnothave_ = original_image_mustnothave_;
		    
		    if (image_mustnothave_ != null && url_string.indexOf(image_mustnothave_) >= 0) {
			image_mustnothave_ = original_image_mustnothave_;
			return false;
		    }

		    if (verbosity_ > 2) {
			System.err.println("src url = "+ url_string);
		    }
		    
		    image_mustnothave_ = original_image_mustnothave_;
		    
		}
	
	    } else { // already visited this image link
		System.err.println("\t####" + url_string + " already visited - filter_image returning false");
		// Isn't it that if we've already visited the image link once before,
		// we've dealt with it anyway once before (in one way or another: decided it
		// didn't pass the filter, or added the image for download if it did pass the
		// filters ) so we don't process this image again again?
		return false;
	    }
            
	}
	
	return true;
    }

    /** Restricts links to only those that satisfy several specified conditions
     *  regarding the address of the link.
     *
     *  @param url_string the url to check
     *  @param new_url_string the url from which this link was found
     *  @param depth the number of links followed on this path
     *  @return true if the image is satisfactory, false if not */
    public boolean filter_href(String url_string, String new_url_string, int depth)
    {
	boolean has_href = false;
	String tmp = href_musthave_;
	String original_href_musthave_ = href_musthave_;

	// checks that it does contain this content
	if (href_musthave_ !=  null) {

	    while (href_musthave_ != null && href_musthave_.indexOf("%") >= 0) {
		
		tmp = href_musthave_.substring(0, href_musthave_.indexOf("%"));
		if (href_musthave_.length() > href_musthave_.indexOf("%") + 1)
		    href_musthave_ = href_musthave_.substring(href_musthave_.indexOf("%") + 1, href_musthave_.length());
		else
		    href_musthave_ = null;
		
		if (url_string.indexOf(tmp) >= 0)
		    has_href = true;
	    }
	    
	    if (href_musthave_ != null && url_string.indexOf(href_musthave_) >= 0)
		has_href = true;

	    href_musthave_ = original_href_musthave_;
	}

	tmp = image_mustnothave_;
	String original_image_mustnothave_ = image_mustnothave_;

	// checks that it doesn't contain this content
	if (image_mustnothave_ != null) {

	    while (image_mustnothave_ != null && image_mustnothave_.indexOf("%") >= 0) {
		
		tmp = image_mustnothave_.substring(0, image_mustnothave_.indexOf("%"));
		if (image_mustnothave_.length() > image_mustnothave_.indexOf("%") + 1)
		    image_mustnothave_ = image_mustnothave_.substring(image_mustnothave_.indexOf("%") + 1, image_mustnothave_.length());
		else
		    image_mustnothave_ = null;
		
		if (url_string.indexOf(tmp) >= 0)
		    has_href = false;
	    }
	    if (image_mustnothave_ != null && url_string.indexOf(image_mustnothave_) >= 0)
		has_href = false;
	  
	    image_mustnothave_ = original_image_mustnothave_;
	}

	// return true if the link is valid and false if not
	if (href_musthave_==null || has_href)
	{
	    // might be another URL
	    if (depth < app_.maxDepth())
	    {
		if (!new_url_string.startsWith(url_string))
		{
		    return true;
		}
	    }
	}
	return false;
    }

    /** Adds an image to the stored downloaded images as a triplet.
     *  Ensures that the number of images downloaded but not displayed at
     *  any one time is controlled by using a buffer. If the buffer is
     *  full this function will wait until space becomes available before
     *  continuing. It also restricts the
     *  total number of images to download as specified by the applet.
     *
     *  @param url the image to download
     *  @param from_url the url that this image was sourced from
     *  @param img_name the name of the image */
    public void add_image(URL url, String from_url, String img_name)
    {
	    // get the image from the url
	    if (verbosity_>=2) {
		System.err.println("  Downloading image URL: " + url.toString());
	    }
            
           if (image_visited(url.toString(),img_name)) return;

	   int size = download_images_.downloadImage(tracker,url, from_url, img_name);

	   total_images_to_download++;
	   
	   try{
	       // if have completed the maximum number of downloads for the
	       // application then stop downloading
	       if (size == app_.maxDownloads()) {
		   // NOTE: the app can continue displaying images forever after download is
		   // finished, until interrupted/stopped.
		   // So don't set stop_running=false just because downloads have finished.
		   //stop_running = true; // Don't do this!
		   //thread_running = false;
		   //thread.currentThread().interrupt();
		   
		   stop_downloading = true;
		   //stop(); // TODO, remove this, replacing with above
		   
	       }
	       
	   }
	   catch (Exception e) {
	       thread_running_ = false;
	       //stop(); // TODO
	       stop_downloading = true;
	       e.printStackTrace();
	   }
    }

    /** Connects to the starting url and looks for all images and links from this
     *  original page. Image links are processed first, so that any images found can be
     *  downloaded immediately and placed on the applet. Secondly, the links to other
     *  pages are recursively processed by this function and treated as a starting url
     *
     *  @param new_url the url from which to start searching for images and links
     *  @param depth the number of links that have been followed on this path */
    public void rec_add_images(String new_url, int depth)
    {
	// Check if the application's stopping, to end this recursive function as soon as possible
	if(stop_running) {
	    return;
	}	
	
	if (verbosity_ >= 2) {
	    System.err.println("*** Inspecting url: " + new_url);
	}
	
	if (already_visited(new_url)) return;

	// check if there is a scenario where external hyperlinks are being used
	externalLinks();
	String img_name = new String();

	// connect to the url
	// stopRunning would have set the interrupted flag, and
	// CURL checks for that in its loop, *outside* its potentially-blocking read() call
	CURL curl = (app_.gsdlversion == 3) ? new CURL(new_url, app_.baseURL) : new CURL(new_url);

	if (curl.connected_ok())
	{
	    if (verbosity_ >= 1) {
		System.err.print("Connected OK ... ");
	    }

	    // read the page
	    curl.readAll();
	    if (verbosity_ >= 1) {
		System.err.println("URL read.");
	    }

	    // get all the <code><img src=</code> links into a vector
	    Vector src_links = curl.getSrcLinks();
        	    
	    if (verbosity_ >= 2) {
		System.err.println("  Got src links... there are " + src_links.size() + " of them.");
	    }
	    // process each of the image links according to the parameters given.
	    for (int i = 0; i < src_links.size() && !stop_running && !stop_downloading; i++) 
	    {
		URL url = (URL)src_links.get(i);
		String url_string  = url.toString();
            
		//System.err.println(" source links " + i + " ["  + url_string +"]");

		if (verbosity_ >= 4) {
		    System.err.println("    Unfiltered: src_link[" + i + "] = " + url_string);
		}

		if (image_file_extension(url_string))
		    {
			if (filter_image(url_string))
			    {
				img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length());
                                
				if (verbosity_ >= 2) {
				    System.err.println("    Filtered: src_link[" + i + "] = " + url_string);
				}

				if ((external_links_ != null) && (!external_links_.isEmpty())) {
				    String ext = (String) external_links_.get(img_name);
				          
				           
				    if (ext != null){
					add_image(url, ext, img_name);
                                       
				    }
				    else{
				
					add_image(url, new_url, img_name);
				    }
				}
				else {
				    
				    add_image(url, new_url, img_name);
				}
                           
				
			    }
		
		    }
		
	    }

	    if(stop_running && verbosity_ >= 3) {
		System.err.println("*** DownloadUrls.rec_add_images() - Asked to stop running");
		return;
	    }
	    
	    // get all the <code><a href=</code> links into a vector
	    Vector href_links = curl.getHrefLinks();
	  
	    if (verbosity_ >= 2) {
		System.err.println("  Got href links... there are " + href_links.size() + " of them.");
	    }
	    

	    // process each of the href links according to the parameters given.
	    for (int i = 0; i < href_links.size() && !stop_running && !stop_downloading; i++) 
		{
           
		    URL url = (URL)href_links.get(i);
		    String url_string  = url.toString();
		    //System.err.println(" href links " + i + "["  + url_string +"]");      

		    if (image_file_extension(url_string))
			{
			                      
			    if (filter_image(url_string))

				{
				 					    
				    img_name = url_string.substring(url_string.lastIndexOf("/") + 1, url_string.length());
				    if (verbosity_ >= 2) {
					System.err.println("    Filtered: href_link[" + i + "] = " + url_string);
				    }
				    if ((external_links_ != null) && (!external_links_.isEmpty())) {
					String ext = (String) external_links_.get(img_name);

					if (ext != null) 
					    add_image(url, ext, img_name);
					else
					    add_image(url, new_url, img_name);
				    }
				    else {
					add_image(url, url_string, img_name);
				    }
				}
			}
		    else
			{
			    if (filter_href(url_string,new_url,depth))
				{
				    // If application has stopped, then don't do the
				    // recursive call, so we stop faster before exploring yet
				    // more links and deciding to stop then
				    if(stop_running) {
					return;
				    }		
				    rec_add_images(url_string,depth+1);
				    
				}
			}
		}
	}
   
	else {
	    System.err.println("Unable to download "+new_url);
	    unable_to_download = true;
	}

	if(stop_running && verbosity_ >= 3) {
	    System.err.println("*** DownloadUrls.rec_add_images() thread has been told to stop.");
	}
    }

    public boolean wasUnableToDownload() { return unable_to_download; }
    
    
    /** Used in cases where the image maps to a url outside of it's original location.
     *  When used with Greenstone the collage images will refer to documents in the collections
     *  from which the images are sourced. When used individually, the images may be saved into
     *  a user directory and the pages they reference may be external hyperlinks. 
     *  This function reads that external links file and creates a hash map of the image to
     *  its external hyperlink. If the file does not exist the download thread will continue
     *  and assume the first case, that links are internal. */
    public void externalLinks() {
	external_links_ = null;
	try {

	    if (starting_url_ == null || (document_root_ != null && starting_url_.indexOf(document_root_) >= 0) ){
		if (verbosity_ >= 3) {
		    System.err.println("**** " + starting_url_ + " is not an external link.");
		}
		return;
	    }

	    // open a url to the file written
	    URL u = new URL(starting_url_ + "externallinks");

	    BufferedReader r = new BufferedReader(new InputStreamReader(u.openStream()));
	    
	    external_links_ = new Hashtable();

	    String l = r.readLine();
	    // split the line of the space, first part is the image, second part the link
	    while (l != null && !stop_running && !stop_downloading) {
	
		String tmp1 = new String();
		String tmp2 = new String();

		if (l.indexOf(" ") >= 0) {
		    
		    tmp1 = l.substring(0, l.indexOf(" "));
		    if (l.length() > l.indexOf(" ") + 1)
			tmp2 = l.substring(l.indexOf(" ") + 1, l.length());
		    else
			tmp2 = null;
		    
		    if (tmp2 != null) {
			
			external_links_.put(tmp1, tmp2);
			//System.err.println(tmp1 + " " + tmp2);
		    }
		}
		l = r.readLine();
	    }
		
		r.close();

		if(stop_running && verbosity_ >= 3) {
		    System.err.println("*** DownloadUrls.externalLinks(): Asked to stop running");
		}
		
	} catch (Exception e) {
	    e.printStackTrace();
	    return;
	}
    }

    /** Controls the download thread */
    public void run () 
    {
	System.err.println("Starting download thread.");
	visited_url_ = new Hashtable();
	visited_images_ = new Hashtable();
	
	rec_add_images(starting_url_,1);
	download_images_.stopDownload();
	System.err.println("DownloadUrls.run() - download thread finished.");
    }
}
