package org.greenstone.atlas.server;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.ArrayList;

public class HTMLParser
{
	protected char[] _html = null;
	protected int _index = 0;
	
	ArrayList<String> _fullWordList = new ArrayList<String>();
	ArrayList<String> _fullBetweenList = new ArrayList<String>();
	
	public HTMLParser(String html)
	{		
		_html = html.toCharArray();
		
		String other = getNextCharactersBetweenWords();
		String word = getNextWord();
		while(word != null || other != null)
		{
			if(word != null)
			{
				_fullWordList.add(word);
			}
			
			if(other != null)
			{
				_fullBetweenList.add(other);
			}
			
			other = getNextCharactersBetweenWords();
			word = getNextWord();
		}
	}
	
	public String getNextCharactersBetweenWords()
	{
		if(_index >= _html.length)
		{
			return null;
		}
		
		StringBuilder chars = new StringBuilder();
		
		while(true)
		{
			if(_index >= _html.length)
			{
				break;
			}
			
			if(_html[_index] == '<')
			{
				chars.append(_html[_index]);
				int bracketCount = 1;
				while(_index < _html.length && bracketCount > 0)
				{
					_index++;
					if(_html[_index] == '>')
					{
						bracketCount--;
					}
					else if(_html[_index] == '<')
					{
						bracketCount++;
					}
					chars.append(_html[_index]);	
				}
			}
			else if(!Character.isLetter(_html[_index]))
			{
				chars.append(_html[_index]);
			}
			else
			{
				break;
			}
			_index++;
		}
		
		if(chars.length() == 0)
		{
			return null;
		}
		return chars.toString();
	}
	
	public String getNextWord()
	{
		if(_index >= _html.length)
		{
			return null;
		}
		
		StringBuilder newWord = new StringBuilder();
		
		while(true)
		{
			char currentCharacter = _html[_index];
			if(!Character.isLetter(currentCharacter))
			{
				break;
			}
			else
			{
				newWord.append(currentCharacter);
			}
			_index++;
		}
		return newWord.toString();
	}
	
	public ArrayList<String> getFullHTMLWordList()
	{
		return _fullWordList;
	}
	
	public ArrayList<String> getFullBetweenWordList()
	{
		return _fullBetweenList;
	}
	
	public static String removeTags(String htmlString)
	{
		StringBuilder cleanText = new StringBuilder();
		
		int inTag = 0;
		for(char c : htmlString.toCharArray())
		{
			if(c == '<')
			{
				inTag++;
			}
			else if(c == '>' && inTag > 0)
			{
				inTag--;
			}
			else if(inTag == 0)
			{
				cleanText.append(c);
			}
		}
		
		return cleanText.toString();
	}
}
