/*
* 12/06/2005
*
* PythonTokenMaker.java - Token maker for the Python programming language.
*
* This library is distributed under a modified BSD license. See the included
* RSyntaxTextArea.License.txt file for details.
*/
package org.fife.ui.rsyntaxtextarea.modes;
import java.io.*;
import javax.swing.text.Segment;
import org.fife.ui.rsyntaxtextarea.AbstractJFlexTokenMaker;
import org.fife.ui.rsyntaxtextarea.DefaultToken;
import org.fife.ui.rsyntaxtextarea.Token;
import org.fife.ui.rsyntaxtextarea.TokenMaker;
/**
* Scanner for the Python programming language.
*
* @author Robert Futrell
* @version 0.3
*/
%%
%public
%class PythonTokenMaker
%extends AbstractJFlexTokenMaker
%implements TokenMaker
%unicode
%type org.fife.ui.rsyntaxtextarea.Token
%{
/**
* Constructor. This must be here because JFlex does not generate a
* no-parameter constructor.
*/
public PythonTokenMaker() {
super();
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param tokenType The token's type.
*/
private void addToken(int tokenType) {
addToken(zzStartRead, zzMarkedPos-1, tokenType);
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param tokenType The token's type.
*/
private void addToken(int start, int end, int tokenType) {
int so = start + offsetShift;
addToken(zzBuffer, start,end, tokenType, so);
}
/**
* Adds the token specified to the current linked list of tokens.
*
* @param array The character array.
* @param start The starting offset in the array.
* @param end The ending offset in the array.
* @param tokenType The token's type.
* @param startOffset The offset in the document at which this token
* occurs.
*/
public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
super.addToken(array, start,end, tokenType, startOffset);
zzStartRead = zzMarkedPos;
}
/**
* Returns the text to place at the beginning and end of a
* line to "comment" it in a this programming language.
*
* @return The start and end strings to add to a line to "comment"
* it out.
*/
public String[] getLineCommentStartAndEnd() {
return new String[] { "#", null };
}
/**
* Returns the first token in the linked list of tokens generated
* from text
. This method must be implemented by
* subclasses so they can correctly implement syntax highlighting.
*
* @param text The text from which to get tokens.
* @param initialTokenType The token type we should start with.
* @param startOffset The offset into the document at which
* text
starts.
* @return The first Token
in a linked list representing
* the syntax highlighted text.
*/
public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
resetTokenList();
this.offsetShift = -text.offset + startOffset;
// Start off in the proper state.
int state = Token.NULL;
switch (initialTokenType) {
case Token.LITERAL_STRING_DOUBLE_QUOTE:
state = LONG_STRING_2;
break;
case Token.LITERAL_CHAR:
state = LONG_STRING_1;
break;
default:
state = Token.NULL;
}
s = text;
try {
yyreset(zzReader);
yybegin(state);
return yylex();
} catch (IOException ioe) {
ioe.printStackTrace();
return new DefaultToken();
}
}
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* cannot be reused (internal buffer is discarded and lost).
* Lexical state is set to YY_INITIAL.
*
* @param reader the new input stream
*/
public final void yyreset(java.io.Reader reader) throws java.io.IOException {
// 's' has been updated.
zzBuffer = s.array;
/*
* We replaced the line below with the two below it because zzRefill
* no longer "refills" the buffer (since the way we do it, it's always
* "full" the first time through, since it points to the segment's
* array). So, we assign zzEndRead here.
*/
//zzStartRead = zzEndRead = s.offset;
zzStartRead = s.offset;
zzEndRead = zzStartRead + s.count - 1;
zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
zzLexicalState = YYINITIAL;
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
}
/**
* Refills the input buffer.
*
* @return true
if EOF was reached, otherwise
* false
.
* @exception IOException if any I/O-Error occurs.
*/
private boolean zzRefill() throws java.io.IOException {
return zzCurrentPos>=s.offset+s.count;
}
%}
/* This part is taken from http://www.python.org/doc/2.2.3/ref/grammar.txt */
identifier = (({letter}|"_")({letter}|{digit}|"_")*)
letter = ({lowercase}|{uppercase})
lowercase = ([a-z])
uppercase = ([A-Z])
digit = ([0-9])
stringliteral = ({stringprefix}?{shortstring})
stringprefix = ("r"|"u"[rR]?|"R"|"U"[rR]?)
shortstring1 = ([\']{shortstring1item}*[\']?)
shortstring2 = ([\"]{shortstring2item}*[\"]?)
shortstring = ({shortstring1}|{shortstring2})
shortstring1item = ({shortstring1char}|{escapeseq})
shortstring2item = ({shortstring2char}|{escapeseq})
shortstring1char = ([^\\\n\'])
shortstring2char = ([^\\\n\"])
escapeseq = ([\\].)
longinteger = ({integer}[lL])
integer = ({decimalinteger}|{octinteger}|{hexinteger})
decimalinteger = ({nonzerodigit}{digit}*|"0")
octinteger = ("0"{octdigit}+)
hexinteger = ("0"[xX]{hexdigit}+)
nonzerodigit = ([1-9])
octdigit = ([0-7])
hexdigit = ({digit}|[a-f]|[A-F])
floatnumber = ({pointfloat}|{exponentfloat})
pointfloat = ({intpart}?{fraction}|{intpart}".")
exponentfloat = (({intpart}|{pointfloat}){exponent})
intpart = ({digit}+)
fraction = ("."{digit}+)
exponent = ([eE][\+\-]?{digit}+)
imagnumber = (({floatnumber}|{intpart})[jJ])
ErrorNumberFormat = ({digit}{NonSeparator}+)
NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#")
LongStringStart1 = ({stringprefix}?\'\'\')
LongStringStart2 = ({stringprefix}?\"\"\")
LineTerminator = (\n)
WhiteSpace = ([ \t\f])
LineComment = ("#".*)
%state LONG_STRING_1
%state LONG_STRING_2
%%
/* Keywords */
"and" { addToken(Token.RESERVED_WORD); }
"as" { addToken(Token.RESERVED_WORD); }
"assert" { addToken(Token.RESERVED_WORD); }
"break" { addToken(Token.RESERVED_WORD); }
"class" { addToken(Token.RESERVED_WORD); }
"continue" { addToken(Token.RESERVED_WORD); }
"def" { addToken(Token.RESERVED_WORD); }
"del" { addToken(Token.RESERVED_WORD); }
"elif" { addToken(Token.RESERVED_WORD); }
"else" { addToken(Token.RESERVED_WORD); }
"except" { addToken(Token.RESERVED_WORD); }
"exec" { addToken(Token.RESERVED_WORD); }
"finally" { addToken(Token.RESERVED_WORD); }
"for" { addToken(Token.RESERVED_WORD); }
"from" { addToken(Token.RESERVED_WORD); }
"global" { addToken(Token.RESERVED_WORD); }
"if" { addToken(Token.RESERVED_WORD); }
"import" { addToken(Token.RESERVED_WORD); }
"in" { addToken(Token.RESERVED_WORD); }
"is" { addToken(Token.RESERVED_WORD); }
"lambda" { addToken(Token.RESERVED_WORD); }
"not" { addToken(Token.RESERVED_WORD); }
"or" { addToken(Token.RESERVED_WORD); }
"pass" { addToken(Token.RESERVED_WORD); }
"print" { addToken(Token.RESERVED_WORD); }
"raise" { addToken(Token.RESERVED_WORD); }
"return" { addToken(Token.RESERVED_WORD); }
"try" { addToken(Token.RESERVED_WORD); }
"while" { addToken(Token.RESERVED_WORD); }
"yield" { addToken(Token.RESERVED_WORD); }
/* Data types. */
"char" { addToken(Token.DATA_TYPE); }
"double" { addToken(Token.DATA_TYPE); }
"float" { addToken(Token.DATA_TYPE); }
"int" { addToken(Token.DATA_TYPE); }
"long" { addToken(Token.DATA_TYPE); }
"short" { addToken(Token.DATA_TYPE); }
"signed" { addToken(Token.DATA_TYPE); }
"unsigned" { addToken(Token.DATA_TYPE); }
"void" { addToken(Token.DATA_TYPE); }
/* Standard functions */
"abs" { addToken(Token.FUNCTION); }
"apply" { addToken(Token.FUNCTION); }
"bool" { addToken(Token.FUNCTION); }
"buffer" { addToken(Token.FUNCTION); }
"callable" { addToken(Token.FUNCTION); }
"chr" { addToken(Token.FUNCTION); }
"classmethod" { addToken(Token.FUNCTION); }
"cmp" { addToken(Token.FUNCTION); }
"coerce" { addToken(Token.FUNCTION); }
"compile" { addToken(Token.FUNCTION); }
"complex" { addToken(Token.FUNCTION); }
"delattr" { addToken(Token.FUNCTION); }
"dict" { addToken(Token.FUNCTION); }
"dir" { addToken(Token.FUNCTION); }
"divmod" { addToken(Token.FUNCTION); }
"enumerate" { addToken(Token.FUNCTION); }
"eval" { addToken(Token.FUNCTION); }
"execfile" { addToken(Token.FUNCTION); }
"file" { addToken(Token.FUNCTION); }
"filter" { addToken(Token.FUNCTION); }
"float" { addToken(Token.FUNCTION); }
"getattr" { addToken(Token.FUNCTION); }
"globals" { addToken(Token.FUNCTION); }
"hasattr" { addToken(Token.FUNCTION); }
"hash" { addToken(Token.FUNCTION); }
"hex" { addToken(Token.FUNCTION); }
"id" { addToken(Token.FUNCTION); }
"input" { addToken(Token.FUNCTION); }
"int" { addToken(Token.FUNCTION); }
"intern" { addToken(Token.FUNCTION); }
"isinstance" { addToken(Token.FUNCTION); }
"issubclass" { addToken(Token.FUNCTION); }
"iter" { addToken(Token.FUNCTION); }
"len" { addToken(Token.FUNCTION); }
"list" { addToken(Token.FUNCTION); }
"locals" { addToken(Token.FUNCTION); }
"long" { addToken(Token.FUNCTION); }
"map" { addToken(Token.FUNCTION); }
"max" { addToken(Token.FUNCTION); }
"min" { addToken(Token.FUNCTION); }
"object" { addToken(Token.FUNCTION); }
"oct" { addToken(Token.FUNCTION); }
"open" { addToken(Token.FUNCTION); }
"ord" { addToken(Token.FUNCTION); }
"pow" { addToken(Token.FUNCTION); }
"property" { addToken(Token.FUNCTION); }
"range" { addToken(Token.FUNCTION); }
"raw_input" { addToken(Token.FUNCTION); }
"reduce" { addToken(Token.FUNCTION); }
"reload" { addToken(Token.FUNCTION); }
"repr" { addToken(Token.FUNCTION); }
"round" { addToken(Token.FUNCTION); }
"setattr" { addToken(Token.FUNCTION); }
"slice" { addToken(Token.FUNCTION); }
"staticmethod" { addToken(Token.FUNCTION); }
"str" { addToken(Token.FUNCTION); }
"sum" { addToken(Token.FUNCTION); }
"super" { addToken(Token.FUNCTION); }
"tuple" { addToken(Token.FUNCTION); }
"type" { addToken(Token.FUNCTION); }
"unichr" { addToken(Token.FUNCTION); }
"unicode" { addToken(Token.FUNCTION); }
"vars" { addToken(Token.FUNCTION); }
"xrange" { addToken(Token.FUNCTION); }
"zip" { addToken(Token.FUNCTION); }
{
{LineTerminator} { addNullToken(); return firstToken; }
{identifier} { addToken(Token.IDENTIFIER); }
{WhiteSpace}+ { addToken(Token.WHITESPACE); }
/* String/Character Literals. */
{stringliteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
{LongStringStart1} { yybegin(LONG_STRING_1); addToken(Token.LITERAL_CHAR); }
{LongStringStart2} { yybegin(LONG_STRING_2); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
/* Comment Literals. */
{LineComment} { addToken(Token.COMMENT_EOL); }
/* Separators. */
"(" { addToken(Token.SEPARATOR); }
")" { addToken(Token.SEPARATOR); }
"[" { addToken(Token.SEPARATOR); }
"]" { addToken(Token.SEPARATOR); }
"{" { addToken(Token.SEPARATOR); }
"}" { addToken(Token.SEPARATOR); }
/* Operators. */
"=" { addToken(Token.OPERATOR); }
"+" { addToken(Token.OPERATOR); }
"-" { addToken(Token.OPERATOR); }
"*" { addToken(Token.OPERATOR); }
"/" { addToken(Token.OPERATOR); }
"%" { addToken(Token.OPERATOR); }
"**" { addToken(Token.OPERATOR); }
"~" { addToken(Token.OPERATOR); }
"<" { addToken(Token.OPERATOR); }
">" { addToken(Token.OPERATOR); }
"<<" { addToken(Token.OPERATOR); }
">>" { addToken(Token.OPERATOR); }
"==" { addToken(Token.OPERATOR); }
"+=" { addToken(Token.OPERATOR); }
"-=" { addToken(Token.OPERATOR); }
"*=" { addToken(Token.OPERATOR); }
"/=" { addToken(Token.OPERATOR); }
"%=" { addToken(Token.OPERATOR); }
">>=" { addToken(Token.OPERATOR); }
"<<=" { addToken(Token.OPERATOR); }
"^" { addToken(Token.OPERATOR); }
"&" { addToken(Token.OPERATOR); }
"&&" { addToken(Token.OPERATOR); }
"|" { addToken(Token.OPERATOR); }
"||" { addToken(Token.OPERATOR); }
"?" { addToken(Token.OPERATOR); }
":" { addToken(Token.OPERATOR); }
"," { addToken(Token.OPERATOR); }
"!" { addToken(Token.OPERATOR); }
"++" { addToken(Token.OPERATOR); }
"--" { addToken(Token.OPERATOR); }
"." { addToken(Token.OPERATOR); }
"," { addToken(Token.OPERATOR); }
/* Numbers */
{longinteger}|{integer} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
{floatnumber}|{imagnumber} { addToken(Token.LITERAL_NUMBER_FLOAT); }
{ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); }
/* Other punctuation, we'll highlight it as "identifiers." */
"@" { addToken(Token.IDENTIFIER); }
";" { addToken(Token.IDENTIFIER); }
/* Ended with a line not in a string or comment. */
<> { addNullToken(); return firstToken; }
/* Catch any other (unhandled) characters and flag them as bad. */
. { addToken(Token.ERROR_IDENTIFIER); }
}
{
[^\']+ { addToken(Token.LITERAL_CHAR); }
"'''" { yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); }
"'" { addToken(Token.LITERAL_CHAR); }
<> {
if (firstToken==null) {
addToken(Token.LITERAL_CHAR);
}
return firstToken;
}
}
{
[^\"]+ { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
\"\"\" { yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
\" { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
<> {
if (firstToken==null) {
addToken(Token.LITERAL_STRING_DOUBLE_QUOTE);
}
return firstToken;
}
}