/* * 12/06/2005 * * PythonTokenMaker.java - Token maker for the Python programming language. * * This library is distributed under a modified BSD license. See the included * RSyntaxTextArea.License.txt file for details. */ package org.fife.ui.rsyntaxtextarea.modes; import java.io.*; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.AbstractJFlexTokenMaker; import org.fife.ui.rsyntaxtextarea.DefaultToken; import org.fife.ui.rsyntaxtextarea.Token; import org.fife.ui.rsyntaxtextarea.TokenMaker; /** * Scanner for the Python programming language. * * @author Robert Futrell * @version 0.3 */ %% %public %class PythonTokenMaker %extends AbstractJFlexTokenMaker %implements TokenMaker %unicode %type org.fife.ui.rsyntaxtextarea.Token %{ /** * Constructor. This must be here because JFlex does not generate a * no-parameter constructor. */ public PythonTokenMaker() { super(); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int tokenType) { addToken(zzStartRead, zzMarkedPos-1, tokenType); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int start, int end, int tokenType) { int so = start + offsetShift; addToken(zzBuffer, start,end, tokenType, so); } /** * Adds the token specified to the current linked list of tokens. * * @param array The character array. * @param start The starting offset in the array. * @param end The ending offset in the array. * @param tokenType The token's type. * @param startOffset The offset in the document at which this token * occurs. */ public void addToken(char[] array, int start, int end, int tokenType, int startOffset) { super.addToken(array, start,end, tokenType, startOffset); zzStartRead = zzMarkedPos; } /** * Returns the text to place at the beginning and end of a * line to "comment" it in a this programming language. * * @return The start and end strings to add to a line to "comment" * it out. */ public String[] getLineCommentStartAndEnd() { return new String[] { "#", null }; } /** * Returns the first token in the linked list of tokens generated * from text. This method must be implemented by * subclasses so they can correctly implement syntax highlighting. * * @param text The text from which to get tokens. * @param initialTokenType The token type we should start with. * @param startOffset The offset into the document at which * text starts. * @return The first Token in a linked list representing * the syntax highlighted text. */ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { resetTokenList(); this.offsetShift = -text.offset + startOffset; // Start off in the proper state. int state = Token.NULL; switch (initialTokenType) { case Token.LITERAL_STRING_DOUBLE_QUOTE: state = LONG_STRING_2; break; case Token.LITERAL_CHAR: state = LONG_STRING_1; break; default: state = Token.NULL; } s = text; try { yyreset(zzReader); yybegin(state); return yylex(); } catch (IOException ioe) { ioe.printStackTrace(); return new DefaultToken(); } } /** * Resets the scanner to read from a new input stream. * Does not close the old reader. * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). * Lexical state is set to YY_INITIAL. * * @param reader the new input stream */ public final void yyreset(java.io.Reader reader) throws java.io.IOException { // 's' has been updated. zzBuffer = s.array; /* * We replaced the line below with the two below it because zzRefill * no longer "refills" the buffer (since the way we do it, it's always * "full" the first time through, since it points to the segment's * array). So, we assign zzEndRead here. */ //zzStartRead = zzEndRead = s.offset; zzStartRead = s.offset; zzEndRead = zzStartRead + s.count - 1; zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset; zzLexicalState = YYINITIAL; zzReader = reader; zzAtBOL = true; zzAtEOF = false; } /** * Refills the input buffer. * * @return true if EOF was reached, otherwise * false. * @exception IOException if any I/O-Error occurs. */ private boolean zzRefill() throws java.io.IOException { return zzCurrentPos>=s.offset+s.count; } %} /* This part is taken from http://www.python.org/doc/2.2.3/ref/grammar.txt */ identifier = (({letter}|"_")({letter}|{digit}|"_")*) letter = ({lowercase}|{uppercase}) lowercase = ([a-z]) uppercase = ([A-Z]) digit = ([0-9]) stringliteral = ({stringprefix}?{shortstring}) stringprefix = ("r"|"u"[rR]?|"R"|"U"[rR]?) shortstring1 = ([\']{shortstring1item}*[\']?) shortstring2 = ([\"]{shortstring2item}*[\"]?) shortstring = ({shortstring1}|{shortstring2}) shortstring1item = ({shortstring1char}|{escapeseq}) shortstring2item = ({shortstring2char}|{escapeseq}) shortstring1char = ([^\\\n\']) shortstring2char = ([^\\\n\"]) escapeseq = ([\\].) longinteger = ({integer}[lL]) integer = ({decimalinteger}|{octinteger}|{hexinteger}) decimalinteger = ({nonzerodigit}{digit}*|"0") octinteger = ("0"{octdigit}+) hexinteger = ("0"[xX]{hexdigit}+) nonzerodigit = ([1-9]) octdigit = ([0-7]) hexdigit = ({digit}|[a-f]|[A-F]) floatnumber = ({pointfloat}|{exponentfloat}) pointfloat = ({intpart}?{fraction}|{intpart}".") exponentfloat = (({intpart}|{pointfloat}){exponent}) intpart = ({digit}+) fraction = ("."{digit}+) exponent = ([eE][\+\-]?{digit}+) imagnumber = (({floatnumber}|{intpart})[jJ]) ErrorNumberFormat = ({digit}{NonSeparator}+) NonSeparator = ([^\t\f\r\n\ \(\)\{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#") LongStringStart1 = ({stringprefix}?\'\'\') LongStringStart2 = ({stringprefix}?\"\"\") LineTerminator = (\n) WhiteSpace = ([ \t\f]) LineComment = ("#".*) %state LONG_STRING_1 %state LONG_STRING_2 %% /* Keywords */ "and" { addToken(Token.RESERVED_WORD); } "as" { addToken(Token.RESERVED_WORD); } "assert" { addToken(Token.RESERVED_WORD); } "break" { addToken(Token.RESERVED_WORD); } "class" { addToken(Token.RESERVED_WORD); } "continue" { addToken(Token.RESERVED_WORD); } "def" { addToken(Token.RESERVED_WORD); } "del" { addToken(Token.RESERVED_WORD); } "elif" { addToken(Token.RESERVED_WORD); } "else" { addToken(Token.RESERVED_WORD); } "except" { addToken(Token.RESERVED_WORD); } "exec" { addToken(Token.RESERVED_WORD); } "finally" { addToken(Token.RESERVED_WORD); } "for" { addToken(Token.RESERVED_WORD); } "from" { addToken(Token.RESERVED_WORD); } "global" { addToken(Token.RESERVED_WORD); } "if" { addToken(Token.RESERVED_WORD); } "import" { addToken(Token.RESERVED_WORD); } "in" { addToken(Token.RESERVED_WORD); } "is" { addToken(Token.RESERVED_WORD); } "lambda" { addToken(Token.RESERVED_WORD); } "not" { addToken(Token.RESERVED_WORD); } "or" { addToken(Token.RESERVED_WORD); } "pass" { addToken(Token.RESERVED_WORD); } "print" { addToken(Token.RESERVED_WORD); } "raise" { addToken(Token.RESERVED_WORD); } "return" { addToken(Token.RESERVED_WORD); } "try" { addToken(Token.RESERVED_WORD); } "while" { addToken(Token.RESERVED_WORD); } "yield" { addToken(Token.RESERVED_WORD); } /* Data types. */ "char" { addToken(Token.DATA_TYPE); } "double" { addToken(Token.DATA_TYPE); } "float" { addToken(Token.DATA_TYPE); } "int" { addToken(Token.DATA_TYPE); } "long" { addToken(Token.DATA_TYPE); } "short" { addToken(Token.DATA_TYPE); } "signed" { addToken(Token.DATA_TYPE); } "unsigned" { addToken(Token.DATA_TYPE); } "void" { addToken(Token.DATA_TYPE); } /* Standard functions */ "abs" { addToken(Token.FUNCTION); } "apply" { addToken(Token.FUNCTION); } "bool" { addToken(Token.FUNCTION); } "buffer" { addToken(Token.FUNCTION); } "callable" { addToken(Token.FUNCTION); } "chr" { addToken(Token.FUNCTION); } "classmethod" { addToken(Token.FUNCTION); } "cmp" { addToken(Token.FUNCTION); } "coerce" { addToken(Token.FUNCTION); } "compile" { addToken(Token.FUNCTION); } "complex" { addToken(Token.FUNCTION); } "delattr" { addToken(Token.FUNCTION); } "dict" { addToken(Token.FUNCTION); } "dir" { addToken(Token.FUNCTION); } "divmod" { addToken(Token.FUNCTION); } "enumerate" { addToken(Token.FUNCTION); } "eval" { addToken(Token.FUNCTION); } "execfile" { addToken(Token.FUNCTION); } "file" { addToken(Token.FUNCTION); } "filter" { addToken(Token.FUNCTION); } "float" { addToken(Token.FUNCTION); } "getattr" { addToken(Token.FUNCTION); } "globals" { addToken(Token.FUNCTION); } "hasattr" { addToken(Token.FUNCTION); } "hash" { addToken(Token.FUNCTION); } "hex" { addToken(Token.FUNCTION); } "id" { addToken(Token.FUNCTION); } "input" { addToken(Token.FUNCTION); } "int" { addToken(Token.FUNCTION); } "intern" { addToken(Token.FUNCTION); } "isinstance" { addToken(Token.FUNCTION); } "issubclass" { addToken(Token.FUNCTION); } "iter" { addToken(Token.FUNCTION); } "len" { addToken(Token.FUNCTION); } "list" { addToken(Token.FUNCTION); } "locals" { addToken(Token.FUNCTION); } "long" { addToken(Token.FUNCTION); } "map" { addToken(Token.FUNCTION); } "max" { addToken(Token.FUNCTION); } "min" { addToken(Token.FUNCTION); } "object" { addToken(Token.FUNCTION); } "oct" { addToken(Token.FUNCTION); } "open" { addToken(Token.FUNCTION); } "ord" { addToken(Token.FUNCTION); } "pow" { addToken(Token.FUNCTION); } "property" { addToken(Token.FUNCTION); } "range" { addToken(Token.FUNCTION); } "raw_input" { addToken(Token.FUNCTION); } "reduce" { addToken(Token.FUNCTION); } "reload" { addToken(Token.FUNCTION); } "repr" { addToken(Token.FUNCTION); } "round" { addToken(Token.FUNCTION); } "setattr" { addToken(Token.FUNCTION); } "slice" { addToken(Token.FUNCTION); } "staticmethod" { addToken(Token.FUNCTION); } "str" { addToken(Token.FUNCTION); } "sum" { addToken(Token.FUNCTION); } "super" { addToken(Token.FUNCTION); } "tuple" { addToken(Token.FUNCTION); } "type" { addToken(Token.FUNCTION); } "unichr" { addToken(Token.FUNCTION); } "unicode" { addToken(Token.FUNCTION); } "vars" { addToken(Token.FUNCTION); } "xrange" { addToken(Token.FUNCTION); } "zip" { addToken(Token.FUNCTION); } { {LineTerminator} { addNullToken(); return firstToken; } {identifier} { addToken(Token.IDENTIFIER); } {WhiteSpace}+ { addToken(Token.WHITESPACE); } /* String/Character Literals. */ {stringliteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } {LongStringStart1} { yybegin(LONG_STRING_1); addToken(Token.LITERAL_CHAR); } {LongStringStart2} { yybegin(LONG_STRING_2); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } /* Comment Literals. */ {LineComment} { addToken(Token.COMMENT_EOL); } /* Separators. */ "(" { addToken(Token.SEPARATOR); } ")" { addToken(Token.SEPARATOR); } "[" { addToken(Token.SEPARATOR); } "]" { addToken(Token.SEPARATOR); } "{" { addToken(Token.SEPARATOR); } "}" { addToken(Token.SEPARATOR); } /* Operators. */ "=" { addToken(Token.OPERATOR); } "+" { addToken(Token.OPERATOR); } "-" { addToken(Token.OPERATOR); } "*" { addToken(Token.OPERATOR); } "/" { addToken(Token.OPERATOR); } "%" { addToken(Token.OPERATOR); } "**" { addToken(Token.OPERATOR); } "~" { addToken(Token.OPERATOR); } "<" { addToken(Token.OPERATOR); } ">" { addToken(Token.OPERATOR); } "<<" { addToken(Token.OPERATOR); } ">>" { addToken(Token.OPERATOR); } "==" { addToken(Token.OPERATOR); } "+=" { addToken(Token.OPERATOR); } "-=" { addToken(Token.OPERATOR); } "*=" { addToken(Token.OPERATOR); } "/=" { addToken(Token.OPERATOR); } "%=" { addToken(Token.OPERATOR); } ">>=" { addToken(Token.OPERATOR); } "<<=" { addToken(Token.OPERATOR); } "^" { addToken(Token.OPERATOR); } "&" { addToken(Token.OPERATOR); } "&&" { addToken(Token.OPERATOR); } "|" { addToken(Token.OPERATOR); } "||" { addToken(Token.OPERATOR); } "?" { addToken(Token.OPERATOR); } ":" { addToken(Token.OPERATOR); } "," { addToken(Token.OPERATOR); } "!" { addToken(Token.OPERATOR); } "++" { addToken(Token.OPERATOR); } "--" { addToken(Token.OPERATOR); } "." { addToken(Token.OPERATOR); } "," { addToken(Token.OPERATOR); } /* Numbers */ {longinteger}|{integer} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); } {floatnumber}|{imagnumber} { addToken(Token.LITERAL_NUMBER_FLOAT); } {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); } /* Other punctuation, we'll highlight it as "identifiers." */ "@" { addToken(Token.IDENTIFIER); } ";" { addToken(Token.IDENTIFIER); } /* Ended with a line not in a string or comment. */ <> { addNullToken(); return firstToken; } /* Catch any other (unhandled) characters and flag them as bad. */ . { addToken(Token.ERROR_IDENTIFIER); } } { [^\']+ { addToken(Token.LITERAL_CHAR); } "'''" { yybegin(YYINITIAL); addToken(Token.LITERAL_CHAR); } "'" { addToken(Token.LITERAL_CHAR); } <> { if (firstToken==null) { addToken(Token.LITERAL_CHAR); } return firstToken; } } { [^\"]+ { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } \"\"\" { yybegin(YYINITIAL); addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } \" { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } <> { if (firstToken==null) { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } return firstToken; } }