/* * 11/13/2004 * * CSharpTokenMaker.java - An object that can take a chunk of text and return * a linked list of tokens representing it in the C# programming language. * * This library is distributed under a modified BSD license. See the included * RSyntaxTextArea.License.txt file for details. */ package org.fife.ui.rsyntaxtextarea.modes; import java.io.*; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.*; /** * A lexer for the C# programming language. * * This implementation was created using * JFlex 1.4.1; however, the generated file * was modified for performance. Memory allocation needs to be almost * completely removed to be competitive with the handwritten lexers (subclasses * of AbstractTokenMaker, so this class has been modified so that * Strings are never allocated (via yytext()), and the scanner never has to * worry about refilling its buffer (needlessly copying chars around). * We can achieve this because RText always scans exactly 1 line of tokens at a * time, and hands the scanner this line as an array of characters (a Segment * really). Since tokens contain pointers to char arrays instead of Strings * holding their contents, there is no need for allocating new memory for * Strings.

* * The actual algorithm generated for scanning has, of course, not been * modified.

* * If you wish to regenerate this file yourself, keep in mind the following: *

The generated CSharpTokenMaker.java file will contain two * definitions of both zzRefill and yyreset. * You should hand-delete the second of each definition (the ones * generated by the lexer), as these generated methods modify the input * buffer, which we'll never have to do.
You should also change the declaration/definition of zzBuffer to NOT * be initialized. This is a needless memory allocation for us since we * will be pointing the array somewhere else anyway.
You should NOT call yylex() on the generated scanner * directly; rather, you should use getTokenList as you would * with any other TokenMaker instance.

* * @author Robert Futrell * @version 0.5 * */ %% %public %class CSharpTokenMaker %extends AbstractJFlexCTokenMaker %unicode %type org.fife.ui.rsyntaxtextarea.Token %{ /** * Constructor. This must be here because JFlex does not generate a * no-parameter constructor. */ public CSharpTokenMaker() { super(); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. * @see #addToken(int, int, int) */ private void addHyperlinkToken(int start, int end, int tokenType) { int so = start + offsetShift; addToken(zzBuffer, start,end, tokenType, so, true); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int tokenType) { addToken(zzStartRead, zzMarkedPos-1, tokenType); } /** * Adds the token specified to the current linked list of tokens. * * @param tokenType The token's type. */ private void addToken(int start, int end, int tokenType) { int so = start + offsetShift; addToken(zzBuffer, start,end, tokenType, so); } /** * Adds the token specified to the current linked list of tokens. * * @param array The character array. * @param start The starting offset in the array. * @param end The ending offset in the array. * @param tokenType The token's type. * @param startOffset The offset in the document at which this token * occurs. */ public void addToken(char[] array, int start, int end, int tokenType, int startOffset) { super.addToken(array, start,end, tokenType, startOffset); zzStartRead = zzMarkedPos; } /** * Returns the text to place at the beginning and end of a * line to "comment" it in a this programming language. * * @return The start and end strings to add to a line to "comment" * it out. */ public String[] getLineCommentStartAndEnd() { return new String[] { "//", null }; } /** * Returns the first token in the linked list of tokens generated * from text. This method must be implemented by * subclasses so they can correctly implement syntax highlighting. * * @param text The text from which to get tokens. * @param initialTokenType The token type we should start with. * @param startOffset The offset into the document at which * text starts. * @return The first Token in a linked list representing * the syntax highlighted text. */ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { resetTokenList(); this.offsetShift = -text.offset + startOffset; // Start off in the proper state. int state = Token.NULL; switch (initialTokenType) { case Token.LITERAL_STRING_DOUBLE_QUOTE: state = VERBATIMSTRING; start = text.offset; break; case Token.COMMENT_MULTILINE: state = DELIMITEDCOMMENT; start = text.offset; break; default: state = Token.NULL; } s = text; try { yyreset(zzReader); yybegin(state); return yylex(); } catch (IOException ioe) { ioe.printStackTrace(); return new DefaultToken(); } } /** * Refills the input buffer. * * @return true if EOF was reached, otherwise * false. * @exception IOException if any I/O-Error occurs. */ private boolean zzRefill() throws java.io.IOException { return zzCurrentPos>=s.offset+s.count; } /** * Resets the scanner to read from a new input stream. * Does not close the old reader. * * All internal variables are reset, the old input stream * cannot be reused (internal buffer is discarded and lost). * Lexical state is set to YY_INITIAL. * * @param reader the new input stream */ public final void yyreset(java.io.Reader reader) throws java.io.IOException { // 's' has been updated. zzBuffer = s.array; /* * We replaced the line below with the two below it because zzRefill * no longer "refills" the buffer (since the way we do it, it's always * "full" the first time through, since it points to the segment's * array). So, we assign zzEndRead here. */ //zzStartRead = zzEndRead = s.offset; zzStartRead = s.offset; zzEndRead = zzStartRead + s.count - 1; zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset; zzLexicalState = YYINITIAL; zzReader = reader; zzAtBOL = true; zzAtEOF = false; } %} /* C1.1 - Line terminators. */ NewlineCharacter = ([\n]) /* C.1.2 - Whitespace. */ Whitespace = ([\t ]+) /* C.1.3 - Comments */ InputCharacter = ([^\n]) InputCharacters = ({InputCharacter}+) DocumentationCommentStart = ("///") SingleLineComment = ("//"([^/]{InputCharacters}?)?) DelimitedCommentStart = ("/*") DelimitedCommentEnd = ("*/") /* C.1.5 - Unicode character escape sequences. */ UnicodeEscape1 = ("\\u"{HexDigit}{HexDigit}{HexDigit}{HexDigit}) UnicodeEscape2 = ("\\U"{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}{HexDigit}) UnicodeEscapeSequence = ({UnicodeEscape1}|{UnicodeEscape2}) /* C1.6 - Identifiers. */ LetterCharacter = ([A-Za-z]) /* Not accurate - many more Unicode letters, Unicode escapes */ /* CombiningCharacter = () */ DecimalDigitCharacter = ([0-9]) ConnectingCharacter = ([_\-]) /* FormattingCharacter = () */ /* IdentifierPartCharacter = ({LetterCharacter}|{DecimalDigitCharacter}|{ConnectingCharacter}|{CombiningCharacter}|{FormattingCharacter}) */ IdentifierPartCharacter = ({LetterCharacter}|{DecimalDigitCharacter}|{ConnectingCharacter}) IdentifierPartCharacters = ({IdentifierPartCharacter}+) IdentifierStartCharacter = ({LetterCharacter}|[_]) IdentifierOrKeyword = ({IdentifierStartCharacter}{IdentifierPartCharacters}?) Identifier = ("@"?{IdentifierOrKeyword}) /* NOTE: The two below aren't from the C# spec, but we add them so we can */ /* highlight errors. */ NonSeparator = (([^\t\f\r\n\ \{\}\[\]\;\,\.\=\>\<\!\~\?\:\+\-\*\/\&\|\^\%\"\']|"#"|"\\")) ErrorIdentifier = ({NonSeparator}+) /* C1.8 - Literals. */ BooleanLiteral = ("true"|"false") DecimalDigit = ([0-9]) DecimalDigits = ({DecimalDigit}+) IntegerTypeSuffix = (([uU][lL]?)|([lL][uU]?)) DecimalIntegerLiteral = ({DecimalDigits}{IntegerTypeSuffix}?) HexDigit = ([0-9A-Fa-f]) HexDigits = ({HexDigit}+) HexadecimalIntegerLiteral = ("0"[xX]{HexDigits}{IntegerTypeSuffix}?) Sign = ([+\-]) ExponentPart = ([eE]{Sign}?{DecimalDigits}) RealTypeSuffix = ([fFdDmM]) RealHelper1 = ({DecimalDigits}"."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?) RealHelper2 = ("."{DecimalDigits}{ExponentPart}?{RealTypeSuffix}?) RealHelper3 = ({DecimalDigits}{ExponentPart}{RealTypeSuffix}?) RealHelper4 = ({DecimalDigits}{RealTypeSuffix}) RealLiteral = ({RealHelper1}|{RealHelper2}|{RealHelper3}|{RealHelper4}) ErrorNumberFormat = (({DecimalIntegerLiteral}|{HexadecimalIntegerLiteral}|{RealLiteral}){NonSeparator}+) SingleCharacter = ([^\'\\\n]) SimpleEscapeSequence = ("\\"[\'\"\\0abfnrtv]) HexadecimalEscapeSequence = ("\\x"{HexDigit}{HexDigit}?{HexDigit}?{HexDigit}?) Character = ({SingleCharacter}|{SimpleEscapeSequence}|{HexadecimalEscapeSequence}|{UnicodeEscapeSequence}) UnclosedCharacterLiteral = ("'"{Character}) CharacterLiteral = ({UnclosedCharacterLiteral}"'") ErrorUnclosedCharacterLiteral = ("'"[^\'\n]*) ErrorCharacterLiteral = ("''"|{ErrorUnclosedCharacterLiteral}[\']) QuoteEscapeSequence = ("\"\"") SingleVerbatimStringLiteralCharacter = ([^\"]) VerbatimStringLiteralStart = ("@\"") SingleRegularStringLiteralCharacter = ([^\"\\\n]) RegularStringLiteralCharacter = ({SingleRegularStringLiteralCharacter}|{SimpleEscapeSequence}|{HexadecimalEscapeSequence}|{UnicodeEscapeSequence}) RegularStringLiteralCharacters = ({RegularStringLiteralCharacter}+) RegularStringLiteral = ([\"]{RegularStringLiteralCharacters}?[\"]) UnclosedRegularStringLiteral = ([\"]([\\].|[^\\\"])*[^\"]?) ErrorRegularStringLiteral = ({UnclosedRegularStringLiteral}[\"]) /* C.1.9 - Operators and Punctuators. */ OOPHelper1 = (":") OOPHelper2 = ("+"|"-"|"*"|"/"|"%"|"&"|"|"|"^"|"!"|"~") OOPHelper3 = ("="|"<"|">"|"?"|"++"|"--"|"&&"|"||"|"<<"|">>") OOPHelper4 = ("=="|"!="|"<="|">="|"+="|"-="|"*="|"/="|"%="|"&=") OOPHelper5 = ("|="|"^="|"<<="|">>="|"->") OperatorOrPunctuator = ({OOPHelper1}|{OOPHelper2}|{OOPHelper3}|{OOPHelper4}|{OOPHelper5}) /* NOTE: We distinguish between operators and separators (punctuators), but */ /* the C# spec doesn't, so the stuff below isn't in the spec. */ Separator = ([\{\}\[\]]) Separator2 = ([,;]) /* C.1.10 - Pre-processing Directives. */ /* NOTE: We don't do ALL of the PP stuff here as it's unnecessary */ /* for us to know the difference between declarations, diagnostics, */ /* regions, etc. */ ConditionalSymbol = ({IdentifierOrKeyword}) /* Not correct - excludes "true" and "false". */ PPNewLine = ({Whitespace}?{SingleLineComment}?{NewlineCharacter}) PPPrimaryExpression = ({IdentifierOrKeyword}|({Whitespace}?{PPExpression}{Whitespace}?)) PPUnaryExpression = ({PPPrimaryExpression}|("!"{Whitespace}?{PPUnaryExpression})) PPEqualityExpression = ({PPUnaryExpression}|({Whitespace}?"=="{Whitespace}?{PPUnaryExpression})|({Whitespace}?"!="{Whitespace}?{PPUnaryExpression})) PPAndExpression = ({PPEqualityExpression}|({Whitespace}?"&&"{Whitespace}?{PPEqualityExpression})) PPOrExpression = ({PPAndExpression}|({Whitespace}?"||"{Whitespace}?{PPAndExpression})) PPExpression = ({Whitespace}?{PPOrExpression}{Whitespace}?) PPWord = ("define"|"undef"|"if"|"elif"|"else"|"endif"|"line"|"error"|"warning"|"region"|"endregion") PPDirective = ({Whitespace}?"#"{Whitespace}?{PPWord}{InputCharacter}*) /* URL matching, for comments (not in C# spec) */ URLGenDelim = ([:\/\?#\[\]@]) URLSubDelim = ([\!\$&'\*\+,;=]) URLUnreserved = ([A-Za-z_]|{DecimalDigitCharacter}|[\-\.\~]) URLCharacter = ({URLGenDelim}|{URLSubDelim}|{URLUnreserved}|[%]) URLCharacters = ({URLCharacter}*) URLEndCharacter = ([\/\$]|[A-Za-z0-9]) URL = (((https?|f(tp|ile))"://"|"www.")({URLCharacters}{URLEndCharacter})?) %state DELIMITEDCOMMENT %state DOCUMENTCOMMENT %state VERBATIMSTRING %% { /* Keywords */ "abstract" | "as" | "base" | "break" | "case" | "catch" | "checked" | "class" | "const" | "continue" | "decimal" | "default" | "delegate" | "do" | "else" | "enum" | "event" | "explicit" | "extern" | "finally" | "fixed" | "for" | "foreach" | "goto" | "if" | "implicit" | "in" | "interface" | "internal" | "is" | "lock" | "namespace" | "new" | "null" | "object" | "operator" | "out" | "override" | "params" | "private" | "protected" | "public" | "readonly" | "ref" | "return" | "sealed" | "sizeof" | "stackalloc" | "static" | "string" | "struct" | "switch" | "this" | "throw" | "try" | "typeof" | "unchecked" | "unsafe" | "using" | "virtual" | "void" | "volatile" | "while" { addToken(Token.RESERVED_WORD); } /* Data types. */ "bool" | "byte" | "char" | "double" | "float" | "int" | "long" | "object" | "sbyte" | "short" | "string" | "uint" | "ulong" | "ushort" { addToken(Token.DATA_TYPE); } {NewlineCharacter} { addNullToken(); return firstToken; } {BooleanLiteral} { addToken(Token.LITERAL_BOOLEAN); } {Identifier} { addToken(Token.IDENTIFIER); } {Whitespace} { addToken(Token.WHITESPACE); } /* String/Character Literals. */ {CharacterLiteral} { addToken(Token.LITERAL_CHAR); } {UnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ } {ErrorUnclosedCharacterLiteral} { addToken(Token.ERROR_CHAR); addNullToken(); return firstToken; } {ErrorCharacterLiteral} { addToken(Token.ERROR_CHAR); } {VerbatimStringLiteralStart} { start = zzMarkedPos-2; yybegin(VERBATIMSTRING); } {RegularStringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); } {UnclosedRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; } {ErrorRegularStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); } /* Comments. */ {DelimitedCommentStart} { start = zzMarkedPos-2; yybegin(DELIMITEDCOMMENT); } {DocumentationCommentStart} { start = zzMarkedPos-3; yybegin(DOCUMENTCOMMENT); } {SingleLineComment} { addToken(Token.COMMENT_EOL); addNullToken(); return firstToken; } /* Separators. */ {Separator} { addToken(Token.SEPARATOR); } {Separator2} { addToken(Token.IDENTIFIER); } /* Operators. */ {OperatorOrPunctuator} { addToken(Token.OPERATOR); } /* Numbers */ {DecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); } {HexadecimalIntegerLiteral} { addToken(Token.LITERAL_NUMBER_HEXADECIMAL); } {RealLiteral} { addToken(Token.LITERAL_NUMBER_FLOAT); } {ErrorNumberFormat} { addToken(Token.ERROR_NUMBER_FORMAT); } /* Preprocessor directives. */ {PPDirective} { addToken(Token.PREPROCESSOR); } /* Pretty-much anything else. */ {ErrorIdentifier} { addToken(Token.ERROR_IDENTIFIER); } /* Ended with a line not in a string or comment. */ <> { addNullToken(); return firstToken; } /* Catch any other (unhandled) characters and flag them as bad. */ . { addToken(Token.ERROR_IDENTIFIER); } } { [^hwf\n\*]+ {} {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_MULTILINE); start = zzMarkedPos; } [hwf] {} \n { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; } {DelimitedCommentEnd} { yybegin(YYINITIAL); addToken(start,zzStartRead+1, Token.COMMENT_MULTILINE); } \* {} <> { addToken(start,zzStartRead-1, Token.COMMENT_MULTILINE); return firstToken; } } { [^hwf\<\n]* {} {URL} { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addHyperlinkToken(temp,zzMarkedPos-1, Token.COMMENT_DOCUMENTATION); start = zzMarkedPos; } [hwf] {} \n { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; } "<"[^\>]*">" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzMarkedPos-1, Token.PREPROCESSOR); start = zzMarkedPos; } "<" { int temp=zzStartRead; addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addToken(temp,zzEndRead, Token.PREPROCESSOR); addNullToken(); return firstToken; } <> { addToken(start,zzStartRead-1, Token.COMMENT_DOCUMENTATION); addNullToken(); return firstToken; } } { [^\"\n]* {} {QuoteEscapeSequence} {} \" { yybegin(YYINITIAL); addToken(start,zzStartRead, Token.LITERAL_STRING_DOUBLE_QUOTE); } \n { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; } <> { addToken(start,zzStartRead-1, Token.LITERAL_STRING_DOUBLE_QUOTE); return firstToken; } }