* 12/06/2004
* AssemblerX86TokenMaker.java - An object that can take a chunk of text and
* return a linked list of tokens representing X86 assembler.
* This library is distributed under a modified BSD license. See the included
* RSyntaxTextArea.License.txt file for details.
package org.fife.ui.rsyntaxtextarea.modes;
import java.io.*;
import javax.swing.text.Segment;
import org.fife.ui.rsyntaxtextarea.*;
* This class takes plain text and returns tokens representing x86
* assembler.
* This implementation was created using
* JFlex 1.4.1; however, the generated file
* was modified for performance. Memory allocation needs to be almost
* completely removed to be competitive with the handwritten lexers (subclasses
* of AbstractTokenMaker
, so this class has been modified so that
* Strings are never allocated (via yytext()), and the scanner never has to
* worry about refilling its buffer (needlessly copying chars around).
* We can achieve this because RText always scans exactly 1 line of tokens at a
* time, and hands the scanner this line as an array of characters (a Segment
* really). Since tokens contain pointers to char arrays instead of Strings
* holding their contents, there is no need for allocating new memory for
* Strings.
* The actual algorithm generated for scanning has, of course, not been
* modified.
* If you wish to regenerate this file yourself, keep in mind the following:
* - The generated AssemblerX86TokenMaker.java file will contain two
* definitions of both
and yyreset
* You should hand-delete the second of each definition (the ones
* generated by the lexer), as these generated methods modify the input
* buffer, which we'll never have to do.
* - You should also change the declaration/definition of zzBuffer to NOT
* be initialized. This is a needless memory allocation for us since we
* will be pointing the array somewhere else anyway.
* - You should NOT call
on the generated scanner
* directly; rather, you should use getTokenList
as you would
* with any other TokenMaker
* @author Robert Futrell
* @version 0.2
%class AssemblerX86TokenMaker
%extends AbstractJFlexTokenMaker
%implements TokenMaker
%type org.fife.ui.rsyntaxtextarea.Token
* Constructor. We must have this here as JFLex does not generate a
* no parameter constructor.
public AssemblerX86TokenMaker() {
* Adds the token specified to the current linked list of tokens.
* @param tokenType The token's type.
private void addToken(int tokenType) {
addToken(zzStartRead, zzMarkedPos-1, tokenType);
* Adds the token specified to the current linked list of tokens.
* @param tokenType The token's type.
private void addToken(int start, int end, int tokenType) {
int so = start + offsetShift;
addToken(zzBuffer, start,end, tokenType, so);
* Adds the token specified to the current linked list of tokens.
* @param array The character array.
* @param start The starting offset in the array.
* @param end The ending offset in the array.
* @param tokenType The token's type.
* @param startOffset The offset in the document at which this token
* occurs.
public void addToken(char[] array, int start, int end, int tokenType, int startOffset) {
super.addToken(array, start,end, tokenType, startOffset);
zzStartRead = zzMarkedPos;
* Returns the text to place at the beginning and end of a
* line to "comment" it in a this programming language.
* @return The start and end strings to add to a line to "comment"
* it out.
public String[] getLineCommentStartAndEnd() {
return new String[] { ";", null };
* Returns the first token in the linked list of tokens generated
* from text
. This method must be implemented by
* subclasses so they can correctly implement syntax highlighting.
* @param text The text from which to get tokens.
* @param initialTokenType The token type we should start with.
* @param startOffset The offset into the document at which
* text
* @return The first Token
in a linked list representing
* the syntax highlighted text.
public Token getTokenList(Segment text, int initialTokenType, int startOffset) {
this.offsetShift = -text.offset + startOffset;
// Start off in the proper state.
int state = Token.NULL;
switch (initialTokenType) {
state = Token.NULL;
s = text;
try {
return yylex();
} catch (IOException ioe) {
return new DefaultToken();
* Refills the input buffer.
* @return true
if EOF was reached, otherwise
* false
* @exception IOException if any I/O-Error occurs.
private boolean zzRefill() throws java.io.IOException {
return zzCurrentPos>=s.offset+s.count;
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
* All internal variables are reset, the old input stream
* cannot be reused (internal buffer is discarded and lost).
* Lexical state is set to YY_INITIAL.
* @param reader the new input stream
public final void yyreset(java.io.Reader reader) throws java.io.IOException {
// 's' has been updated.
zzBuffer = s.array;
* We replaced the line below with the two below it because zzRefill
* no longer "refills" the buffer (since the way we do it, it's always
* "full" the first time through, since it points to the segment's
* array). So, we assign zzEndRead here.
//zzStartRead = zzEndRead = s.offset;
zzStartRead = s.offset;
zzEndRead = zzStartRead + s.count - 1;
zzCurrentPos = zzMarkedPos = zzPushbackPos = s.offset;
zzLexicalState = YYINITIAL;
zzReader = reader;
zzAtBOL = true;
zzAtEOF = false;
Letter = ([A-Za-z_])
Digit = ([0-9])
Number = ({Digit}+)
Identifier = (({Letter}|{Digit})[^ \t\f\n\,\.\+\-\*\/\%\[\]]+)
UnclosedStringLiteral = ([\"][^\"]*)
StringLiteral = ({UnclosedStringLiteral}[\"])
UnclosedCharLiteral = ([\'][^\']*)
CharLiteral = ({UnclosedCharLiteral}[\'])
CommentBegin = ([;])
LineTerminator = (\n)
WhiteSpace = ([ \t\f])
Label = (({Letter}|{Digit})+[\:])
Operator = ("+"|"-"|"*"|"/"|"%"|"^"|"|"|"&"|"~"|"!"|"="|"<"|">")
/* Keywords */
".186" |
".286" |
".286P" |
".287" |
".386" |
".386P" |
".387" |
".486" |
".486P" |
".586" |
".586P" |
".686" |
".686P" |
".8086" |
".8087" |
".ALPHA" |
".BREAK" |
".BSS" |
".CODE" |
".CONST" |
".CREF" |
".DATA" |
".DATA?" |
".ELSE" |
".ENDIF" |
".ENDW" |
".ERR" |
".ERR1" |
".ERR2" |
".ERRB" |
".ERRE" |
".ERRNB" |
".ERRNZ" |
".EXIT" |
".IF" |
".K3D" |
".LALL" |
".LIST" |
".MMX" |
".MODEL" |
".NO87" |
".RADIX" |
".SALL" |
".SEQ" |
".STACK" |
".TEXT" |
".UNTIL" |
".WHILE" |
".XALL" |
".XCREF" |
".XLIST" |
".XMM" |
"__FILE__" |
"__LINE__" |
"A16" |
"A32" |
"ADDR" |
"BITS" |
"CARRY?" |
"COMM" |
"ECHO" |
"ELSE" |
"END" |
"ENDM" |
"ENDP" |
"ENDS" |
"EVEN" |
"FAR" |
"FOR" |
"FORC" |
"GOTO" |
"HIGH" |
"IEND" |
"IF" |
"IF1" |
"IF2" |
"IFB" |
"IFE" |
"IFNB" |
"IRP" |
"IRPC" |
"LOW" |
"NAME" |
"NEAR" |
"O16" |
"O32" |
"ORG" |
"PAGE" |
"PROC" |
"PTR" |
"REPT" |
"SEG" |
"SIGN?" |
"SIZE" |
"THIS" |
"TYPE" |
"USE16" |
"USE32" |
"USES" |
"WRT" |
"ZERO?" { addToken(Token.PREPROCESSOR); }
"DB" |
"DW" |
"DD" |
"DF" |
"DQ" |
"DT" |
"RESB" |
"RESW" |
"RESD" |
"RESQ" |
"REST" |
"EQU" |
"DUP" { addToken(Token.FUNCTION); }
"BYTE" |
"WORD" |
"REAL4" |
"REAL8" |
"REAL10" { addToken(Token.DATA_TYPE); }
/* Registers */
"AL" |
"BL" |
"CL" |
"DL" |
"AH" |
"BH" |
"CH" |
"DH" |
"AX" |
"BX" |
"CX" |
"DX" |
"SI" |
"DI" |
"SP" |
"BP" |
"EAX" |
"EBX" |
"ECX" |
"EDX" |
"ESI" |
"EDI" |
"ESP" |
"EBP" |
"CS" |
"DS" |
"SS" |
"ES" |
"FS" |
"GS" |
"ST" |
"ST0" |
"ST1" |
"ST2" |
"ST3" |
"ST4" |
"ST5" |
"ST6" |
"ST7" |
"MM0" |
"MM1" |
"MM2" |
"MM3" |
"MM4" |
"MM5" |
"MM6" |
"MM7" |
"XMM0" |
"XMM1" |
"XMM2" |
"XMM3" |
"XMM4" |
"XMM5" |
"XMM6" |
"XMM7" |
"CR0" |
"CR2" |
"CR3" |
"CR4" |
"DR0" |
"DR1" |
"DR2" |
"DR3" |
"DR4" |
"DR5" |
"DR6" |
"DR7" |
"TR3" |
"TR4" |
"TR5" |
"TR6" |
"TR7" { addToken(Token.VARIABLE); }
/* Pentium III Instructions. */
"AAA" |
"AAD" |
"AAM" |
"AAS" |
"ADC" |
"ADD" |
"AND" |
"ARPL" |
"BSF" |
"BSR" |
"BT" |
"BTC" |
"BTR" |
"BTS" |
"CALL" |
"CBW" |
"CDQ" |
"CLC" |
"CLD" |
"CLI" |
"CLTS" |
"CMC" |
"CMP" |
"CMPS" |
"CWD" |
"CWDE" |
"DAA" |
"DAS" |
"DEC" |
"DIV" |
"EMMS" |
"F2XM1" |
"FABS" |
"FADD" |
"FBLD" |
"FCHS" |
"FCOM" |
"FCOS" |
"FDIV" |
"FILD" |
"FIST" |
"FLD1" |
"FLDL2E" |
"FLDL2T" |
"FLDLG2" |
"FLDLN2" |
"FLDZ" |
"FMUL" |
"FNOP" |
"FSIN" |
"FST" |
"FSTP" |
"FSUB" |
"FTST" |
"FXAM" |
"FXCH" |
"FYL2X" |
"FYL2XP1" |
"HLT" |
"IDIV" |
"IMUL" |
"IN" |
"INC" |
"INS" |
"INSB" |
"INSD" |
"INSW" |
"INT" |
"INTO" |
"INVD" |
"IRET" |
"JA" |
"JAE" |
"JB" |
"JBE" |
"JC" |
"JCXZ" |
"JE" |
"JG" |
"JGE" |
"JL" |
"JLE" |
"JMP" |
"JNA" |
"JNAE" |
"JNB" |
"JNBE" |
"JNC" |
"JNE" |
"JNG" |
"JNGE" |
"JNL" |
"JNLE" |
"JNO" |
"JNP" |
"JNS" |
"JNZ" |
"JO" |
"JP" |
"JPE" |
"JPO" |
"JS" |
"JZ" |
"LAHF" |
"LAR" |
"LDS" |
"LEA" |
"LES" |
"LFS" |
"LGDT" |
"LGS" |
"LIDT" |
"LLDT" |
"LMSW" |
"LOCK" |
"LODS" |
"LOOP" |
"LSL" |
"LSS" |
"LTR" |
"MOV" |
"MOVD" |
"MOVQ" |
"MOVS" |
"MUL" |
"NEG" |
"NOP" |
"NOT" |
"OR" |
"ORPS" |
"OUT" |
"OUTS" |
"PAND" |
"POP" |
"POPA" |
"POPF" |
"POR" |
"PUSH" |
"PXOR" |
"RCL" |
"RCR" |
"REP" |
"REPE" |
"REPZ" |
"RET" |
"RETF" |
"RETN" |
"ROL" |
"ROR" |
"RSM" |
"SAHF" |
"SAL" |
"SAR" |
"SBB" |
"SCAS" |
"SETA" |
"SETB" |
"SETC" |
"SETE" |
"SETG" |
"SETL" |
"SETO" |
"SETP" |
"SETS" |
"SETZ" |
"SGDT" |
"SHL" |
"SHLD" |
"SHR" |
"SHRD" |
"SIDT" |
"SLDT" |
"SMSW" |
"STC" |
"STD" |
"STI" |
"STOS" |
"STR" |
"SUB" |
"TEST" |
"UB2" |
"WAIT" |
"VERR" |
"VERW" |
"XADD" |
"XCHG" |
"XLAT" |
"XOR" |
"XORPS" { addToken(Token.RESERVED_WORD); }
{LineTerminator} { addNullToken(); return firstToken; }
{WhiteSpace}+ { addToken(Token.WHITESPACE); }
/* String/Character Literals. */
{CharLiteral} { addToken(Token.LITERAL_CHAR); }
{UnclosedCharLiteral} { addToken(Token.ERROR_CHAR); /*addNullToken(); return firstToken;*/ }
{StringLiteral} { addToken(Token.LITERAL_STRING_DOUBLE_QUOTE); }
{UnclosedStringLiteral} { addToken(Token.ERROR_STRING_DOUBLE); addNullToken(); return firstToken; }
/* Labels. */
{Label} { addToken(Token.PREPROCESSOR); }
^%({Letter}|{Digit})* { addToken(Token.FUNCTION); }
/* Comment Literals. */
{CommentBegin}.* { addToken(Token.COMMENT_EOL); addNullToken(); return firstToken; }
/* Operators. */
{Operator} { addToken(Token.OPERATOR); }
/* Numbers */
{Number} { addToken(Token.LITERAL_NUMBER_DECIMAL_INT); }
/* Ended with a line not in a string or comment. */
<> { addNullToken(); return firstToken; }
/* Catch any other (unhandled) characters. */
{Identifier} { addToken(Token.IDENTIFIER); }
. { addToken(Token.IDENTIFIER); }