(* Copyright (c) 1997 Andrew W. Appel. *) functor TigerLexFun(structure Tokens : Tiger_TOKENS)= struct structure UserDeclarations = struct open ErrorMsg; type svalue = Tokens.svalue type pos = int type ('a,'b) token = ('a,'b) Tokens.token type lexresult = (svalue,pos) token fun inc x = x := !x + 1 fun dec x = x := !x - 1 val stringstart = ref 0 val charlist = ref (nil: char list) val lineNum = ErrorMsg.lineNum val linePos = ErrorMsg.linePos val comLevel = ref 0 fun err(p1,p2) = ErrorMsg.error p1 val eof = fn () => let val pos = Int.max(!stringstart+2, hd(!linePos)) in if !comLevel>0 then err (!stringstart,pos) "unclosed comment" else (); Tokens.EOF(pos,pos) end fun addString (s:char) = charlist := s :: (!charlist) fun makeString () = (implode(rev(!charlist)) before charlist := nil) fun makeInt s = foldl (fn (c,a) => a*10 + ord c - ord #"0") 0 (explode s) end (* end of user routines *) exception LexError (* raised if illegal leaf action tried *) structure Internal = struct datatype yyfinstate = N of int type statedata = {fin : yyfinstate list, trans: string} (* transition & final state table *) val tab = let val sfun f x = x val s = map f (rev (tl (rev s))) exception LexHackingError fun look ((j,x)::r, i) = if i = j then x else look(r, i) | look ([], i) = raise LexHackingError fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} in Vector.fromList(map g [{fin = [], trans = 0}, {fin = [(N 2)], trans = 1}, {fin = [(N 2)], trans = 1}, {fin = [], trans = 3}, {fin = [], trans = 3}, {fin = [], trans = 5}, {fin = [], trans = 5}, {fin = [(N 174)], trans = 7}, {fin = [(N 174)], trans = 7}, {fin = [(N 147),(N 149)], trans = 0}, {fin = [(N 149)], trans = 0}, {fin = [(N 10),(N 149)], trans = 0}, {fin = [(N 36),(N 149)], trans = 0}, {fin = [(N 8),(N 149)], trans = 0}, {fin = [(N 134),(N 149)], trans = 14}, {fin = [(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 16}, {fin = [(N 134)], trans = 17}, {fin = [(N 134)], trans = 18}, {fin = [(N 134)], trans = 19}, {fin = [(N 64),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 21}, {fin = [(N 134)], trans = 22}, {fin = [(N 101),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 24}, {fin = [(N 134)], trans = 25}, {fin = [(N 134)], trans = 26}, {fin = [(N 106),(N 134)], trans = 14}, {fin = [(N 84),(N 134)], trans = 14}, {fin = [(N 134)], trans = 29}, {fin = [(N 134)], trans = 30}, {fin = [(N 120),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 32}, {fin = [(N 131),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 34}, {fin = [(N 134)], trans = 35}, {fin = [(N 81),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 37}, {fin = [(N 134)], trans = 38}, {fin = [(N 74),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 40}, {fin = [(N 77),(N 134)], trans = 14}, {fin = [(N 115),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 43}, {fin = [(N 134)], trans = 44}, {fin = [(N 134)], trans = 45}, {fin = [(N 134)], trans = 46}, {fin = [(N 134)], trans = 47}, {fin = [(N 134)], trans = 48}, {fin = [(N 134)], trans = 49}, {fin = [(N 97),(N 134)], trans = 14}, {fin = [(N 134)], trans = 51}, {fin = [(N 58),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 53}, {fin = [(N 134)], trans = 54}, {fin = [(N 88),(N 134)], trans = 14}, {fin = [(N 134)], trans = 56}, {fin = [(N 134)], trans = 57}, {fin = [(N 125),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 59}, {fin = [(N 128),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 61}, {fin = [(N 134)], trans = 62}, {fin = [(N 134)], trans = 63}, {fin = [(N 134)], trans = 64}, {fin = [(N 70),(N 134)], trans = 14}, {fin = [(N 134),(N 149)], trans = 66}, {fin = [(N 134)], trans = 67}, {fin = [(N 134)], trans = 68}, {fin = [(N 134)], trans = 69}, {fin = [(N 112),(N 134)], trans = 14}, {fin = [(N 14),(N 149)], trans = 0}, {fin = [(N 12),(N 149)], trans = 0}, {fin = [(N 46),(N 149)], trans = 73}, {fin = [(N 51)], trans = 0}, {fin = [(N 41),(N 149)], trans = 0}, {fin = [(N 48),(N 149)], trans = 76}, {fin = [(N 44)], trans = 0}, {fin = [(N 54)], trans = 0}, {fin = [(N 18),(N 149)], trans = 0}, {fin = [(N 16),(N 149)], trans = 80}, {fin = [(N 39)], trans = 0}, {fin = [(N 137),(N 149)], trans = 82}, {fin = [(N 137)], trans = 82}, {fin = [(N 32),(N 149)], trans = 0}, {fin = [(N 24),(N 149)], trans = 0}, {fin = [(N 28),(N 149)], trans = 0}, {fin = [(N 6),(N 149)], trans = 0}, {fin = [(N 26),(N 149)], trans = 0}, {fin = [(N 30),(N 149)], trans = 89}, {fin = [(N 145)], trans = 0}, {fin = [(N 22),(N 149)], trans = 0}, {fin = [(N 20),(N 149)], trans = 92}, {fin = [(N 142)], trans = 0}, {fin = [(N 34),(N 149)], trans = 0}, {fin = [(N 139),(N 149)], trans = 0}, {fin = [(N 2),(N 149)], trans = 96}, {fin = [(N 2)], trans = 96}, {fin = [(N 4)], trans = 0}, {fin = [(N 159)], trans = 0}, {fin = [(N 159)], trans = 100}, {fin = [(N 157)], trans = 0}, {fin = [(N 159)], trans = 102}, {fin = [(N 152)], trans = 0}, {fin = [(N 154)], trans = 0}, {fin = [(N 203)], trans = 0}, {fin = [(N 201),(N 203)], trans = 106}, {fin = [(N 181)], trans = 0}, {fin = [(N 184)], trans = 0}, {fin = [], trans = 109}, {fin = [(N 194)], trans = 0}, {fin = [(N 187)], trans = 0}, {fin = [], trans = 112}, {fin = [], trans = 113}, {fin = [(N 199)], trans = 0}, {fin = [(N 190)], trans = 0}, {fin = [(N 169)], trans = 0}, {fin = [(N 166)], trans = 0}, {fin = [(N 161),(N 203)], trans = 0}, {fin = [(N 163)], trans = 0}, {fin = [(N 178)], trans = 0}, {fin = [(N 176),(N 178)], trans = 0}, {fin = [(N 174),(N 178)], trans = 122}, {fin = [(N 174)], trans = 122}, {fin = [(N 171)], trans = 0}]) end structure StartStates = struct datatype yystartstate = STARTSTATE of int (* start state definitions *) val A = STARTSTATE 3; val F = STARTSTATE 7; val INITIAL = STARTSTATE 1; val S = STARTSTATE 5; end type result = UserDeclarations.lexresult exception LexerError (* raised if illegal leaf action tried *) end fun makeLexer yyinput = let val yyb = ref "\n" (* buffer *) val yybl = ref 1 (*buffer length *) val yybufpos = ref 1 (* location of next character to use *) val yygone = ref 1 (* position in file of beginning of buffer *) val yydone = ref false (* eof found yet? *) val yybegin = ref 1 (*Current 'start state' for lexer *) val YYBEGIN = fn (Internal.StartStates.STARTSTATE x) => yybegin := x fun lex () : Internal.result = let fun continue() = lex() in let fun scan (s,AcceptingLeaves : Internal.yyfinstate list list,l,i0) = let fun action (i,nil) = raise LexError | action (i,nil::l) = action (i-1,l) | action (i,(node::acts)::l) = case node of Internal.N yyk => (let val yytext = substring(!yyb,i0,i-i0) val yypos = i0+ !yygone open UserDeclarations Internal.StartStates in (yybufpos := i; case yyk of (* Application actions *) 10 => (Tokens.RBRACE(yypos,yypos+1)) | 101 => (Tokens.VAR(yypos,yypos+3)) | 106 => (Tokens.TYPE(yypos,yypos+4)) | 112 => (Tokens.ARRAY(yypos,yypos+5)) | 115 => (Tokens.IF(yypos,yypos+2)) | 12 => (Tokens.LBRACK(yypos,yypos+1)) | 120 => (Tokens.THEN(yypos,yypos+4)) | 125 => (Tokens.ELSE(yypos,yypos+4)) | 128 => (Tokens.DO(yypos,yypos+2)) | 131 => (Tokens.OF(yypos,yypos+2)) | 134 => (Tokens.ID(yytext,yypos,yypos+size yytext)) | 137 => (Tokens.INT(makeInt yytext handle Overflow => (err (yypos,yypos+size yytext) "integer too large"; 1), yypos,yypos+size yytext)) | 139 => (charlist := nil; stringstart := yypos; YYBEGIN S; continue()) | 14 => (Tokens.RBRACK(yypos,yypos+1)) | 142 => (YYBEGIN A; stringstart := yypos; comLevel := 1; continue()) | 145 => (err (yypos,yypos+1) "unmatched close comment"; continue()) | 147 => (err (yypos,yypos) "non-Ascii character"; continue()) | 149 => (err (yypos,yypos) "illegal token"; continue()) | 152 => (inc comLevel; continue()) | 154 => (inc lineNum; linePos := yypos :: !linePos; continue()) | 157 => (dec comLevel; if !comLevel=0 then YYBEGIN INITIAL else (); continue()) | 159 => (continue()) | 16 => (Tokens.COLON(yypos,yypos+1)) | 161 => (YYBEGIN INITIAL; Tokens.STRING(makeString(), !stringstart,yypos+1)) | 163 => (err (!stringstart,yypos) "unclosed string"; inc lineNum; linePos := yypos :: !linePos; YYBEGIN INITIAL; Tokens.STRING(makeString(),!stringstart,yypos)) | 166 => (inc lineNum; linePos := yypos :: !linePos; YYBEGIN F; continue()) | 169 => (YYBEGIN F; continue()) | 171 => (inc lineNum; linePos := yypos :: !linePos; continue()) | 174 => (continue()) | 176 => (YYBEGIN S; stringstart := yypos; continue()) | 178 => (err (!stringstart,yypos) "unclosed string"; YYBEGIN INITIAL; Tokens.STRING(makeString(),!stringstart,yypos+1)) | 18 => (Tokens.SEMICOLON(yypos,yypos+1)) | 181 => (addString #"\t"; continue()) | 184 => (addString #"\n"; continue()) | 187 => (addString #"\\"; continue()) | 190 => (addString #"\""; continue()) | 194 => (addString(chr(ord(String.sub(yytext,2))-ord(#"@"))); continue()) | 199 => (let val x = ord(String.sub(yytext,1))*100 +ord(String.sub(yytext,2))*10 +ord(String.sub(yytext,3)) -(ord #"0" * 111) in (if x>255 then err (yypos,yypos+4) "illegal ascii escape" else addString(chr x); continue()) end) | 2 => (continue()) | 20 => (Tokens.LPAREN(yypos,yypos+1)) | 201 => (err (yypos,yypos+1) "illegal string escape"; continue()) | 203 => (addString(String.sub(yytext,0)); continue()) | 22 => (Tokens.RPAREN(yypos,yypos+1)) | 24 => (Tokens.DOT(yypos,yypos+1)) | 26 => (Tokens.PLUS(yypos,yypos+1)) | 28 => (Tokens.MINUS(yypos,yypos+1)) | 30 => (Tokens.TIMES(yypos,yypos+1)) | 32 => (Tokens.DIVIDE(yypos,yypos+1)) | 34 => (Tokens.AND(yypos,yypos+1)) | 36 => (Tokens.OR(yypos,yypos+1)) | 39 => (Tokens.ASSIGN(yypos,yypos+2)) | 4 => (inc lineNum; linePos := yypos :: !linePos; continue()) | 41 => (Tokens.EQ(yypos,yypos+1)) | 44 => (Tokens.NEQ(yypos,yypos+2)) | 46 => (Tokens.GT(yypos,yypos+1)) | 48 => (Tokens.LT(yypos,yypos+1)) | 51 => (Tokens.GE(yypos,yypos+2)) | 54 => (Tokens.LE(yypos,yypos+2)) | 58 => (Tokens.FOR(yypos,yypos+3)) | 6 => (Tokens.COMMA(yypos,yypos+1)) | 64 => (Tokens.WHILE(yypos,yypos+5)) | 70 => (Tokens.WHILE(yypos,yypos+5)) | 74 => (Tokens.LET(yypos,yypos+3)) | 77 => (Tokens.IN(yypos,yypos+2)) | 8 => (Tokens.LBRACE(yypos,yypos+1)) | 81 => (Tokens.NIL(yypos,yypos+3)) | 84 => (Tokens.TO(yypos,yypos+2)) | 88 => (Tokens.END(yypos,yypos+3)) | 97 => (Tokens.FUNCTION(yypos,yypos+8)) | _ => raise Internal.LexerError ) end ) val {fin,trans} = Vector.sub(Internal.tab, s) val NewAcceptingLeaves = fin::AcceptingLeaves in if l = !yybl then if trans = #trans(Vector.sub(Internal.tab,0)) then action(l,NewAcceptingLeaves ) else let val newchars= if !yydone then "" else yyinput 1024 in if (size newchars)=0 then (yydone := true; if (l=i0) then UserDeclarations.eof () else action(l,NewAcceptingLeaves)) else (if i0=l then yyb := newchars else yyb := substring(!yyb,i0,l-i0)^newchars; yygone := !yygone+i0; yybl := size (!yyb); scan (s,AcceptingLeaves,l-i0,0)) end else let val NewChar = Char.ord(String.sub(!yyb,l)) val NewState = if NewChar<128 then Char.ord(String.sub(trans,NewChar)) else Char.ord(String.sub(trans,128)) in if NewState=0 then action(l,NewAcceptingLeaves) else scan(NewState,NewAcceptingLeaves,l+1,i0) end end (* val start= if substring(!yyb,!yybufpos-1,1)="\n" then !yybegin+1 else !yybegin *) in scan(!yybegin (* start *),nil,!yybufpos,!yybufpos) end end in lex end end