<> <> <> <> DIRECTORY Ascii: TYPE USING [BS, CR, FF, LF, TAB], Atom: TYPE USING [MakeAtom], IO: TYPE USING [ card, EndOf, EndOfStream, GetChar, GetIndex, GetLength, Put, PutChar, PutF, rope, SetIndex, STREAM], SMP1: TYPE P1 USING [Token, TValue, nullTValue], SMParseTable: TYPE ParseTable USING [ HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, TSymbol, VocabularyRef, endMarker, tokenFILENAME, tokenID, tokenSTR], Rope: TYPE USING [Flatten, FromProc, NewText, ROPE, Text], SMCommentTable: TYPE USING [Index], SMCommentTableOps: TYPE USING [Add, AddBreakHint, Reset], SMOps: TYPE USING [MS], SMTree: TYPE Tree USING [Name]; SMScannerImpl: CEDAR PROGRAM IMPORTS Atom, IO, Rope, SMCommentTableOps EXPORTS SMP1 SHARES Rope ~ { OPEN Tree~~SMTree, SMParseTable, P1~~SMP1; <> tablePtr: TableRef; hashTab: HashTableRef; scanTab: ScanTableRef; vocab: VocabularyRef; vocabIndex: IndexTableRef; InstallScanTable: PUBLIC PROC[base: TableRef] ~ TRUSTED { tablePtr _ base; hashTab _ @tablePtr[tablePtr.scanTable.hashTab]; scanTab _ @tablePtr[tablePtr.scanTable.scanTab]; vocab _ LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]]; vocabIndex _ @tablePtr[tablePtr.scanTable.vocabIndex]}; CharClass: PROC[c: CHAR] RETURNS[TSymbol] ~ TRUSTED INLINE { RETURN[scanTab[c]]}; <> cm: SMOps.MS; out: IO.STREAM; zone: ZONE _ NIL; Index: TYPE ~ SMCommentTable.Index; NUL: CHAR ~ '\000; stream: IO.STREAM _ NIL; -- the input stream char: CHAR; -- current (most recently scanned) character tPos: Index _ 0; -- index of char in stream AtEof: PROC RETURNS[BOOL] ~ { RETURN[char = NUL AND stream.EndOf]}; toklen: NAT _ 0; -- current token length tokpos: Index _ 0; -- source index for start of token TokenToText: PROC RETURNS[t: Rope.Text] ~ { -- copies from token in buffer savePos: Index ~ tPos; Get: PROC RETURNS[c: CHAR] ~ { RETURN[stream.GetChar]}; stream.SetIndex[tokpos]; t _ Rope.FromProc[toklen, Get].Flatten[]; stream.SetIndex[savePos+1]; RETURN}; nTokens: CARDINAL; -- token count nErrors: CARDINAL; -- lexical errors lastToken: Index _ 0; FirstChar: PROC[] ~ { tokpos _ tPos; toklen _ 1}; AddChar: PROC[] ~ { IF toklen = 0 THEN tokpos _ tPos; toklen _ toklen + 1}; AddCharPlus: PROC[] ~ { IF toklen = 0 THEN tokpos _ tPos; toklen _ toklen + 1; NextChar[]}; NextChar: PROC ~ { tPos _ tPos + 1; char _ stream.GetChar[ ! IO.EndOfStream => {char _ NUL; CONTINUE}]}; IdFromRope: PROC[r: Rope.ROPE] RETURNS[Tree.Name] ~ INLINE { RETURN[Atom.MakeAtom[r]]}; IdFromBuffer: PROC[] RETURNS[Tree.Name] ~ { RETURN[IdFromRope[TokenToText[]]]}; Map: PROC[scan: PROC[CHAR] RETURNS[BOOL]] RETURNS[stopped: BOOL _ FALSE] ~ { UNTIL stopped OR stream.EndOf DO c: CHAR ~ stream.GetChar; stopped _ scan[c]; ENDLOOP; RETURN}; NextToken: PUBLIC PROC RETURNS[token: P1.Token] ~ { DO CRcount: NAT _ 0; IF char IN ['\000..' ] THEN { NULcount: NAT _ 0; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { IF c > ' OR (c = NUL AND NULcount # 0) THEN {char _ c; RETURN[TRUE]}; tPos _ tPos + 1; NULcount _ 0; SELECT c FROM Ascii.CR => { CRcount _ CRcount + 1; IF cm.comments # NIL THEN (cm.comments).AddBreakHint[tPos]}; Ascii.FF => { IF cm.comments # NIL THEN (cm.comments).Add[tPos, "\f", lastToken, CRcount]; CRcount _ 0}; NUL => NULcount _ NULcount _ 1; ENDCASE; RETURN[FALSE]}; [] _ Scan[char]; IF ~Map[Scan] OR char = NUL THEN GO TO EndFile}; toklen _ 0; token.index _ tPos; token.value _ P1.nullTValue; IF CRcount > 1 AND cm.comments # NIL THEN { -- remember extra blank lines (cm.comments).Add[tPos-1, NIL, lastToken, CRcount-1]; CRcount _ 1}; SELECT char FROM 'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm, 'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z, '& => { Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { SELECT c FROM IN ['a..'z], IN ['A..'Z], IN ['0..'9], '& => NULL; ENDCASE => {char _ c; RETURN[TRUE]}; toklen _ toklen + 1; RETURN[FALSE]}; FirstChar[]; [] _ Map[Scan]; tPos _ tPos + toklen; token.class _ tokenID; token.value _ IdFromBuffer[]; GO TO GotNext}; 'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M, 'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => { first, last: NAT _ char.ORD; uId: BOOL _ TRUE; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { SELECT c FROM IN ['A..'Z] => last _ c.ORD; IN ['a..'z], IN ['0..'9] => uId _ FALSE; ENDCASE => {char _ c; RETURN[TRUE]}; toklen _ toklen + 1; RETURN[FALSE]}; FirstChar[]; [] _ Map[Scan]; tPos _ tPos + toklen; IF uId THEN TRUSTED { j: CARDINAL; h: HashIndex _ ((first*128-first) + last) MOD HashIndex.LAST + 1; WHILE (j _ hashTab[h].symbol) # 0 DO s2: CARDINAL _vocabIndex[j-1]; IF vocabIndex[j] - s2 = toklen THEN { t: Rope.Text ~ TokenToText[]; FOR s1: CARDINAL IN [0 .. toklen) DO IF t[s1] # vocab.text[s2] THEN EXIT; s2 _ s2+1; REPEAT FINISHED => {token.class _ j; GO TO GotNext}; ENDLOOP; }; IF (h _ hashTab[h].link) = 0 THEN EXIT; ENDLOOP}; token.class _ tokenID; token.value _ IdFromBuffer[]; GO TO GotNext}; ',, ';, ':, '., '~, '+, '^, '*, '/, '\\, '(, '), '[, '], '=, '> => { token.class _ CharClass[char]; GO TO GetNext}; '" => { DO NextChar[]; SELECT char FROM '" => { NextChar[]; IF char # '" THEN GO TO QuoteEnd; AddChar[]}; '\\ => AddCharPlus[]; NUL => IF AtEof[] THEN GO TO QuoteEnd; ENDCASE; AddChar[]; IF toklen = NAT.LAST THEN { ScanError[$string, token.index]; toklen _ 0}; REPEAT QuoteEnd => NULL; ENDLOOP; token.value _ EnterText[]; token.class _ tokenSTR; GO TO GotNext}; '@ => { State: TYPE ~ [0..7]; <> <<0: @>> <<1: @ / ?(alpha | '# )>> <<2: ( @ / ?(alpha | '# ) / | @ alpha ?alpha / ) ? (alpha / )>> <<3: ( @ alpha | {2} alpha ) ?alpha>> <<4: {3} !>> <<5: {4} digit ?digit>> <<6: {5} ( H | L )>> <<7: ( {3} | {5} | {6} ) any>> state: State _ 0; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { SELECT c FROM IN ['a..'z], IN ['A..'Z] => SELECT state FROM 0, 2, 3 => state _ 3; 1 => state _ 1; 4 => SELECT c FROM 'H, 'h, 'L, 'l => state _ 6 ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; 5, 6 => state _ State.LAST; ENDCASE => ERROR; IN ['0..'9] => SELECT state FROM 0, 2, 3 => state _ 3; 1 => state _ 1; 4, 5 => state _ 5; 6 => state _ State.LAST; ENDCASE => ERROR; '., '$ => -- '+ and '- are SML operators SELECT state FROM 0, 2, 3 => state _ 3; 1 => state _ 1; 5, 6 => state _ State.LAST; ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; '# => SELECT state FROM 1 => state _ 1; 3, 5, 6 => state _ state _ State.LAST; ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; '/ => SELECT state FROM 0 => state _ 1; 1, 2, 3 => state _ 2; 5, 6 => state _ State.LAST; ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; '! => SELECT state FROM 3 => state _ 4; 5, 6 => state _ State.LAST; ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; ENDCASE => SELECT state FROM 3, 5, 6 => state _ State.LAST; ENDCASE => {ScanError[$file, tPos+toklen]; state _ State.LAST}; IF state = State.LAST THEN char _ c ELSE toklen _ toklen + 1; RETURN[state = State.LAST]}; FirstChar[]; [] _ Map[Scan]; tPos _ tPos + toklen; token.class _ tokenFILENAME; token.value _ TokenToText[]; GO TO GotNext}; '- => { -- comment processing pChar: CHAR _ NUL; Scan: PROC[c: CHAR] RETURNS[BOOL] ~ { toklen _ toklen + 1; IF c = Ascii.CR OR (c = '- AND pChar = c) THEN { char _ c; RETURN[TRUE]}; pChar _ c; RETURN[FALSE]}; token.class _ CharClass['-]; tokpos _ tPos; NextChar[]; IF char # '- THEN GO TO GotNext; toklen _ 2; [] _ Map[Scan]; tPos _ tokpos + toklen - 1; IF cm.comments # NIL THEN { comment: Rope.Text ~ TokenToText[]; (cm.comments).Add[tokpos, comment, lastToken, CRcount]}; lastToken _ tokpos; IF char = '- THEN NextChar[]}; ENDCASE => { token.class _ CharClass[char]; IF token.class # 0 THEN GO TO GetNext; NextChar[]; ScanError[$char, token.index]}; REPEAT GetNext => NextChar[]; GotNext => NULL; EndFile => { char _ NUL; token _ [class~endMarker, index~tPos, value~P1.nullTValue]}; ENDLOOP; nTokens _ nTokens + 1; lastToken _ token.index; RETURN}; <> EnterText: PROC [] RETURNS[P1.TValue] ~ { RETURN[ParseLit[TokenToText[]]]}; <<>> ParseLit: PROC [t: Rope.Text] RETURNS[Rope.Text] ~ { src: INTEGER _ 0; dst: INTEGER _ 0; nt: Rope.Text _ NIL; len: INTEGER _ t.length; WHILE src < len DO c: CHAR _ t[src]; src _ src + 1; IF c = '\\ OR c = '" THEN { cc: CHAR _ c; nt _ Rope.NewText[len]; dst _ src - 1; FOR i: INTEGER IN [0..dst) DO nt[i] _ t[i] ENDLOOP; IF src < len THEN {cc _ t[src]; src _ src + 1}; SELECT cc FROM 'n, 'N, 'r, 'R => c _ Ascii.CR; 't, 'T => c _ Ascii.TAB; 'b, 'B => c _ Ascii.BS; 'f, 'F => c _ Ascii.FF; 'l, 'L => c _ Ascii.LF; IN ['0..'7] => { code: NAT _ cc.ORD-'0.ORD; cc _ '\000; IF src < len THEN { cc _ t[src]; IF cc IN ['0..'7] THEN { src _ src + 1; code _ code*8 + (cc.ORD-'0.ORD); cc _ '\000; IF src < len THEN { cc _ t[src]; IF cc IN ['0..'7] THEN { src _ src + 1; code _ code*8 + (cc.ORD-'0.ORD)} } } }; c _ VAL[code]}; ENDCASE => c _ cc }; IF nt # NIL THEN {nt[dst] _ c; dst _ dst + 1}; ENDLOOP; IF nt = NIL THEN RETURN[t] ELSE {nt.length _ dst; RETURN[nt]}}; <<>> <> ScanInit: PUBLIC PROC [model: SMOps.MS, source: IO.STREAM] ~ { cm _ model; out _ model.out; zone _ model.z; stream _ source; tPos _ stream.GetIndex - 1; IF cm.comments # NIL THEN (cm.comments).Reset; lastToken _ 0; NextChar[]; nTokens _ nErrors _ 0}; ScanReset: PUBLIC PROC RETURNS[CARDINAL, CARDINAL] ~ { cm _ NIL; out _ NIL; zone _ NIL; RETURN[nTokens, nErrors]}; <> ResetScanIndex: PUBLIC PROC [index: Index] RETURNS[success: BOOL_TRUE] ~ { stream.SetIndex[MIN[index, stream.GetLength]]; tPos _ index-1; NextChar[]}; ScanError: PROC[code: {number, string, char, atom, file}, tokenIndex: Index] ~ { nErrors _ nErrors + 1; ErrorContext[ SELECT code FROM $number => "invalid number", $string => "string unterminated or too long", $char => "invalid character", $atom => "invalid atom", $file => "ill-formed file designator", ENDCASE => NIL, tokenIndex]; out.PutChar['\n]}; ErrorContext: PUBLIC PROC [message: Rope.ROPE, tokenIndex: Index] ~ { savePos: Index ~ tPos; low: Index ~ (IF tokenIndex >= 40 THEN tokenIndex-40 ELSE 0); high: Index ~ tokenIndex+40; out.PutChar['\n]; IF low > 0 THEN out.Put[IO.rope["..."]]; stream.SetIndex[low]; FOR i: Index IN [low..high] WHILE ~stream.EndOf DO c: CHAR ~ stream.GetChar; IF i = tokenIndex THEN out.PutChar['^]; out.PutChar[c]; ENDLOOP; IF ~stream.EndOf THEN out.Put[IO.rope["..."]]; out.PutF["\n%g [%d]\n", IO.rope[message], IO.card[tokenIndex]]; [] _ ResetScanIndex[savePos]}; TokenValue: PUBLIC PROC [s: TSymbol] RETURNS[P1.TValue] ~ { RETURN[SELECT s FROM tokenID => IdFromRope["&anon"], ENDCASE => P1.nullTValue] }; }.