SMScannerImpl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Satterthwaite, April 21, 1986 4:20:03 pm PST
Derived from Compiler/Scanner.mesa
DIRECTORY
Ascii: TYPE USING [BS, CR, FF, LF, TAB],
Atom: TYPE USING [MakeAtom],
IO: TYPE USING [
card, EndOf, EndOfStream, GetChar, GetIndex, GetLength, Put, PutChar, PutF,
rope, SetIndex, STREAM],
SMP1: TYPE P1 USING [Token, TValue, nullTValue],
SMParseTable: TYPE ParseTable USING [
HashIndex, HashTableRef, IndexTableRef, ScanTableRef, TableRef, TSymbol, VocabularyRef,
endMarker, tokenFILENAME, tokenID, tokenSTR],
Rope: TYPE USING [Flatten, FromProc, NewText, ROPE, Text],
SMCommentTable: TYPE USING [Index],
SMCommentTableOps: TYPE USING [Add, AddBreakHint, Reset],
SMOps: TYPE USING [MS],
SMTree: TYPE Tree USING [Name];
SMScannerImpl: CEDAR PROGRAM
IMPORTS Atom, IO, Rope, SMCommentTableOps
EXPORTS SMP1 SHARES Rope ~ {
OPEN Tree~~SMTree, SMParseTable, P1~~SMP1;
table installation
tablePtr: TableRef;
hashTab: HashTableRef;
scanTab: ScanTableRef;
vocab: VocabularyRef;
vocabIndex: IndexTableRef;
InstallScanTable: PUBLIC PROC[base: TableRef] ~ TRUSTED {
tablePtr ← base;
hashTab ← @tablePtr[tablePtr.scanTable.hashTab];
scanTab ← @tablePtr[tablePtr.scanTable.scanTab];
vocab ← LOOPHOLE[@tablePtr[tablePtr.scanTable.vocabBody]];
vocabIndex ← @tablePtr[tablePtr.scanTable.vocabIndex]};
CharClass: PROC[c: CHAR] RETURNS[TSymbol] ~ TRUSTED INLINE {
RETURN[scanTab[c]]};
scanner state (protected by monitor in SMReaderImpl
cm: SMOps.MS;
out: IO.STREAM;
zone: ZONENIL;
Index: TYPE ~ SMCommentTable.Index;
NUL: CHAR ~ '\000;
stream: IO.STREAMNIL; -- the input stream
char: CHAR;   -- current (most recently scanned) character
tPos: Index0;  -- index of char in stream
AtEof: PROC RETURNS[BOOL] ~ {
RETURN[char = NUL AND stream.EndOf]};
toklen: NAT ← 0;  -- current token length
tokpos: Index ← 0;  -- source index for start of token
TokenToText: PROC RETURNS[t: Rope.Text] ~ { -- copies from token in buffer
savePos: Index ~ tPos;
Get: PROC RETURNS[c: CHAR] ~ {
RETURN[stream.GetChar]};
stream.SetIndex[tokpos];
t ← Rope.FromProc[toklen, Get].Flatten[];
stream.SetIndex[savePos+1];
RETURN};
nTokens: CARDINAL;  -- token count
nErrors: CARDINAL;  -- lexical errors
lastToken: Index ← 0;
FirstChar: PROC[] ~ {
tokpos ← tPos; toklen ← 1};
AddChar: PROC[] ~ {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1};
AddCharPlus: PROC[] ~ {
IF toklen = 0 THEN tokpos ← tPos;
toklen ← toklen + 1;
NextChar[]};
NextChar: PROC ~ {
tPos ← tPos + 1;
char ← stream.GetChar[ ! IO.EndOfStream => {char ← NUL; CONTINUE}]};
IdFromRope: PROC[r: Rope.ROPE] RETURNS[Tree.Name] ~ INLINE {
RETURN[Atom.MakeAtom[r]]};
IdFromBuffer: PROC[] RETURNS[Tree.Name] ~ {
RETURN[IdFromRope[TokenToText[]]]};
Map: PROC[scan: PROC[CHAR] RETURNS[BOOL]] RETURNS[stopped: BOOLFALSE] ~ {
UNTIL stopped OR stream.EndOf DO
c: CHAR ~ stream.GetChar;
stopped ← scan[c];
ENDLOOP;
RETURN};
NextToken: PUBLIC PROC RETURNS[token: P1.Token] ~ {
DO
CRcount: NAT ← 0;
IF char IN ['\000..' ] THEN {
NULcount: NAT ← 0;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
IF c > ' OR (c = NUL AND NULcount # 0) THEN {char ← c; RETURN[TRUE]};
tPos ← tPos + 1; NULcount ← 0;
SELECT c FROM
Ascii.CR => {
CRcount ← CRcount + 1;
IF cm.comments # NIL THEN (cm.comments).AddBreakHint[tPos]};
Ascii.FF => {
IF cm.comments # NIL THEN (cm.comments).Add[tPos, "\f", lastToken, CRcount];
CRcount ← 0};
NUL => NULcount ← NULcount ← 1;
ENDCASE;
RETURN[FALSE]};
[] ← Scan[char];
IF ~Map[Scan] OR char = NUL THEN GO TO EndFile};
toklen ← 0;
token.index ← tPos; token.value ← P1.nullTValue;
IF CRcount > 1 AND cm.comments # NIL THEN { -- remember extra blank lines
(cm.comments).Add[tPos-1, NIL, lastToken, CRcount-1];
CRcount ← 1};
SELECT char FROM
'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z, '& => {
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
SELECT c FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '& => NULL;
ENDCASE => {char ← c; RETURN[TRUE]};
toklen ← toklen + 1;
RETURN[FALSE]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenID; token.value ← IdFromBuffer[];
GO TO GotNext};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {
first, last: NAT ← char.ORD;
uId: BOOLTRUE;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
SELECT c FROM
IN ['A..'Z] => last ← c.ORD;
IN ['a..'z], IN ['0..'9] => uId ← FALSE;
ENDCASE => {char ← c; RETURN[TRUE]};
toklen ← toklen + 1;
RETURN[FALSE]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
IF uId THEN TRUSTED {
j: CARDINAL;
h: HashIndex ← ((first*128-first) + last) MOD HashIndex.LAST + 1;
WHILE (j ← hashTab[h].symbol) # 0 DO
s2: CARDINAL ←vocabIndex[j-1];
IF vocabIndex[j] - s2 = toklen THEN {
t: Rope.Text ~ TokenToText[];
FOR s1: CARDINAL IN [0 .. toklen) DO
IF t[s1] # vocab.text[s2] THEN EXIT;
s2 ← s2+1;
REPEAT
FINISHED => {token.class ← j; GO TO GotNext};
ENDLOOP;
};
IF (h ← hashTab[h].link) = 0 THEN EXIT;
ENDLOOP};
token.class ← tokenID; token.value ← IdFromBuffer[];
GO TO GotNext};
',, ';, ':, '., '~, '+, '^, '*, '/, '\\,
'(, '), '[, '], '=, '> => {
token.class ← CharClass[char]; GO TO GetNext};
'" => {
DO
NextChar[];
SELECT char FROM
'" => {
NextChar[];
IF char # '" THEN GO TO QuoteEnd;
AddChar[]};
'\\ => AddCharPlus[];
NUL => IF AtEof[] THEN GO TO QuoteEnd;
ENDCASE;
AddChar[];
IF toklen = NAT.LAST THEN {
ScanError[$string, token.index]; toklen ← 0};
REPEAT
QuoteEnd => NULL;
ENDLOOP;
token.value ← EnterText[]; token.class ← tokenSTR;
GO TO GotNext};
'@ => {
State: TYPE ~ [0..7];
state assignments
where alpha = {a..z, A..Z, 0..9, ., $} -- FS allows +, - also
0: @
1: @ / ?(alpha | '# )
2: ( @ / ?(alpha | '# ) / | @ alpha ?alpha / ) ? (alpha / )
3: ( @ alpha | {2} alpha ) ?alpha
4: {3} !
5: {4} digit ?digit
6: {5} ( H | L )
7: ( {3} | {5} | {6} ) any
state: State ← 0;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
SELECT c FROM
IN ['a..'z], IN ['A..'Z] =>
SELECT state FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
4 =>
SELECT c FROM
'H, 'h, 'L, 'l => state ← 6
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
5, 6 => state ← State.LAST;
ENDCASE => ERROR;
IN ['0..'9] =>
SELECT state FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
4, 5 => state ← 5;
6 => state ← State.LAST;
ENDCASE => ERROR;
'., '$ => -- '+ and '- are SML operators
SELECT state FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'# =>
SELECT state FROM
1 => state ← 1;
3, 5, 6 => state ← state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'/ =>
SELECT state FROM
0 => state ← 1;
1, 2, 3 => state ← 2;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'! =>
SELECT state FROM
3 => state ← 4;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
ENDCASE =>
SELECT state FROM
3, 5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
IF state = State.LAST THEN char ← c ELSE toklen ← toklen + 1;
RETURN[state = State.LAST]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenFILENAME; token.value ← TokenToText[];
GO TO GotNext};
'- => { -- comment processing
pChar: CHARNUL;
Scan: PROC[c: CHAR] RETURNS[BOOL] ~ {
toklen ← toklen + 1;
IF c = Ascii.CR OR (c = '- AND pChar = c) THEN {
char ← c; RETURN[TRUE]};
pChar ← c;
RETURN[FALSE]};
token.class ← CharClass['-];
tokpos ← tPos;
NextChar[];
IF char # '- THEN GO TO GotNext;
toklen ← 2;
[] ← Map[Scan];
tPos ← tokpos + toklen - 1;
IF cm.comments # NIL THEN {
comment: Rope.Text ~ TokenToText[];
(cm.comments).Add[tokpos, comment, lastToken, CRcount]};
lastToken ← tokpos;
IF char = '- THEN NextChar[]};
ENDCASE => {
token.class ← CharClass[char];
IF token.class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[$char, token.index]};
REPEAT
GetNext => NextChar[];
GotNext => NULL;
EndFile => {
char ← NUL;
token ← [class~endMarker, index~tPos, value~P1.nullTValue]};
ENDLOOP;
nTokens ← nTokens + 1;
lastToken ← token.index;
RETURN};
string literals
EnterText: PROC [] RETURNS[P1.TValue] ~ {
RETURN[ParseLit[TokenToText[]]]};
ParseLit: PROC [t: Rope.Text] RETURNS[Rope.Text] ~ {
src: INTEGER ← 0;
dst: INTEGER ← 0;
nt: Rope.Text ← NIL;
len: INTEGER ← t.length;
WHILE src < len DO
c: CHAR ← t[src]; src ← src + 1;
IF c = '\\ OR c = '" THEN {
cc: CHAR ← c;
nt ← Rope.NewText[len];
dst ← src - 1;
FOR i: INTEGER IN [0..dst) DO nt[i] ← t[i] ENDLOOP;
IF src < len THEN {cc ← t[src]; src ← src + 1};
SELECT cc FROM
'n, 'N, 'r, 'R => c ← Ascii.CR;
't, 'T => c ← Ascii.TAB;
'b, 'B => c ← Ascii.BS;
'f, 'F => c ← Ascii.FF;
'l, 'L => c ← Ascii.LF;
IN ['0..'7] => {
code: NAT ← cc.ORD-'0.ORD;
cc ← '\000;
IF src < len THEN {
cc ← t[src];
IF cc IN ['0..'7] THEN {
src ← src + 1;
code ← code*8 + (cc.ORD-'0.ORD);
cc ← '\000;
IF src < len THEN {
cc ← t[src];
IF cc IN ['0..'7] THEN {
src ← src + 1;
code ← code*8 + (cc.ORD-'0.ORD)}
}
}
};
c ← VAL[code]};
ENDCASE => c ← cc
};
IF nt # NIL THEN {nt[dst] ← c; dst ← dst + 1};
ENDLOOP;
IF nt = NIL THEN RETURN[t]
ELSE {nt.length ← dst; RETURN[nt]}};
initialization/finalization
ScanInit: PUBLIC PROC [model: SMOps.MS, source: IO.STREAM] ~ {
cm ← model; out ← model.out; zone ← model.z;
stream ← source;
tPos ← stream.GetIndex - 1;
IF cm.comments # NIL THEN (cm.comments).Reset;
lastToken ← 0;
NextChar[];
nTokens ← nErrors ← 0};
ScanReset: PUBLIC PROC RETURNS[CARDINAL, CARDINAL] ~ {
cm ← NIL; out ← NIL; zone ← NIL;
RETURN[nTokens, nErrors]};
error handling
ResetScanIndex: PUBLIC PROC [index: Index] RETURNS[success: BOOLTRUE] ~ {
stream.SetIndex[MIN[index, stream.GetLength]];
tPos ← index-1; NextChar[]};
ScanError: PROC[code: {number, string, char, atom, file}, tokenIndex: Index] ~ {
nErrors ← nErrors + 1;
ErrorContext[
SELECT code FROM
$number => "invalid number",
$string => "string unterminated or too long",
$char => "invalid character",
$atom => "invalid atom",
$file => "ill-formed file designator",
ENDCASE => NIL,
tokenIndex];
out.PutChar['\n]};
ErrorContext: PUBLIC PROC [message: Rope.ROPE, tokenIndex: Index] ~ {
savePos: Index ~ tPos;
low: Index ~ (IF tokenIndex >= 40 THEN tokenIndex-40 ELSE 0);
high: Index ~ tokenIndex+40;
out.PutChar['\n];
IF low > 0 THEN out.Put[IO.rope["..."]];
stream.SetIndex[low];
FOR i: Index IN [low..high] WHILE ~stream.EndOf DO
c: CHAR ~ stream.GetChar;
IF i = tokenIndex THEN out.PutChar['^];
out.PutChar[c];
ENDLOOP;
IF ~stream.EndOf THEN out.Put[IO.rope["..."]];
out.PutF["\n%g [%d]\n", IO.rope[message], IO.card[tokenIndex]];
[] ← ResetScanIndex[savePos]};
TokenValue: PUBLIC PROC [s: TSymbol] RETURNS[P1.TValue] ~ {
RETURN[SELECT s FROM
tokenID => IdFromRope["&anon"],
ENDCASE => P1.nullTValue]
};
}.