'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm,
'n, 'o, 'p, 'q, 'r, 's, 't, 'u, 'v, 'w, 'x, 'y, 'z, '& => {
Scan:
PROC[c:
CHAR]
RETURNS[
BOOL] ~ {
SELECT c
FROM
IN ['a..'z], IN ['A..'Z], IN ['0..'9], '& => NULL;
ENDCASE => {char ← c; RETURN[TRUE]};
toklen ← toklen + 1;
RETURN[FALSE]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenID; token.value ← IdFromBuffer[];
GO TO GotNext};
'A, 'B, 'C, 'D, 'E, 'F, 'G, 'H, 'I, 'J, 'K, 'L, 'M,
'N, 'O, 'P, 'Q, 'R, 'S, 'T, 'U, 'V, 'W, 'X, 'Y, 'Z => {
first, last: NAT ← char.ORD;
uId: BOOL ← TRUE;
Scan:
PROC[c:
CHAR]
RETURNS[
BOOL] ~ {
SELECT c
FROM
IN ['A..'Z] => last ← c.ORD;
IN ['a..'z], IN ['0..'9] => uId ← FALSE;
ENDCASE => {char ← c; RETURN[TRUE]};
toklen ← toklen + 1;
RETURN[FALSE]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
IF uId
THEN
TRUSTED {
j: CARDINAL;
h: HashIndex ← ((first*128-first) + last) MOD HashIndex.LAST + 1;
WHILE (j ← hashTab[h].symbol) # 0
DO
s2: CARDINAL ←vocabIndex[j-1];
IF vocabIndex[j] - s2 = toklen
THEN {
t: Rope.Text ~ TokenToText[];
FOR s1:
CARDINAL
IN [0 .. toklen)
DO
IF t[s1] # vocab.text[s2] THEN EXIT;
s2 ← s2+1;
REPEAT
FINISHED => {token.class ← j; GO TO GotNext};
ENDLOOP;
};
IF (h ← hashTab[h].link) = 0 THEN EXIT;
ENDLOOP};
token.class ← tokenID; token.value ← IdFromBuffer[];
GO TO GotNext};
',, ';, ':, '., '~, '+, '^, '*, '/, '\\,
'(, '), '[, '], '=, '> => {
token.class ← CharClass[char]; GO TO GetNext};
'" => {
DO
NextChar[];
SELECT char
FROM
'" => {
NextChar[];
IF char # '" THEN GO TO QuoteEnd;
AddChar[]};
'\\ => AddCharPlus[];
NUL => IF AtEof[] THEN GO TO QuoteEnd;
ENDCASE;
AddChar[];
IF toklen =
NAT.
LAST
THEN {
ScanError[$string, token.index]; toklen ← 0};
ENDLOOP;
token.value ← EnterText[]; token.class ← tokenSTR;
GO TO GotNext};
'@ => {
State: TYPE ~ [0..7];
state assignments
where alpha = {a..z, A..Z, 0..9, ., $} -- FS allows +, - also
0: @
1: @ / ?(alpha | '# )
2: ( @ / ?(alpha | '# ) / | @ alpha ?alpha / ) ? (alpha / )
3: ( @ alpha | {2} alpha ) ?alpha
4: {3} !
5: {4} digit ?digit
6: {5} ( H | L )
7: ( {3} | {5} | {6} ) any
state: State ← 0;
Scan:
PROC[c:
CHAR]
RETURNS[
BOOL] ~ {
SELECT c
FROM
IN ['a..'z],
IN ['A..'Z] =>
SELECT state
FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
4 =>
SELECT c
FROM
'H, 'h, 'L, 'l => state ← 6
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
5, 6 => state ← State.LAST;
ENDCASE => ERROR;
IN ['0..'9] =>
SELECT state
FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
4, 5 => state ← 5;
6 => state ← State.LAST;
ENDCASE => ERROR;
'., '$ => -- '+ and '- are SML operators
SELECT state
FROM
0, 2, 3 => state ← 3;
1 => state ← 1;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'# =>
SELECT state
FROM
1 => state ← 1;
3, 5, 6 => state ← state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'/ =>
SELECT state
FROM
0 => state ← 1;
1, 2, 3 => state ← 2;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
'! =>
SELECT state
FROM
3 => state ← 4;
5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
ENDCASE =>
SELECT state
FROM
3, 5, 6 => state ← State.LAST;
ENDCASE => {ScanError[$file, tPos+toklen]; state ← State.LAST};
IF state = State.LAST THEN char ← c ELSE toklen ← toklen + 1;
RETURN[state = State.LAST]};
FirstChar[];
[] ← Map[Scan];
tPos ← tPos + toklen;
token.class ← tokenFILENAME; token.value ← TokenToText[];
GO TO GotNext};
'- => {
-- comment processing
pChar: CHAR ← NUL;
Scan:
PROC[c:
CHAR]
RETURNS[
BOOL] ~ {
toklen ← toklen + 1;
IF c = Ascii.
CR
OR (c = '-
AND pChar = c)
THEN {
char ← c; RETURN[TRUE]};
pChar ← c;
RETURN[FALSE]};
token.class ← CharClass['-];
tokpos ← tPos;
NextChar[];
IF char # '- THEN GO TO GotNext;
toklen ← 2;
[] ← Map[Scan];
tPos ← tokpos + toklen - 1;
IF cm.comments #
NIL
THEN {
comment: Rope.Text ~ TokenToText[];
(cm.comments).Add[tokpos, comment, lastToken, CRcount]};
lastToken ← tokpos;
IF char = '- THEN NextChar[]};
ENDCASE => {
token.class ← CharClass[char];
IF token.class # 0 THEN GO TO GetNext;
NextChar[];
ScanError[$char, token.index]};