/* Copyright (C) 1992 Imperial College */
/*
    token.c - tokeniser functions for term input/output in IC-Prolog ][
    Written by Frank McCabe and Damian Chu
    Imperial College, Winter 1989

    Modifications :
    24/2/90		dac
	changed tokeniser to use only one token
*/

#include <ctype.h>
#include "primitives.h"
#include "termio.h"

#include "ascii_latin1.h"
#include "ascii_greek.h"
#include "ascii_mac.h"

char_t	*chtypes = chtype_latin1;

/*
 *  setting alternative character encodings
 */
init_charset(set)
char	*set;
{
	if (set == NULL)
		chtypes = chtype_latin1;
	else if (strcmp(set, "greek") == 0)
		chtypes = chtype_greek;
	else if (strcmp(set, "mac") == 0)
		chtypes = chtype_mac;
	else
		chtypes = chtype_latin1;
}

/* entry points for character level input output */
extern	CHARTYPE (*charin)();
extern	bool	(*charback)();
extern	void	syntax_error();

/* token buffer space management functions */
static	token	tk;			/* the global token */
static	token	*tokaddr = &tk;		/* address of the global token */
static	strpo	last_char;		/* where to put next char of token */
static	int	brace_depth	= 0;	/* to differentiate '.' from '. ' */
static	bool	have_token	= FALSE;



/*------------------------------------------------------------*
 *          U T I L I T Y     F U N C T I O N S               *
 *------------------------------------------------------------*/

void
reset_brace()
{
    brace_depth = 0;
}

#define	newtoken	last_char = tk.buff
#define cp_ch(ch)	*last_char++=(ch)

/* return a token which uses the buffer */
toktype end_token(ch)
char_t ch;
{
    cp_ch('\0');
    tk.bufflen = last_char-tk.buff;
    tk.tt = (toktype)ch;
    return((toktype)ch);
}

/* return a number token */
toktype number_token(n)
fourBytes n;
{
    /* check for overflow into tag */
    fourBytes high = (n & int_mask);
    if (high && (high != int_mask))
	return(end_token(floating));
    /* overload the bufflen field for numbers */
    tk.bufflen = n;
    tk.tt = (toktype)number;
    return((toktype)number);
}

/* return a predefined token */
toktype single_tok(ttype)
toktype ttype;
{
    tk.tt = ttype;
    return(ttype);
}

/* returns length of a token */
long int tok_len(t)
token *t;
{
    switch(t->tt) {
	case eof_token:
	case space:
	case dot:
	case bra:
	case ket:
	case sqbra:
	case sqket:
	case brace:
	case endbrace:
	case comma:
	case semicolon: 	/* this is really a bar */
	    return(1);
	default:
	    return(strlen(t -> buff));
    }
}

void dump_token(t)
token *t;
{
    switch(t->tt) {
	case eof_token:
	    (void) fprintf(stderr, "<EOF>");
	    break;
	case semicolon:
	    (void)fputc('|', stderr);
	    break;
	case space:
	case dot:
	case bra:
	case ket:
	case sqbra:
	case sqket:
	case brace:
	case endbrace:
	case comma:
	    (void)fputc(t->tt, stderr);
	    break;
	default:
	    (void) fprintf(stderr, "%s", t->buff);
	    break;
    }
}

/* read exponent part of a floating-point number */
void
get_exponent()
{
    register CHARTYPE ch = (*charin)();
    cp_ch('e');
    if (ch == minus || ch == plus) {
	cp_ch(ch);
	ch = (*charin)();
    }
    if (chtype(ch) != number) {
	(void)end_token(floating);
	(*charback)(ch);
	syntax_error(14, &tokaddr);
    }
    cp_ch(ch);
    while (chtype((ch=(*charin)()))==number)
	cp_ch(ch);
    (*charback)(ch);
}

#define LAYOUT -2

/* read an escaped character */
CHARTYPE
escape_char()
{
    register CHARTYPE ch = (*charin)();

    switch (ch) {
	case 'b': case 'B':	/* backspace */
		return('\b');

	case 't': case 'T':	/* tab */
		return('\t');

	case 'n': case 'N':	/* newline */
		return('\n');

	case 'v': case 'V':	/* vertical tab */
		return('\v');

	case 'f': case 'F':	/* formfeed */
		return('\f');

	case 'r': case 'R':	/* carriage return */
		return('\r');

	case 'e': case 'E':	/* escape */
		return('\033');

	case 'd': case 'D':	/* delete */
		return('\177');

	case 'a': case 'A':	/* alarm */
		return('\007');

	case '0': case '1':	/* octal string */
	case '2': case '3':
	case '4': case '5':
	case '6': case '7': {
		uchar octalchar = ch - '0';
		ch=(*charin)();
		if (ch > '7' || ch < '0')
		    (*charback)(ch);
		else {
		    octalchar = octalchar * 8 + ch - '0';
		    ch=(*charin)();
		    if (ch > '7' || ch < '0')
			(*charback)(ch);
		    else octalchar = octalchar * 8 + ch - '0';
		}
		return(octalchar);
	}

	case '^': 		/* control char */
		if ((ch=(*charin)())==EOF)
		    return(EOF);
		else return(ch & 0x1F);

	case 'c': case 'C':	/* ignore layout chars */
		while ((ch=(*charin)()) <= 040 || ch >= 0177)
		    if (ch==EOF)
			return(EOF);
		(*charback)(ch);
		return(LAYOUT);

	case 's': case 'S':	/* visible space */
		return(' ');

	case EOF:
		return(EOF);

	default:
		if (ch <= 040 || ch >= 0177)	/* layout */
		    return(LAYOUT);
		else return(ch);
    }
}

/*------------------------------------------------------------*/



/* basic tokeniser */
toktype nxtoken()
{
    register CHARTYPE ch;

    newtoken;
    ch=(*charin)();

tkrestart:

    if (ch == EOF)	/* this test must come after the tkrestart label */
	ch = '\004';	/* CTRL-D is end of file */

    switch(chtype(ch)) {
	case sep:
	    if ((ch=(*charin)())==bra) {	/* space before opening bracket */
		(*charback)(ch);
		return(single_tok(space));
	    }
	    else goto tkrestart;

	case punct:
	    if (ch==dot)
		if ((ch=(*charin)())==EOF) {
		    (*charback)(ch);
		    return(single_tok(dot));
		}
		else if (chtype(ch)==sep) {
		    if (brace_depth>0) {
			cp_ch(dot);
			cp_ch(space);
			return(end_token(graph));
		    }
		    else
			return(single_tok(dot));
		}
		else if (ch==endbrace) {
		    (*charback)(endbrace);
		    if (brace_depth) {
			cp_ch(dot);
			cp_ch(space);
			return(end_token(graph));
		    }
		    else
			return(single_tok(dot));
		}
		else {		/* we have a graph started by a dot ... */
		    (*charback)(ch);
		    /* read in the graph token */
		    cp_ch(dot);
		    while(chtype((ch=(*charin)()))==graph||ch==dot)
			cp_ch(ch);
		    (*charback)(ch);
		    return(end_token(graph));
		}
	    else if (ch==percent) {		/* % - eol comment */
		while((ch=(*charin)())!='\n')
		    if (ch==EOF)
			goto tkrestart;
		if ((ch=(*charin)())==bra) {
		    (*charback)(ch);
		    ch=space;
		}
		goto tkrestart;
	    }
	    else if (ch==bar) {			/* bars are mapped to semicolons */
		cp_ch(semicolon);
		return(end_token((char_t)semicolon));
	    }
	    else if (ch==comma) {
		cp_ch(comma);
		return(end_token((char_t)comma));
	    }
	    else if (ch==brace)			/* starting a brace pair */
		brace_depth++;
	    else if (ch==endbrace)		/* closing a brace pair */
		if (brace_depth)
		    brace_depth--;

	    return(single_tok(ch));

	case solo:
	    cp_ch(ch);
	    return(end_token(solo));

	case graph: {
	    uchar initial = ch;
	    ch=(*charin)();

	    if (initial==slash && ch==star) {	/* we have a comment */
		for(;TRUE;) {
		    while((ch=(*charin)())!=star)
			if (ch==EOF)
			    goto tkrestart;
		    if ((ch=(*charin)())==slash)
			break;
		    else (*charback)(ch);
		}
		if ((ch=(*charin)())==bra) {
		    (*charback)(ch);
		    ch=space;
		}
		goto tkrestart;
	    }
	    else
		cp_ch(initial);			/* start of a graph token */
	    while(chtype(ch)==graph||ch==dot) {	/* read in the graph token */
		cp_ch(ch);
		ch=(*charin)();
	    }
	    (*charback)(ch);		/* step back from extra char */
	    return(end_token(graph));
	}

	case upper: {
	    register char_t tt;
	    cp_ch(ch);
	    while((tt=chtype(ch=(*charin)()))==upper || tt==lower || tt==number)
		cp_ch(ch);
	    (*charback)(ch);
	    return(end_token(upper));
	}

	case lower: {
	    register char_t tt;
	    cp_ch(ch);
	    while((tt=chtype(ch=(*charin)()))==upper || tt==lower || tt==number)
		cp_ch(ch);
	    (*charback)(ch);
	    return(end_token(lower));
	}

	case number: {
	    register fourBytes n = ch - '0';
	    FLOAT fl_num;
	    double dummy;
	    cp_ch(ch);
	    while (chtype(ch=(*charin)())==number) {
		n = n * 10 + ch - '0';
		cp_ch(ch);
	    }

	    if (ch == quote) {
		int base = n;
		if (base == 0) {	/* 0'<char> notation */
		    while ((ch=(*charin)())==backslash)
			if ((ch=escape_char())!=LAYOUT)
			    break;

		    if (ch==EOF)
			syntax_error(13, &tokaddr);
		    else return(number_token((fourBytes)ch));
		}
		else if (base <= 36) {
		    bool ok = FALSE;
		    n = 0;
		    for (;;) {
			int digit;
			ch = (*charin)();
			if (ch >= '0' && ch <= '9')
			    digit = ch - '0';
			else if (ch >= 'a' && ch <= 'z')
			    digit = ch - 'a' + 10;
			else if (ch >= 'A' && ch <= 'Z')
			    digit = ch - 'A' + 10;
			else break;
			if (digit >= base)
			    break;
			n = n * base + digit;
			ok = TRUE;
		    }
		    (*charback)(ch);
		    if (n==0 & !ok) {
			(*charback)(quote);
			return(number_token((fourBytes)base));
		    }
		    else return(number_token(n));
		}
	    }
	    else if (ch == 'e' || ch == 'E') {
		get_exponent();
		return(end_token(floating));
	    }
	    else if (ch == dot) {
		if (chtype((ch=(*charin)()))==number) {
		    cp_ch(dot);
		    cp_ch(ch);
		    while (chtype((ch=(*charin)()))==number)
			cp_ch(ch);
		    if (ch == 'e' || ch == 'E')
			get_exponent();
		    else (*charback)(ch);
		    return(end_token(floating));
		}
		else {
		    (*charback)(ch);
		    ch = dot;
		}
	    }

	    (*charback)(ch);

	    /* test for overflow of 32-bit integer */
	    cp_ch('\0');
	    last_char--;
	    (void)sscanf(tk.buff, "%lf", &dummy);
	    fl_num = dummy;
	    if (fl_num == n) {
		return(number_token(n));
	    } else
			return(end_token(floating));
	}

	case string:
more_string:
	    while((ch=(*charin)())!=dquote) {
		if (ch==backslash) {
		    if ((ch=escape_char())==LAYOUT)
			continue;
		}
		else if (ch == '\n') {
		    (void)end_token(string);
		    (*charback)(ch);
		    syntax_error(9, &tokaddr);
		}

		if (ch==EOF) {
		    (void)end_token(string);
		    syntax_error(10, &tokaddr);
		}
		else cp_ch(ch);
	    }

	    if ((ch=(*charin)())==dquote) {
		cp_ch(dquote);
		goto more_string;
	    }
	    else (*charback)(ch);
	    return(end_token(string));

	case quoted:
more_quoted:
	    while((ch=(*charin)())!=quote) {
		if (ch==backslash) {
		    if ((ch=escape_char())==LAYOUT)
			continue;
		}
		else if (ch == '\n') {
		    (void)end_token(quoted);
		    (*charback)(ch);
		    syntax_error(11, &tokaddr);
		}

		if (ch == EOF) {
		    (void)end_token(quoted);
		    syntax_error(12, &tokaddr);
		}
		else cp_ch(ch);
	    }

	    if ((ch=(*charin)())==quote) {
		cp_ch(quote);
		goto more_quoted;
	    }
	    else (*charback)(ch);
	    return(end_token(quoted));

	default:
	    return(single_tok((toktype)eof_token));
    }
}

/*------------------------------------------------------------*/

/* packaged up tokeniser to allow for token look ahead */

/* get the next token */
toktype nextoken(tok,skip)
token **tok;
bool skip;
{
    toktype type = tk.tt;

    if (!have_token)
	type = nxtoken();
    else
	have_token=FALSE;

    while(skip && type==space)		/* skipping space tokens? */
	type = nxtoken();

    *tok=&tk;
    return(type);
}

/*------------------------------------------------------------*/

/* look ahead one token */
toktype hedtoken(tok,skip)
token **tok;
bool skip;
{
    toktype type = tk.tt;

    if (!have_token) {
	type = nxtoken();
	have_token = TRUE;
    }
    while(skip && type==space)		/* skipping space tokens? */
	type = nxtoken();

    *tok=&tk;
    return(type);
}

/*------------------------------------------------------------*/

/* look at character following next token */
CHARTYPE hedchar()
{
    CHARTYPE ch;
    if (!have_token) {
	(void) nxtoken();
	have_token = TRUE;
    }

    ch = (*charin)();
    (*charback)(ch);
    return ch;
}
