Personal tools
You are here: Home Projects C++ Cfront releases Release 1.0 Source cfront src lalex.c
Document Actions

lalex.c

by Paul McJones last modified 2007-02-02 09:34

Click here to get the file

Size 14.3 kB - File type text/x-csrc

File contents

/* @(#) lalex.c 1.7 1/27/86 17:49:05 */ 
/*ident	"@(#)cfront:src/lalex.c	1.7" */
/***********************************************************************

	C++ source for cfront, the C++ compiler front-end

	Copyright (c) 1985 AT&T, Inc. All Rights Reserved
	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T, INC.


lalex.c:

	lexical lookahead
	unravel casts to handle cast/constructor ambiguity
	handle "struct x" => "x" transformation
	make NAME nodes for identifiers
	try to catch missing semi-colons
		(those sets of characters ought to be handled by table lookup)

	This is a mess; it really should have been a recursive decent parser

**************************************************************************/

#include "cfront.h"
#include "yystype.h"
#include "tqueue.h"

/*
	first define a queue of tokens (as a linked list)
*/

toknode* front = 0;
toknode* rear  = 0;

toknode* free_toks = 0;
#define NBITE 16
toknode.toknode(TOK t, YYSTYPE r)
{
	if (free_toks == 0) {
		register toknode* q = (toknode*)malloc( NBITE*sizeof(toknode) );
		free_toks=q;
		for (q+=NBITE-1, q->next=0; q!=free_toks; q--) (q-1)->next = q;
	}
	this = free_toks;
	free_toks = free_toks->next;
	tok = t;
	retval = r;
	next = last = 0;
}

inline toknode.~toknode()
{
	next = free_toks;
	free_toks = this;
	this = 0;
}


void addtok(TOK t, YYSTYPE r)
{
	toknode* T = new toknode(t,r);
	if (front == 0)
		front = rear = T;
	else {
		rear->next = T;
		T->last = rear;
		rear = T;
	}
}


TOK tk;		// last token returned

TOK deltok()
{
	toknode* T = front;
	tk = T->tok;
	yylval = T->retval;
	if (front = front->next) front->last = 0;
	delete T;
	return tk;
}

int scan_type();
int scan_mod();
int scan_tlist();
int scan_suf();
void get_tag();
void scan_e();


toknode* latok = 0;		/* "lookahead" token */

inline TOK  la_start()
{
	if (front == 0) tlex();
	latok=front;
	return latok->tok;
}

inline TOK  lookahead()
{
	if (latok==rear) tlex();
	latok=latok->next;
	return latok->tok;
}

inline void backup()
{
	if (latok->last == 0) error('i', "token q backup");
	latok = latok->last;
}

void insert_tok(TOK t)
{
	toknode* nt = new toknode(t, yylval);
	nt->last = latok->last;
	nt->last->next = nt;
	nt->next = latok;
	latok->last = nt;
	latok = nt;
}

void rep_cast()
/* replace bad cast with cast to ANY */
{
toknode* tt = front->next;	// front->tok == LP
toknode* junk = tt->next;
	if (junk == latok) return;
	tt->tok = TYPE;
	tt->retval.pt = any_type;
	tt->next = latok;
	latok->last->next = 0;
	latok->last = tt;
	for (tt = junk; tt; tt = tt->next) delete tt;
}

/*
	PRE-PARSER -- SCAN AHEAD TO DETERMINE TOKEN TYPE
*/

#define DO_RET goto ret

int bad_cast = 0;

TOK lalex()
{
static nocast = 0;	// prevent ID CAST, FOR CAST, etc.
//static in_op = 0;	// prevent OPERATOR op CAST
static incast = 0;	// don't scan in already recognized cast
static in_enum;
static fr;
char en = 0;

	switch (la_start()) {
	case VOLATILE:
	case SIGNED:
		error('w',"keyword%k (ignored)",latok->tok);
		return lalex();
	case ENUM:
		en = 1;
	case AGGR:
		switch (tk) {
/*				strictly speaking these ought to be included,
				but they only cause grief
		case ELSE:
		case COLON:	// label
		case RP:	// end of condition
*/
		case 0:
		case LP:	// argument list
		case CM:
		case NEW:
		case CAST:
		case RP:	// yok, old C: f(i) struct s * i; {}
		case OPERATOR:
		case DO:
		case TYPE:	// might be "const"
		case COLON:	// label
		case SM:	// end of statement
		case RC:	// end of statement
		case LC:	// first statement
			break;
		default:
			error(&curloc,"';' missing afterS orD before\"%k\"",latok->tok);
			return tk=SM;
		}

	{	TOK t = lookahead();
		TOK x;
//error('d',"aggr %k %d in_enum %d",t,t, en);
		switch (t) {
		case TNAME:
			x = lookahead();
			break;
		case ID:			// hidden or undefined
			x = lookahead();
			backup();
//error('d',"after id %k %d",x,x);
			switch (x) {
			case LC:
				in_enum = en;
			case COLON:		// defining: return AGGR ID
				backup();
				fr = 0;
				DO_RET;
			default:
			{	Pname n = ktbl->look(latok->retval.s,HIDDEN);
				if (n == 0) {	// new tag: define it
					n = new name(latok->retval.s);
					n->lex_level = 0;
					n = n->tname(latok->last->retval.t);
					modified_tn = modified_tn->l;
				}
				else {
					switch (n->tp->base) {
					case COBJ:
					case EOBJ:
						break;
					default:
						error('i',"hidden%n:%t",n,n->tp);
					}
				}
				latok->tok = TNAME;
				latok->retval.pn = n;
			}
			}
			(void)lookahead();
			break;
		case LC:
			in_enum = en;
		default:
			fr = 0;
			DO_RET;
		};
		
//error('d',"next %k %d tk %k",x,x,tk);
		switch (x) {
		case LC:		// class x {
			in_enum = en; 
		case COLON:		// class x :
			fr = 0;
			DO_RET;
		case SM:
			if (tk!=NEW && fr==0) {	// no further processing necessary
				deltok();	// class
				deltok();	// x
				deltok();	// ;
				return lalex();
			}
						// new class x ; => new x ;
		default:
			deltok();	// AGGR(?) TNAME(x) => TNAME(x)
			fr = 0;
			DO_RET;
		}	
	}

	case LP:
//error('d',"nocast %d in_op %d incast %d",nocast,in_op,incast);
		fr = 0;
		if (nocast) {
			nocast = 0;
			DO_RET;
//		} else if (in_op) {
//			in_op = 0;
//			DO_RET;
		} else if (incast)
			DO_RET;
		/* possible cast */
		bad_cast = 0;
		if (scan_type()) {
			if (scan_mod()) {
				if (lookahead() != RP) DO_RET;
				switch (lookahead()) {
				case CM: case RP: case SM: case LC: case ASSIGN:
					/* arg type list in declaration */
					if (tk != SIZEOF) DO_RET;
					break;

				case PLUS: case MINUS: case MUL: case AND:
				case NEW: case DELETE: case SIZEOF: case MEM:
				case NOT: case COMPL: case ICOP:
				case LP: case CAST:
				case ID: case TYPE: case TNAME:
				case THIS: case OPERATOR: case ZERO:
				case ICON: case FCON: case CCON: case STRING:
					// cast of a term
					break;
				default:	// something wrong...
					// binary op, missing ;,  etc.
					// "bad cast" could be legal expr
					//    "( TNAME() )" (ctor call)
					if (bad_cast) DO_RET;
					else break;
				}
				backup();
				front->tok = CAST;
				latok->tok = ENDCAST;
				if (bad_cast) {
					error("can't cast to function");
					rep_cast();
				}
				incast = 1;
			}
		}
		DO_RET;
	case CAST:
		incast++;
		DO_RET;
	case ENDCAST:
		if (--incast == 0) nocast = 0;
		DO_RET;
	case ID:
	{	char* s = front->retval.s;
//error('d',"id: %s",front->retval.s);
		fr = 0;
		nocast = 1;
		switch (lookahead()) {
		case ID:
		{	// handle ID ID
			// assume ID is a missing, hidden, or misspelt TNAME
			char* s2 = latok->retval.s;
			backup();
			Pname n = ktbl->look(s,HIDDEN);
			if (n == 0) {	// new tag: define it
				error("%s %s:TX (%s is not a TN)",s,s2,s);
				n = new name(s);
				n->lex_level = 0;
				n = n->tname(0);
				modified_tn = modified_tn->l;
				n->tp = any_type;
			}
			else {
				error("%s %s: %s is hidden",s,s2,s);
			}
			latok->tok = TNAME;
			latok->retval.pn = n;
			break;
		}
		case LC:
			backup();
			front->retval.pn = new name(s);
			front->retval.pn->lex_level--;
			break;
		default:
			backup();
			front->retval.pn = new name(s);
		}
		DO_RET;
	}
	case CASE:
	case DEFAULT:
	case PUBLIC:
	case ELSE:
		fr = 0;
		switch (tk) {
		case COLON:	// label
		case SM:	// end of statement
		case RC:	// end of statement
		case LC:	// first statement
			DO_RET;
		default:
			error(&curloc,"';' missing afterS orD before\"%k\"",latok->tok);
			return tk=SM;
		}
	case DO:
	case GOTO:
	case CONTINUE:
	case BREAK:
	case RETURN:
		fr = 0;
		switch (tk) {
		case ELSE:
		case DO:
		case COLON:	// label
		case RP:	// end of condition
		case SM:	// end of statement
		case RC:	// end of statement
		case LC:	// first statement
			DO_RET;
		default:
			error(&curloc,"';' missing afterS orD before\"%k\"",latok->tok);
			return tk=SM;
		}
	case IF:
	case WHILE:
	case FOR:
	case SWITCH:
		fr = 0;
		switch (tk) {
		case ELSE:
		case DO:
		case COLON:	// label
		case RP:	// end of condition
		case SM:	// end of statement
		case RC:	// end of statement
		case LC:	// first statement
			nocast = 1;
			DO_RET;
		default:
			error(&curloc,"';' missing afterS orD before\"%k\"",latok->tok);
			return tk=SM;
		}
	case TYPE:	// dangerous to diddle with: constructor notation
//fprintf(stderr,"type %d\n",tk); fflush(stderr);
		fr = 0;
		switch (tk) {
		case ID:
		//	case RP: 	old C function definition
		case RB:
			error(&curloc,"';' missing afterS orD before\"%k\"",latok->tok);
			return tk=SM;
		}
		if (latok->retval.t == FRIEND) fr = 1;
		nocast = 1;
		DO_RET;
	case TNAME:	// dangerous to diddle with: name hiding
	{	Pname n = latok->retval.pn;
		if (fr) {	// guard against: TYPE(friend) TNAME(x) SM
			nocast = 1;
			fr = 0;
			DO_RET;
		}
		fr = 0;
// fprintf(stderr,"type or tname %d %s\n",tk,n->string); fflush(stderr);
		switch (tk) {
		case TYPE:	// int TN ? or unsigned TN ?
			// beware of unsigned etc.
			switch (lookahead()) {
			case SM:
			case RB:
			case COLON:
			case ASSIGN:
				goto hid;
		//	case LP:	// the real problem
			default:
				nocast = 1;
				DO_RET;
			}
		case TNAME:	// TN TN ?
			switch (lookahead()) {
			case MEM:	// cl_name::mem_name
			case DOT:	// anachronism: cl_name.mem_name
				nocast = 1;
				DO_RET;
			}
		hid:
			backup();	// undo lookahead after TNAME
			n->hide();
			n = new name(n->string);
			n->n_oper = TNAME;
			latok->tok = ID;
			latok->retval.pn = n;
		}
	}
	case NEW:
		fr = 0;
		nocast = 1;
		DO_RET;
/*
	case OPERATOR:
		switch (lookahead()) {
		case LP:
			in_op = 1;
			if (lookahead() != RP) error("bad operator");
			break;
		case LB:
			if (lookahead() != RB) error("bad operator");
			break;
		case TYPE:
		case TNAME:
			while (lookahead() == MUL) ;
			backup();
		// default : 'regular' operator
		}
		if (lookahead() == LP) in_op = 1;
		DO_RET;
*/
	case RC:
//error('d',"} tk %d in_enum %d",tk,in_enum);
		fr = 0;
		switch (tk) {
		case RC:	// nested } (either stmt or expr)
		case LC:	// empty block: {}
		case SM:	// end of statement
			break;
		default:
		{	TOK t;
			loc x = curloc;
			switch (t = lookahead()) {
			case ELSE:
			case RC:	// } } probably end of initializer
			case CM:	// } , definitely end of initializer
			case SM:	// } ; probably end of initializer or class
			case RP:	// int f( struct { ... } );
				break;
			default:
				// either	"= { ... E } SorD"
				// or		" SorD }"
				// or enum { a, b } c; - yuk
				if (in_enum == 0) {
					error(&x,"';'X at end ofS orD before '}'");
					return tk=SM;
				}
				in_enum = 0;
			}
		}
		}
		in_enum = 0;
	default:
		fr = 0;
		nocast = 0;
		DO_RET;
	}
ret:
// hand optimized return:
//TOK deltok()
//{
	toknode* T = front;
	tk = T->tok;
	yylval = T->retval;
	if (front = front->next) front->last = 0;
	delete T;
//fprintf(stderr,"-> %d %d\n",tk,yylval);
	return tk;
//}

}


int scan_type()
{
	int is_type = 0;
	for(;;)
		switch (lookahead()) {
		case AGGR:
		case ENUM:
			get_tag();
		case TYPE:
		case TNAME:
			is_type = 1;
			continue;
		default:
			backup();
			return is_type;
		}
}


int scan_mod()
/*
	  ptr mod
	| "(" mod ")"
	| mod suf
	| ...
*/
{
for(;;)
	switch (lookahead()) {
	case AND:
	case MUL:                     // ptr mod
		continue;
	case LP:                      // "(" mod ")" [suf] | suf
		switch (lookahead()) {
		case AND:
		case MUL:
		case LP:
		case LB:
			backup();
			if (!scan_mod()) return 0;
			if (lookahead() != RP) return 0;
			if (!scan_suf()) return 0;
			return 1;
		case AGGR: case ENUM: case TYPE: case TNAME:
			backup();
			if (!scan_tlist()) return 0;
			if (lookahead() != RP) return 0;
			/* no break */
		case RP:
			bad_cast = 1;	/* possible cast to ftn */
			if (!scan_suf()) return 0;
			return 1;
		default:
			return 0;
		}
	case LB:                      // mod suf
		backup();
		if (!scan_suf()) return 0;
		return 1;
	case RP:
	case CM:
	case ELLIPSIS:
		backup();
		return 1;
	default:
		return 0;
	}
}


int scan_suf()
/*
	suf vec | suf arg_type_list | ...
	vec --> "[" [ICON] "]"
	arg_type_list --> "(" [tlist] ")"
*/
{
int found = 0;
for(;;)
	switch (lookahead()) {
	case LB:
		scan_e();
		found = 1;
		continue;
	case LP:
		if (!scan_tlist()) return 0;
		if (lookahead() != RP) return 0;
		if (found) {
			bad_cast = 1;	/* possible cast to ftn */
		} else found = 1;
		continue;
	default:
		backup();
		return 1;
	}
}


int scan_tlist()
/*
	tlist type | type
	type --> (TYPE | [AGGR] TNAME) mod
*/
{
	for(;;) {
		switch (lookahead()) {
		case AGGR:
		case ENUM:
			get_tag();
		case TYPE:
		case TNAME:
			scan_type();
			break;
		case ELLIPSIS:
			if (lookahead() != RP) {
				error("missing ')' after '...'");
				insert_tok(RP);
			}
		case RP:
			backup();
			return 1;
		default:
			return 0;
		}

		/* saw type */
		if (!scan_mod()) return 0;

		switch (lookahead()) {
		case CM:
			continue;
		case ELLIPSIS:
			if (lookahead() != RP) {
				error("missing ')' after '...'");
				insert_tok(RP);
			}
		case RP:
			backup();
			return 1;
		default:
			return 0;
		}
	}
}

void get_tag()
/* saw AGGR or ENUM */
{
	switch (lookahead()) {
	default:
		error('e', "missing tag");
		insert_tok(ID);
		latok->retval.s = "__MISSING__";
	case ID:
	{
		Pname n = ktbl->look(latok->retval.s,HIDDEN);
		if (n == 0) {
			n = new name(latok->retval.s);
			n->lex_level = 0;
			n = n->tname(latok->last->retval.t);
			modified_tn = modified_tn->l;
		}
		else {
			switch (n->tp->base) {
			case COBJ:
			case EOBJ:
				break;
			default:
				error('i',"hidden%n:%t",n,n->tp);
			}
		}
		latok->tok = TNAME;
		latok->retval.pn = n;
		break;
	}
	case TNAME:
		break;
	}

	switch (lookahead()) {
	default:
		backup(); return;
	case COLON:
		switch (lookahead()) {
		case ID: case TNAME: case LC:
			break;
		default:
			backup(); return;
		}
	case LC:{ int level = 1;
		for(;;) switch (lookahead()) {
			case LC: level++; break;
			case RC: if(--level==0) return;
				break;
			case EOFTOK:
				error('i', "unexpected eof");
			}
		} // case LC
	} // switch
}

void scan_e()
/* scan expr in vec */
{
long brcount = 1L;
int localcast = 0;
	for(;;)
		switch (lookahead()) {
		case RB:
			if (--brcount == 0L)  return;
			continue;
		case LB:
			brcount++;
			continue;
		case LP:
			if (localcast)
				continue;
			toknode* mark = latok;
			if (scan_type())
				if (scan_mod())
					if (lookahead() == RP)
						switch (lookahead()) {
						case CM:
						case RP:
						case SM:
						case LC:
						case ASSIGN:
							break;
						default:
							backup();
							mark->tok = CAST;
							latok->tok = ENDCAST;
						}
			continue;
		case CAST:	/* scenario: local cast recognized,
					main cast in lalex fails,
					lalex later recognizes a smaller cast
						containing this one
				*/
			localcast++;
			continue;
		case ENDCAST:
			localcast--;
			continue;
		default:
			continue;
		}
}
« October 2024 »
Su Mo Tu We Th Fr Sa
1 2 3 4 5
6 7 8 9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28 29 30 31
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: