Personal tools
You are here: Home Projects C++ Cfront releases Release 3.0.3 source libSC fs fsippsrc parse.c
Document Actions

parse.c

by Michael L Powell last modified 2007-01-26 03:23

Click here to get the file

Size 17.4 kB - File type text/plain

File contents

/*ident	"@(#)fs:fsippsrc/parse.c	3.1" */
/******************************************************************************
*
* C++ Standard Components, Release 3.0.
*
* Copyright (c) 1991, 1992 AT&T and Unix System Laboratories, Inc.
* Copyright (c) 1988, 1989, 1990 AT&T.  All Rights Reserved.
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T and Unix System
* Laboratories, Inc.  The copyright notice above does not evidence
* any actual or intended publication of such source code.
*
******************************************************************************/

#include "fsipp.h"
#include "setjmp.h"

#define SHOWSTREAM 0

static void foundND();
static void protoOrDef();
static void proto(bool_type);
static void def(bool_type, String, bool_type);
static void deleteUse();
static void newUse();
static void cppline();
static void lookForClassHead();

static bool_type doingNew;
static int nErrors;
static jmp_buf beginning;

static void advAfterError()
{
	ADV;  
	ADV;
	ADV;
	while (TOK != LC && TOK != RC && TOK != SEMI && TOK != EOFTOK)
		ADV;
}

static void fail()
{
	lexer->melt();
	advAfterError();
	longjmp(beginning, 1);
}

static void message(char *prefix, char *mess)
{
	nErrors++;
	cerr << "CC -fs: " << prefix << ": line " << (lexer->emptyWindow()? 1 : LINENO) << " " << lexer->filename();
	if (mess)
		cerr << ": " << mess;
	cerr << endl;
	fail();
}

static void sorry(char *mess)
{
	message("Sorry, not implemented", mess);
}

void syntaxError(char *mess)
{
	message("Syntax error", mess);
}

static bool_type builtin(const String & type)
{
	return (type == "int" || type == "char" || type == "short" || type == "long" || type == "float" || type == "double");
}

static const char *size_t_onThisMachine()
{
	int short_size = sizeof(short);
	int int_size = sizeof(int);
	int long_size = sizeof(long);
	int sizeof_size = sizeof(sizeof(char));  /* the type there doesn't matter */
	if (sizeof_size == int_size)
		return "unsigned int";
	else if (sizeof_size == short_size)
		return "unsigned short";
	else if (sizeof_size == long_size)
		return "unsigned long";
	else
	{
		cerr << "Can't determine underlying type of size_t on this machine." << endl;
		exit(1);
		return 0;
	}
}

static void match(TokenType t)
{
	if (TOK == t)
		ADV;
	else
		syntaxError("Unexpected token");
}

/*
static void matchOptional(TokenType t)
{
	if (TOK == t)
		ADV;
}
*/

static void emitLineDirective(int line)
{
	cout << "\n#";
	if (alternateLineDirectiveFormat)
		cout << "line";
	cout << " " << line << " \"" << lexer->filename() << "\"\n";
}

static TokenType rightGroupTok(TokenType left)
{
        if (left == LP)
		return RP;
        else if (left == LC)
                return RC;
        else if (left == LS)
                return RS;
	else if (left == LANGLE)
		return RANGLE;
	assert(0);
	/* NOTREACHED */
	return ID;
}

// advance to the right grouping token matching the current token.
// while doing so, append lexemes and whitespace to the string s, if s!=0.
static void advToMatchingAppendingTo(String *s)
{
        TokenType left = TOK;
        TokenType right = rightGroupTok(left);

        int depth = 1;
        while (depth > 0 && TOK != EOFTOK)
        {
		if (s != 0) 
		{
			*s += LEXWS;
			*s += LEXEME;
		}
                ADV;
                if (TOK == left) depth++;
                else if (TOK == right) depth--;
		else if (TOK == LC)
			syntaxError("Missing right delimiter");
        }
        if (TOK == EOFTOK)
                syntaxError("Unexpected end of file: missing right delimiter");
	if (s != 0)
	{
		*s += LEXWS;
		*s += LEXEME;
	}
}

static void advToMatching()
{
	advToMatchingAppendingTo(0);
}

/* Here's what we do to tokens that get contracted off the lexer window.
*/
void echo(Token *t)
{
	cout << t->ws << t->lexeme;
}

/* Map the function f to each of the tokens i..j in the current window.
*  i and j are absolute positions.
*
* THIS IS INEFFICIENT - it calls windowAbs for every token between i and j.
*
*/
void maptok(void (*f)(Token*), int i, int j)
{
	for (; i<=j; i++)
	{
		(*f)(lexer->windowAbs(i));
	}
}

static bool_type interesting(TokenType t)
{
	return (t == NEW || t == DELETE || t == POUND || t == EOFTOK || t == AGGR || t == LC || t == RC);
}

static void emitMyProtos()
{
	cout << "extern void *_fs_register_ATTLC(int atuse, const char *filename, int lineno, const char *type,\n\t";
	cout << size_t_onThisMachine() << " size, long number, const void *address);\n";
	cout << "extern void _fs_unregister_ATTLC(int atuse, int hasbrackets, const void *address);\n";
}

static void foundSomethingInteresting()
{
	if (TOK == NEW || TOK == DELETE)
		foundND();
	else if (TOK == AGGR)
		lookForClassHead();
	else if (TOK == POUND)
		cppline();
	else if (TOK == LC)
	{
		ADV;
		pushScope();
	}
	else if (TOK == RC)
	{
		ADV;
		popScope();
	}
}

int parse()
{
	lexer->setTrailsize(10);  // magic number.  10 is more than enough for the lookbehinds we need.
	lexer->setContractAction(echo);	// Tokens that fall out of the window just get echoed.
	nErrors = 0;
	emitMyProtos();
	while (TOK != EOFTOK && nErrors <= MAXNERRORS)
	{
#if SHOWSTREAM
		cerr << *lexer->window(0) << endl;
		ADV;
#else
		if (setjmp(beginning)) 
		{
			/* parsing returns here if we encounter a syntax error */
		}
		else
		{
			while (!interesting(TOK)) ADV;
			foundSomethingInteresting();
		}
#endif
	}
	lexer->contractTo(0);
#if SHOWSTREAM
	cerr << *lexer->window(0) << endl;
#else
	cout << LEXWS;
#endif
	if (nErrors > MAXNERRORS)
		cerr << "Sorry, too many errors." << endl;
	return nErrors;
}

/* found a new or delete
*/
static void foundND()
{
	assert (TOK == NEW || TOK == DELETE);

	doingNew = (TOK == NEW);
	if (PREVTOK == OPERATOR)
		protoOrDef();
	else 
	{
		if (TOK == NEW)
			newUse();
		else
			deleteUse();
	}
}

static void findQualAndInline(bool_type &hasQual, String &qual, bool_type &isInline)
{
	// Here's where we do a backwards recursive descent!
	
	// [...] operator n/d
	//                 ^
	int rempos = lexer->posinfile();
	BACKUP;
	BACKUP;
	qual.make_empty();
	if (hasQual = (TOK == QUAL))
	{
		BACKUP;
		// [...] :: operator n/d
		//    ^
		if (TOK == ID)
		{
			// [...] T :: operator n/d
			//       ^
			if (PREVTOK == QUAL)
			{
				sorry("Cannot handle out-of-line definitions of nested operator new/delete");
			}
			qual = LEXEME;
			BACKUP;
		}
		else if (TOK == RANGLE)
		{
			sorry("Cannot handle out-of-line definitions of template operator new/delete");
		}
	}
	if (doingNew && TOK == STAR)
		BACKUP;
	if (TOK != VOID && TOK != ID)  // might be typedef name
		syntaxError(doingNew? "return type of operator new must be void*" : "return type of operator delete must be void");
	BACKUP;
	isInline = (TOK == INLINE);
	lexer->moveInFile(rempos);	
}

/* Found a prototype or definition of new or delete.
*/
static void protoOrDef()
{
	// [...] operator n/d
	//                 ^

	String qual;
	bool_type hasQual, isInline;
	findQualAndInline(hasQual, qual, isInline);

	ADV;
	if (TOK != LP)
		syntaxError(doingNew? "operator new: missing argument list" : "operator delete: missing argument list");
	// [...] operator n/d (
	//                    ^

	lexer->contractTo(0);
	lexer->freeze();
	advToMatching();
	ADV;
	// ( arglist ) TOK
	//              ^

	if (TOK == LC)
		def(hasQual, qual, isInline);
	else if (TOK == SEMI)
		proto(isInline);
	else if (TOK == COMMA)
		sorry("prototypes of operators new and delete cannot be in prototype list");
	else
		syntaxError("I don't understand this.");
	lexer->melt();
}

static void determineClasAndGlobality(bool_type hasQual, String qual, String &clas, bool_type &isglobal)
{
	if (hasQual)
	{
		isglobal = no;
		clas = qual;
	}
	else
	{
		isglobal = !containingClass(clas);
		if (isglobal)
			clas = "?";
	}	
}

static Token *generateId(int i, int line)
{
	Token *id = new Token;
	id->ws = ' ';
	id->lineno = line;
	ostrstream os;
	os << "fsATTLC" << i << ends;
	id->lexeme = os.str();
	delete os.str();
	id->type = ID;
	return id;
}

/* Returns a pointer to the formal argument name appearing to the left of absolute 
* position i in the window.
*
* Inserts a unique argument name into the arglist if an argument name was omitted.
* (This will unfortunately cause a cc "argument not used" warning.  I should
* really delete any inserted tokens immediately after laying down the 
*	static void *_new(
* or 
*	static void _delete(
* )
*/
static Token *argNameToLeft(int i, int &inserted)
{
	Token *prev = lexer->windowAbs(i-1);
	Token *prevprev = lexer->windowAbs(i-2);
	inserted = 0;
	if (prev->type != ID || prevprev->type == COMMA || prevprev->type == LP || builtin(prev->lexeme))
	{
		// arg name was omitted, insert one
		Token *t = generateId(i, prev->lineno);
		lexer->insertBeforeAbs(t, i);
		inserted = 1;
		return t;
	}
	else if (prev->type == ID)	
	{
		// it's probably the arg name
		return prev;
	}
	else	
	{
		// it's probably a syntax error
		syntaxError("I don't understand this parameter list");
		return 0;
	}
}

typedef List_of_p<Token> ArgList;

/* Return a list of pointers to the tokens in the window comprising
* the names of the arguments in the argument list beginning at absolute 
* window position beg and ending at end.
*
* If any of the arguments are missing their names, I'll generate unique ones,
* and insert them into the window.
*/
static ArgList *extractArgs(int beg, int end)
{
	assert(lexer->windowAbs(beg)->type == LP);
	assert(lexer->windowAbs(end)->type == RP);
	
//cerr << "Before extracting args: "; lexer->printWindow();
//cerr << endl;

	ArgList *args = new ArgList;

	int i = beg+1;
	Token *t = lexer->windowAbs(i);
	for (; i <= end; i++, t = lexer->windowAbs(i))
	{
		if (t->type == LP || (t->type == RP && i != end))
			sorry("parameter list is too complicated, try simplifying");

		if (t->type == COMMA || i == end)
		{
			int inserted;
			args->put(argNameToLeft(i, inserted));
			if (inserted)
			{
				i++;
				end++;
			}
		}
	}
//cerr << "After extracting args: "; lexer->printWindow();
//cerr << "\nArgs are: " << *args << endl;
	return args;
}

static void emitInstrumentedDef(String clas, bool_type isglobal, ArgList *args)
{
	// The first argument is the size for new, and the pointer to the object for delete.
	if (args->length() == 0)
		syntaxError("new and delete must each have at least one parameter");
	Token *firstArg;
	// args->peek_next(firstArg);
	List_of_piter<Token> argsi(*args);
	argsi.peek_next(firstArg);

	cout << "{ ";
	if (isglobal) // generate prototype
	{
		cout << "extern void " << (doingNew? "*_new":"_delete");
		maptok(echo, 0, lexer->curpos()-1);	// duplicate the formal argument list
		cout << "; ";
	}
	if (doingNew)
	{	
		cout << "return (_fs_register_ATTLC(0, \"?\", -1, \"" << clas << "\", ";
		cout << firstArg->lexeme << ", " << (isglobal? -2:1) << ", _new(";
	}
	else
	{
		cout << "((_fs_unregister_ATTLC(0, 0, " << firstArg->lexeme << "), _delete(";
	}
	Token *arg;
	for (argsi.reset(); argsi.next(arg); )  // pass all of the parameters through to _new/_delete
	{
		cout << arg->lexeme;
		if (!argsi.at_end())
			cout << ", ";
	}
	cout << "))); }";
}

static void def(bool_type hasQual, String qual, bool_type isInline)
{
//cerr << "found definition on line " << LINENO << endl;
	// ( arglist ) {
	//             ^

	int openingCurlyLine = LINENO;
	String clas;
	bool_type isglobal;
	determineClasAndGlobality(hasQual, qual, clas, isglobal);

	ArgList *args = extractArgs(0, lexer->curpos()-1);
	maptok(echo, 0, lexer->curpos()-1);	// duplicate the arglist
	emitLineDirective(openingCurlyLine);
	emitInstrumentedDef(clas, isglobal, args);
	delete args;
	emitLineDirective(openingCurlyLine);

	if (!isglobal && !hasQual)
		cout << "static ";
	if (isInline)
		cout << "inline ";
	cout << "void ";
	if (doingNew)
		cout << "* ";
	if (hasQual)
		cout << qual << "::";
	cout << (doingNew? "_new" : "_delete");

	lexer->contractTo(0);
	emitLineDirective(openingCurlyLine);
	LEXWS = "";
}

static void proto(bool_type isInline)
{
//cerr << "found prototype on line " << LINENO << endl;

	// ( arglist ) ;
	//             ^

	maptok(echo, 0, lexer->curpos()); // duplicate "( arglist ) ;"
	if (containingClass())
		cout << " static";
	if (isInline)
		cout << " inline";
	cout << " void " << (doingNew? "*_new" : "_delete");
}

/* Initial window:
*  	...tokens [[X]::]n/d tokens...
*	                  ^
*  Final window:
*                 [[X]::]n/d tokens...
*	                  ^
* and (1) the whitespace before the first token in the final window has 
* been printed and reset to empty, and (2) the lexer is frozen.
*/
static void prepareUse()
{
	assert(TOK == NEW || TOK == DELETE);
	int i = 0;
	if (PREVTOK == QUAL)
	{
		BACKUP;
		i++;
		// tokens [X] :: n/d tokens...
		//            ^
		if (PREVTOK == ID)
		{
			i++;
			if (PREVPREVTOK == QUAL)
				sorry("Cannot handle qualified calls to nested operator new/delete");
		}
		else if (PREVTOK == RANGLE)
		{
			sorry("Cannot handle qualified calls to template new/delete");
		}
		ADV;
	}
	lexer->contractTo(i);
	cout << lexer->window(-i)->ws;
	lexer->window(-i)->ws = "";
	lexer->freezeTotally();
}

static bool_type canFollowNewedtype(TokenType t)
{
	return (t == COLON || t == SEMI || t == COMMA || t == RP || t == LS || t == LP);
}

static String collectTypename()
{
	String s = LEXEME;
	match(ID);
	if (TOK == LANGLE)
	{
		advToMatchingAppendingTo(&s);
		ADV;
	}
	while (TOK == QUAL)
	{
		s += "::";
		ADV;
		if (TOK != ID)
			syntaxError("Something's wrong in this qualified type name");
		s += LEXEME;
		ADV;
	}
	return s;
}		

/* Found a use of new.
*/
static void newUse()
{
	assert (TOK == NEW);
	prepareUse();
	// [[X]::] new TOK
	//          ^

	int newlineno = LINENO;
	ADV;  
	if (TOK == LP)
	{
		advToMatching();
		ADV;
	}
	// [[X]::] new [ (expr) ] TOK
	//                         ^

	String aggr;
	if (TOK == AGGR)
	{
		aggr = LEXEME;
		ADV;
	}
	// [[X]::] new [(expr)] [AGGR] type
	//                              ^

	if (TOK != ID)
		sorry("In \"new T\", T cannot be a complex type expression.");

	String type = collectTypename();

//	emitLineDirective(newlineno);

	cout << "(" << aggr << " " << type << "*)_fs_register_ATTLC(1, \"" << lexer->filename() << "\", ";
	cout << newlineno << ", \"" << type << "\", ";
	cout << "sizeof(" << aggr << " " << type << "), ";

	// [[X]::] new [AGGR] type TOK
	//                          ^

	if (!canFollowNewedtype(TOK))
		sorry("In \"new T\", T cannot be a complex type expression.");

	if (TOK == LS)
	{
		ADV;
		// [[X]::] new [AGGR] type '[' expr ']'
		//                             ^

		if ((TOK == ID || TOK == LITINT) && lexer->window(1)->type == RS)
			cout << LEXEME;
		else
			cout << -1;
		cout << ", ";

		BACKUP;
		advToMatching();
		ADV;
		// [[X]::] new [AGGR] type '[' expr ']' TOK   
		//		    	                 ^
	}
	else if (TOK == LP)
	{
		// [[X]::] new [AGGR] type ( constructorArgs )
		//                         ^

		cout << "1, ";

		advToMatching();
		ADV;
		// [[X]::] new [AGGR] type ( constructorArgs ) TOK   
		//          		                        ^
	}
	else
	{
		// [[X]::] new [AGGR] type TOK
		//	                    ^

		cout << "1, ";
	}
	lexer->melt();
	lexer->contractTo(0);
	cout << ")";		

//	emitLineDirective(LINENO);
	LEXWS = "";
}

/* Found a use of delete.
*/
static void deleteUse()
{
	assert (TOK == DELETE);
	prepareUse();
	ADV;  
	// [[X]::] delete TOK
	//                 ^

	int deletePos = lexer->curpos() - 1;
	bool_type hasBrackets = (TOK == LS);

	cout << "(_fs_unregister_ATTLC(1, " << hasBrackets << ", ";

	if (hasBrackets)
	{
		// [[X]::] delete '[' [ignoreable expr] ']' TOK
		//                 ^
		advToMatching();
		ADV;
		// [[X]::] delete '[' [ignoreable expr] ']' TOK
		//                                           ^
	}

	// Now the object being deleted.
	//
	if (TOK != ID || (lexer->window(1)->type != SEMI && lexer->window(1)->type != COMMA))
		sorry("argument of delete too complicated (must be simple identifier)");

	// [[X]::] delete [ '[' [ignoreable expr] ']' ] id ;
	//                                              ^

	cout << LEXEME << "), ";
	maptok(echo, 0, deletePos - 1);  // print the optional qualifier
	cout << "delete ";
	if (hasBrackets)
		cout << "[] ";
	cout << LEXEME << ")";

	ADV;
	// [[X]::] delete [ ... ] id ;
	//                           ^

	lexer->melt();
	lexer->discardTo(0);

	emitLineDirective(LINENO);
	LEXWS = "";
}

/* Found a #.  It's probably a "#[line] lineNumber "fileName"" line.
*  If it's some other kind of preprocessor line, just echo it. 
*/
static void cppline()
{
	lexer->contractTo(0);
	lexer->freeze();

	extern bool_type newlineIsToken;
	newlineIsToken = yes;  // for gettok()
	match(POUND);

	bool_type itsALineDirective = 0;
	if (LEXEME == "line")
		ADV;
	if (TOK == LITINT)
	{
		itsALineDirective = 1;
		Litint litint;
		lexer->get(litint);
			// The -1 counteracts the newline coming up 
			// at the end of this preprocessor line.
		lexer->setLineno(litint.intval - 1);  
		ADV;
		if (TOK == LITSTRING)
		{
			Litstring litstring;
			lexer->get(litstring);
			lexer->setFilename(litstring.thestring);
			ADV;
		}			
		emitLineDirective(litint.intval);
	}

	// get to the end of the preprocessor line.
	while (TOK != NL && TOK != EOFTOK) 
		ADV;
	if (itsALineDirective)  // then discard the whole line, we've already emitted the directive
	{
		lexer->discardTo(0);
		LEXWS = "";
		LEXEME = "";
	}
	lexer->melt();
	newlineIsToken = no;
	ADV;
}

static void gobbleBaseList()		
{
	match(COLON);
	while (TOK != LC && TOK != SEMI && TOK != EOFTOK)
	{
		ADV;
	}
	if (TOK != LC)
		syntaxError("Something's wrong in this base class list");
}

static void lookForClassHead()
{
	String tag;
	match(AGGR);
	if (TOK == ID)
	{
		tag = LEXEME;
		ADV;
	}
	if (TOK == COLON)
		gobbleBaseList();
	if (TOK == LC)
	{
		// found a class header.
		pushClassScope(tag);
		ADV;
	}
}


« April 2024 »
Su Mo Tu We Th Fr Sa
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: