Personal tools
You are here: Home Projects C++ Cfront releases Release 3.0.3 source libSC publik Lexer.h
Document Actions

Lexer.h

by Michael L Powell last modified 2007-01-26 03:24

Click here to get the file

Size 7.5 kB - File type text/plain

File contents

/*ident	"@(#)publik:Lexer.h	3.1" */
/******************************************************************************
*
* C++ Standard Components, Release 3.0.
*
* Copyright (c) 1991, 1992 AT&T and Unix System Laboratories, Inc.
* Copyright (c) 1988, 1989, 1990 AT&T.  All Rights Reserved.
*
* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF AT&T and Unix System
* Laboratories, Inc.  The copyright notice above does not evidence
* any actual or intended publication of such source code.
*
******************************************************************************/

#include "Token.h"

#include <Objection.h>
#include <List.h>
#include <iostream.h>

typedef Token *TokenP;
//List_of_pdeclare(Token)  /* no more, using SC release 3.0 */
typedef void (*ContractAction)(Token *);

class Lexer 
{
public:
	static Objection DestroyingFrozenLexer, ContractingFrozenLexer, DiscardedToken, BadHandshake;

	enum { infinity = -1 };

	// Create a lexer with the given trail size (default = 0).  
	Lexer(int trail=0): 
		thetrailsize(trail), contractAction(0), lalineno(1), 
		la(0), inf(0), curTok(0), curToki(-1), frozen(0), verboselevel(0)
					{}
	~Lexer() 			{ if (frozen) DestroyingFrozenLexer.raise("Attempt to destroy a frozen lexer!"); }

	// Attach the lexer to an input character stream.
	void attach(istream *f, const char *nam)
				 	{ thefilename = nam; inf = f; la = 0; in(); move(); }

	// Returns true if there are no tokens in the current window.
	// (True before the lexer is attached to a file.)
	int emptyWindow() const		{ return curToki == -1; }

	// Access the name of the input file.
	void setFilename(const char *nam)	{ thefilename = nam; }
	const char *filename() const		{ return thefilename; }

	// Access the trail size.
	void setTrailsize(int l)	{ thetrailsize = l; }
	int trailsize() const		{ return thetrailsize; }

	// Move the current token pointer i tokens to the right (left if i is negative), 
	// extending and contracting the window as necessary.
	void move(int i=1)	 	{ if (i != 0 && resize(i)) {
					  	curToki += i; 
					  	curTok = theWindow[(unsigned)curToki]; 
					  	if (thetrailsize != infinity) implicitContract(curToki - thetrailsize);
					  	if (verboselevel == 1) cerr << *curTok << endl;
					}}

	// Same as move, but i is relative to beginning of window.
	void moveAbs(int i)	 	{ move(i-curToki); }

	// Add i tokens to the right of the window (but don't move the current token).
	//void extend(int i=1)		{ while (i-- > 0) { 
	int extend(int i=1);

	// Remove i tokens from the left of the window, or as many as possible up to the current token.
	// Return the number actually removed.
	int contract(int i=1)		{ return docontract(i, contractAction, 1); }

	// Remove enough tokens from the left of the window so that there are no more than i to the left of the current token.
	int contractTo(int i)		{ return docontract(curToki - i, contractAction, 1); }

	// Same as above two, but don't do contractAction.
	int discard(int i=1)		{ return docontract(i, 0, 1); }
	int discardTo(int i)		{ return docontract(curToki - i, 0, 1); }

	// Disable all contracting, both explicit and automatic.
	void freeze()			{ frozen = 2; }

	// Disable automatic contracting.
	void partialFreeze()		{ frozen = 1; }

	// Enable all contracting.
	void melt()			{ frozen = 0; }

	// 0 is quiet, 1 is "show each token as it becomes current," >1 is "show each token as it enters window."
	void verbose(int i)		{ verboselevel = i; }

	// Set the contract action.  Return the old value.
	ContractAction setContractAction(ContractAction ca)
					{ ContractAction cur = contractAction; 
					  contractAction = ca;
					  return cur; 
					}

	// Get the current contract action.
	ContractAction getContractAction() const
					{ return contractAction; }

	// Return the current size of the window.
 	int windowSize() const		{ return theWindow.length(); }

	// Return the index in the window of the current token.
	int curpos() const		{ return curToki; }

	// Functions for retreiving information from the window.  
	// Except for the *Abs functions, i is offset relative to the current token.
	//
 	// The first function returns a pointer to the *base part* of the i'th token.
	// This function is provided as a courtesy to the client, so she doesn't have to do
	// an inordinate amount of checking on the types of tokens.
	//
	Token *window(int i=0)		{ Token *retval;
					  if (i == 0) retval = curTok;
					  else if (resize(i)) retval = theWindow[(unsigned)(curToki+i)]; 
					  else retval = 0;
					  return retval;
					}

	Token *windowAbs(int i)		{ return window(i-curToki); }
	//
	// The rest of the functions are used to get tokens of particular types from the window.  
	// Since they are specific to the particular language being lexed, they must be defined in the derived class.
	// They shouldn't move the current token.

	// Line number access.
	void setLineno(int l)		{ lalineno = l; }
	int lineno() const		{ return lalineno; }

protected:
// methods:
	int implicitContract(int i)	{ return docontract(i, contractAction, 0); }

	// The implementation of contract, contractTo, discard, discardTo, and implicitContract.  
	// Returns the number of tokens actually removed.
	int docontract(int i, ContractAction ca, int is_explicit);  // defined in lexer.c.

	// The user-supplied gettok.  It should return a pointer to the next Token in the input stream.
	virtual Token *gettok() = 0;

	// Make i a valid offset from the current token.
	int resize(int i)		{ int retval = 1;
					  extend(curToki+i-theWindow.length()+1);
					  if (curToki+i < 0) {
						DiscardedToken.raise("Attempt to go to or get a discarded token!");
						retval = 0;
					  }
					  return retval;
					}

	// Make sure the i'th token has type type.
	// Does *not* do a resize(i).
	int handshake(int type, int i)	{ int retval = 1;
					  if (((Token *)(theWindow[(unsigned)(curToki+i)]))->type != type) {
						BadHandshake.raise("Bad handshake with Lexer!");
						retval = 0;
					  }
					  return retval;
					}

	// Get the next character from the input stream and put it in la, and also return it.  
	char in()			{ if (la == '\n') 
						lalineno++; 
					  if (inf == 0 || inf->eof() || !inf->get(la))
						la = 0;
					  return la;		
					}

	// Return what the next call to in() will return.
	char peek()			{ return (inf == 0 || inf->eof()) 
						 ?
						 0
						 :
						 inf->peek(); 
					}
protected:
// data:
	// See above.
	int verboselevel;

	// 1 if automatic contracting is disabled, 2 if all contracting is disabled, 0 otherwise.
	int frozen;

	// User-specified action to perform on token when contracting it out of window
	// due to exceeding trail size, or explicit request from client.  
	ContractAction contractAction;

	// Maximum allowed number of tokens in window to the left of the current token.
	int thetrailsize;

	// Input character *following* the current token.
	char la;

	// The input file.
	istream *inf;

	// Name of input stream, and line number of la.
	const char *thefilename;
	int lalineno;

	// The token window.  curTok points to the current token, and curToki is its index in the window.
	List_of_p<Token> theWindow;
	Token *curTok;
	int curToki;
};


#define TOK	lexer->window()->type
#define PREVTOK	lexer->window(-1)->type
#define NEXTTOK	lexer->window(1)->type
#define LEXEME	lexer->window()->lexeme
#define LINENO	lexer->window()->lineno
#define LEXWS	lexer->window()->ws
#define ADV	lexer->move()


#define DEFINE_GET(a_typename, typecode)			\
	void get(a_typename &x, int i=0)			\
		{ if (resize(i) && handshake(typecode, i))	\
			x = *(a_typename*)window(i);		\
		}
« April 2024 »
Su Mo Tu We Th Fr Sa
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: