Personal tools
You are here: Home Projects C++ Cfront releases Release 3.0.3 source libSC Path libx strmatch.c
Document Actions

strmatch.c

by Michael L Powell last modified 2007-01-26 03:24

Click here to get the file

Size 6.1 kB - File type text/plain

File contents

/*ident	"@(#)Path:libx/strmatch.c	3.1" */
/*
 * D. G. Korn
 * G. S. Fowler
 * AT&T Bell Laboratories
 *
 * match shell file patterns -- derived from Bourne and Korn shell gmatch()
 *
 *	sh pattern	egrep RE	description
 *	----------	--------	-----------
 *	*		.*		0 or more chars
 *	?		.		any single char
 *	[.]		[.]		char class
 *	[!.]		[^.]		negated char class
 *	*(.)		(.)*		0 or more of
 *	+(.)		(.)+		1 or more of
 *	?(.)		(.)?		0 or 1 of
 *	(.)		(.)		1 of
 *	@(.)		(.)		1 of
 *	a|b		a|b		a or b
 *	a&b				a and b
 *	!(.)				none of
 *
 * \ used to escape metacharacters
 *
 *	*, ?, (, |, &, ), [, \ must be \'d outside of [...]
 *	only ] must be \'d inside [...]
 *
 * BUG: unbalanced ) terminates top level pattern
 */

#ifdef MULTIBYTE

#include "national.h"

#define REGISTER

#define C_MASK		(3<<(7*ESS_MAXCHAR))	/* character classes	*/
#define getchar(x)	mb_getchar((unsigned char**)(&(x)))

static int		mb_getchar();

#else

#define REGISTER	register

#define getchar(x)	(*x++)

#endif

#define getsource(s,e)	(((s)>=(e))?0:getchar(s))

static char*		endmatch;
static int		minmatch;

static int		grpmatch();
static int		onematch();
static char*		gobble();

/*
 * strmatch compares the string s with the shell pattern p
 * returns 1 for match 0 otherwise
 */

int
strmatch_Path_ATTLC(s, p)
register char*	s;
char*		p;
{
	minmatch = 0;
	return(grpmatch(s, p, s + strlen(s), (char*)0));
}

/*
 * leading substring match
 * first char after end of substring returned
 * 0 returned if no match
 * m: (0-min, 1-max) match
 */

char*
submatch_Path_ATTLC(s, p, m)
register char*	s;
char*		p;
int		m;
{
	endmatch = 0;
	minmatch = !m;
	(void)grpmatch(s, p, s + strlen(s), (char*)0);
	return(endmatch);
}

/*
 * match any pattern in a group
 * | and & subgroups are parsed here
 */

static int
grpmatch(s, p, e, g)
char*		s;
register char*	p;
char*		e;
char*		g;
{
	register char*	a;

	do
	{
		a = p;
		do
		{
			if (!onematch(s, a, e, g)) break;
		} while (a = gobble(a, '&'));
		if (!a) return(1);
	} while (p = gobble(p, '|'));
	return(0);
}

/*
 * match a single pattern
 * e is the end (0) of the substring in s
 * g marks the start of a repeated subgroup pattern
 */

static int
onematch(s, p, e, g)
char*		s;
REGISTER char*	p;
char*		e;
char*		g;
{
	register int 	pc;
	register int 	sc;
	register int	n;
	char*		olds;
	char*		oldp;

	do
	{
		olds = s;
		sc = getsource(s, e);
		switch (pc = getchar(p))
		{
		case '(':
		case '*':
		case '?':
		case '+':
		case '@':
		case '!':
			if (pc == '(' || *p == '(')
			{
				char*	subp;

				s = olds;
				oldp = p - 1;
				subp = p + (pc != '(');
				if (!(p = gobble(subp, 0))) return(0);
				if (pc == '*' || pc == '?' || pc == '+' && oldp == g)
				{
					if (onematch(s, p, e, (char*)0)) return(1);
					if (!sc || !getsource(s, e)) return(0);
				}
				if (pc == '*' || pc == '+') p = oldp;
				pc = (pc != '!');
				do
				{
					if (grpmatch(olds, subp, s, (char*)0) == pc && onematch(s, p, e, oldp)) return(1);
				} while (s < e && getchar(s));
				return(0);
			}
			else if (pc == '*')
			{
				/*
				 * several stars are the same as one
				 */

				while (*p == '*')
					if (*(p + 1) == '(') break;
					else p++;
				oldp = p;
				switch (pc = getchar(p))
				{
				case '@':
				case '!':
				case '+':
					n = *p == '(';
					break;
				case '(':
				case '[':
				case '?':
				case '*':
					n = 1;
					break;
				case 0:
					endmatch = minmatch ? olds : e;
					/*FALLTHROUGH*/
				case '|':
				case '&':
				case ')':
					return(1);
				case '\\':
					if (!(pc = getchar(p))) return(0);
					/*FALLTHROUGH*/
				default:
					n = 0;
					break;
				}
				p = oldp;
				do
				{
					if ((n || pc == sc) && onematch(olds, p, e, (char*)0)) return(1);
					olds = s;
				} while (sc && (sc = getsource(s, e)));
				return(0);
			}
			else if (pc != '?' && pc != sc) return(0);
			break;
		case 0:
			endmatch = olds;
			if (minmatch) return(1);
			/*FALLTHROUGH*/
		case '|':
		case '&':
		case ')':
			return(!sc);
		case '[':
			{
				int	ok = 0;
				int	invert;

				n = 0;
				if (invert = *p == '!') p++;
				for (;;)
				{
					if (!(pc = getchar(p))) return(0);
					else if (pc == ']' && n)
					{
						if (ok != invert) break;
						return(0);
					}
					else if (pc == '-' && n && *p != ']')
					{
						if (!(pc = getchar(p)) || pc == '\\' && !(pc = getchar(p))) return(0);
#ifdef MULTIBYTE
						/*
						 * must be in same char set
						 */

						if ((n & C_MASK) != (pc & C_MASK))
						{
							if (sc == pc) ok = 1;
						}
						else
#endif
						if (sc >= n && sc <= pc || sc == pc) ok = 1;
					}
					else if (pc == '\\' && !(pc = getchar(p))) return(0);
					else
					{
						if (sc == pc) ok = 1;
						n = pc;
					}
				}
			}
			break;
		case '\\':
			if (!(pc = getchar(p))) return(0);
			/*FALLTHROUGH*/
		default:
			if (pc != sc) return(0);
			break;
		}
	} while (sc);
	return(0);
}

/*
 * gobble chars up to <sub> or ) keeping track of (...) and [...]
 * sub must be one of { '|', '&', 0 }
 * 0 returned if s runs out
 */

static char*
gobble(s, sub)
register char*	s;
register int	sub;
{
	register int	p = 0;
	register char*	b = 0;

	for (;;) switch (getchar(s))
	{
	case '\\':
		if (getchar(s)) break;
		/*FALLTHROUGH*/
	case 0:
		return(0);
	case '[':
		if (!b) b = s;
		break;
	case ']':
		if (b && b != (s - 1)) b = 0;
		break;
	case '(':
		if (!b) p++;
		break;
	case ')':
		if (!b && p-- <= 0) return(sub ? 0 : s);
		break;
	case '&':
		if (!b && !p && sub == '&') return(s);
		break;
	case '|':
		if (!b && !p)
		{
			if (sub == '|') return(s);
			else if (sub == '&') return(0);
		}
		break;
	}
}

#ifdef MULTIBYTE

/*
 * return the next char in (*address) which may be from one to three bytes
 * the character set designation is in the bits defined by C_MASK
 */

static int
mb_getchar(address)
unsigned char**	address;
{
	register unsigned char*	cp = *(unsigned char**)address;
	register int		c = *cp++;
	register int		size;
	int			d;

	if (size = echarset(c))
	{
		d = (size == 1 ? c : 0);
		c = size;
		size = in_csize(c);
		c <<= 7 * (ESS_MAXCHAR - size);
		if (d)
		{
			size--;
			c = (c << 7) | (d & ~HIGHBIT);
		}
		while (size-- > 0)
			c = (c << 7) | ((*cp++) & ~HIGHBIT);
	}
	*address = cp;
	return(c);
}

#endif

« April 2024 »
Su Mo Tu We Th Fr Sa
1 2 3 4 5 6
7 8 9 10 11 12 13
14 15 16 17 18 19 20
21 22 23 24 25 26 27
28 29 30
 

Powered by Plone CMS, the Open Source Content Management System

This site conforms to the following standards: