src/lex.c - platform/external/mksh - Git at Google

 /*	$OpenBSD: lex.c,v 1.44 2008/07/03 17:52:08 otto Exp $	*/

 /*-
  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
  *	Thorsten Glaser <tg@mirbsd.org>
  *
  * Provided that these terms and disclaimer and all copyright notices
  * are retained or reproduced in an accompanying document, permission
  * is granted to deal in this work without restriction, including un-
  * limited rights to use, publicly perform, distribute, sell, modify,
  * merge, give away, or sublicence.
  *
  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
  * the utmost extent permitted by applicable law, neither express nor
  * implied; without malicious intent or gross negligence. In no event
  * may a licensor, author or contributor be held liable for indirect,
  * direct, other damage, loss, or other issues arising in any way out
  * of dealing in the work, even if advised of the possibility of such
  * damage or existence of a defect, except proven that it results out
  * of said person's immediate fault when using the work as intended.
  */

 #include "sh.h"

 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.118 2010/07/25 11:35:41 tg Exp $");

 /*
  * states while lexing word
  */
 #define SBASE		0	/* outside any lexical constructs */
 #define SWORD		1	/* implicit quoting for substitute() */
 #define SLETPAREN	2	/* inside (( )), implicit quoting */
 #define SSQUOTE		3	/* inside '' */
 #define SDQUOTE		4	/* inside "" */
 #define SEQUOTE		5	/* inside $'' */
 #define SBRACE		6	/* inside ${} */
 #define SQBRACE		7	/* inside "${}" */
 #define SCSPAREN	8	/* inside $() */
 #define SBQUOTE		9	/* inside `` */
 #define SASPAREN	10	/* inside $(( )) */
 #define SHEREDELIM	11	/* parsing <<,<<- delimiter */
 #define SHEREDQUOTE	12	/* parsing " in <<,<<- delimiter */
 #define SPATTERN	13	/* parsing *(...|...) pattern (*+?@!) */
 #define STBRACE		14	/* parsing ${...[#%]...} */
 #define SLETARRAY	15	/* inside =( ), just copy */
 #define SADELIM		16	/* like SBASE, looking for delimiter */
 #define SHERESTRING	17	/* parsing <<< string */

 /* Structure to keep track of the lexing state and the various pieces of info
  * needed for each particular state. */
 typedef struct lex_state Lex_state;
 struct lex_state {
 	int ls_state;
 	union {
 		/* $(...) */
 		struct scsparen_info {
 			int nparen;	/* count open parenthesis */
 			int csstate;	/* XXX remove */
 #define ls_scsparen ls_info.u_scsparen
 		} u_scsparen;

 		/* $((...)) */
 		struct sasparen_info {
 			int nparen;	/* count open parenthesis */
 			int start;	/* marks start of $(( in output str */
 #define ls_sasparen ls_info.u_sasparen
 		} u_sasparen;

 		/* ((...)) */
 		struct sletparen_info {
 			int nparen;	/* count open parenthesis */
 #define ls_sletparen ls_info.u_sletparen
 		} u_sletparen;

 		/* `...` */
 		struct sbquote_info {
 			int indquotes;	/* true if in double quotes: "`...`" */
 #define ls_sbquote ls_info.u_sbquote
 		} u_sbquote;

 #ifndef MKSH_SMALL
 		/* =(...) */
 		struct sletarray_info {
 			int nparen;	/* count open parentheses */
 #define ls_sletarray ls_info.u_sletarray
 		} u_sletarray;
 #endif

 		/* ADELIM */
 		struct sadelim_info {
 			unsigned char nparen;	/* count open parentheses */
 #define SADELIM_BASH	0
 #define SADELIM_MAKE	1
 			unsigned char style;
 			unsigned char delimiter;
 			unsigned char num;
 			unsigned char flags;	/* ofs. into sadelim_flags[] */
 #define ls_sadelim ls_info.u_sadelim
 		} u_sadelim;

 		/* $'...' */
 		struct sequote_info {
 			bool got_NUL;	/* ignore rest of string */
 #define ls_sequote ls_info.u_sequote
 		} u_sequote;

 		Lex_state *base;	/* used to point to next state block */
 	} ls_info;
 };

 typedef struct {
 	Lex_state *base;
 	Lex_state *end;
 } State_info;

 static void readhere(struct ioword *);
 static int getsc__(void);
 static void getsc_line(Source *);
 static int getsc_bn(void);
 static int s_get(void);
 static void s_put(int);
 static char *get_brace_var(XString *, char *);
 static int arraysub(char **);
 static const char *ungetsc(int);
 static void gethere(bool);
 static Lex_state *push_state_(State_info *, Lex_state *);
 static Lex_state *pop_state_(State_info *, Lex_state *);

 static int dopprompt(const char *, int, bool);

 static int backslash_skip;
 static int ignore_backslash_newline;

 /* optimised getsc_bn() */
 #define _getsc()	(*source->str != '\0' && *source->str != '\\' \
 			 && !backslash_skip && !(source->flags & SF_FIRST) \
 			 ? *source->str++ : getsc_bn())
 /* optimised getsc__() */
 #define	_getsc_()	((*source->str != '\0') && !(source->flags & SF_FIRST) \
 			 ? *source->str++ : getsc__())

 #ifdef MKSH_SMALL
 static int getsc(void);
 static int getsc_(void);

 static int
 getsc(void)
 {
 	return (_getsc());
 }

 static int
 getsc_(void)
 {
 	return (_getsc_());
 }
 #else
 /* !MKSH_SMALL: use them inline */
 #define getsc()		_getsc()
 #define getsc_()	_getsc_()
 #endif

 #define STATE_BSIZE	32

 #define PUSH_STATE(s)	do {					\
 	if (++statep == state_info.end)				\
 		statep = push_state_(&state_info, statep);	\
 	state = statep->ls_state = (s);				\
 } while (0)

 #define POP_STATE()	do {					\
 	if (--statep == state_info.base)			\
 		statep = pop_state_(&state_info, statep);	\
 	state = statep->ls_state;				\
 } while (0)

 /**
  * Lexical analyser
  *
  * tokens are not regular expressions, they are LL(1).
  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
  * hence the state stack.
  */

 int
 yylex(int cf)
 {
 	Lex_state states[STATE_BSIZE], *statep, *s2, *base;
 	State_info state_info;
 	int c, c2, state;
 	XString ws;		/* expandable output word */
 	char *wp;		/* output word pointer */
 	char *sp, *dp;

  Again:
 	states[0].ls_state = -1;
 	states[0].ls_info.base = NULL;
 	statep = &states[1];
 	state_info.base = states;
 	state_info.end = &state_info.base[STATE_BSIZE];

 	Xinit(ws, wp, 64, ATEMP);

 	backslash_skip = 0;
 	ignore_backslash_newline = 0;

 	if (cf&ONEWORD)
 		state = SWORD;
 	else if (cf&LETEXPR) {
 		/* enclose arguments in (double) quotes */
 		*wp++ = OQUOTE;
 		state = SLETPAREN;
 		statep->ls_sletparen.nparen = 0;
 #ifndef MKSH_SMALL
 	} else if (cf&LETARRAY) {
 		state = SLETARRAY;
 		statep->ls_sletarray.nparen = 0;
 #endif
 	} else {		/* normal lexing */
 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
 		while ((c = getsc()) == ' ' || c == '\t')
 			;
 		if (c == '#') {
 			ignore_backslash_newline++;
 			while ((c = getsc()) != '\0' && c != '\n')
 				;
 			ignore_backslash_newline--;
 		}
 		ungetsc(c);
 	}
 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
 		source->flags &= ~SF_ALIAS;
 		cf |= ALIAS;
 	}

 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
 	statep->ls_state = state;

 	/* check for here string */
 	if (state == SHEREDELIM) {
 		c = getsc();
 		if (c == '<') {
 			state = SHERESTRING;
 			while ((c = getsc()) == ' ' || c == '\t')
 				;
 			ungetsc(c);
 			c = '<';
 			goto accept_nonword;
 		}
 		ungetsc(c);
 	}

 	/* collect non-special or quoted characters to form word */
 	while (!((c = getsc()) == 0 ||
 	    ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
 	    ctype(c, C_LEX1)))) {
  accept_nonword:
 		Xcheck(ws, wp);
 		switch (state) {
 		case SADELIM:
 			if (c == '(')
 				statep->ls_sadelim.nparen++;
 			else if (c == ')')
 				statep->ls_sadelim.nparen--;
 			else if (statep->ls_sadelim.nparen == 0 &&
 			    (c == /*{*/ '}' || c == statep->ls_sadelim.delimiter)) {
 				*wp++ = ADELIM;
 				*wp++ = c;
 				if (c == /*{*/ '}' || --statep->ls_sadelim.num == 0)
 					POP_STATE();
 				if (c == /*{*/ '}')
 					POP_STATE();
 				break;
 			}
 			/* FALLTHROUGH */
 		case SBASE:
 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
 				*wp = EOS;	/* temporary */
 				if (is_wdvarname(Xstring(ws, wp), false)) {
 					char *p, *tmp;

 					if (arraysub(&tmp)) {
 						*wp++ = CHAR;
 						*wp++ = c;
 						for (p = tmp; *p; ) {
 							Xcheck(ws, wp);
 							*wp++ = CHAR;
 							*wp++ = *p++;
 						}
 						afree(tmp, ATEMP);
 						break;
 					} else {
 						Source *s;

 						s = pushs(SREREAD,
 						    source->areap);
 						s->start = s->str =
 						    s->u.freeme = tmp;
 						s->next = source;
 						source = s;
 					}
 				}
 				*wp++ = CHAR;
 				*wp++ = c;
 				break;
 			}
 			/* FALLTHROUGH */
  Sbase1:		/* includes *(...|...) pattern (*+?@!) */
 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
 			    c == '!') {
 				c2 = getsc();
 				if (c2 == '(' /*)*/ ) {
 					*wp++ = OPAT;
 					*wp++ = c;
 					PUSH_STATE(SPATTERN);
 					break;
 				}
 				ungetsc(c2);
 			}
 			/* FALLTHROUGH */
  Sbase2:		/* doesn't include *(...|...) pattern (*+?@!) */
 			switch (c) {
 			case '\\':
  getsc_qchar:
 				if ((c = getsc())) {
 					/* trailing \ is lost */
 					*wp++ = QCHAR;
 					*wp++ = c;
 				}
 				break;
 			case '\'':
  open_ssquote:
 				*wp++ = OQUOTE;
 				ignore_backslash_newline++;
 				PUSH_STATE(SSQUOTE);
 				break;
 			case '"':
  open_sdquote:
 				*wp++ = OQUOTE;
 				PUSH_STATE(SDQUOTE);
 				break;
 			default:
 				goto Subst;
 			}
 			break;

  Subst:
 			switch (c) {
 			case '\\':
 				c = getsc();
 				switch (c) {
 				case '"':
 					if ((cf & HEREDOC))
 						goto heredocquote;
 					/* FALLTHROUGH */
 				case '\\':
 				case '$': case '`':
  store_qchar:
 					*wp++ = QCHAR;
 					*wp++ = c;
 					break;
 				default:
  heredocquote:
 					Xcheck(ws, wp);
 					if (c) {
 						/* trailing \ is lost */
 						*wp++ = CHAR;
 						*wp++ = '\\';
 						*wp++ = CHAR;
 						*wp++ = c;
 					}
 					break;
 				}
 				break;
 			case '$':
  subst_dollar:
 				c = getsc();
 				if (c == '(') /*)*/ {
 					c = getsc();
 					if (c == '(') /*)*/ {
 						PUSH_STATE(SASPAREN);
 						statep->ls_sasparen.nparen = 2;
 						statep->ls_sasparen.start =
 						    Xsavepos(ws, wp);
 						*wp++ = EXPRSUB;
 					} else {
 						ungetsc(c);
 						PUSH_STATE(SCSPAREN);
 						statep->ls_scsparen.nparen = 1;
 						statep->ls_scsparen.csstate = 0;
 						*wp++ = COMSUB;
 					}
 				} else if (c == '{') /*}*/ {
 					*wp++ = OSUBST;
 					*wp++ = '{'; /*}*/
 					wp = get_brace_var(&ws, wp);
 					c = getsc();
 					/* allow :# and :% (ksh88 compat) */
 					if (c == ':') {
 						*wp++ = CHAR;
 						*wp++ = c;
 						c = getsc();
 						if (c == ':') {
 							*wp++ = CHAR;
 							*wp++ = '0';
 							*wp++ = ADELIM;
 							*wp++ = ':';
 							PUSH_STATE(SBRACE);
 							PUSH_STATE(SADELIM);
 							statep->ls_sadelim.style = SADELIM_BASH;
 							statep->ls_sadelim.delimiter = ':';
 							statep->ls_sadelim.num = 1;
 							statep->ls_sadelim.nparen = 0;
 							break;
 						} else if (ksh_isdigit(c) ||
 						    c == '('/*)*/ || c == ' ' ||
 						    c == '$' /* XXX what else? */) {
 							/* substring subst. */
 							if (c != ' ') {
 								*wp++ = CHAR;
 								*wp++ = ' ';
 							}
 							ungetsc(c);
 							PUSH_STATE(SBRACE);
 							PUSH_STATE(SADELIM);
 							statep->ls_sadelim.style = SADELIM_BASH;
 							statep->ls_sadelim.delimiter = ':';
 							statep->ls_sadelim.num = 2;
 							statep->ls_sadelim.nparen = 0;
 							break;
 						}
 					} else if (c == '/') {
 						*wp++ = CHAR;
 						*wp++ = c;
 						if ((c = getsc()) == '/') {
 							*wp++ = ADELIM;
 							*wp++ = c;
 						} else
 							ungetsc(c);
 						PUSH_STATE(SBRACE);
 						PUSH_STATE(SADELIM);
 						statep->ls_sadelim.style = SADELIM_BASH;
 						statep->ls_sadelim.delimiter = '/';
 						statep->ls_sadelim.num = 1;
 						statep->ls_sadelim.nparen = 0;
 						break;
 					}
 					/* If this is a trim operation,
 					 * treat (,|,) specially in STBRACE.
 					 */
 					if (ctype(c, C_SUBOP2)) {
 						ungetsc(c);
 						PUSH_STATE(STBRACE);
 					} else {
 						ungetsc(c);
 						if (state == SDQUOTE)
 							PUSH_STATE(SQBRACE);
 						else
 							PUSH_STATE(SBRACE);
 					}
 				} else if (ksh_isalphx(c)) {
 					*wp++ = OSUBST;
 					*wp++ = 'X';
 					do {
 						Xcheck(ws, wp);
 						*wp++ = c;
 						c = getsc();
 					} while (ksh_isalnux(c));
 					*wp++ = '\0';
 					*wp++ = CSUBST;
 					*wp++ = 'X';
 					ungetsc(c);
 				} else if (ctype(c, C_VAR1 | C_DIGIT)) {
 					Xcheck(ws, wp);
 					*wp++ = OSUBST;
 					*wp++ = 'X';
 					*wp++ = c;
 					*wp++ = '\0';
 					*wp++ = CSUBST;
 					*wp++ = 'X';
 				} else if (c == '\'' && (state == SBASE)) {
 					/* XXX which other states are valid? */
 					*wp++ = OQUOTE;
 					ignore_backslash_newline++;
 					PUSH_STATE(SEQUOTE);
 					statep->ls_sequote.got_NUL = false;
 					break;
 				} else {
 					*wp++ = CHAR;
 					*wp++ = '$';
 					ungetsc(c);
 				}
 				break;
 			case '`':
  subst_gravis:
 				PUSH_STATE(SBQUOTE);
 				*wp++ = COMSUB;
 				/* Need to know if we are inside double quotes
 				 * since sh/AT&T-ksh translate the \" to " in
 				 * "`...\"...`".
 				 * This is not done in POSIX mode (section
 				 * 3.2.3, Double Quotes: "The backquote shall
 				 * retain its special meaning introducing the
 				 * other form of command substitution (see
 				 * 3.6.3). The portion of the quoted string
 				 * from the initial backquote and the
 				 * characters up to the next backquote that
 				 * is not preceded by a backslash (having
 				 * escape characters removed) defines that
 				 * command whose output replaces `...` when
 				 * the word is expanded."
 				 * Section 3.6.3, Command Substitution:
 				 * "Within the backquoted style of command
 				 * substitution, backslash shall retain its
 				 * literal meaning, except when followed by
 				 * $ ` \.").
 				 */
 				statep->ls_sbquote.indquotes = 0;
 				s2 = statep;
 				base = state_info.base;
 				while (1) {
 					for (; s2 != base; s2--) {
 						if (s2->ls_state == SDQUOTE) {
 							statep->ls_sbquote.indquotes = 1;
 							break;
 						}
 					}
 					if (s2 != base)
 						break;
 					if (!(s2 = s2->ls_info.base))
 						break;
 					base = s2-- - STATE_BSIZE;
 				}
 				break;
 			case QCHAR:
 				if (cf & LQCHAR) {
 					*wp++ = QCHAR;
 					*wp++ = getsc();
 					break;
 				}
 				/* FALLTHROUGH */
 			default:
  store_char:
 				*wp++ = CHAR;
 				*wp++ = c;
 			}
 			break;

 		case SEQUOTE:
 			if (c == '\'') {
 				POP_STATE();
 				*wp++ = CQUOTE;
 				ignore_backslash_newline--;
 			} else if (c == '\\') {
 				if ((c2 = unbksl(true, s_get, s_put)) == -1)
 					c2 = s_get();
 				if (c2 == 0)
 					statep->ls_sequote.got_NUL = true;
 				if (!statep->ls_sequote.got_NUL) {
 					char ts[4];

 					if ((unsigned int)c2 < 0x100) {
 						*wp++ = QCHAR;
 						*wp++ = c2;
 					} else {
 						c = utf_wctomb(ts, c2 - 0x100);
 						ts[c] = 0;
 						for (c = 0; ts[c]; ++c) {
 							*wp++ = QCHAR;
 							*wp++ = ts[c];
 						}
 					}
 				}
 			} else if (!statep->ls_sequote.got_NUL) {
 				*wp++ = QCHAR;
 				*wp++ = c;
 			}
 			break;

 		case SSQUOTE:
 			if (c == '\'') {
 				POP_STATE();
 				*wp++ = CQUOTE;
 				ignore_backslash_newline--;
 			} else {
 				*wp++ = QCHAR;
 				*wp++ = c;
 			}
 			break;

 		case SDQUOTE:
 			if (c == '"') {
 				POP_STATE();
 				*wp++ = CQUOTE;
 			} else
 				goto Subst;
 			break;

 		case SCSPAREN:	/* $( ... ) */
 			/* todo: deal with $(...) quoting properly
 			 * kludge to partly fake quoting inside $(...): doesn't
 			 * really work because nested $(...) or ${...} inside
 			 * double quotes aren't dealt with.
 			 */
 			switch (statep->ls_scsparen.csstate) {
 			case 0:	/* normal */
 				switch (c) {
 				case '(':
 					statep->ls_scsparen.nparen++;
 					break;
 				case ')':
 					statep->ls_scsparen.nparen--;
 					break;
 				case '\\':
 					statep->ls_scsparen.csstate = 1;
 					break;
 				case '"':
 					statep->ls_scsparen.csstate = 2;
 					break;
 				case '\'':
 					statep->ls_scsparen.csstate = 4;
 					ignore_backslash_newline++;
 					break;
 				}
 				break;

 			case 1:	/* backslash in normal mode */
 			case 3:	/* backslash in double quotes */
 				--statep->ls_scsparen.csstate;
 				break;

 			case 2:	/* double quotes */
 				if (c == '"')
 					statep->ls_scsparen.csstate = 0;
 				else if (c == '\\')
 					statep->ls_scsparen.csstate = 3;
 				break;

 			case 4:	/* single quotes */
 				if (c == '\'') {
 					statep->ls_scsparen.csstate = 0;
 					ignore_backslash_newline--;
 				}
 				break;
 			}
 			if (statep->ls_scsparen.nparen == 0) {
 				POP_STATE();
 				*wp++ = 0;	/* end of COMSUB */
 			} else
 				*wp++ = c;
 			break;

 		case SASPAREN:	/* $(( ... )) */
 			/* XXX should nest using existing state machine
 			 * (embed "...", $(...), etc.) */
 			if (c == '(')
 				statep->ls_sasparen.nparen++;
 			else if (c == ')') {
 				statep->ls_sasparen.nparen--;
 				if (statep->ls_sasparen.nparen == 1) {
 					/*(*/
 					if ((c2 = getsc()) == ')') {
 						POP_STATE();
 						/* end of EXPRSUB */
 						*wp++ = 0;
 						break;
 					} else {
 						char *s;

 						ungetsc(c2);
 						/* mismatched parenthesis -
 						 * assume we were really
 						 * parsing a $(...) expression
 						 */
 						s = Xrestpos(ws, wp,
 						    statep->ls_sasparen.start);
 						memmove(s + 1, s, wp - s);
 						*s++ = COMSUB;
 						*s = '('; /*)*/
 						wp++;
 						statep->ls_scsparen.nparen = 1;
 						statep->ls_scsparen.csstate = 0;
 						state = statep->ls_state =
 						    SCSPAREN;
 					}
 				}
 			}
 			*wp++ = c;
 			break;

 		case SQBRACE:
 			if (c == '\\') {
 				/*
 				 * perform POSIX "quote removal" if the back-
 				 * slash is "special", i.e. same cases as the
 				 * {case '\\':} in Subst: plus closing brace;
 				 * in mksh code "quote removal" on '\c' means
 				 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
 				 * emitted (in heredocquote:)
 				 */
 				if ((c = getsc()) == '"' || c == '\\' ||
 				    c == '$' || c == '`' || c == /*{*/'}')
 					goto store_qchar;
 				goto heredocquote;
 			}
 			goto common_SQBRACE;

 		case SBRACE:
 			if (c == '\'')
 				goto open_ssquote;
 			else if (c == '\\')
 				goto getsc_qchar;
  common_SQBRACE:
 			if (c == '"')
 				goto open_sdquote;
 			else if (c == '$')
 				goto subst_dollar;
 			else if (c == '`')
 				goto subst_gravis;
 			else if (c != /*{*/ '}')
 				goto store_char;
 			POP_STATE();
 			*wp++ = CSUBST;
 			*wp++ = /*{*/ '}';
 			break;

 		case STBRACE:
 			/* Same as SBASE, except (,|,) treated specially */
 			if (c == /*{*/ '}') {
 				POP_STATE();
 				*wp++ = CSUBST;
 				*wp++ = /*{*/ '}';
 			} else if (c == '|') {
 				*wp++ = SPAT;
 			} else if (c == '(') {
 				*wp++ = OPAT;
 				*wp++ = ' ';	/* simile for @ */
 				PUSH_STATE(SPATTERN);
 			} else
 				goto Sbase1;
 			break;

 		case SBQUOTE:
 			if (c == '`') {
 				*wp++ = 0;
 				POP_STATE();
 			} else if (c == '\\') {
 				switch (c = getsc()) {
 				case '\\':
 				case '$': case '`':
 					*wp++ = c;
 					break;
 				case '"':
 					if (statep->ls_sbquote.indquotes) {
 						*wp++ = c;
 						break;
 					}
 					/* FALLTHROUGH */
 				default:
 					if (c) {
 						/* trailing \ is lost */
 						*wp++ = '\\';
 						*wp++ = c;
 					}
 					break;
 				}
 			} else
 				*wp++ = c;
 			break;

 		case SWORD:	/* ONEWORD */
 			goto Subst;

 		case SLETPAREN:	/* LETEXPR: (( ... )) */
 			/*(*/
 			if (c == ')') {
 				if (statep->ls_sletparen.nparen > 0)
 					--statep->ls_sletparen.nparen;
 				else if ((c2 = getsc()) == /*(*/ ')') {
 					c = 0;
 					*wp++ = CQUOTE;
 					goto Done;
 				} else {
 					Source *s;

 					ungetsc(c2);
 					/* mismatched parenthesis -
 					 * assume we were really
 					 * parsing a $(...) expression
 					 */
 					*wp = EOS;
 					sp = Xstring(ws, wp);
 					dp = wdstrip(sp, true, false);
 					s = pushs(SREREAD, source->areap);
 					s->start = s->str = s->u.freeme = dp;
 					s->next = source;
 					source = s;
 					return ('('/*)*/);
 				}
 			} else if (c == '(')
 				/* parenthesis inside quotes and backslashes
 				 * are lost, but AT&T ksh doesn't count them
 				 * either
 				 */
 				++statep->ls_sletparen.nparen;
 			goto Sbase2;

 #ifndef MKSH_SMALL
 		case SLETARRAY:	/* LETARRAY: =( ... ) */
 			if (c == '('/*)*/)
 				++statep->ls_sletarray.nparen;
 			else if (c == /*(*/')')
 				if (statep->ls_sletarray.nparen-- == 0) {
 					c = 0;
 					goto Done;
 				}
 			*wp++ = CHAR;
 			*wp++ = c;
 			break;
 #endif

 		case SHERESTRING:	/* <<< delimiter */
 			if (c == '\\') {
 				c = getsc();
 				if (c) {
 					/* trailing \ is lost */
 					*wp++ = QCHAR;
 					*wp++ = c;
 				}
 				/* invoke quoting mode */
 				Xstring(ws, wp)[0] = QCHAR;
 			} else if (c == '$') {
 				if ((c2 = getsc()) == '\'') {
 					PUSH_STATE(SEQUOTE);
 					statep->ls_sequote.got_NUL = false;
 					goto sherestring_quoted;
 				}
 				ungetsc(c2);
 				goto sherestring_regular;
 			} else if (c == '\'') {
 				PUSH_STATE(SSQUOTE);
  sherestring_quoted:
 				*wp++ = OQUOTE;
 				ignore_backslash_newline++;
 				/* invoke quoting mode */
 				Xstring(ws, wp)[0] = QCHAR;
 			} else if (c == '"') {
 				state = statep->ls_state = SHEREDQUOTE;
 				*wp++ = OQUOTE;
 				/* just don't IFS split; no quoting mode */
 			} else {
  sherestring_regular:
 				*wp++ = CHAR;
 				*wp++ = c;
 			}
 			break;

 		case SHEREDELIM:	/* <<,<<- delimiter */
 			/* XXX chuck this state (and the next) - use
 			 * the existing states ($ and \`...` should be
 			 * stripped of their specialness after the
 			 * fact).
 			 */
 			/* here delimiters need a special case since
 			 * $ and `...` are not to be treated specially
 			 */
 			if (c == '\\') {
 				c = getsc();
 				if (c) {
 					/* trailing \ is lost */
 					*wp++ = QCHAR;
 					*wp++ = c;
 				}
 			} else if (c == '$') {
 				if ((c2 = getsc()) == '\'') {
 					PUSH_STATE(SEQUOTE);
 					statep->ls_sequote.got_NUL = false;
 					goto sheredelim_quoted;
 				}
 				ungetsc(c2);
 				goto sheredelim_regular;
 			} else if (c == '\'') {
 				PUSH_STATE(SSQUOTE);
  sheredelim_quoted:
 				*wp++ = OQUOTE;
 				ignore_backslash_newline++;
 			} else if (c == '"') {
 				state = statep->ls_state = SHEREDQUOTE;
 				*wp++ = OQUOTE;
 			} else {
  sheredelim_regular:
 				*wp++ = CHAR;
 				*wp++ = c;
 			}
 			break;

 		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
 			if (c == '"') {
 				*wp++ = CQUOTE;
 				state = statep->ls_state =
 				    /* dp[1] == '<' means here string */
 				    Xstring(ws, wp)[1] == '<' ?
 				    SHERESTRING : SHEREDELIM;
 			} else {
 				if (c == '\\') {
 					switch (c = getsc()) {
 					case '\\': case '"':
 					case '$': case '`':
 						break;
 					default:
 						if (c) {
 							/* trailing \ lost */
 							*wp++ = CHAR;
 							*wp++ = '\\';
 						}
 						break;
 					}
 				}
 				*wp++ = CHAR;
 				*wp++ = c;
 			}
 			break;

 		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
 			if ( /*(*/ c == ')') {
 				*wp++ = CPAT;
 				POP_STATE();
 			} else if (c == '|') {
 				*wp++ = SPAT;
 			} else if (c == '(') {
 				*wp++ = OPAT;
 				*wp++ = ' ';	/* simile for @ */
 				PUSH_STATE(SPATTERN);
 			} else
 				goto Sbase1;
 			break;
 		}
 	}
  Done:
 	Xcheck(ws, wp);
 	if (statep != &states[1])
 		/* XXX figure out what is missing */
 		yyerror("no closing quote\n");

 #ifndef MKSH_SMALL
 	if (state == SLETARRAY && statep->ls_sletarray.nparen != -1)
 		yyerror("%s: ')' missing\n", T_synerr);
 #endif

 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
 	if (state == SHEREDELIM || state == SHERESTRING)
 		state = SBASE;

 	dp = Xstring(ws, wp);
 	if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
 		struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);

 		if (Xlength(ws, wp) == 0)
 			iop->unit = c == '<' ? 0 : 1;
 		else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
 			if (dp[c2] != CHAR)
 				goto no_iop;
 			if (!ksh_isdigit(dp[c2 + 1]))
 				goto no_iop;
 			iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
 		}

 		if (iop->unit >= FDBASE)
 			goto no_iop;

 		if (c == '&') {
 			if ((c2 = getsc()) != '>') {
 				ungetsc(c2);
 				goto no_iop;
 			}
 			c = c2;
 			iop->flag = IOBASH;
 		} else
 			iop->flag = 0;

 		c2 = getsc();
 		/* <<, >>, <> are ok, >< is not */
 		if (c == c2 || (c == '<' && c2 == '>')) {
 			iop->flag |= c == c2 ?
 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
 			if (iop->flag == IOHERE) {
 				if ((c2 = getsc()) == '-')
 					iop->flag |= IOSKIP;
 				else
 					ungetsc(c2);
 			}
 		} else if (c2 == '&')
 			iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
 		else {
 			iop->flag |= c == '>' ? IOWRITE : IOREAD;
 			if (c == '>' && c2 == '|')
 				iop->flag |= IOCLOB;
 			else
 				ungetsc(c2);
 		}

 		iop->name = NULL;
 		iop->delim = NULL;
 		iop->heredoc = NULL;
 		Xfree(ws, wp);	/* free word */
 		yylval.iop = iop;
 		return (REDIR);
  no_iop:
 		;
 	}

 	if (wp == dp && state == SBASE) {
 		Xfree(ws, wp);	/* free word */
 		/* no word, process LEX1 character */
 		if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
 			if ((c2 = getsc()) == c)
 				c = (c == ';') ? BREAK :
 				    (c == '|') ? LOGOR :
 				    (c == '&') ? LOGAND :
 				    /* c == '(' ) */ MDPAREN;
 			else if (c == '|' && c2 == '&')
 				c = COPROC;
 			else
 				ungetsc(c2);
 		} else if (c == '\n') {
 			gethere(false);
 			if (cf & CONTIN)
 				goto Again;
 		} else if (c == '\0')
 			/* need here strings at EOF */
 			gethere(true);
 		return (c);
 	}

 	*wp++ = EOS;		/* terminate word */
 	yylval.cp = Xclose(ws, wp);
 	if (state == SWORD || state == SLETPAREN
 	    /* XXX ONEWORD? */
 #ifndef MKSH_SMALL
 	    || state == SLETARRAY
 #endif
 	    )
 		return (LWORD);

 	/* unget terminator */
 	ungetsc(c);

 	/*
 	 * note: the alias-vs-function code below depends on several
 	 * interna: starting from here, source->str is not modified;
 	 * the way getsc() and ungetsc() operate; etc.
 	 */

 	/* copy word to unprefixed string ident */
 	sp = yylval.cp;
 	dp = ident;
 	if ((cf & HEREDELIM) && (sp[1] == '<'))
 		while (dp < ident+IDENT) {
 			if ((c = *sp++) == CHAR)
 				*dp++ = *sp++;
 			else if ((c != OQUOTE) && (c != CQUOTE))
 				break;
 		}
 	else
 		while (dp < ident+IDENT && (c = *sp++) == CHAR)
 			*dp++ = *sp++;
 	/* Make sure the ident array stays '\0' padded */
 	memset(dp, 0, (ident+IDENT) - dp + 1);
 	if (c != EOS)
 		*ident = '\0';	/* word is not unquoted */

 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
 		struct tbl *p;
 		uint32_t h = hash(ident);

 		/* { */
 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
 		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
 			afree(yylval.cp, ATEMP);
 			return (p->val.i);
 		}
 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
 		    (p->flag & ISSET)) {
 			/*
 			 * this still points to the same character as the
 			 * ungetsc'd terminator from above
 			 */
 			const char *cp = source->str;

 			/* prefer POSIX but not Korn functions over aliases */
 			while (*cp == ' ' || *cp == '\t')
 				/*
 				 * this is like getsc() without skipping
 				 * over Source boundaries (including not
 				 * parsing ungetsc'd characters that got
 				 * pushed into an SREREAD) which is what
 				 * we want here anyway: find out whether
 				 * the alias name is followed by a POSIX
 				 * function definition (only the opening
 				 * parenthesis is checked though)
 				 */
 				++cp;
 			/* prefer functions over aliases */
 			if (*cp == '(' /*)*/)
 				/*
 				 * delete alias upon encountering function
 				 * definition
 				 */
 				ktdelete(p);
 			else {
 				Source *s = source;

 				while (s && (s->flags & SF_HASALIAS))
 					if (s->u.tblp == p)
 						return (LWORD);
 					else
 						s = s->next;
 				/* push alias expansion */
 				s = pushs(SALIAS, source->areap);
 				s->start = s->str = p->val.s;
 				s->u.tblp = p;
 				s->flags |= SF_HASALIAS;
 				s->next = source;
 				if (source->type == SEOF) {
 					/* prevent infinite recursion at EOS */
 					source->u.tblp = p;
 					source->flags |= SF_HASALIAS;
 				}
 				source = s;
 				afree(yylval.cp, ATEMP);
 				goto Again;
 			}
 		}
 	}

 	return (LWORD);
 }

 static void
 gethere(bool iseof)
 {
 	struct ioword **p;

 	for (p = heres; p < herep; p++)
 		if (iseof && (*p)->delim[1] != '<')
 			/* only here strings at EOF */
 			return;
 		else
 			readhere(*p);
 	herep = heres;
 }

 /*
  * read "<<word" text into temp file
  */

 static void
 readhere(struct ioword *iop)
 {
 	int c;
 	char *volatile eof;
 	char *eofp;
 	int skiptabs;
 	XString xs;
 	char *xp;
 	int xpos;

 	if (iop->delim[1] == '<') {
 		/* process the here string */
 		xp = iop->heredoc = evalstr(iop->delim, DOBLANK);
 		c = strlen(xp) - 1;
 		memmove(xp, xp + 1, c);
 		xp[c] = '\n';
 		return;
 	}

 	eof = evalstr(iop->delim, 0);

 	if (!(iop->flag & IOEVAL))
 		ignore_backslash_newline++;

 	Xinit(xs, xp, 256, ATEMP);

 	for (;;) {
 		eofp = eof;
 		skiptabs = iop->flag & IOSKIP;
 		xpos = Xsavepos(xs, xp);
 		while ((c = getsc()) != 0) {
 			if (skiptabs) {
 				if (c == '\t')
 					continue;
 				skiptabs = 0;
 			}
 			if (c != *eofp)
 				break;
 			Xcheck(xs, xp);
 			Xput(xs, xp, c);
 			eofp++;
 		}
 		/* Allow EOF here so commands with out trailing newlines
 		 * will work (eg, ksh -c '...', $(...), etc).
 		 */
 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
 			xp = Xrestpos(xs, xp, xpos);
 			break;
 		}
 		ungetsc(c);
 		while ((c = getsc()) != '\n') {
 			if (c == 0)
 				yyerror("here document '%s' unclosed\n", eof);
 			Xcheck(xs, xp);
 			Xput(xs, xp, c);
 		}
 		Xcheck(xs, xp);
 		Xput(xs, xp, c);
 	}
 	Xput(xs, xp, '\0');
 	iop->heredoc = Xclose(xs, xp);

 	if (!(iop->flag & IOEVAL))
 		ignore_backslash_newline--;
 }

 void
 yyerror(const char *fmt, ...)
 {
 	va_list va;

 	/* pop aliases and re-reads */
 	while (source->type == SALIAS || source->type == SREREAD)
 		source = source->next;
 	source->str = null;	/* zap pending input */

 	error_prefix(true);
 	va_start(va, fmt);
 	shf_vfprintf(shl_out, fmt, va);
 	va_end(va);
 	errorfz();
 }

 /*
  * input for yylex with alias expansion
  */

 Source *
 pushs(int type, Area *areap)
 {
 	Source *s;

 	s = alloc(sizeof(Source), areap);
 	memset(s, 0, sizeof(Source));
 	s->type = type;
 	s->str = null;
 	s->areap = areap;
 	if (type == SFILE || type == SSTDIN)
 		XinitN(s->xs, 256, s->areap);
 	return (s);
 }

 static int
 getsc__(void)
 {
 	Source *s = source;
 	int c;

  getsc_again:
 	while ((c = *s->str++) == 0) {
 		s->str = NULL;		/* return 0 for EOF by default */
 		switch (s->type) {
 		case SEOF:
 			s->str = null;
 			return (0);

 		case SSTDIN:
 		case SFILE:
 			getsc_line(s);
 			break;

 		case SWSTR:
 			break;

 		case SSTRING:
 			break;

 		case SWORDS:
 			s->start = s->str = *s->u.strv++;
 			s->type = SWORDSEP;
 			break;

 		case SWORDSEP:
 			if (*s->u.strv == NULL) {
 				s->start = s->str = "\n";
 				s->type = SEOF;
 			} else {
 				s->start = s->str = " ";
 				s->type = SWORDS;
 			}
 			break;

 		case SALIAS:
 			if (s->flags & SF_ALIASEND) {
 				/* pass on an unused SF_ALIAS flag */
 				source = s->next;
 				source->flags |= s->flags & SF_ALIAS;
 				s = source;
 			} else if (*s->u.tblp->val.s &&
 			    (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
 				source = s = s->next;	/* pop source stack */
 				/* Note that this alias ended with a space,
 				 * enabling alias expansion on the following
 				 * word.
 				 */
 				s->flags |= SF_ALIAS;
 			} else {
 				/* At this point, we need to keep the current
 				 * alias in the source list so recursive
 				 * aliases can be detected and we also need
 				 * to return the next character. Do this
 				 * by temporarily popping the alias to get
 				 * the next character and then put it back
 				 * in the source list with the SF_ALIASEND
 				 * flag set.
 				 */
 				source = s->next;	/* pop source stack */
 				source->flags |= s->flags & SF_ALIAS;
 				c = getsc__();
 				if (c) {
 					s->flags |= SF_ALIASEND;
 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
 					s->start = s->str = s->ugbuf;
 					s->next = source;
 					source = s;
 				} else {
 					s = source;
 					/* avoid reading eof twice */
 					s->str = NULL;
 					break;
 				}
 			}
 			continue;

 		case SREREAD:
 			if (s->start != s->ugbuf)	/* yuck */
 				afree(s->u.freeme, ATEMP);
 			source = s = s->next;
 			continue;
 		}
 		if (s->str == NULL) {
 			s->type = SEOF;
 			s->start = s->str = null;
 			return ('\0');
 		}
 		if (s->flags & SF_ECHO) {
 			shf_puts(s->str, shl_out);
 			shf_flush(shl_out);
 		}
 	}
 	/* check for UTF-8 byte order mark */
 	if (s->flags & SF_FIRST) {
 		s->flags &= ~SF_FIRST;
 		if (((unsigned char)c == 0xEF) &&
 		    (((const unsigned char *)(s->str))[0] == 0xBB) &&
 		    (((const unsigned char *)(s->str))[1] == 0xBF)) {
 			s->str += 2;
 			UTFMODE = 1;
 			goto getsc_again;
 		}
 	}
 	return (c);
 }

 static void
 getsc_line(Source *s)
 {
 	char *xp = Xstring(s->xs, xp), *cp;
 	bool interactive = Flag(FTALKING) && s->type == SSTDIN;
 	int have_tty = interactive && (s->flags & SF_TTY);

 	/* Done here to ensure nothing odd happens when a timeout occurs */
 	XcheckN(s->xs, xp, LINE);
 	*xp = '\0';
 	s->start = s->str = xp;

 	if (have_tty && ksh_tmout) {
 		ksh_tmout_state = TMOUT_READING;
 		alarm(ksh_tmout);
 	}
 	if (interactive)
 		change_winsz();
 	if (have_tty && (
 #if !MKSH_S_NOVI
 	    Flag(FVI) ||
 #endif
 	    Flag(FEMACS) || Flag(FGMACS))) {
 		int nread;

 		nread = x_read(xp, LINE);
 		if (nread < 0)	/* read error */
 			nread = 0;
 		xp[nread] = '\0';
 		xp += nread;
 	} else {
 		if (interactive)
 			pprompt(prompt, 0);
 		else
 			s->line++;

 		while (1) {
 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);

 			if (!p && shf_error(s->u.shf) &&
 			    shf_errno(s->u.shf) == EINTR) {
 				shf_clearerr(s->u.shf);
 				if (trap)
 					runtraps(0);
 				continue;
 			}
 			if (!p || (xp = p, xp[-1] == '\n'))
 				break;
 			/* double buffer size */
 			xp++;	/* move past NUL so doubling works... */
 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
 			xp--;	/* ...and move back again */
 		}
 		/* flush any unwanted input so other programs/builtins
 		 * can read it. Not very optimal, but less error prone
 		 * than flushing else where, dealing with redirections,
 		 * etc.
 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
 		 */
 		if (s->type == SSTDIN)
 			shf_flush(s->u.shf);
 	}
 	/* XXX: temporary kludge to restore source after a
 	 * trap may have been executed.
 	 */
 	source = s;
 	if (have_tty && ksh_tmout) {
 		ksh_tmout_state = TMOUT_EXECUTING;
 		alarm(0);
 	}
 	cp = Xstring(s->xs, xp);
 #ifndef MKSH_SMALL
 	if (interactive && *cp == '!' && cur_prompt == PS1) {
 		int linelen;

 		linelen = Xlength(s->xs, xp);
 		XcheckN(s->xs, xp, fc_e_n + /* NUL */ 1);
 		/* reload after potential realloc */
 		cp = Xstring(s->xs, xp);
 		/* change initial '!' into space */
 		*cp = ' ';
 		/* NUL terminate the current string */
 		*xp = '\0';
 		/* move the actual string forward */
 		memmove(cp + fc_e_n, cp, linelen + /* NUL */ 1);
 		xp += fc_e_n;
 		/* prepend it with "fc -e -" */
 		memcpy(cp, fc_e_, fc_e_n);
 	}
 #endif
 	s->start = s->str = cp;
 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
 	/* Note: if input is all nulls, this is not eof */
 	if (Xlength(s->xs, xp) == 0) {
 		/* EOF */
 		if (s->type == SFILE)
 			shf_fdclose(s->u.shf);
 		s->str = NULL;
 	} else if (interactive && *s->str &&
 	    (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
 		histsave(&s->line, s->str, true, true);
 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
 	} else if (interactive && cur_prompt == PS1) {
 		cp = Xstring(s->xs, xp);
 		while (*cp && ctype(*cp, C_IFSWS))
 			++cp;
 		if (!*cp)
 			histsync();
 #endif
 	}
 	if (interactive)
 		set_prompt(PS2, NULL);
 }

 void
 set_prompt(int to, Source *s)
 {
 	cur_prompt = to;

 	switch (to) {
 	case PS1:	/* command */
 		/* Substitute ! and !! here, before substitutions are done
 		 * so ! in expanded variables are not expanded.
 		 * NOTE: this is not what AT&T ksh does (it does it after
 		 * substitutions, POSIX doesn't say which is to be done.
 		 */
 		{
 			struct shf *shf;
 			char * volatile ps1;
 			Area *saved_atemp;

 			ps1 = str_val(global("PS1"));
 			shf = shf_sopen(NULL, strlen(ps1) * 2,
 			    SHF_WR | SHF_DYNAMIC, NULL);
 			while (*ps1)
 				if (*ps1 != '!' || *++ps1 == '!')
 					shf_putchar(*ps1++, shf);
 				else
 					shf_fprintf(shf, "%d",
 						s ? s->line + 1 : 0);
 			ps1 = shf_sclose(shf);
 			saved_atemp = ATEMP;
 			newenv(E_ERRH);
 			if (sigsetjmp(e->jbuf, 0)) {
 				prompt = safe_prompt;
 				/* Don't print an error - assume it has already
 				 * been printed. Reason is we may have forked
 				 * to run a command and the child may be
 				 * unwinding its stack through this code as it
 				 * exits.
 				 */
 			} else {
 				char *cp = substitute(ps1, 0);
 				strdupx(prompt, cp, saved_atemp);
 			}
 			quitenv(NULL);
 		}
 		break;
 	case PS2:	/* command continuation */
 		prompt = str_val(global("PS2"));
 		break;
 	}
 }

 static int
 dopprompt(const char *cp, int ntruncate, bool doprint)
 {
 	int columns = 0, lines = 0, indelimit = 0;
 	char delimiter = 0;

 	/* Undocumented AT&T ksh feature:
 	 * If the second char in the prompt string is \r then the first char
 	 * is taken to be a non-printing delimiter and any chars between two
 	 * instances of the delimiter are not considered to be part of the
 	 * prompt length
 	 */
 	if (*cp && cp[1] == '\r') {
 		delimiter = *cp;
 		cp += 2;
 	}
 	for (; *cp; cp++) {
 		if (indelimit && *cp != delimiter)
 			;
 		else if (*cp == '\n' || *cp == '\r') {
 			lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
 			columns = 0;
 		} else if (*cp == '\t') {
 			columns = (columns | 7) + 1;
 		} else if (*cp == '\b') {
 			if (columns > 0)
 				columns--;
 		} else if (*cp == delimiter)
 			indelimit = !indelimit;
 		else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
 			const char *cp2;
 			columns += utf_widthadj(cp, &cp2);
 			if (doprint && (indelimit ||
 			    (ntruncate < (x_cols * lines + columns))))
 				shf_write(cp, cp2 - cp, shl_out);
 			cp = cp2 - /* loop increment */ 1;
 			continue;
 		} else
 			columns++;
 		if (doprint && (*cp != delimiter) &&
 		    (indelimit || (ntruncate < (x_cols * lines + columns))))
 			shf_putc(*cp, shl_out);
 	}
 	if (doprint)
 		shf_flush(shl_out);
 	return (x_cols * lines + columns);
 }


 void
 pprompt(const char *cp, int ntruncate)
 {
 	dopprompt(cp, ntruncate, true);
 }

 int
 promptlen(const char *cp)
 {
 	return (dopprompt(cp, 0, false));
 }

 /* Read the variable part of a ${...} expression (ie, up to but not including
  * the :[-+?=#%] or close-brace.
  */
 static char *
 get_brace_var(XString *wsp, char *wp)
 {
 	enum parse_state {
 		PS_INITIAL, PS_SAW_HASH, PS_IDENT,
 		PS_NUMBER, PS_VAR1
 	} state;
 	char c;

 	state = PS_INITIAL;
 	while (1) {
 		c = getsc();
 		/* State machine to figure out where the variable part ends. */
 		switch (state) {
 		case PS_INITIAL:
 			if (c == '#' || c == '!' || c == '%') {
 				state = PS_SAW_HASH;
 				break;
 			}
 			/* FALLTHROUGH */
 		case PS_SAW_HASH:
 			if (ksh_isalphx(c))
 				state = PS_IDENT;
 			else if (ksh_isdigit(c))
 				state = PS_NUMBER;
 			else if (ctype(c, C_VAR1))
 				state = PS_VAR1;
 			else
 				goto out;
 			break;
 		case PS_IDENT:
 			if (!ksh_isalnux(c)) {
 				if (c == '[') {
 					char *tmp, *p;

 					if (!arraysub(&tmp))
 						yyerror("missing ]\n");
 					*wp++ = c;
 					for (p = tmp; *p; ) {
 						Xcheck(*wsp, wp);
 						*wp++ = *p++;
 					}
 					afree(tmp, ATEMP);
 					c = getsc();	/* the ] */
 				}
 				goto out;
 			}
 			break;
 		case PS_NUMBER:
 			if (!ksh_isdigit(c))
 				goto out;
 			break;
 		case PS_VAR1:
 			goto out;
 		}
 		Xcheck(*wsp, wp);
 		*wp++ = c;
 	}
  out:
 	*wp++ = '\0';	/* end of variable part */
 	ungetsc(c);
 	return (wp);
 }

 /*
  * Save an array subscript - returns true if matching bracket found, false
  * if eof or newline was found.
  * (Returned string double null terminated)
  */
 static int
 arraysub(char **strp)
 {
 	XString ws;
 	char	*wp;
 	char	c;
 	int	depth = 1;	/* we are just past the initial [ */

 	Xinit(ws, wp, 32, ATEMP);

 	do {
 		c = getsc();
 		Xcheck(ws, wp);
 		*wp++ = c;
 		if (c == '[')
 			depth++;
 		else if (c == ']')
 			depth--;
 	} while (depth > 0 && c && c != '\n');

 	*wp++ = '\0';
 	*strp = Xclose(ws, wp);

 	return (depth == 0 ? 1 : 0);
 }

 /* Unget a char: handles case when we are already at the start of the buffer */
 static const char *
 ungetsc(int c)
 {
 	if (backslash_skip)
 		backslash_skip--;
 	/* Don't unget eof... */
 	if (source->str == null && c == '\0')
 		return (source->str);
 	if (source->str > source->start)
 		source->str--;
 	else {
 		Source *s;

 		s = pushs(SREREAD, source->areap);
 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
 		s->start = s->str = s->ugbuf;
 		s->next = source;
 		source = s;
 	}
 	return (source->str);
 }


 /* Called to get a char that isn't a \newline sequence. */
 static int
 getsc_bn(void)
 {
 	int c, c2;

 	if (ignore_backslash_newline)
 		return (getsc_());

 	if (backslash_skip == 1) {
 		backslash_skip = 2;
 		return (getsc_());
 	}

 	backslash_skip = 0;

 	while (1) {
 		c = getsc_();
 		if (c == '\\') {
 			if ((c2 = getsc_()) == '\n')
 				/* ignore the \newline; get the next char... */
 				continue;
 			ungetsc(c2);
 			backslash_skip = 1;
 		}
 		return (c);
 	}
 }

 static Lex_state *
 push_state_(State_info *si, Lex_state *old_end)
 {
 	Lex_state *news = alloc(STATE_BSIZE * sizeof(Lex_state), ATEMP);

 	news[0].ls_info.base = old_end;
 	si->base = &news[0];
 	si->end = &news[STATE_BSIZE];
 	return (&news[1]);
 }

 static Lex_state *
 pop_state_(State_info *si, Lex_state *old_end)
 {
 	Lex_state *old_base = si->base;

 	si->base = old_end->ls_info.base - STATE_BSIZE;
 	si->end = old_end->ls_info.base;

 	afree(old_base, ATEMP);

 	return (si->base + STATE_BSIZE - 1);
 }

 static int
 s_get(void)
 {
 	return (getsc());
 }

 static void
 s_put(int c)
 {
 	ungetsc(c);
 }