| /* GNU SED, a batch stream editor. |
| Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008 |
| Free Software Foundation, Inc. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ |
| |
| /* compile.c: translate sed source into internal form */ |
| |
| #include "sed.h" |
| #include <stdio.h> |
| #include <ctype.h> |
| |
| #ifdef HAVE_STRINGS_H |
| # include <strings.h> |
| # ifdef HAVE_MEMORY_H |
| # include <memory.h> |
| # endif |
| #else |
| # include <string.h> |
| #endif /* HAVE_STRINGS_H */ |
| |
| #ifdef HAVE_STDLIB_H |
| # include <stdlib.h> |
| #endif |
| #ifndef EXIT_FAILURE |
| # define EXIT_FAILURE 1 |
| #endif |
| |
| #ifdef HAVE_SYS_TYPES_H |
| # include <sys/types.h> |
| #endif |
| |
| #include <obstack.h> |
| |
| |
| #define YMAP_LENGTH 256 /*XXX shouldn't this be (UCHAR_MAX+1)?*/ |
| #define VECTOR_ALLOC_INCREMENT 40 |
| |
| /* let's not confuse text editors that have only dumb bracket-matching... */ |
| #define OPEN_BRACKET '[' |
| #define CLOSE_BRACKET ']' |
| #define OPEN_BRACE '{' |
| #define CLOSE_BRACE '}' |
| |
| struct prog_info { |
| /* When we're reading a script command from a string, `prog.base' |
| points to the first character in the string, 'prog.cur' points |
| to the current character in the string, and 'prog.end' points |
| to the end of the string. This allows us to compile script |
| strings that contain nulls. */ |
| const unsigned char *base; |
| const unsigned char *cur; |
| const unsigned char *end; |
| |
| /* This is the current script file. If it is NULL, we are reading |
| from a string stored at `prog.cur' instead. If both `prog.file' |
| and `prog.cur' are NULL, we're in trouble! */ |
| FILE *file; |
| }; |
| |
| /* Information used to give out useful and informative error messages. */ |
| struct error_info { |
| /* This is the name of the current script file. */ |
| const char *name; |
| |
| /* This is the number of the current script line that we're compiling. */ |
| countT line; |
| |
| /* This is the index of the "-e" expressions on the command line. */ |
| countT string_expr_count; |
| }; |
| |
| |
| /* Label structure used to resolve GOTO's, labels, and block beginnings. */ |
| struct sed_label { |
| countT v_index; /* index of vector element being referenced */ |
| char *name; /* NUL-terminated name of the label */ |
| struct error_info err_info; /* track where `{}' blocks start */ |
| struct sed_label *next; /* linked list (stack) */ |
| }; |
| |
| struct special_files { |
| struct output outf; |
| FILE **pfp; |
| }; |
| |
| FILE *my_stdin, *my_stdout, *my_stderr; |
| struct special_files special_files[] = { |
| { { "/dev/stdin", false, NULL, NULL }, &my_stdin }, |
| { { "/dev/stdout", false, NULL, NULL }, &my_stdout }, |
| { { "/dev/stderr", false, NULL, NULL }, &my_stderr }, |
| { { NULL, false, NULL, NULL }, NULL } |
| }; |
| |
| |
| /* Where we are in the processing of the input. */ |
| static struct prog_info prog; |
| static struct error_info cur_input; |
| |
| /* Information about labels and jumps-to-labels. This is used to do |
| the required backpatching after we have compiled all the scripts. */ |
| static struct sed_label *jumps = NULL; |
| static struct sed_label *labels = NULL; |
| |
| /* We wish to detect #n magic only in the first input argument; |
| this flag tracks when we have consumed the first file of input. */ |
| static bool first_script = true; |
| |
| /* Allow for scripts like "sed -e 'i\' -e foo": */ |
| static struct buffer *pending_text = NULL; |
| static struct text_buf *old_text_buf = NULL; |
| |
| /* Information about block start positions. This is used to backpatch |
| block end positions. */ |
| static struct sed_label *blocks = NULL; |
| |
| /* Use an obstack for compilation. */ |
| static struct obstack obs; |
| |
| /* Various error messages we may want to print */ |
| static const char errors[] = |
| "multiple `!'s\0" |
| "unexpected `,'\0" |
| "invalid usage of +N or ~N as first address\0" |
| "unmatched `{'\0" |
| "unexpected `}'\0" |
| "extra characters after command\0" |
| "expected \\ after `a', `c' or `i'\0" |
| "`}' doesn't want any addresses\0" |
| ": doesn't want any addresses\0" |
| "comments don't accept any addresses\0" |
| "missing command\0" |
| "command only uses one address\0" |
| "unterminated address regex\0" |
| "unterminated `s' command\0" |
| "unterminated `y' command\0" |
| "unknown option to `s'\0" |
| "multiple `p' options to `s' command\0" |
| "multiple `g' options to `s' command\0" |
| "multiple number options to `s' command\0" |
| "number option to `s' command may not be zero\0" |
| "strings for `y' command are different lengths\0" |
| "delimiter character is not a single-byte character\0" |
| "expected newer version of sed\0" |
| "invalid usage of line address 0\0" |
| "unknown command: `%c'"; |
| |
| #define BAD_BANG (errors) |
| #define BAD_COMMA (BAD_BANG + sizeof(N_("multiple `!'s"))) |
| #define BAD_STEP (BAD_COMMA + sizeof(N_("unexpected `,'"))) |
| #define EXCESS_OPEN_BRACE (BAD_STEP + sizeof(N_("invalid usage of +N or ~N as first address"))) |
| #define EXCESS_CLOSE_BRACE (EXCESS_OPEN_BRACE + sizeof(N_("unmatched `{'"))) |
| #define EXCESS_JUNK (EXCESS_CLOSE_BRACE + sizeof(N_("unexpected `}'"))) |
| #define EXPECTED_SLASH (EXCESS_JUNK + sizeof(N_("extra characters after command"))) |
| #define NO_CLOSE_BRACE_ADDR (EXPECTED_SLASH + sizeof(N_("expected \\ after `a', `c' or `i'"))) |
| #define NO_COLON_ADDR (NO_CLOSE_BRACE_ADDR + sizeof(N_("`}' doesn't want any addresses"))) |
| #define NO_SHARP_ADDR (NO_COLON_ADDR + sizeof(N_(": doesn't want any addresses"))) |
| #define NO_COMMAND (NO_SHARP_ADDR + sizeof(N_("comments don't accept any addresses"))) |
| #define ONE_ADDR (NO_COMMAND + sizeof(N_("missing command"))) |
| #define UNTERM_ADDR_RE (ONE_ADDR + sizeof(N_("command only uses one address"))) |
| #define UNTERM_S_CMD (UNTERM_ADDR_RE + sizeof(N_("unterminated address regex"))) |
| #define UNTERM_Y_CMD (UNTERM_S_CMD + sizeof(N_("unterminated `s' command"))) |
| #define UNKNOWN_S_OPT (UNTERM_Y_CMD + sizeof(N_("unterminated `y' command"))) |
| #define EXCESS_P_OPT (UNKNOWN_S_OPT + sizeof(N_("unknown option to `s'"))) |
| #define EXCESS_G_OPT (EXCESS_P_OPT + sizeof(N_("multiple `p' options to `s' command"))) |
| #define EXCESS_N_OPT (EXCESS_G_OPT + sizeof(N_("multiple `g' options to `s' command"))) |
| #define ZERO_N_OPT (EXCESS_N_OPT + sizeof(N_("multiple number options to `s' command"))) |
| #define Y_CMD_LEN (ZERO_N_OPT + sizeof(N_("number option to `s' command may not be zero"))) |
| #define BAD_DELIM (Y_CMD_LEN + sizeof(N_("strings for `y' command are different lengths"))) |
| #define ANCIENT_VERSION (BAD_DELIM + sizeof(N_("delimiter character is not a single-byte character"))) |
| #define INVALID_LINE_0 (ANCIENT_VERSION + sizeof(N_("expected newer version of sed"))) |
| #define UNKNOWN_CMD (INVALID_LINE_0 + sizeof(N_("invalid usage of line address 0"))) |
| #define END_ERRORS (UNKNOWN_CMD + sizeof(N_("unknown command: `%c'"))) |
| |
| static struct output *file_read = NULL; |
| static struct output *file_write = NULL; |
| |
| |
| /* Complain about an unknown command and exit. */ |
| void |
| bad_command(ch) |
| char ch; |
| { |
| const char *msg = _(UNKNOWN_CMD); |
| char *unknown_cmd = xmalloc(strlen(msg)); |
| sprintf(unknown_cmd, msg, ch); |
| bad_prog(unknown_cmd); |
| } |
| |
| /* Complain about a programming error and exit. */ |
| void |
| bad_prog(why) |
| const char *why; |
| { |
| if (cur_input.name) |
| fprintf(stderr, _("%s: file %s line %lu: %s\n"), |
| myname, cur_input.name, CAST(unsigned long)cur_input.line, why); |
| else |
| fprintf(stderr, _("%s: -e expression #%lu, char %lu: %s\n"), |
| myname, |
| CAST(unsigned long)cur_input.string_expr_count, |
| CAST(unsigned long)(prog.cur-prog.base), |
| why); |
| exit(EXIT_FAILURE); |
| } |
| |
| |
| /* Read the next character from the program. Return EOF if there isn't |
| anything to read. Keep cur_input.line up to date, so error messages |
| can be meaningful. */ |
| static int inchar P_((void)); |
| static int |
| inchar() |
| { |
| int ch = EOF; |
| |
| if (prog.cur) |
| { |
| if (prog.cur < prog.end) |
| ch = *prog.cur++; |
| } |
| else if (prog.file) |
| { |
| if (!feof(prog.file)) |
| ch = getc(prog.file); |
| } |
| if (ch == '\n') |
| ++cur_input.line; |
| return ch; |
| } |
| |
| /* unget `ch' so the next call to inchar will return it. */ |
| static void savchar P_((int ch)); |
| static void |
| savchar(ch) |
| int ch; |
| { |
| if (ch == EOF) |
| return; |
| if (ch == '\n' && cur_input.line > 0) |
| --cur_input.line; |
| if (prog.cur) |
| { |
| if (prog.cur <= prog.base || *--prog.cur != ch) |
| panic("Called savchar() with unexpected pushback (%x)", |
| CAST(unsigned char)ch); |
| } |
| else |
| ungetc(ch, prog.file); |
| } |
| |
| /* Read the next non-blank character from the program. */ |
| static int in_nonblank P_((void)); |
| static int |
| in_nonblank() |
| { |
| int ch; |
| do |
| ch = inchar(); |
| while (ISBLANK(ch)); |
| return ch; |
| } |
| |
| /* Read an integer value from the program. */ |
| static countT in_integer P_((int ch)); |
| static countT |
| in_integer(ch) |
| int ch; |
| { |
| countT num = 0; |
| |
| while (ISDIGIT(ch)) |
| { |
| num = num * 10 + ch - '0'; |
| ch = inchar(); |
| } |
| savchar(ch); |
| return num; |
| } |
| |
| static int add_then_next P_((struct buffer *b, int ch)); |
| static int |
| add_then_next(b, ch) |
| struct buffer *b; |
| int ch; |
| { |
| add1_buffer(b, ch); |
| return inchar(); |
| } |
| |
| static char * convert_number P_((char *, char *, const char *, int, int, int)); |
| static char * |
| convert_number(result, buf, bufend, base, maxdigits, default_char) |
| char *result; |
| char *buf; |
| const char *bufend; |
| int base; |
| int maxdigits; |
| int default_char; |
| { |
| int n = 0; |
| char *p; |
| |
| for (p=buf; p < bufend && maxdigits-- > 0; ++p) |
| { |
| int d = -1; |
| switch (*p) |
| { |
| case '0': d = 0x0; break; |
| case '1': d = 0x1; break; |
| case '2': d = 0x2; break; |
| case '3': d = 0x3; break; |
| case '4': d = 0x4; break; |
| case '5': d = 0x5; break; |
| case '6': d = 0x6; break; |
| case '7': d = 0x7; break; |
| case '8': d = 0x8; break; |
| case '9': d = 0x9; break; |
| case 'A': case 'a': d = 0xa; break; |
| case 'B': case 'b': d = 0xb; break; |
| case 'C': case 'c': d = 0xc; break; |
| case 'D': case 'd': d = 0xd; break; |
| case 'E': case 'e': d = 0xe; break; |
| case 'F': case 'f': d = 0xf; break; |
| } |
| if (d < 0 || base <= d) |
| break; |
| n = n * base + d; |
| } |
| if (p == buf) |
| *result = default_char; |
| else |
| *result = n; |
| return p; |
| } |
| |
| |
| /* Read in a filename for a `r', `w', or `s///w' command. */ |
| static struct buffer *read_filename P_((void)); |
| static struct buffer * |
| read_filename() |
| { |
| struct buffer *b; |
| int ch; |
| |
| b = init_buffer(); |
| ch = in_nonblank(); |
| while (ch != EOF && ch != '\n') |
| { |
| #if 0 /*XXX ZZZ 1998-09-12 kpp: added, then had second thoughts*/ |
| if (posixicity == POSIXLY_EXTENDED) |
| if (ch == ';' || ch == '#') |
| { |
| savchar(ch); |
| break; |
| } |
| #endif |
| ch = add_then_next(b, ch); |
| } |
| add1_buffer(b, '\0'); |
| return b; |
| } |
| |
| static struct output *get_openfile P_((struct output **file_ptrs, const char *mode, int fail)); |
| static struct output * |
| get_openfile(file_ptrs, mode, fail) |
| struct output **file_ptrs; |
| const char *mode; |
| int fail; |
| { |
| struct buffer *b; |
| char *file_name; |
| struct output *p; |
| |
| b = read_filename(); |
| file_name = get_buffer(b); |
| for (p=*file_ptrs; p; p=p->link) |
| if (strcmp(p->name, file_name) == 0) |
| break; |
| |
| if (posixicity == POSIXLY_EXTENDED) |
| { |
| /* Check whether it is a special file (stdin, stdout or stderr) */ |
| struct special_files *special = special_files; |
| |
| /* std* sometimes are not constants, so they |
| cannot be used in the initializer for special_files */ |
| my_stdin = stdin; my_stdout = stdout; my_stderr = stderr; |
| for (special = special_files; special->outf.name; special++) |
| if (strcmp(special->outf.name, file_name) == 0) |
| { |
| special->outf.fp = *special->pfp; |
| free_buffer (b); |
| return &special->outf; |
| } |
| } |
| |
| if (!p) |
| { |
| p = OB_MALLOC(&obs, 1, struct output); |
| p->name = ck_strdup(file_name); |
| p->fp = ck_fopen(p->name, mode, fail); |
| p->missing_newline = false; |
| p->link = *file_ptrs; |
| *file_ptrs = p; |
| } |
| free_buffer(b); |
| return p; |
| } |
| |
| |
| static struct sed_cmd *next_cmd_entry P_((struct vector **vectorp)); |
| static struct sed_cmd * |
| next_cmd_entry(vectorp) |
| struct vector **vectorp; |
| { |
| struct sed_cmd *cmd; |
| struct vector *v; |
| |
| v = *vectorp; |
| if (v->v_length == v->v_allocated) |
| { |
| v->v_allocated += VECTOR_ALLOC_INCREMENT; |
| v->v = REALLOC(v->v, v->v_allocated, struct sed_cmd); |
| } |
| |
| cmd = v->v + v->v_length; |
| cmd->a1 = NULL; |
| cmd->a2 = NULL; |
| cmd->range_state = RANGE_INACTIVE; |
| cmd->addr_bang = false; |
| cmd->cmd = '\0'; /* something invalid, to catch bugs early */ |
| |
| *vectorp = v; |
| return cmd; |
| } |
| |
| static int snarf_char_class P_((struct buffer *b, mbstate_t *cur_stat)); |
| static int |
| snarf_char_class(b, cur_stat) |
| struct buffer *b; |
| mbstate_t *cur_stat; |
| { |
| int ch; |
| int state = 0; |
| int delim; |
| bool pending_mb = 0; |
| |
| ch = inchar(); |
| if (ch == '^') |
| ch = add_then_next(b, ch); |
| if (ch == CLOSE_BRACKET) |
| ch = add_then_next(b, ch); |
| |
| /* States are: |
| 0 outside a collation element, character class or collation class |
| 1 after the bracket |
| 2 after the opening ./:/= |
| 3 after the closing ./:/= */ |
| |
| for (;; ch = add_then_next (b, ch)) |
| { |
| pending_mb = BRLEN (ch, cur_stat) != 1; |
| |
| switch (ch) |
| { |
| case EOF: |
| case '\n': |
| return ch; |
| |
| case '.': |
| case ':': |
| case '=': |
| if (pending_mb) |
| continue; |
| |
| if (state == 1) |
| { |
| delim = ch; |
| state = 2; |
| } |
| else if (state == 2 && ch == delim) |
| state = 3; |
| else |
| break; |
| |
| continue; |
| |
| case OPEN_BRACKET: |
| if (pending_mb) |
| continue; |
| |
| if (state == 0) |
| state = 1; |
| continue; |
| |
| case CLOSE_BRACKET: |
| if (pending_mb) |
| continue; |
| |
| if (state == 0 || state == 1) |
| return ch; |
| else if (state == 3) |
| state = 0; |
| |
| break; |
| |
| default: |
| break; |
| } |
| |
| /* Getting a character different from .=: whilst in state 1 |
| goes back to state 0, getting a character different from ] |
| whilst in state 3 goes back to state 2. */ |
| state &= ~1; |
| } |
| } |
| |
| static struct buffer *match_slash P_((int slash, int regex)); |
| static struct buffer * |
| match_slash(slash, regex) |
| int slash; |
| int regex; |
| { |
| struct buffer *b; |
| int ch; |
| mbstate_t cur_stat; |
| |
| memset (&cur_stat, 0, sizeof (mbstate_t)); |
| |
| /* We allow only 1 byte characters for a slash. */ |
| if (BRLEN (slash, &cur_stat) == -2) |
| bad_prog (BAD_DELIM); |
| |
| memset (&cur_stat, 0, sizeof (mbstate_t)); |
| |
| b = init_buffer(); |
| while ((ch = inchar()) != EOF && ch != '\n') |
| { |
| bool pending_mb = !MBSINIT (&cur_stat); |
| if (BRLEN (ch, &cur_stat) == 1 && !pending_mb) |
| { |
| if (ch == slash) |
| return b; |
| else if (ch == '\\') |
| { |
| ch = inchar(); |
| if (ch == EOF) |
| break; |
| #ifndef REG_PERL |
| else if (ch == 'n' && regex) |
| ch = '\n'; |
| #endif |
| else if (ch != '\n' && (ch != slash || (!regex && ch == '&'))) |
| add1_buffer(b, '\\'); |
| } |
| else if (ch == OPEN_BRACKET && regex) |
| { |
| add1_buffer(b, ch); |
| ch = snarf_char_class(b, &cur_stat); |
| if (ch != CLOSE_BRACKET) |
| break; |
| } |
| } |
| |
| add1_buffer(b, ch); |
| } |
| |
| if (ch == '\n') |
| savchar(ch); /* for proper line number in error report */ |
| free_buffer(b); |
| return NULL; |
| } |
| |
| static int mark_subst_opts P_((struct subst *cmd)); |
| static int |
| mark_subst_opts(cmd) |
| struct subst *cmd; |
| { |
| int flags = 0; |
| int ch; |
| |
| cmd->global = false; |
| cmd->print = false; |
| cmd->eval = false; |
| cmd->numb = 0; |
| cmd->outf = NULL; |
| |
| for (;;) |
| switch ( (ch = in_nonblank()) ) |
| { |
| case 'i': /* GNU extension */ |
| case 'I': /* GNU extension */ |
| if (posixicity == POSIXLY_BASIC) |
| bad_prog(_(UNKNOWN_S_OPT)); |
| flags |= REG_ICASE; |
| break; |
| |
| #ifdef REG_PERL |
| case 's': /* GNU extension */ |
| case 'S': /* GNU extension */ |
| if (posixicity == POSIXLY_BASIC) |
| bad_prog(_(UNKNOWN_S_OPT)); |
| if (extended_regexp_flags & REG_PERL) |
| flags |= REG_DOTALL; |
| break; |
| |
| case 'x': /* GNU extension */ |
| case 'X': /* GNU extension */ |
| if (posixicity == POSIXLY_BASIC) |
| bad_prog(_(UNKNOWN_S_OPT)); |
| if (extended_regexp_flags & REG_PERL) |
| flags |= REG_EXTENDED; |
| break; |
| #endif |
| |
| case 'm': /* GNU extension */ |
| case 'M': /* GNU extension */ |
| if (posixicity == POSIXLY_BASIC) |
| bad_prog(_(UNKNOWN_S_OPT)); |
| flags |= REG_NEWLINE; |
| break; |
| |
| case 'e': |
| cmd->eval = true; |
| break; |
| |
| case 'p': |
| if (cmd->print) |
| bad_prog(_(EXCESS_P_OPT)); |
| cmd->print |= (1 << cmd->eval); /* 1=before eval, 2=after */ |
| break; |
| |
| case 'g': |
| if (cmd->global) |
| bad_prog(_(EXCESS_G_OPT)); |
| cmd->global = true; |
| break; |
| |
| case 'w': |
| cmd->outf = get_openfile(&file_write, "w", true); |
| return flags; |
| |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| if (cmd->numb) |
| bad_prog(_(EXCESS_N_OPT)); |
| cmd->numb = in_integer(ch); |
| if (!cmd->numb) |
| bad_prog(_(ZERO_N_OPT)); |
| break; |
| |
| case CLOSE_BRACE: |
| case '#': |
| savchar(ch); |
| /* Fall Through */ |
| case EOF: |
| case '\n': |
| case ';': |
| return flags; |
| |
| case '\r': |
| if (inchar() == '\n') |
| return flags; |
| /* FALLTHROUGH */ |
| |
| default: |
| bad_prog(_(UNKNOWN_S_OPT)); |
| /*NOTREACHED*/ |
| } |
| } |
| |
| |
| /* read in a label for a `:', `b', or `t' command */ |
| static char *read_label P_((void)); |
| static char * |
| read_label() |
| { |
| struct buffer *b; |
| int ch; |
| char *ret; |
| |
| b = init_buffer(); |
| ch = in_nonblank(); |
| |
| while (ch != EOF && ch != '\n' |
| && !ISBLANK(ch) && ch != ';' && ch != CLOSE_BRACE && ch != '#') |
| ch = add_then_next (b, ch); |
| |
| savchar(ch); |
| add1_buffer(b, '\0'); |
| ret = ck_strdup(get_buffer(b)); |
| free_buffer(b); |
| return ret; |
| } |
| |
| /* Store a label (or label reference) created by a `:', `b', or `t' |
| command so that the jump to/from the label can be backpatched after |
| compilation is complete, or a reference created by a `{' to be |
| backpatched when the corresponding `}' is found. */ |
| static struct sed_label *setup_label |
| P_((struct sed_label *, countT, char *, const struct error_info *)); |
| static struct sed_label * |
| setup_label(list, idx, name, err_info) |
| struct sed_label *list; |
| countT idx; |
| char *name; |
| const struct error_info *err_info; |
| { |
| struct sed_label *ret = OB_MALLOC(&obs, 1, struct sed_label); |
| ret->v_index = idx; |
| ret->name = name; |
| if (err_info) |
| MEMCPY(&ret->err_info, err_info, sizeof (ret->err_info)); |
| ret->next = list; |
| return ret; |
| } |
| |
| static struct sed_label *release_label P_((struct sed_label *list_head)); |
| static struct sed_label * |
| release_label(list_head) |
| struct sed_label *list_head; |
| { |
| struct sed_label *ret; |
| |
| if (!list_head) |
| return NULL; |
| ret = list_head->next; |
| |
| FREE(list_head->name); |
| |
| #if 0 |
| /* We use obstacks */ |
| FREE(list_head); |
| #endif |
| return ret; |
| } |
| |
| static struct replacement *new_replacement P_((char *, size_t, |
| enum replacement_types)); |
| static struct replacement * |
| new_replacement(text, length, type) |
| char *text; |
| size_t length; |
| enum replacement_types type; |
| { |
| struct replacement *r = OB_MALLOC(&obs, 1, struct replacement); |
| |
| r->prefix = text; |
| r->prefix_length = length; |
| r->subst_id = -1; |
| r->repl_type = type; |
| |
| /* r-> next = NULL; */ |
| return r; |
| } |
| |
| static void setup_replacement P_((struct subst *, const char *, size_t)); |
| static void |
| setup_replacement(sub, text, length) |
| struct subst *sub; |
| const char *text; |
| size_t length; |
| { |
| char *base; |
| char *p; |
| char *text_end; |
| enum replacement_types repl_type = REPL_ASIS, save_type = REPL_ASIS; |
| struct replacement root; |
| struct replacement *tail; |
| |
| sub->max_id = 0; |
| base = MEMDUP(text, length, char); |
| length = normalize_text(base, length, TEXT_REPLACEMENT); |
| |
| text_end = base + length; |
| tail = &root; |
| |
| for (p=base; p<text_end; ++p) |
| { |
| if (*p == '\\') |
| { |
| /* Preceding the backslash may be some literal text: */ |
| tail = tail->next = |
| new_replacement(base, CAST(size_t)(p - base), repl_type); |
| |
| repl_type = save_type; |
| |
| /* Skip the backslash and look for a numeric back-reference, |
| or a case-munging escape if not in POSIX mode: */ |
| ++p; |
| if (p == text_end) |
| ++tail->prefix_length; |
| |
| else if (posixicity == POSIXLY_BASIC && !ISDIGIT (*p)) |
| { |
| p[-1] = *p; |
| ++tail->prefix_length; |
| } |
| |
| else |
| switch (*p) |
| { |
| case '0': case '1': case '2': case '3': case '4': |
| case '5': case '6': case '7': case '8': case '9': |
| tail->subst_id = *p - '0'; |
| if (sub->max_id < tail->subst_id) |
| sub->max_id = tail->subst_id; |
| break; |
| |
| case 'L': |
| repl_type = REPL_LOWERCASE; |
| save_type = REPL_LOWERCASE; |
| break; |
| |
| case 'U': |
| repl_type = REPL_UPPERCASE; |
| save_type = REPL_UPPERCASE; |
| break; |
| |
| case 'E': |
| repl_type = REPL_ASIS; |
| save_type = REPL_ASIS; |
| break; |
| |
| case 'l': |
| save_type = repl_type; |
| repl_type |= REPL_LOWERCASE_FIRST; |
| break; |
| |
| case 'u': |
| save_type = repl_type; |
| repl_type |= REPL_UPPERCASE_FIRST; |
| break; |
| |
| default: |
| p[-1] = *p; |
| ++tail->prefix_length; |
| } |
| |
| base = p + 1; |
| } |
| else if (*p == '&') |
| { |
| /* Preceding the ampersand may be some literal text: */ |
| tail = tail->next = |
| new_replacement(base, CAST(size_t)(p - base), repl_type); |
| |
| repl_type = save_type; |
| tail->subst_id = 0; |
| base = p + 1; |
| } |
| } |
| /* There may be some trailing literal text: */ |
| if (base < text_end) |
| tail = tail->next = |
| new_replacement(base, CAST(size_t)(text_end - base), repl_type); |
| |
| tail->next = NULL; |
| sub->replacement = root.next; |
| } |
| |
| static void read_text P_((struct text_buf *buf, int leadin_ch)); |
| static void |
| read_text(buf, leadin_ch) |
| struct text_buf *buf; |
| int leadin_ch; |
| { |
| int ch; |
| |
| /* Should we start afresh (as opposed to continue a partial text)? */ |
| if (buf) |
| { |
| if (pending_text) |
| free_buffer(pending_text); |
| pending_text = init_buffer(); |
| buf->text = NULL; |
| buf->text_length = 0; |
| old_text_buf = buf; |
| } |
| /* assert(old_text_buf != NULL); */ |
| |
| if (leadin_ch == EOF) |
| return; |
| |
| if (leadin_ch != '\n') |
| add1_buffer(pending_text, leadin_ch); |
| |
| ch = inchar(); |
| while (ch != EOF && ch != '\n') |
| { |
| if (ch == '\\') |
| { |
| ch = inchar(); |
| if (ch != EOF) |
| add1_buffer (pending_text, '\\'); |
| } |
| |
| if (ch == EOF) |
| { |
| add1_buffer (pending_text, '\n'); |
| return; |
| } |
| |
| ch = add_then_next (pending_text, ch); |
| } |
| |
| add1_buffer(pending_text, '\n'); |
| if (!buf) |
| buf = old_text_buf; |
| buf->text_length = normalize_text (get_buffer (pending_text), |
| size_buffer (pending_text), TEXT_BUFFER); |
| buf->text = MEMDUP(get_buffer(pending_text), buf->text_length, char); |
| free_buffer(pending_text); |
| pending_text = NULL; |
| } |
| |
| |
| /* Try to read an address for a sed command. If it succeeds, |
| return non-zero and store the resulting address in `*addr'. |
| If the input doesn't look like an address read nothing |
| and return zero. */ |
| static bool compile_address P_((struct addr *addr, int ch)); |
| static bool |
| compile_address(addr, ch) |
| struct addr *addr; |
| int ch; |
| { |
| addr->addr_type = ADDR_IS_NULL; |
| addr->addr_step = 0; |
| addr->addr_number = ~(countT)0; /* extremely unlikely to ever match */ |
| addr->addr_regex = NULL; |
| |
| if (ch == '/' || ch == '\\') |
| { |
| int flags = 0; |
| struct buffer *b; |
| addr->addr_type = ADDR_IS_REGEX; |
| if (ch == '\\') |
| ch = inchar(); |
| if ( !(b = match_slash(ch, true)) ) |
| bad_prog(_(UNTERM_ADDR_RE)); |
| |
| for(;;) |
| { |
| ch = in_nonblank(); |
| if (posixicity == POSIXLY_BASIC) |
| goto posix_address_modifier; |
| switch(ch) |
| { |
| case 'I': /* GNU extension */ |
| flags |= REG_ICASE; |
| break; |
| |
| #ifdef REG_PERL |
| case 'S': /* GNU extension */ |
| if (extended_regexp_flags & REG_PERL) |
| flags |= REG_DOTALL; |
| break; |
| |
| case 'X': /* GNU extension */ |
| if (extended_regexp_flags & REG_PERL) |
| flags |= REG_EXTENDED; |
| break; |
| #endif |
| |
| case 'M': /* GNU extension */ |
| flags |= REG_NEWLINE; |
| break; |
| |
| default: |
| posix_address_modifier: |
| savchar (ch); |
| addr->addr_regex = compile_regex (b, flags, 0); |
| free_buffer(b); |
| return true; |
| } |
| } |
| } |
| else if (ISDIGIT(ch)) |
| { |
| addr->addr_number = in_integer(ch); |
| addr->addr_type = ADDR_IS_NUM; |
| ch = in_nonblank(); |
| if (ch != '~' || posixicity == POSIXLY_BASIC) |
| { |
| savchar(ch); |
| } |
| else |
| { |
| countT step = in_integer(in_nonblank()); |
| if (step > 0) |
| { |
| addr->addr_step = step; |
| addr->addr_type = ADDR_IS_NUM_MOD; |
| } |
| } |
| } |
| else if ((ch == '+' || ch == '~') && posixicity != POSIXLY_BASIC) |
| { |
| addr->addr_step = in_integer(in_nonblank()); |
| if (addr->addr_step==0) |
| ; /* default to ADDR_IS_NULL; forces matching to stop on next line */ |
| else if (ch == '+') |
| addr->addr_type = ADDR_IS_STEP; |
| else |
| addr->addr_type = ADDR_IS_STEP_MOD; |
| } |
| else if (ch == '$') |
| { |
| addr->addr_type = ADDR_IS_LAST; |
| } |
| else |
| return false; |
| |
| return true; |
| } |
| |
| /* Read a program (or a subprogram within `{' `}' pairs) in and store |
| the compiled form in `*vector'. Return a pointer to the new vector. */ |
| static struct vector *compile_program P_((struct vector *)); |
| static struct vector * |
| compile_program(vector) |
| struct vector *vector; |
| { |
| struct sed_cmd *cur_cmd; |
| struct buffer *b; |
| int ch; |
| |
| if (!vector) |
| { |
| vector = MALLOC(1, struct vector); |
| vector->v = NULL; |
| vector->v_allocated = 0; |
| vector->v_length = 0; |
| |
| obstack_init (&obs); |
| } |
| if (pending_text) |
| read_text(NULL, '\n'); |
| |
| for (;;) |
| { |
| struct addr a; |
| |
| while ((ch=inchar()) == ';' || ISSPACE(ch)) |
| ; |
| if (ch == EOF) |
| break; |
| |
| cur_cmd = next_cmd_entry(&vector); |
| if (compile_address(&a, ch)) |
| { |
| if (a.addr_type == ADDR_IS_STEP |
| || a.addr_type == ADDR_IS_STEP_MOD) |
| bad_prog(_(BAD_STEP)); |
| |
| cur_cmd->a1 = MEMDUP(&a, 1, struct addr); |
| ch = in_nonblank(); |
| if (ch == ',') |
| { |
| if (!compile_address(&a, in_nonblank())) |
| bad_prog(_(BAD_COMMA)); |
| |
| cur_cmd->a2 = MEMDUP(&a, 1, struct addr); |
| ch = in_nonblank(); |
| } |
| |
| if ((cur_cmd->a1->addr_type == ADDR_IS_NUM |
| && cur_cmd->a1->addr_number == 0) |
| && ((!cur_cmd->a2 || cur_cmd->a2->addr_type != ADDR_IS_REGEX) |
| || posixicity == POSIXLY_BASIC)) |
| bad_prog(_(INVALID_LINE_0)); |
| } |
| if (ch == '!') |
| { |
| cur_cmd->addr_bang = true; |
| ch = in_nonblank(); |
| if (ch == '!') |
| bad_prog(_(BAD_BANG)); |
| } |
| |
| /* Do not accept extended commands in --posix mode. Also, |
| a few commands only accept one address in that mode. */ |
| if (posixicity == POSIXLY_BASIC) |
| switch (ch) |
| { |
| case 'e': case 'v': case 'z': case 'L': |
| case 'Q': case 'T': case 'R': case 'W': |
| bad_command(ch); |
| |
| case 'a': case 'i': case 'l': |
| case '=': case 'r': |
| if (cur_cmd->a2) |
| bad_prog(_(ONE_ADDR)); |
| } |
| |
| cur_cmd->cmd = ch; |
| switch (ch) |
| { |
| case '#': |
| if (cur_cmd->a1) |
| bad_prog(_(NO_SHARP_ADDR)); |
| ch = inchar(); |
| if (ch=='n' && first_script && cur_input.line < 2) |
| if ( (prog.base && prog.cur==2+prog.base) |
| || (prog.file && !prog.base && 2==ftell(prog.file))) |
| no_default_output = true; |
| while (ch != EOF && ch != '\n') |
| ch = inchar(); |
| continue; /* restart the for (;;) loop */ |
| |
| case 'v': |
| /* This is an extension. Programs needing GNU sed might start |
| * with a `v' command so that other seds will stop. |
| * We compare the version and ignore POSIXLY_CORRECT. |
| */ |
| { |
| char *version = read_label (); |
| char *compared_version; |
| compared_version = (*version == '\0') ? "4.0" : version; |
| if (strverscmp (compared_version, SED_FEATURE_VERSION) > 0) |
| bad_prog(_(ANCIENT_VERSION)); |
| |
| free (version); |
| posixicity = POSIXLY_EXTENDED; |
| } |
| continue; |
| |
| case '{': |
| blocks = setup_label(blocks, vector->v_length, NULL, &cur_input); |
| cur_cmd->addr_bang = !cur_cmd->addr_bang; |
| break; |
| |
| case '}': |
| if (!blocks) |
| bad_prog(_(EXCESS_CLOSE_BRACE)); |
| if (cur_cmd->a1) |
| bad_prog(_(NO_CLOSE_BRACE_ADDR)); |
| ch = in_nonblank(); |
| if (ch == CLOSE_BRACE || ch == '#') |
| savchar(ch); |
| else if (ch != EOF && ch != '\n' && ch != ';') |
| bad_prog(_(EXCESS_JUNK)); |
| |
| vector->v[blocks->v_index].x.jump_index = vector->v_length; |
| blocks = release_label(blocks); /* done with this entry */ |
| break; |
| |
| case 'e': |
| ch = in_nonblank(); |
| if (ch == EOF || ch == '\n') |
| { |
| cur_cmd->x.cmd_txt.text_length = 0; |
| break; |
| } |
| else |
| goto read_text_to_slash; |
| |
| case 'a': |
| case 'i': |
| case 'c': |
| ch = in_nonblank(); |
| |
| read_text_to_slash: |
| if (ch == EOF) |
| bad_prog(_(EXPECTED_SLASH)); |
| |
| if (ch == '\\') |
| ch = inchar(); |
| else |
| { |
| if (posixicity == POSIXLY_BASIC) |
| bad_prog(_(EXPECTED_SLASH)); |
| savchar(ch); |
| ch = '\n'; |
| } |
| |
| read_text(&cur_cmd->x.cmd_txt, ch); |
| break; |
| |
| case ':': |
| if (cur_cmd->a1) |
| bad_prog(_(NO_COLON_ADDR)); |
| labels = setup_label(labels, vector->v_length, read_label(), NULL); |
| break; |
| |
| case 'T': |
| case 'b': |
| case 't': |
| jumps = setup_label(jumps, vector->v_length, read_label(), NULL); |
| break; |
| |
| case 'Q': |
| case 'q': |
| if (cur_cmd->a2) |
| bad_prog(_(ONE_ADDR)); |
| /* Fall through */ |
| |
| case 'L': |
| case 'l': |
| ch = in_nonblank(); |
| if (ISDIGIT(ch) && posixicity != POSIXLY_BASIC) |
| { |
| cur_cmd->x.int_arg = in_integer(ch); |
| ch = in_nonblank(); |
| } |
| else |
| cur_cmd->x.int_arg = -1; |
| |
| if (ch == CLOSE_BRACE || ch == '#') |
| savchar(ch); |
| else if (ch != EOF && ch != '\n' && ch != ';') |
| bad_prog(_(EXCESS_JUNK)); |
| |
| break; |
| |
| case '=': |
| case 'd': |
| case 'D': |
| case 'g': |
| case 'G': |
| case 'h': |
| case 'H': |
| case 'n': |
| case 'N': |
| case 'p': |
| case 'P': |
| case 'z': |
| case 'x': |
| ch = in_nonblank(); |
| if (ch == CLOSE_BRACE || ch == '#') |
| savchar(ch); |
| else if (ch != EOF && ch != '\n' && ch != ';') |
| bad_prog(_(EXCESS_JUNK)); |
| break; |
| |
| case 'r': |
| b = read_filename(); |
| cur_cmd->x.fname = ck_strdup(get_buffer(b)); |
| free_buffer(b); |
| break; |
| |
| case 'R': |
| cur_cmd->x.fp = get_openfile(&file_read, read_mode, false)->fp; |
| break; |
| |
| case 'W': |
| case 'w': |
| cur_cmd->x.outf = get_openfile(&file_write, "w", true); |
| break; |
| |
| case 's': |
| { |
| struct buffer *b2; |
| int flags; |
| int slash; |
| |
| slash = inchar(); |
| if ( !(b = match_slash(slash, true)) ) |
| bad_prog(_(UNTERM_S_CMD)); |
| if ( !(b2 = match_slash(slash, false)) ) |
| bad_prog(_(UNTERM_S_CMD)); |
| |
| cur_cmd->x.cmd_subst = OB_MALLOC(&obs, 1, struct subst); |
| setup_replacement(cur_cmd->x.cmd_subst, |
| get_buffer(b2), size_buffer(b2)); |
| free_buffer(b2); |
| |
| flags = mark_subst_opts(cur_cmd->x.cmd_subst); |
| cur_cmd->x.cmd_subst->regx = |
| compile_regex(b, flags, cur_cmd->x.cmd_subst->max_id + 1); |
| free_buffer(b); |
| } |
| break; |
| |
| case 'y': |
| { |
| size_t len, dest_len; |
| int slash; |
| struct buffer *b2; |
| char *src_buf, *dest_buf; |
| |
| slash = inchar(); |
| if ( !(b = match_slash(slash, false)) ) |
| bad_prog(_(UNTERM_Y_CMD)); |
| src_buf = get_buffer(b); |
| len = normalize_text(src_buf, size_buffer (b), TEXT_BUFFER); |
| |
| if ( !(b2 = match_slash(slash, false)) ) |
| bad_prog(_(UNTERM_Y_CMD)); |
| dest_buf = get_buffer(b2); |
| dest_len = normalize_text(dest_buf, size_buffer (b2), TEXT_BUFFER); |
| |
| if (mb_cur_max > 1) |
| { |
| int i, j, idx, src_char_num; |
| size_t *src_lens = MALLOC(len, size_t); |
| char **trans_pairs; |
| size_t mbclen; |
| mbstate_t cur_stat; |
| |
| /* Enumerate how many character the source buffer has. */ |
| memset(&cur_stat, 0, sizeof(mbstate_t)); |
| for (i = 0, j = 0; i < len;) |
| { |
| mbclen = MBRLEN (src_buf + i, len - i, &cur_stat); |
| /* An invalid sequence, or a truncated multibyte character. |
| We treat it as a singlebyte character. */ |
| if (mbclen == (size_t) -1 || mbclen == (size_t) -2 |
| || mbclen == 0) |
| mbclen = 1; |
| src_lens[j++] = mbclen; |
| i += mbclen; |
| } |
| src_char_num = j; |
| |
| memset(&cur_stat, 0, sizeof(mbstate_t)); |
| idx = 0; |
| |
| /* trans_pairs = {src(0), dest(0), src(1), dest(1), ..., NULL} |
| src(i) : pointer to i-th source character. |
| dest(i) : pointer to i-th destination character. |
| NULL : terminator */ |
| trans_pairs = MALLOC(2 * src_char_num + 1, char*); |
| cur_cmd->x.translatemb = trans_pairs; |
| for (i = 0; i < src_char_num; i++) |
| { |
| if (idx >= dest_len) |
| bad_prog(_(Y_CMD_LEN)); |
| |
| /* Set the i-th source character. */ |
| trans_pairs[2 * i] = MALLOC(src_lens[i] + 1, char); |
| strncpy(trans_pairs[2 * i], src_buf, src_lens[i]); |
| trans_pairs[2 * i][src_lens[i]] = '\0'; |
| src_buf += src_lens[i]; /* Forward to next character. */ |
| |
| /* Fetch the i-th destination character. */ |
| mbclen = MBRLEN (dest_buf + idx, dest_len - idx, &cur_stat); |
| /* An invalid sequence, or a truncated multibyte character. |
| We treat it as a singlebyte character. */ |
| if (mbclen == (size_t) -1 || mbclen == (size_t) -2 |
| || mbclen == 0) |
| mbclen = 1; |
| |
| /* Set the i-th destination character. */ |
| trans_pairs[2 * i + 1] = MALLOC(mbclen + 1, char); |
| strncpy(trans_pairs[2 * i + 1], dest_buf + idx, mbclen); |
| trans_pairs[2 * i + 1][mbclen] = '\0'; |
| idx += mbclen; /* Forward to next character. */ |
| } |
| trans_pairs[2 * i] = NULL; |
| if (idx != dest_len) |
| bad_prog(_(Y_CMD_LEN)); |
| } |
| else |
| { |
| unsigned char *translate = |
| OB_MALLOC(&obs, YMAP_LENGTH, unsigned char); |
| unsigned char *ustring = CAST(unsigned char *)src_buf; |
| |
| if (len != dest_len) |
| bad_prog(_(Y_CMD_LEN)); |
| |
| for (len = 0; len < YMAP_LENGTH; len++) |
| translate[len] = len; |
| |
| while (dest_len--) |
| translate[*ustring++] = (unsigned char)*dest_buf++; |
| |
| cur_cmd->x.translate = translate; |
| } |
| |
| if ((ch = in_nonblank()) != EOF && ch != '\n' && ch != ';') |
| bad_prog(_(EXCESS_JUNK)); |
| |
| free_buffer(b); |
| free_buffer(b2); |
| } |
| break; |
| |
| case EOF: |
| bad_prog(_(NO_COMMAND)); |
| /*NOTREACHED*/ |
| |
| default: |
| bad_command (ch); |
| /*NOTREACHED*/ |
| } |
| |
| /* this is buried down here so that "continue" statements will miss it */ |
| ++vector->v_length; |
| } |
| return vector; |
| } |
| |
| |
| /* deal with \X escapes */ |
| size_t |
| normalize_text(buf, len, buftype) |
| char *buf; |
| size_t len; |
| enum text_types buftype; |
| { |
| const char *bufend = buf + len; |
| char *p = buf; |
| char *q = buf; |
| |
| /* This variable prevents normalizing text within bracket |
| subexpressions when conforming to POSIX. If 0, we |
| are not within a bracket expression. If -1, we are within a |
| bracket expression but are not within [.FOO.], [=FOO=], |
| or [:FOO:]. Otherwise, this is the '.', '=', or ':' |
| respectively within these three types of subexpressions. */ |
| int bracket_state = 0; |
| |
| int mbclen; |
| mbstate_t cur_stat; |
| memset(&cur_stat, 0, sizeof(mbstate_t)); |
| |
| while (p < bufend) |
| { |
| int c; |
| mbclen = MBRLEN (p, bufend - p, &cur_stat); |
| if (mbclen != 1) |
| { |
| /* An invalid sequence, or a truncated multibyte character. |
| We treat it as a singlebyte character. */ |
| if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) |
| mbclen = 1; |
| |
| memmove (q, p, mbclen); |
| q += mbclen; |
| p += mbclen; |
| continue; |
| } |
| |
| if (*p == '\\' && p+1 < bufend && bracket_state == 0) |
| switch ( (c = *++p) ) |
| { |
| #if defined __STDC__ && __STDC__-0 |
| case 'a': *q++ = '\a'; p++; continue; |
| #else /* Not STDC; we'll just assume ASCII */ |
| case 'a': *q++ = '\007'; p++; continue; |
| #endif |
| /* case 'b': *q++ = '\b'; p++; continue; --- conflicts with \b RE */ |
| case 'f': *q++ = '\f'; p++; continue; |
| case '\n': /*fall through */ |
| case 'n': *q++ = '\n'; p++; continue; |
| case 'r': *q++ = '\r'; p++; continue; |
| case 't': *q++ = '\t'; p++; continue; |
| case 'v': *q++ = '\v'; p++; continue; |
| |
| case 'd': /* decimal byte */ |
| p = convert_number(q, p+1, bufend, 10, 3, 'd'); |
| q++; |
| continue; |
| |
| case 'x': /* hexadecimal byte */ |
| p = convert_number(q, p+1, bufend, 16, 2, 'x'); |
| q++; |
| continue; |
| |
| #ifdef REG_PERL |
| case '0': case '1': case '2': case '3': |
| case '4': case '5': case '6': case '7': |
| if ((extended_regexp_flags & REG_PERL) |
| && p+1 < bufend |
| && p[1] >= '0' && p[1] <= '9') |
| { |
| p = convert_number(q, p, bufend, 8, 3, *p); |
| q++; |
| } |
| else |
| { |
| /* we just pass the \ up one level for interpretation */ |
| if (buftype != TEXT_BUFFER) |
| *q++ = '\\'; |
| } |
| |
| continue; |
| |
| case 'o': /* octal byte */ |
| if (!(extended_regexp_flags & REG_PERL)) |
| { |
| p = convert_number(q, p+1, bufend, 8, 3, 'o'); |
| q++; |
| } |
| else |
| { |
| /* we just pass the \ up one level for interpretation */ |
| if (buftype != TEXT_BUFFER) |
| *q++ = '\\'; |
| } |
| |
| continue; |
| #else |
| case 'o': /* octal byte */ |
| p = convert_number(q, p+1, bufend, 8, 3, 'o'); |
| q++; |
| continue; |
| #endif |
| |
| case 'c': |
| if (++p < bufend) |
| { |
| *q++ = toupper(*p) ^ 0x40; |
| p++; |
| continue; |
| } |
| else |
| { |
| /* we just pass the \ up one level for interpretation */ |
| if (buftype != TEXT_BUFFER) |
| *q++ = '\\'; |
| continue; |
| } |
| |
| default: |
| /* we just pass the \ up one level for interpretation */ |
| if (buftype != TEXT_BUFFER) |
| *q++ = '\\'; |
| break; |
| } |
| else if (buftype == TEXT_REGEX && posixicity != POSIXLY_EXTENDED) |
| switch (*p) |
| { |
| case '[': |
| if (!bracket_state) |
| bracket_state = -1; |
| break; |
| |
| case ':': |
| case '.': |
| case '=': |
| if (bracket_state == -1 && p[-1] == '[') |
| bracket_state = *p; |
| break; |
| |
| case ']': |
| if (bracket_state == 0) |
| ; |
| else if (bracket_state == -1) |
| bracket_state = 0; |
| else if (p[-2] != bracket_state && p[-1] == bracket_state) |
| bracket_state = -1; |
| break; |
| } |
| |
| *q++ = *p++; |
| } |
| return (size_t)(q - buf); |
| } |
| |
| |
| /* `str' is a string (from the command line) that contains a sed command. |
| Compile the command, and add it to the end of `cur_program'. */ |
| struct vector * |
| compile_string(cur_program, str, len) |
| struct vector *cur_program; |
| char *str; |
| size_t len; |
| { |
| static countT string_expr_count = 0; |
| struct vector *ret; |
| |
| prog.file = NULL; |
| prog.base = CAST(unsigned char *)str; |
| prog.cur = prog.base; |
| prog.end = prog.cur + len; |
| |
| cur_input.line = 0; |
| cur_input.name = NULL; |
| cur_input.string_expr_count = ++string_expr_count; |
| |
| ret = compile_program(cur_program); |
| prog.base = NULL; |
| prog.cur = NULL; |
| prog.end = NULL; |
| |
| first_script = false; |
| return ret; |
| } |
| |
| /* `cmdfile' is the name of a file containing sed commands. |
| Read them in and add them to the end of `cur_program'. |
| */ |
| struct vector * |
| compile_file(cur_program, cmdfile) |
| struct vector *cur_program; |
| const char *cmdfile; |
| { |
| struct vector *ret; |
| |
| prog.file = stdin; |
| if (cmdfile[0] != '-' || cmdfile[1] != '\0') |
| prog.file = ck_fopen(cmdfile, "rt", true); |
| |
| cur_input.line = 1; |
| cur_input.name = cmdfile; |
| cur_input.string_expr_count = 0; |
| |
| ret = compile_program(cur_program); |
| if (prog.file != stdin) |
| ck_fclose(prog.file); |
| prog.file = NULL; |
| |
| first_script = false; |
| return ret; |
| } |
| |
| /* Make any checks which require the whole program to have been read. |
| In particular: this backpatches the jump targets. |
| Any cleanup which can be done after these checks is done here also. */ |
| void |
| check_final_program(program) |
| struct vector *program; |
| { |
| struct sed_label *go; |
| struct sed_label *lbl; |
| |
| /* do all "{"s have a corresponding "}"? */ |
| if (blocks) |
| { |
| /* update info for error reporting: */ |
| MEMCPY(&cur_input, &blocks->err_info, sizeof (cur_input)); |
| bad_prog(_(EXCESS_OPEN_BRACE)); |
| } |
| |
| /* was the final command an unterminated a/c/i command? */ |
| if (pending_text) |
| { |
| old_text_buf->text_length = size_buffer(pending_text); |
| if (old_text_buf->text_length) |
| old_text_buf->text = MEMDUP(get_buffer(pending_text), |
| old_text_buf->text_length, char); |
| free_buffer(pending_text); |
| pending_text = NULL; |
| } |
| |
| for (go = jumps; go; go = release_label(go)) |
| { |
| for (lbl = labels; lbl; lbl = lbl->next) |
| if (strcmp(lbl->name, go->name) == 0) |
| break; |
| if (lbl) |
| { |
| program->v[go->v_index].x.jump_index = lbl->v_index; |
| } |
| else |
| { |
| if (*go->name) |
| panic(_("can't find label for jump to `%s'"), go->name); |
| program->v[go->v_index].x.jump_index = program->v_length; |
| } |
| } |
| jumps = NULL; |
| |
| for (lbl = labels; lbl; lbl = release_label(lbl)) |
| ; |
| labels = NULL; |
| |
| /* There is no longer a need to track file names: */ |
| { |
| struct output *p; |
| |
| for (p=file_read; p; p=p->link) |
| if (p->name) |
| { |
| FREE(p->name); |
| p->name = NULL; |
| } |
| |
| for (p=file_write; p; p=p->link) |
| if (p->name) |
| { |
| FREE(p->name); |
| p->name = NULL; |
| } |
| } |
| } |
| |
| /* Rewind all resources which were allocated in this module. */ |
| void |
| rewind_read_files() |
| { |
| struct output *p; |
| |
| for (p=file_read; p; p=p->link) |
| if (p->fp) |
| rewind(p->fp); |
| } |
| |
| /* Release all resources which were allocated in this module. */ |
| void |
| finish_program(program) |
| struct vector *program; |
| { |
| /* close all files... */ |
| { |
| struct output *p, *q; |
| |
| for (p=file_read; p; p=q) |
| { |
| if (p->fp) |
| ck_fclose(p->fp); |
| q = p->link; |
| #if 0 |
| /* We use obstacks. */ |
| FREE(p); |
| #endif |
| } |
| |
| for (p=file_write; p; p=q) |
| { |
| if (p->fp) |
| ck_fclose(p->fp); |
| q = p->link; |
| #if 0 |
| /* We use obstacks. */ |
| FREE(p); |
| #endif |
| } |
| file_read = file_write = NULL; |
| } |
| |
| #ifdef DEBUG_LEAKS |
| obstack_free (&obs, NULL); |
| #endif /*DEBUG_LEAKS*/ |
| } |