| /* GNU SED, a batch stream editor. |
| Copyright (C) 1989,90,91,92,93,94,95,98,99,2002,2003,2004,2005,2006,2008,2009 |
| Free Software Foundation, Inc. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, write to the Free Software |
| Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ |
| |
| #undef EXPERIMENTAL_DASH_N_OPTIMIZATION /*don't use -- is very buggy*/ |
| #define INITIAL_BUFFER_SIZE 50 |
| #define FREAD_BUFFER_SIZE 8192 |
| |
| #include "sed.h" |
| |
| #include <stddef.h> |
| #include <stdio.h> |
| #include <ctype.h> |
| |
| #include <errno.h> |
| #ifndef errno |
| extern int errno; |
| #endif |
| |
| #ifndef BOOTSTRAP |
| #include <selinux/selinux.h> |
| #include <selinux/context.h> |
| #endif |
| |
| #ifdef HAVE_UNISTD_H |
| # include <unistd.h> |
| #endif |
| |
| #ifndef BOOTSTRAP |
| #include "acl.h" |
| #endif |
| |
| #ifdef __GNUC__ |
| # if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__-0 >= 7) |
| /* silence warning about unused parameter even for "gcc -W -Wunused" */ |
| # define UNUSED __attribute__((unused)) |
| # endif |
| #endif |
| #ifndef UNUSED |
| # define UNUSED |
| #endif |
| |
| #ifdef HAVE_STRINGS_H |
| # include <strings.h> |
| #else |
| # include <string.h> |
| #endif /*HAVE_STRINGS_H*/ |
| #ifdef HAVE_MEMORY_H |
| # include <memory.h> |
| #endif |
| |
| #ifndef HAVE_STRCHR |
| # define strchr index |
| # define strrchr rindex |
| #endif |
| |
| #ifdef HAVE_STDLIB_H |
| # include <stdlib.h> |
| #endif |
| #ifndef EXIT_SUCCESS |
| # define EXIT_SUCCESS 0 |
| #endif |
| |
| #ifdef HAVE_SYS_TYPES_H |
| # include <sys/types.h> |
| #endif |
| |
| #include <sys/stat.h> |
| #include "stat-macros.h" |
| |
| |
| /* Sed operates a line at a time. */ |
| struct line { |
| char *text; /* Pointer to line allocated by malloc. */ |
| char *active; /* Pointer to non-consumed part of text. */ |
| size_t length; /* Length of text (or active, if used). */ |
| size_t alloc; /* Allocated space for active. */ |
| bool chomped; /* Was a trailing newline dropped? */ |
| #ifdef HAVE_MBRTOWC |
| mbstate_t mbstate; |
| #endif |
| }; |
| |
| #ifdef HAVE_MBRTOWC |
| #define SIZEOF_LINE offsetof (struct line, mbstate) |
| #else |
| #define SIZEOF_LINE (sizeof (struct line)) |
| #endif |
| |
| /* A queue of text to write out at the end of a cycle |
| (filled by the "a", "r" and "R" commands.) */ |
| struct append_queue { |
| const char *fname; |
| char *text; |
| size_t textlen; |
| struct append_queue *next; |
| bool free; |
| }; |
| |
| /* State information for the input stream. */ |
| struct input { |
| /* The list of yet-to-be-opened files. It is invalid for file_list |
| to be NULL. When *file_list is NULL we are currently processing |
| the last file. */ |
| |
| char **file_list; |
| |
| /* Count of files we failed to open. */ |
| countT bad_count; |
| |
| /* Current input line number (over all files). */ |
| countT line_number; |
| |
| /* True if we'll reset line numbers and addresses before |
| starting to process the next (possibly the first) file. */ |
| bool reset_at_next_file; |
| |
| /* Function to read one line. If FP is NULL, read_fn better not |
| be one which uses fp; in particular, read_always_fail() is |
| recommended. */ |
| bool (*read_fn) P_((struct input *)); /* read one line */ |
| |
| char *out_file_name; |
| |
| const char *in_file_name; |
| |
| /* Owner and mode to be set just before closing the file. */ |
| struct stat st; |
| |
| /* if NULL, none of the following are valid */ |
| FILE *fp; |
| |
| bool no_buffering; |
| }; |
| |
| |
| /* Have we done any replacements lately? This is used by the `t' command. */ |
| static bool replaced = false; |
| |
| /* The current output file (stdout if -i is not being used. */ |
| static struct output output_file; |
| |
| /* The `current' input line. */ |
| static struct line line; |
| |
| /* An input line used to accumulate the result of the s and e commands. */ |
| static struct line s_accum; |
| |
| /* An input line that's been stored by later use by the program */ |
| static struct line hold; |
| |
| /* The buffered input look-ahead. The only field that should be |
| used outside of read_mem_line() or line_init() is buffer.length. */ |
| static struct line buffer; |
| |
| static struct append_queue *append_head = NULL; |
| static struct append_queue *append_tail = NULL; |
| |
| |
| #ifdef BOOTSTRAP |
| /* We can't be sure that the system we're boostrapping on has |
| memchr(), and ../lib/memchr.c requires configuration knowledge |
| about how many bits are in a `long'. This implementation |
| is far from ideal, but it should get us up-and-limping well |
| enough to run the configure script, which is all that matters. |
| */ |
| # ifdef memchr |
| # undef memchr |
| # endif |
| # define memchr bootstrap_memchr |
| |
| static VOID *bootstrap_memchr P_((const VOID *s, int c, size_t n)); |
| static VOID * |
| bootstrap_memchr(s, c, n) |
| const VOID *s; |
| int c; |
| size_t n; |
| { |
| char *p; |
| |
| for (p=(char *)s; n-- > 0; ++p) |
| if (*p == c) |
| return p; |
| return CAST(VOID *)0; |
| } |
| #endif /*BOOTSTRAP*/ |
| |
| /* increase a struct line's length, making some attempt at |
| keeping realloc() calls under control by padding for future growth. */ |
| static void resize_line P_((struct line *, size_t)); |
| static void |
| resize_line(lb, len) |
| struct line *lb; |
| size_t len; |
| { |
| int inactive; |
| inactive = lb->active - lb->text; |
| |
| /* If the inactive part has got to more than two thirds of the buffer, |
| * remove it. */ |
| if (inactive > lb->alloc * 2) |
| { |
| MEMMOVE(lb->text, lb->active, lb->length); |
| lb->alloc += lb->active - lb->text; |
| lb->active = lb->text; |
| inactive = 0; |
| |
| if (lb->alloc > len) |
| return; |
| } |
| |
| lb->alloc *= 2; |
| if (lb->alloc < len) |
| lb->alloc = len; |
| if (lb->alloc < INITIAL_BUFFER_SIZE) |
| lb->alloc = INITIAL_BUFFER_SIZE; |
| |
| lb->text = REALLOC(lb->text, inactive + lb->alloc, char); |
| lb->active = lb->text + inactive; |
| } |
| |
| /* Append `length' bytes from `string' to the line `to'. */ |
| static void str_append P_((struct line *, const char *, size_t)); |
| static void |
| str_append(to, string, length) |
| struct line *to; |
| const char *string; |
| size_t length; |
| { |
| size_t new_length = to->length + length; |
| |
| if (to->alloc < new_length) |
| resize_line(to, new_length); |
| MEMCPY(to->active + to->length, string, length); |
| to->length = new_length; |
| |
| #ifdef HAVE_MBRTOWC |
| if (mb_cur_max > 1 && !is_utf8) |
| while (length) |
| { |
| size_t n = MBRLEN (string, length, &to->mbstate); |
| |
| /* An invalid sequence is treated like a singlebyte character. */ |
| if (n == (size_t) -1) |
| { |
| memset (&to->mbstate, 0, sizeof (to->mbstate)); |
| n = 1; |
| } |
| |
| if (n > 0) |
| { |
| string += n; |
| length -= n; |
| } |
| else |
| break; |
| } |
| #endif |
| } |
| |
| static void str_append_modified P_((struct line *, const char *, size_t, |
| enum replacement_types)); |
| static void |
| str_append_modified(to, string, length, type) |
| struct line *to; |
| const char *string; |
| size_t length; |
| enum replacement_types type; |
| { |
| #ifdef HAVE_MBRTOWC |
| mbstate_t from_stat; |
| |
| if (type == REPL_ASIS) |
| { |
| str_append(to, string, length); |
| return; |
| } |
| |
| if (to->alloc - to->length < length * mb_cur_max) |
| resize_line(to, to->length + length * mb_cur_max); |
| |
| MEMCPY (&from_stat, &to->mbstate, sizeof(mbstate_t)); |
| while (length) |
| { |
| wchar_t wc; |
| int n = MBRTOWC (&wc, string, length, &from_stat); |
| |
| /* An invalid sequence is treated like a singlebyte character. */ |
| if (n == -1) |
| { |
| memset (&to->mbstate, 0, sizeof (from_stat)); |
| n = 1; |
| } |
| |
| if (n > 0) |
| string += n, length -= n; |
| else |
| { |
| /* Incomplete sequence, copy it manually. */ |
| str_append(to, string, length); |
| return; |
| } |
| |
| /* Convert the first character specially... */ |
| if (type & (REPL_UPPERCASE_FIRST | REPL_LOWERCASE_FIRST)) |
| { |
| if (type & REPL_UPPERCASE_FIRST) |
| wc = towupper(wc); |
| else |
| wc = towlower(wc); |
| |
| type &= ~(REPL_LOWERCASE_FIRST | REPL_UPPERCASE_FIRST); |
| if (type == REPL_ASIS) |
| { |
| n = WCRTOMB (to->active + to->length, wc, &to->mbstate); |
| to->length += n; |
| str_append(to, string, length); |
| return; |
| } |
| } |
| |
| else if (type & REPL_UPPERCASE) |
| wc = towupper(wc); |
| else |
| wc = towlower(wc); |
| |
| /* Copy the new wide character to the end of the string. */ |
| n = WCRTOMB (to->active + to->length, wc, &to->mbstate); |
| to->length += n; |
| if (n == -1) |
| { |
| fprintf (stderr, "Case conversion produced an invalid character!"); |
| abort (); |
| } |
| } |
| #else |
| size_t old_length = to->length; |
| char *start, *end; |
| |
| str_append(to, string, length); |
| start = to->active + old_length; |
| end = start + length; |
| |
| /* Now do the required modifications. First \[lu]... */ |
| if (type & REPL_UPPERCASE_FIRST) |
| { |
| *start = toupper(*start); |
| start++; |
| type &= ~REPL_UPPERCASE_FIRST; |
| } |
| else if (type & REPL_LOWERCASE_FIRST) |
| { |
| *start = tolower(*start); |
| start++; |
| type &= ~REPL_LOWERCASE_FIRST; |
| } |
| |
| if (type == REPL_ASIS) |
| return; |
| |
| /* ...and then \[LU] */ |
| if (type == REPL_UPPERCASE) |
| for (; start != end; start++) |
| *start = toupper(*start); |
| else |
| for (; start != end; start++) |
| *start = tolower(*start); |
| #endif |
| } |
| |
| /* Initialize a "struct line" buffer. Copy multibyte state from `state' |
| if not null. */ |
| static void line_init P_((struct line *, struct line *, size_t initial_size)); |
| static void |
| line_init(buf, state, initial_size) |
| struct line *buf; |
| struct line *state; |
| size_t initial_size; |
| { |
| buf->text = MALLOC(initial_size, char); |
| buf->active = buf->text; |
| buf->alloc = initial_size; |
| buf->length = 0; |
| buf->chomped = true; |
| |
| #ifdef HAVE_MBRTOWC |
| if (state) |
| memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); |
| else |
| memset (&buf->mbstate, 0, sizeof (buf->mbstate)); |
| #endif |
| } |
| |
| /* Reset a "struct line" buffer to length zero. Copy multibyte state from |
| `state' if not null. */ |
| static void line_reset P_((struct line *, struct line *)); |
| static void |
| line_reset(buf, state) |
| struct line *buf, *state; |
| { |
| if (buf->alloc == 0) |
| line_init(buf, state, INITIAL_BUFFER_SIZE); |
| else |
| { |
| buf->length = 0; |
| #ifdef HAVE_MBRTOWC |
| if (state) |
| memcpy (&buf->mbstate, &state->mbstate, sizeof (buf->mbstate)); |
| else |
| memset (&buf->mbstate, 0, sizeof (buf->mbstate)); |
| #endif |
| } |
| } |
| |
| /* Copy the contents of the line `from' into the line `to'. |
| This destroys the old contents of `to'. |
| Copy the multibyte state if `state' is true. */ |
| static void line_copy P_((struct line *from, struct line *to, int state)); |
| static void |
| line_copy(from, to, state) |
| struct line *from; |
| struct line *to; |
| int state; |
| { |
| /* Remove the inactive portion in the destination buffer. */ |
| to->alloc += to->active - to->text; |
| |
| if (to->alloc < from->length) |
| { |
| to->alloc *= 2; |
| if (to->alloc < from->length) |
| to->alloc = from->length; |
| if (to->alloc < INITIAL_BUFFER_SIZE) |
| to->alloc = INITIAL_BUFFER_SIZE; |
| /* Use FREE()+MALLOC() instead of REALLOC() to |
| avoid unnecessary copying of old text. */ |
| FREE(to->text); |
| to->text = MALLOC(to->alloc, char); |
| } |
| |
| to->active = to->text; |
| to->length = from->length; |
| to->chomped = from->chomped; |
| MEMCPY(to->active, from->active, from->length); |
| |
| #ifdef HAVE_MBRTOWC |
| if (state) |
| MEMCPY(&to->mbstate, &from->mbstate, sizeof (from->mbstate)); |
| #endif |
| } |
| |
| /* Append the contents of the line `from' to the line `to'. |
| Copy the multibyte state if `state' is true. */ |
| static void line_append P_((struct line *from, struct line *to, int state)); |
| static void |
| line_append(from, to, state) |
| struct line *from; |
| struct line *to; |
| int state; |
| { |
| str_append(to, "\n", 1); |
| str_append(to, from->active, from->length); |
| to->chomped = from->chomped; |
| |
| #ifdef HAVE_MBRTOWC |
| if (state) |
| MEMCPY (&to->mbstate, &from->mbstate, sizeof (from->mbstate)); |
| #endif |
| } |
| |
| /* Exchange two "struct line" buffers. |
| Copy the multibyte state if `state' is true. */ |
| static void line_exchange P_((struct line *a, struct line *b, int state)); |
| static void |
| line_exchange(a, b, state) |
| struct line *a; |
| struct line *b; |
| int state; |
| { |
| struct line t; |
| |
| if (state) |
| { |
| MEMCPY(&t, a, sizeof (struct line)); |
| MEMCPY( a, b, sizeof (struct line)); |
| MEMCPY( b, &t, sizeof (struct line)); |
| } |
| else |
| { |
| MEMCPY(&t, a, SIZEOF_LINE); |
| MEMCPY( a, b, SIZEOF_LINE); |
| MEMCPY( b, &t, SIZEOF_LINE); |
| } |
| } |
| |
| |
| /* dummy function to simplify read_pattern_space() */ |
| static bool read_always_fail P_((struct input *)); |
| static bool |
| read_always_fail(input) |
| struct input *input UNUSED; |
| { |
| return false; |
| } |
| |
| static bool read_file_line P_((struct input *)); |
| static bool |
| read_file_line(input) |
| struct input *input; |
| { |
| static char *b; |
| static size_t blen; |
| |
| long result = ck_getline (&b, &blen, input->fp); |
| if (result <= 0) |
| return false; |
| |
| /* Remove the trailing new-line that is left by getline. */ |
| if (b[result - 1] == '\n') |
| --result; |
| else |
| line.chomped = false; |
| |
| str_append(&line, b, result); |
| return true; |
| } |
| |
| |
| static inline void output_missing_newline P_((struct output *)); |
| static inline void |
| output_missing_newline(outf) |
| struct output *outf; |
| { |
| if (outf->missing_newline) |
| { |
| ck_fwrite("\n", 1, 1, outf->fp); |
| outf->missing_newline = false; |
| } |
| } |
| |
| static inline void flush_output P_((FILE *)); |
| static inline void |
| flush_output(fp) |
| FILE *fp; |
| { |
| if (fp != stdout || unbuffered_output) |
| ck_fflush(fp); |
| } |
| |
| static void output_line P_((const char *, size_t, int, struct output *)); |
| static void |
| output_line(text, length, nl, outf) |
| const char *text; |
| size_t length; |
| int nl; |
| struct output *outf; |
| { |
| if (!text) |
| return; |
| |
| output_missing_newline(outf); |
| if (length) |
| ck_fwrite(text, 1, length, outf->fp); |
| if (nl) |
| ck_fwrite("\n", 1, 1, outf->fp); |
| else |
| outf->missing_newline = true; |
| |
| flush_output(outf->fp); |
| } |
| |
| static struct append_queue *next_append_slot P_((void)); |
| static struct append_queue * |
| next_append_slot() |
| { |
| struct append_queue *n = MALLOC(1, struct append_queue); |
| |
| n->fname = NULL; |
| n->text = NULL; |
| n->textlen = 0; |
| n->next = NULL; |
| n->free = false; |
| |
| if (append_tail) |
| append_tail->next = n; |
| else |
| append_head = n; |
| return append_tail = n; |
| } |
| |
| static void release_append_queue P_((void)); |
| static void |
| release_append_queue() |
| { |
| struct append_queue *p, *q; |
| |
| for (p=append_head; p; p=q) |
| { |
| if (p->free) |
| FREE(p->text); |
| |
| q = p->next; |
| FREE(p); |
| } |
| append_head = append_tail = NULL; |
| } |
| |
| static void dump_append_queue P_((void)); |
| static void |
| dump_append_queue() |
| { |
| struct append_queue *p; |
| |
| output_missing_newline(&output_file); |
| for (p=append_head; p; p=p->next) |
| { |
| if (p->text) |
| ck_fwrite(p->text, 1, p->textlen, output_file.fp); |
| |
| if (p->fname) |
| { |
| char buf[FREAD_BUFFER_SIZE]; |
| size_t cnt; |
| FILE *fp; |
| |
| /* "If _fname_ does not exist or cannot be read, it shall |
| be treated as if it were an empty file, causing no error |
| condition." IEEE Std 1003.2-1992 |
| So, don't fail. */ |
| fp = ck_fopen(p->fname, read_mode, false); |
| if (fp) |
| { |
| while ((cnt = ck_fread(buf, 1, sizeof buf, fp)) > 0) |
| ck_fwrite(buf, 1, cnt, output_file.fp); |
| ck_fclose(fp); |
| } |
| } |
| } |
| |
| flush_output(output_file.fp); |
| release_append_queue(); |
| } |
| |
| |
| /* Compute the name of the backup file for in-place editing */ |
| static char *get_backup_file_name P_((const char *)); |
| static char * |
| get_backup_file_name(name) |
| const char *name; |
| { |
| char *old_asterisk, *asterisk, *backup, *p; |
| int name_length = strlen(name), backup_length = strlen(in_place_extension); |
| |
| /* Compute the length of the backup file */ |
| for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; |
| (asterisk = strchr(old_asterisk, '*')); |
| old_asterisk = asterisk + 1) |
| backup_length += name_length - 1; |
| |
| p = backup = xmalloc(backup_length + 1); |
| |
| /* Each iteration gobbles up to an asterisk */ |
| for (asterisk = in_place_extension - 1, old_asterisk = asterisk + 1; |
| (asterisk = strchr(old_asterisk, '*')); |
| old_asterisk = asterisk + 1) |
| { |
| MEMCPY (p, old_asterisk, asterisk - old_asterisk); |
| p += asterisk - old_asterisk; |
| strcpy (p, name); |
| p += name_length; |
| } |
| |
| /* Tack on what's after the last asterisk */ |
| strcpy (p, old_asterisk); |
| return backup; |
| } |
| |
| /* Initialize a struct input for the named file. */ |
| static void open_next_file P_((const char *name, struct input *)); |
| static void |
| open_next_file(name, input) |
| const char *name; |
| struct input *input; |
| { |
| buffer.length = 0; |
| |
| if (name[0] == '-' && name[1] == '\0' && !in_place_extension) |
| { |
| clearerr(stdin); /* clear any stale EOF indication */ |
| input->fp = ck_fdopen (fileno (stdin), "stdin", read_mode, false); |
| } |
| else if ( ! (input->fp = ck_fopen(name, read_mode, false)) ) |
| { |
| const char *ptr = strerror(errno); |
| fprintf(stderr, _("%s: can't read %s: %s\n"), myname, name, ptr); |
| input->read_fn = read_always_fail; /* a redundancy */ |
| ++input->bad_count; |
| return; |
| } |
| |
| input->read_fn = read_file_line; |
| |
| if (in_place_extension) |
| { |
| int input_fd; |
| char *tmpdir, *p; |
| #ifndef BOOTSTRAP |
| security_context_t old_fscreatecon; |
| int reset_fscreatecon = 0; |
| memset (&old_fscreatecon, 0, sizeof (old_fscreatecon)); |
| #endif |
| |
| if (follow_symlinks) |
| input->in_file_name = follow_symlink (name); |
| else |
| input->in_file_name = name; |
| |
| /* get the base name */ |
| tmpdir = ck_strdup(input->in_file_name); |
| if ((p = strrchr(tmpdir, '/'))) |
| *p = 0; |
| else |
| strcpy(tmpdir, "."); |
| |
| if (isatty (fileno (input->fp))) |
| panic(_("couldn't edit %s: is a terminal"), input->in_file_name); |
| |
| input_fd = fileno (input->fp); |
| fstat (input_fd, &input->st); |
| if (!S_ISREG (input->st.st_mode)) |
| panic(_("couldn't edit %s: not a regular file"), input->in_file_name); |
| |
| #ifndef BOOTSTRAP |
| if (is_selinux_enabled ()) |
| { |
| security_context_t con; |
| if (getfilecon (input->in_file_name, &con) != -1) |
| { |
| /* Save and restore the old context for the sake of w and W |
| commands. */ |
| reset_fscreatecon = getfscreatecon (&old_fscreatecon) >= 0; |
| if (setfscreatecon (con) < 0) |
| fprintf (stderr, _("%s: warning: failed to set default file creation context to %s: %s"), |
| myname, con, strerror (errno)); |
| freecon (con); |
| } |
| else |
| { |
| if (errno != ENOSYS) |
| fprintf (stderr, _("%s: warning: failed to get security context of %s: %s"), |
| myname, input->in_file_name, strerror (errno)); |
| } |
| } |
| #endif |
| |
| output_file.fp = ck_mkstemp (&input->out_file_name, tmpdir, "sed"); |
| output_file.missing_newline = false; |
| free (tmpdir); |
| |
| #ifndef BOOTSTRAP |
| if (reset_fscreatecon) |
| { |
| setfscreatecon (old_fscreatecon); |
| freecon (old_fscreatecon); |
| } |
| #endif |
| |
| if (!output_file.fp) |
| panic(_("couldn't open temporary file %s: %s"), input->out_file_name, strerror(errno)); |
| } |
| else |
| output_file.fp = stdout; |
| } |
| |
| |
| /* Clean up an input stream that we are done with. */ |
| static void closedown P_((struct input *)); |
| static void |
| closedown(input) |
| struct input *input; |
| { |
| input->read_fn = read_always_fail; |
| if (!input->fp) |
| return; |
| |
| if (in_place_extension && output_file.fp != NULL) |
| { |
| const char *target_name; |
| int input_fd, output_fd; |
| |
| target_name = input->in_file_name; |
| input_fd = fileno (input->fp); |
| output_fd = fileno (output_file.fp); |
| copy_acl (input->in_file_name, input_fd, |
| input->out_file_name, output_fd, |
| input->st.st_mode); |
| #ifdef HAVE_FCHOWN |
| if (fchown (output_fd, input->st.st_uid, input->st.st_gid) == -1) |
| fchown (output_fd, -1, input->st.st_gid); |
| #endif |
| |
| ck_fclose (input->fp); |
| ck_fclose (output_file.fp); |
| if (strcmp(in_place_extension, "*") != 0) |
| { |
| char *backup_file_name = get_backup_file_name(target_name); |
| ck_rename (target_name, backup_file_name, input->out_file_name); |
| free (backup_file_name); |
| } |
| |
| ck_rename (input->out_file_name, target_name, input->out_file_name); |
| free (input->out_file_name); |
| } |
| else |
| ck_fclose (input->fp); |
| |
| input->fp = NULL; |
| } |
| |
| /* Reset range commands so that they are marked as non-matching */ |
| static void reset_addresses P_((struct vector *)); |
| static void |
| reset_addresses(vec) |
| struct vector *vec; |
| { |
| struct sed_cmd *cur_cmd; |
| int n; |
| |
| for (cur_cmd = vec->v, n = vec->v_length; n--; cur_cmd++) |
| if (cur_cmd->a1 |
| && cur_cmd->a1->addr_type == ADDR_IS_NUM |
| && cur_cmd->a1->addr_number == 0) |
| cur_cmd->range_state = RANGE_ACTIVE; |
| else |
| cur_cmd->range_state = RANGE_INACTIVE; |
| } |
| |
| /* Read in the next line of input, and store it in the pattern space. |
| Return zero if there is nothing left to input. */ |
| static bool read_pattern_space P_((struct input *, struct vector *, int)); |
| static bool |
| read_pattern_space(input, the_program, append) |
| struct input *input; |
| struct vector *the_program; |
| int append; |
| { |
| if (append_head) /* redundant test to optimize for common case */ |
| dump_append_queue(); |
| replaced = false; |
| if (!append) |
| line.length = 0; |
| line.chomped = true; /* default, until proved otherwise */ |
| |
| while ( ! (*input->read_fn)(input) ) |
| { |
| closedown(input); |
| |
| if (!*input->file_list) |
| return false; |
| |
| if (input->reset_at_next_file) |
| { |
| input->line_number = 0; |
| hold.length = 0; |
| reset_addresses (the_program); |
| rewind_read_files (); |
| |
| /* If doing in-place editing, we will never append the |
| new-line to this file; but if the output goes to stdout, |
| we might still have to output the missing new-line. */ |
| if (in_place_extension) |
| output_file.missing_newline = false; |
| |
| input->reset_at_next_file = separate_files; |
| } |
| |
| open_next_file (*input->file_list++, input); |
| } |
| |
| ++input->line_number; |
| return true; |
| } |
| |
| |
| static bool last_file_with_data_p P_((struct input *)); |
| static bool |
| last_file_with_data_p(input) |
| struct input *input; |
| { |
| for (;;) |
| { |
| int ch; |
| |
| closedown(input); |
| if (!*input->file_list) |
| return true; |
| open_next_file(*input->file_list++, input); |
| if (input->fp) |
| { |
| if ((ch = getc(input->fp)) != EOF) |
| { |
| ungetc(ch, input->fp); |
| return false; |
| } |
| } |
| } |
| } |
| |
| /* Determine if we match the `$' address. */ |
| static bool test_eof P_((struct input *)); |
| static bool |
| test_eof(input) |
| struct input *input; |
| { |
| int ch; |
| |
| if (buffer.length) |
| return false; |
| if (!input->fp) |
| return separate_files || last_file_with_data_p(input); |
| if (feof(input->fp)) |
| return separate_files || last_file_with_data_p(input); |
| if ((ch = getc(input->fp)) == EOF) |
| return separate_files || last_file_with_data_p(input); |
| ungetc(ch, input->fp); |
| return false; |
| } |
| |
| /* Return non-zero if the current line matches the address |
| pointed to by `addr'. */ |
| static bool match_an_address_p P_((struct addr *, struct input *)); |
| static bool |
| match_an_address_p(addr, input) |
| struct addr *addr; |
| struct input *input; |
| { |
| switch (addr->addr_type) |
| { |
| case ADDR_IS_NULL: |
| return true; |
| |
| case ADDR_IS_REGEX: |
| return match_regex(addr->addr_regex, line.active, line.length, 0, NULL, 0); |
| |
| case ADDR_IS_NUM_MOD: |
| return (input->line_number >= addr->addr_number |
| && ((input->line_number - addr->addr_number) % addr->addr_step) == 0); |
| |
| case ADDR_IS_STEP: |
| case ADDR_IS_STEP_MOD: |
| /* reminder: these are only meaningful for a2 addresses */ |
| /* a2->addr_number needs to be recomputed each time a1 address |
| matches for the step and step_mod types */ |
| return (addr->addr_number <= input->line_number); |
| |
| case ADDR_IS_LAST: |
| return test_eof(input); |
| |
| /* ADDR_IS_NUM is handled in match_address_p. */ |
| case ADDR_IS_NUM: |
| default: |
| panic("INTERNAL ERROR: bad address type"); |
| } |
| /*NOTREACHED*/ |
| return false; |
| } |
| |
| /* return non-zero if current address is valid for cmd */ |
| static bool match_address_p P_((struct sed_cmd *, struct input *)); |
| static bool |
| match_address_p(cmd, input) |
| struct sed_cmd *cmd; |
| struct input *input; |
| { |
| if (!cmd->a1) |
| return true; |
| |
| if (cmd->range_state != RANGE_ACTIVE) |
| { |
| /* Find if we are going to activate a range. Handle ADDR_IS_NUM |
| specially: it represent an "absolute" state, it should not |
| be computed like regexes. */ |
| if (cmd->a1->addr_type == ADDR_IS_NUM) |
| { |
| if (!cmd->a2) |
| return (input->line_number == cmd->a1->addr_number); |
| |
| if (cmd->range_state == RANGE_CLOSED |
| || input->line_number < cmd->a1->addr_number) |
| return false; |
| } |
| else |
| { |
| if (!cmd->a2) |
| return match_an_address_p(cmd->a1, input); |
| |
| if (!match_an_address_p(cmd->a1, input)) |
| return false; |
| } |
| |
| /* Ok, start a new range. */ |
| cmd->range_state = RANGE_ACTIVE; |
| switch (cmd->a2->addr_type) |
| { |
| case ADDR_IS_REGEX: |
| /* Always include at least two lines. */ |
| return true; |
| case ADDR_IS_NUM: |
| /* Same handling as below, but always include at least one line. */ |
| if (input->line_number >= cmd->a2->addr_number) |
| cmd->range_state = RANGE_CLOSED; |
| return true; |
| case ADDR_IS_STEP: |
| cmd->a2->addr_number = input->line_number + cmd->a2->addr_step; |
| return true; |
| case ADDR_IS_STEP_MOD: |
| cmd->a2->addr_number = input->line_number + cmd->a2->addr_step |
| - (input->line_number%cmd->a2->addr_step); |
| return true; |
| default: |
| break; |
| } |
| } |
| |
| /* cmd->range_state == RANGE_ACTIVE. Check if the range is |
| ending; also handle ADDR_IS_NUM specially in this case. */ |
| |
| if (cmd->a2->addr_type == ADDR_IS_NUM) |
| { |
| /* If the second address is a line number, and if we got past |
| that line, fail to match (it can happen when you jump |
| over such addresses with `b' and `t'. Use RANGE_CLOSED |
| so that the range is not re-enabled anymore. */ |
| if (input->line_number >= cmd->a2->addr_number) |
| cmd->range_state = RANGE_CLOSED; |
| |
| return (input->line_number <= cmd->a2->addr_number); |
| } |
| |
| /* Other addresses are treated as usual. */ |
| if (match_an_address_p(cmd->a2, input)) |
| cmd->range_state = RANGE_CLOSED; |
| |
| return true; |
| } |
| |
| |
| static void do_list P_((int line_len)); |
| static void |
| do_list(line_len) |
| int line_len; |
| { |
| unsigned char *p = CAST(unsigned char *)line.active; |
| countT len = line.length; |
| countT width = 0; |
| char obuf[180]; /* just in case we encounter a 512-bit char (;-) */ |
| char *o; |
| size_t olen; |
| FILE *fp = output_file.fp; |
| |
| output_missing_newline(&output_file); |
| for (; len--; ++p) { |
| o = obuf; |
| |
| /* Some locales define 8-bit characters as printable. This makes the |
| testsuite fail at 8to7.sed because the `l' command in fact will not |
| convert the 8-bit characters. */ |
| #if defined isascii || defined HAVE_ISASCII |
| if (isascii(*p) && ISPRINT(*p)) { |
| #else |
| if (ISPRINT(*p)) { |
| #endif |
| *o++ = *p; |
| if (*p == '\\') |
| *o++ = '\\'; |
| } else { |
| *o++ = '\\'; |
| switch (*p) { |
| #if defined __STDC__ && __STDC__-0 |
| case '\a': *o++ = 'a'; break; |
| #else /* Not STDC; we'll just assume ASCII */ |
| case 007: *o++ = 'a'; break; |
| #endif |
| case '\b': *o++ = 'b'; break; |
| case '\f': *o++ = 'f'; break; |
| case '\n': *o++ = 'n'; break; |
| case '\r': *o++ = 'r'; break; |
| case '\t': *o++ = 't'; break; |
| case '\v': *o++ = 'v'; break; |
| default: |
| sprintf(o, "%03o", *p); |
| o += strlen(o); |
| break; |
| } |
| } |
| olen = o - obuf; |
| if (width+olen >= line_len && line_len > 0) { |
| ck_fwrite("\\\n", 1, 2, fp); |
| width = 0; |
| } |
| ck_fwrite(obuf, 1, olen, fp); |
| width += olen; |
| } |
| ck_fwrite("$\n", 1, 2, fp); |
| flush_output (fp); |
| } |
| |
| |
| static enum replacement_types append_replacement P_((struct line *, struct replacement *, |
| struct re_registers *, |
| enum replacement_types)); |
| static enum replacement_types |
| append_replacement (buf, p, regs, repl_mod) |
| struct line *buf; |
| struct replacement *p; |
| struct re_registers *regs; |
| enum replacement_types repl_mod; |
| { |
| for (; p; p=p->next) |
| { |
| int i = p->subst_id; |
| enum replacement_types curr_type; |
| |
| /* Apply a \[lu] modifier that was given earlier, but which we |
| have not had yet the occasion to apply. But don't do it |
| if this replacement has a modifier of its own. */ |
| curr_type = (p->repl_type & REPL_MODIFIERS) |
| ? p->repl_type |
| : p->repl_type | repl_mod; |
| |
| repl_mod = 0; |
| if (p->prefix_length) |
| { |
| str_append_modified(buf, p->prefix, p->prefix_length, |
| curr_type); |
| curr_type &= ~REPL_MODIFIERS; |
| } |
| |
| if (0 <= i) |
| { |
| if (regs->end[i] == regs->start[i] && p->repl_type & REPL_MODIFIERS) |
| /* Save this modifier, we shall apply it later. |
| e.g. in s/()([a-z])/\u\1\2/ |
| the \u modifier is applied to \2, not \1 */ |
| repl_mod = curr_type & REPL_MODIFIERS; |
| |
| else if (regs->end[i] != regs->start[i]) |
| str_append_modified(buf, line.active + regs->start[i], |
| CAST(size_t)(regs->end[i] - regs->start[i]), |
| curr_type); |
| } |
| } |
| |
| return repl_mod; |
| } |
| |
| static void do_subst P_((struct subst *)); |
| static void |
| do_subst(sub) |
| struct subst *sub; |
| { |
| size_t start = 0; /* where to start scan for (next) match in LINE */ |
| size_t last_end = 0; /* where did the last successful match end in LINE */ |
| countT count = 0; /* number of matches found */ |
| bool again = true; |
| |
| static struct re_registers regs; |
| |
| line_reset(&s_accum, &line); |
| |
| /* The first part of the loop optimizes s/xxx// when xxx is at the |
| start, and s/xxx$// */ |
| if (!match_regex(sub->regx, line.active, line.length, start, |
| ®s, sub->max_id + 1)) |
| return; |
| |
| if (!sub->replacement && sub->numb <= 1) |
| { |
| if (regs.start[0] == 0 && !sub->global) |
| { |
| /* We found a match, set the `replaced' flag. */ |
| replaced = true; |
| |
| line.active += regs.end[0]; |
| line.length -= regs.end[0]; |
| line.alloc -= regs.end[0]; |
| goto post_subst; |
| } |
| else if (regs.end[0] == line.length) |
| { |
| /* We found a match, set the `replaced' flag. */ |
| replaced = true; |
| |
| line.length = regs.start[0]; |
| goto post_subst; |
| } |
| } |
| |
| do |
| { |
| enum replacement_types repl_mod = 0; |
| |
| size_t offset = regs.start[0]; |
| size_t matched = regs.end[0] - regs.start[0]; |
| |
| /* Copy stuff to the left of this match into the output string. */ |
| if (start < offset) |
| str_append(&s_accum, line.active + start, offset - start); |
| |
| /* If we're counting up to the Nth match, are we there yet? |
| And even if we are there, there is another case we have to |
| skip: are we matching an empty string immediately following |
| another match? |
| |
| This latter case avoids that baaaac, when passed through |
| s,a*,x,g, gives `xbxxcx' instead of xbxcx. This behavior is |
| unacceptable because it is not consistently applied (for |
| example, `baaaa' gives `xbx', not `xbxx'). */ |
| if ((matched > 0 || count == 0 || offset > last_end) |
| && ++count >= sub->numb) |
| { |
| /* We found a match, set the `replaced' flag. */ |
| replaced = true; |
| |
| /* Now expand the replacement string into the output string. */ |
| repl_mod = append_replacement (&s_accum, sub->replacement, ®s, repl_mod); |
| again = sub->global; |
| } |
| else |
| { |
| /* The match was not replaced. Copy the text until its |
| end; if it was vacuous, skip over one character and |
| add that character to the output. */ |
| if (matched == 0) |
| { |
| if (start < line.length) |
| matched = 1; |
| else |
| break; |
| } |
| |
| str_append(&s_accum, line.active + offset, matched); |
| } |
| |
| /* Start after the match. last_end is the real end of the matched |
| substring, excluding characters that were skipped in case the RE |
| matched the empty string. */ |
| start = offset + matched; |
| last_end = regs.end[0]; |
| } |
| while (again |
| && start <= line.length |
| && match_regex(sub->regx, line.active, line.length, start, |
| ®s, sub->max_id + 1)); |
| |
| /* Copy stuff to the right of the last match into the output string. */ |
| if (start < line.length) |
| str_append(&s_accum, line.active + start, line.length-start); |
| s_accum.chomped = line.chomped; |
| |
| /* Exchange line and s_accum. This can be much cheaper |
| than copying s_accum.active into line.text (for huge lines). */ |
| line_exchange(&line, &s_accum, false); |
| |
| /* Finish up. */ |
| if (count < sub->numb) |
| return; |
| |
| post_subst: |
| if (sub->print & 1) |
| output_line(line.active, line.length, line.chomped, &output_file); |
| |
| if (sub->eval) |
| { |
| #ifdef HAVE_POPEN |
| FILE *pipe_fp; |
| line_reset(&s_accum, NULL); |
| |
| str_append (&line, "", 1); |
| pipe_fp = popen(line.active, "r"); |
| |
| if (pipe_fp != NULL) |
| { |
| while (!feof (pipe_fp)) |
| { |
| char buf[4096]; |
| int n = fread (buf, sizeof(char), 4096, pipe_fp); |
| if (n > 0) |
| str_append(&s_accum, buf, n); |
| } |
| |
| pclose (pipe_fp); |
| |
| /* Exchange line and s_accum. This can be much cheaper than copying |
| s_accum.active into line.text (for huge lines). See comment above |
| for 'g' as to while the third argument is incorrect anyway. */ |
| line_exchange(&line, &s_accum, true); |
| if (line.length && |
| line.active[line.length - 1] == '\n') |
| line.length--; |
| } |
| else |
| panic(_("error in subprocess")); |
| #else |
| panic(_("option `e' not supported")); |
| #endif |
| } |
| |
| if (sub->print & 2) |
| output_line(line.active, line.length, line.chomped, &output_file); |
| if (sub->outf) |
| output_line(line.active, line.length, line.chomped, sub->outf); |
| } |
| |
| #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION |
| /* Used to attempt a simple-minded optimization. */ |
| |
| static countT branches; |
| |
| static countT count_branches P_((struct vector *)); |
| static countT |
| count_branches(program) |
| struct vector *program; |
| { |
| struct sed_cmd *cur_cmd = program->v; |
| countT isn_cnt = program->v_length; |
| countT cnt = 0; |
| |
| while (isn_cnt-- > 0) |
| { |
| switch (cur_cmd->cmd) |
| { |
| case 'b': |
| case 't': |
| case 'T': |
| case '{': |
| ++cnt; |
| } |
| } |
| return cnt; |
| } |
| |
| static struct sed_cmd *shrink_program P_((struct vector *, struct sed_cmd *)); |
| static struct sed_cmd * |
| shrink_program(vec, cur_cmd) |
| struct vector *vec; |
| struct sed_cmd *cur_cmd; |
| { |
| struct sed_cmd *v = vec->v; |
| struct sed_cmd *last_cmd = v + vec->v_length; |
| struct sed_cmd *p; |
| countT cmd_cnt; |
| |
| for (p=v; p < cur_cmd; ++p) |
| if (p->cmd != '#') |
| MEMCPY(v++, p, sizeof *v); |
| cmd_cnt = v - vec->v; |
| |
| for (; p < last_cmd; ++p) |
| if (p->cmd != '#') |
| MEMCPY(v++, p, sizeof *v); |
| vec->v_length = v - vec->v; |
| |
| return (0 < vec->v_length) ? (vec->v + cmd_cnt) : CAST(struct sed_cmd *)0; |
| } |
| #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ |
| |
| /* Execute the program `vec' on the current input line. |
| Return exit status if caller should quit, -1 otherwise. */ |
| static int execute_program P_((struct vector *, struct input *)); |
| static int |
| execute_program(vec, input) |
| struct vector *vec; |
| struct input *input; |
| { |
| struct sed_cmd *cur_cmd; |
| struct sed_cmd *end_cmd; |
| |
| cur_cmd = vec->v; |
| end_cmd = vec->v + vec->v_length; |
| while (cur_cmd < end_cmd) |
| { |
| if (match_address_p(cur_cmd, input) != cur_cmd->addr_bang) |
| { |
| switch (cur_cmd->cmd) |
| { |
| case 'a': |
| { |
| struct append_queue *aq = next_append_slot(); |
| aq->text = cur_cmd->x.cmd_txt.text; |
| aq->textlen = cur_cmd->x.cmd_txt.text_length; |
| } |
| break; |
| |
| case '{': |
| case 'b': |
| cur_cmd = vec->v + cur_cmd->x.jump_index; |
| continue; |
| |
| case '}': |
| case '#': |
| case ':': |
| /* Executing labels and block-ends are easy. */ |
| break; |
| |
| case 'c': |
| if (cur_cmd->range_state != RANGE_ACTIVE) |
| output_line(cur_cmd->x.cmd_txt.text, |
| cur_cmd->x.cmd_txt.text_length - 1, true, |
| &output_file); |
| /* POSIX.2 is silent about c starting a new cycle, |
| but it seems to be expected (and make sense). */ |
| /* Fall Through */ |
| case 'd': |
| return -1; |
| |
| case 'D': |
| { |
| char *p = memchr(line.active, '\n', line.length); |
| if (!p) |
| return -1; |
| |
| ++p; |
| line.alloc -= p - line.active; |
| line.length -= p - line.active; |
| line.active += p - line.active; |
| |
| /* reset to start next cycle without reading a new line: */ |
| cur_cmd = vec->v; |
| continue; |
| } |
| |
| case 'e': { |
| #ifdef HAVE_POPEN |
| FILE *pipe_fp; |
| int cmd_length = cur_cmd->x.cmd_txt.text_length; |
| line_reset(&s_accum, NULL); |
| |
| if (!cmd_length) |
| { |
| str_append (&line, "", 1); |
| pipe_fp = popen(line.active, "r"); |
| } |
| else |
| { |
| cur_cmd->x.cmd_txt.text[cmd_length - 1] = 0; |
| pipe_fp = popen(cur_cmd->x.cmd_txt.text, "r"); |
| output_missing_newline(&output_file); |
| } |
| |
| if (pipe_fp != NULL) |
| { |
| char buf[4096]; |
| int n; |
| while (!feof (pipe_fp)) |
| if ((n = fread (buf, sizeof(char), 4096, pipe_fp)) > 0) |
| { |
| if (!cmd_length) |
| str_append(&s_accum, buf, n); |
| else |
| ck_fwrite(buf, 1, n, output_file.fp); |
| } |
| |
| pclose (pipe_fp); |
| if (!cmd_length) |
| { |
| /* Store into pattern space for plain `e' commands */ |
| if (s_accum.length && |
| s_accum.active[s_accum.length - 1] == '\n') |
| s_accum.length--; |
| |
| /* Exchange line and s_accum. This can be much |
| cheaper than copying s_accum.active into line.text |
| (for huge lines). See comment above for 'g' as |
| to while the third argument is incorrect anyway. */ |
| line_exchange(&line, &s_accum, true); |
| } |
| else |
| flush_output(output_file.fp); |
| |
| } |
| else |
| panic(_("error in subprocess")); |
| #else |
| panic(_("`e' command not supported")); |
| #endif |
| break; |
| } |
| |
| case 'g': |
| /* We do not have a really good choice for the third parameter. |
| The problem is that hold space and the input file might as |
| well have different states; copying it from hold space means |
| that subsequent input might be read incorrectly, while |
| keeping it as in pattern space means that commands operating |
| on the moved buffer might consider a wrong character set. |
| We keep it true because it's what sed <= 4.1.5 did. */ |
| line_copy(&hold, &line, true); |
| break; |
| |
| case 'G': |
| /* We do not have a really good choice for the third parameter. |
| The problem is that hold space and pattern space might as |
| well have different states. So, true is as wrong as false. |
| We keep it true because it's what sed <= 4.1.5 did, but |
| we could consider having line_ap. */ |
| line_append(&hold, &line, true); |
| break; |
| |
| case 'h': |
| /* Here, it is ok to have true. */ |
| line_copy(&line, &hold, true); |
| break; |
| |
| case 'H': |
| /* See comment above for 'G' regarding the third parameter. */ |
| line_append(&line, &hold, true); |
| break; |
| |
| case 'i': |
| output_line(cur_cmd->x.cmd_txt.text, |
| cur_cmd->x.cmd_txt.text_length - 1, |
| true, &output_file); |
| break; |
| |
| case 'l': |
| do_list(cur_cmd->x.int_arg == -1 |
| ? lcmd_out_line_len |
| : cur_cmd->x.int_arg); |
| break; |
| |
| case 'L': |
| output_missing_newline(&output_file); |
| fmt(line.active, line.active + line.length, |
| cur_cmd->x.int_arg == -1 |
| ? lcmd_out_line_len |
| : cur_cmd->x.int_arg, |
| output_file.fp); |
| flush_output(output_file.fp); |
| break; |
| |
| case 'n': |
| if (!no_default_output) |
| output_line(line.active, line.length, line.chomped, &output_file); |
| if (test_eof(input) || !read_pattern_space(input, vec, false)) |
| return -1; |
| break; |
| |
| case 'N': |
| str_append(&line, "\n", 1); |
| |
| if (test_eof(input) || !read_pattern_space(input, vec, true)) |
| { |
| line.length--; |
| if (posixicity == POSIXLY_EXTENDED && !no_default_output) |
| output_line(line.active, line.length, line.chomped, |
| &output_file); |
| return -1; |
| } |
| break; |
| |
| case 'p': |
| output_line(line.active, line.length, line.chomped, &output_file); |
| break; |
| |
| case 'P': |
| { |
| char *p = memchr(line.active, '\n', line.length); |
| output_line(line.active, p ? p - line.active : line.length, |
| p ? true : line.chomped, &output_file); |
| } |
| break; |
| |
| case 'q': |
| if (!no_default_output) |
| output_line(line.active, line.length, line.chomped, &output_file); |
| dump_append_queue(); |
| |
| case 'Q': |
| return cur_cmd->x.int_arg == -1 ? 0 : cur_cmd->x.int_arg; |
| |
| case 'r': |
| if (cur_cmd->x.fname) |
| { |
| struct append_queue *aq = next_append_slot(); |
| aq->fname = cur_cmd->x.fname; |
| } |
| break; |
| |
| case 'R': |
| if (cur_cmd->x.fp && !feof (cur_cmd->x.fp)) |
| { |
| struct append_queue *aq; |
| size_t buflen; |
| char *text = NULL; |
| int result; |
| |
| result = ck_getline (&text, &buflen, cur_cmd->x.fp); |
| if (result != EOF) |
| { |
| aq = next_append_slot(); |
| aq->free = true; |
| aq->text = text; |
| aq->textlen = result; |
| } |
| } |
| break; |
| |
| case 's': |
| do_subst(cur_cmd->x.cmd_subst); |
| break; |
| |
| case 't': |
| if (replaced) |
| { |
| replaced = false; |
| cur_cmd = vec->v + cur_cmd->x.jump_index; |
| continue; |
| } |
| break; |
| |
| case 'T': |
| if (!replaced) |
| { |
| cur_cmd = vec->v + cur_cmd->x.jump_index; |
| continue; |
| } |
| else |
| replaced = false; |
| break; |
| |
| case 'w': |
| if (cur_cmd->x.fp) |
| output_line(line.active, line.length, |
| line.chomped, cur_cmd->x.outf); |
| break; |
| |
| case 'W': |
| if (cur_cmd->x.fp) |
| { |
| char *p = memchr(line.active, '\n', line.length); |
| output_line(line.active, p ? p - line.active : line.length, |
| p ? true : line.chomped, cur_cmd->x.outf); |
| } |
| break; |
| |
| case 'x': |
| /* See comment above for 'g' regarding the third parameter. */ |
| line_exchange(&line, &hold, false); |
| break; |
| |
| case 'y': |
| { |
| #ifdef HAVE_MBRTOWC |
| if (mb_cur_max > 1) |
| { |
| int idx, prev_idx; /* index in the input line. */ |
| char **trans; |
| mbstate_t mbstate; |
| memset(&mbstate, 0, sizeof(mbstate_t)); |
| for (idx = 0; idx < line.length;) |
| { |
| int mbclen, i; |
| mbclen = MBRLEN (line.active + idx, line.length - idx, |
| &mbstate); |
| /* An invalid sequence, or a truncated multibyte |
| character. We treat it as a singlebyte character. |
| */ |
| if (mbclen == (size_t) -1 || mbclen == (size_t) -2 |
| || mbclen == 0) |
| mbclen = 1; |
| |
| trans = cur_cmd->x.translatemb; |
| /* `i' indicate i-th translate pair. */ |
| for (i = 0; trans[2*i] != NULL; i++) |
| { |
| if (strncmp(line.active + idx, trans[2*i], mbclen) == 0) |
| { |
| bool move_remain_buffer = false; |
| int trans_len = strlen(trans[2*i+1]); |
| |
| if (mbclen < trans_len) |
| { |
| int new_len; |
| new_len = line.length + 1 + trans_len - mbclen; |
| /* We must extend the line buffer. */ |
| if (line.alloc < new_len) |
| { |
| /* And we must resize the buffer. */ |
| resize_line(&line, new_len); |
| } |
| move_remain_buffer = true; |
| } |
| else if (mbclen > trans_len) |
| { |
| /* We must truncate the line buffer. */ |
| move_remain_buffer = true; |
| } |
| prev_idx = idx; |
| if (move_remain_buffer) |
| { |
| int move_len, move_offset; |
| char *move_from, *move_to; |
| /* Move the remaining with \0. */ |
| move_from = line.active + idx + mbclen; |
| move_to = line.active + idx + trans_len; |
| move_len = line.length + 1 - idx - mbclen; |
| move_offset = trans_len - mbclen; |
| memmove(move_to, move_from, move_len); |
| line.length += move_offset; |
| idx += move_offset; |
| } |
| strncpy(line.active + prev_idx, trans[2*i+1], |
| trans_len); |
| break; |
| } |
| } |
| idx += mbclen; |
| } |
| } |
| else |
| #endif /* HAVE_MBRTOWC */ |
| { |
| unsigned char *p, *e; |
| p = CAST(unsigned char *)line.active; |
| for (e=p+line.length; p<e; ++p) |
| *p = cur_cmd->x.translate[*p]; |
| } |
| } |
| break; |
| |
| case 'z': |
| line.length = 0; |
| break; |
| |
| case '=': |
| output_missing_newline(&output_file); |
| fprintf(output_file.fp, "%lu\n", |
| CAST(unsigned long)input->line_number); |
| flush_output(output_file.fp); |
| break; |
| |
| default: |
| panic("INTERNAL ERROR: Bad cmd %c", cur_cmd->cmd); |
| } |
| } |
| |
| #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION |
| /* If our top-level program consists solely of commands with |
| ADDR_IS_NUM addresses then once we past the last mentioned |
| line we should be able to quit if no_default_output is true, |
| or otherwise quickly copy input to output. Now whether this |
| optimization is a win or not depends on how cheaply we can |
| implement this for the cases where it doesn't help, as |
| compared against how much time is saved. One semantic |
| difference (which I think is an improvement) is that *this* |
| version will terminate after printing line two in the script |
| "yes | sed -n 2p". |
| |
| Don't use this when in-place editing is active, because line |
| numbers restart each time then. */ |
| else if (!separate_files) |
| { |
| if (cur_cmd->a1->addr_type == ADDR_IS_NUM |
| && (cur_cmd->a2 |
| ? cur_cmd->range_state == RANGE_CLOSED |
| : cur_cmd->a1->addr_number < input->line_number)) |
| { |
| /* Skip this address next time */ |
| cur_cmd->addr_bang = !cur_cmd->addr_bang; |
| cur_cmd->a1->addr_type = ADDR_IS_NULL; |
| if (cur_cmd->a2) |
| cur_cmd->a2->addr_type = ADDR_IS_NULL; |
| |
| /* can we make an optimization? */ |
| if (cur_cmd->addr_bang) |
| { |
| if (cur_cmd->cmd == 'b' || cur_cmd->cmd == 't' |
| || cur_cmd->cmd == 'T' || cur_cmd->cmd == '}') |
| branches--; |
| |
| cur_cmd->cmd = '#'; /* replace with no-op */ |
| if (branches == 0) |
| cur_cmd = shrink_program(vec, cur_cmd); |
| if (!cur_cmd && no_default_output) |
| return 0; |
| end_cmd = vec->v + vec->v_length; |
| if (!cur_cmd) |
| cur_cmd = end_cmd; |
| continue; |
| } |
| } |
| } |
| #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ |
| |
| /* this is buried down here so that a "continue" statement can skip it */ |
| ++cur_cmd; |
| } |
| |
| if (!no_default_output) |
| output_line(line.active, line.length, line.chomped, &output_file); |
| return -1; |
| } |
| |
| |
| |
| /* Apply the compiled script to all the named files. */ |
| int |
| process_files(the_program, argv) |
| struct vector *the_program; |
| char **argv; |
| { |
| static char dash[] = "-"; |
| static char *stdin_argv[2] = { dash, NULL }; |
| struct input input; |
| int status; |
| |
| line_init(&line, NULL, INITIAL_BUFFER_SIZE); |
| line_init(&hold, NULL, 0); |
| line_init(&buffer, NULL, 0); |
| |
| #ifdef EXPERIMENTAL_DASH_N_OPTIMIZATION |
| branches = count_branches(the_program); |
| #endif /*EXPERIMENTAL_DASH_N_OPTIMIZATION*/ |
| input.reset_at_next_file = true; |
| if (argv && *argv) |
| input.file_list = argv; |
| else if (in_place_extension) |
| panic(_("no input files")); |
| else |
| input.file_list = stdin_argv; |
| |
| input.bad_count = 0; |
| input.line_number = 0; |
| input.read_fn = read_always_fail; |
| input.fp = NULL; |
| |
| status = EXIT_SUCCESS; |
| while (read_pattern_space(&input, the_program, false)) |
| { |
| status = execute_program(the_program, &input); |
| if (status == -1) |
| status = EXIT_SUCCESS; |
| else |
| break; |
| } |
| closedown(&input); |
| |
| #ifdef DEBUG_LEAKS |
| /* We're about to exit, so these free()s are redundant. |
| But if we're running under a memory-leak detecting |
| implementation of malloc(), we want to explicitly |
| deallocate in order to avoid extraneous noise from |
| the allocator. */ |
| release_append_queue(); |
| FREE(buffer.text); |
| FREE(hold.text); |
| FREE(line.text); |
| FREE(s_accum.text); |
| #endif /*DEBUG_LEAKS*/ |
| |
| if (input.bad_count) |
| status = 2; |
| |
| return status; |
| } |