| From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001 |
| Date: Tue, 1 Jun 2010 17:27:23 -0700 |
| Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer. |
| |
| Previous file reading is kept and enhanced with mmap. |
| |
| This is the prepration for reading the dictionary from asset. |
| |
| issue: 2672163 |
| Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857 |
| --- |
| hyphen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++---------- |
| hyphen.h | 2 + |
| 2 files changed, 60 insertions(+), 12 deletions(-) |
| |
| diff --git a/hyphen.c b/hyphen.c |
| index 974d87f..446d5bd 100644 |
| --- a/hyphen.c |
| +++ b/hyphen.c |
| @@ -36,13 +36,13 @@ |
| * MPL. |
| * |
| */ |
| +#include <fcntl.h> |
| +#include <sys/mman.h> |
| +#include <sys/stat.h> |
| #include <stdlib.h> /* for NULL, malloc */ |
| #include <stdio.h> /* for fprintf */ |
| #include <string.h> /* for strdup */ |
| - |
| -#ifdef UNX |
| -#include <unistd.h> /* for exit */ |
| -#endif |
| +#include <unistd.h> /* for close */ |
| |
| #define noVERBOSE |
| |
| @@ -230,12 +230,57 @@ get_state_str (int state) |
| } |
| #endif |
| |
| +// Get a line from the dictionary contents. |
| +static char * |
| +get_line (char *s, int size, const char *dict_contents, int dict_length, |
| + int *dict_ptr) |
| +{ |
| + int len = 0; |
| + while (len < (size - 1) && *dict_ptr < dict_length) { |
| + s[len++] = *(dict_contents + *dict_ptr); |
| + (*dict_ptr)++; |
| + if (s[len - 1] == '\n') |
| + break; |
| + } |
| + s[len] = '\0'; |
| + if (len > 0) { |
| + return s; |
| + } else { |
| + return NULL; |
| + } |
| +} |
| + |
| HyphenDict * |
| hnj_hyphen_load (const char *fn) |
| { |
| + if (fn == NULL) |
| + return NULL; |
| + const int fd = open(fn, O_RDONLY); |
| + if (fd == -1) |
| + return NULL; |
| + struct stat sb; |
| + if (fstat(fd, &sb) == -1) { /* To obtain file size */ |
| + close(fd); |
| + return NULL; |
| + } |
| + |
| + const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0); |
| + if (addr == MAP_FAILED) { |
| + close(fd); |
| + return NULL; |
| + } |
| + HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size); |
| + munmap((void *)addr, sb.st_size); |
| + close(fd); |
| + |
| + return dict; |
| +} |
| + |
| +HyphenDict * |
| +hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length) |
| +{ |
| HyphenDict *dict[2]; |
| HashTab *hashtab; |
| - FILE *f; |
| char buf[MAX_CHARS]; |
| char word[MAX_CHARS]; |
| char pattern[MAX_CHARS]; |
| @@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn) |
| HashEntry *e; |
| int nextlevel = 0; |
| |
| - f = fopen (fn, "r"); |
| - if (f == NULL) |
| + if (dict_contents == NULL) |
| return NULL; |
| |
| + int dict_ptr = 0; |
| // loading one or two dictionaries (separated by NEXTLEVEL keyword) |
| for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { |
| hashtab = hnj_hash_new (); |
| @@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn) |
| /* read in character set info */ |
| if (k == 0) { |
| for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; |
| - fgets(dict[k]->cset, sizeof(dict[k]->cset),f); |
| + get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents, |
| + dict_length, &dict_ptr); |
| for (i=0;i<MAX_NAME;i++) |
| if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) |
| dict[k]->cset[i] = 0; |
| @@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn) |
| dict[k]->utf8 = dict[0]->utf8; |
| } |
| |
| - while (fgets (buf, sizeof(buf), f) != NULL) |
| + while (get_line(buf, sizeof(buf), dict_contents, dict_length, |
| + &dict_ptr) != NULL) |
| { |
| if (buf[0] != '%') |
| { |
| @@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn) |
| #endif |
| state_num = 0; |
| } |
| - fclose(f); |
| if (k == 2) dict[0]->nextlevel = dict[1]; |
| return dict[0]; |
| } |
| @@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size, |
| hyphens2 = hnj_malloc (word_size); |
| } |
| for (i = 0; i < word_size; i++) rep2[i] = NULL; |
| - for (i = 0; i < word_size; i++) if |
| - (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { |
| + for (i = 0; i < word_size; i++) |
| + if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) { |
| if (i - begin > 1) { |
| int hyph = 0; |
| prep_word[i + 2] = '\0'; |
| diff --git a/hyphen.h b/hyphen.h |
| index 5d79308..29a0701 100644 |
| --- a/hyphen.h |
| +++ b/hyphen.h |
| @@ -91,6 +91,8 @@ struct _HyphenTrans { |
| }; |
| |
| HyphenDict *hnj_hyphen_load (const char *fn); |
| +HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents, |
| + int dict_length); |
| void hnj_hyphen_free (HyphenDict *dict); |
| |
| /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ |
| -- |
| 1.7.0.1 |
| |