blob: 6438cfd0e6a1f6228601a32052fc65fdfa4b79d2 [file] [log] [blame]
From 4760752af1c9b7507b51917ff4e4d8eb0491e353 Mon Sep 17 00:00:00 2001
Date: Tue, 1 Jun 2010 17:27:23 -0700
Subject: [PATCH] Ehhance hyphenation dictionary reading from character buffer.
Previous file reading is kept and enhanced with mmap.
This is the prepration for reading the dictionary from asset.
issue: 2672163
Change-Id: I0527b7b1260dc103a3be63856b9f4e4c10ed2857
---
hyphen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++----------
hyphen.h | 2 +
2 files changed, 60 insertions(+), 12 deletions(-)
diff --git a/hyphen.c b/hyphen.c
index 974d87f..446d5bd 100644
--- a/hyphen.c
+++ b/hyphen.c
@@ -36,13 +36,13 @@
* MPL.
*
*/
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
#include <stdlib.h> /* for NULL, malloc */
#include <stdio.h> /* for fprintf */
#include <string.h> /* for strdup */
-
-#ifdef UNX
-#include <unistd.h> /* for exit */
-#endif
+#include <unistd.h> /* for close */
#define noVERBOSE
@@ -230,12 +230,57 @@ get_state_str (int state)
}
#endif
+// Get a line from the dictionary contents.
+static char *
+get_line (char *s, int size, const char *dict_contents, int dict_length,
+ int *dict_ptr)
+{
+ int len = 0;
+ while (len < (size - 1) && *dict_ptr < dict_length) {
+ s[len++] = *(dict_contents + *dict_ptr);
+ (*dict_ptr)++;
+ if (s[len - 1] == '\n')
+ break;
+ }
+ s[len] = '\0';
+ if (len > 0) {
+ return s;
+ } else {
+ return NULL;
+ }
+}
+
HyphenDict *
hnj_hyphen_load (const char *fn)
{
+ if (fn == NULL)
+ return NULL;
+ const int fd = open(fn, O_RDONLY);
+ if (fd == -1)
+ return NULL;
+ struct stat sb;
+ if (fstat(fd, &sb) == -1) { /* To obtain file size */
+ close(fd);
+ return NULL;
+ }
+
+ const char *addr = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED) {
+ close(fd);
+ return NULL;
+ }
+ HyphenDict *dict = hnj_hyphen_load_from_buffer(addr, sb.st_size);
+ munmap((void *)addr, sb.st_size);
+ close(fd);
+
+ return dict;
+}
+
+HyphenDict *
+hnj_hyphen_load_from_buffer (const char *dict_contents, int dict_length)
+{
HyphenDict *dict[2];
HashTab *hashtab;
- FILE *f;
char buf[MAX_CHARS];
char word[MAX_CHARS];
char pattern[MAX_CHARS];
@@ -249,10 +294,10 @@ hnj_hyphen_load (const char *fn)
HashEntry *e;
int nextlevel = 0;
- f = fopen (fn, "r");
- if (f == NULL)
+ if (dict_contents == NULL)
return NULL;
+ int dict_ptr = 0;
// loading one or two dictionaries (separated by NEXTLEVEL keyword)
for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
hashtab = hnj_hash_new ();
@@ -277,7 +322,8 @@ hnj_hyphen_load (const char *fn)
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
- fgets(dict[k]->cset, sizeof(dict[k]->cset),f);
+ get_line(dict[k]->cset, sizeof(dict[k]->cset), dict_contents,
+ dict_length, &dict_ptr);
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
@@ -287,7 +333,8 @@ hnj_hyphen_load (const char *fn)
dict[k]->utf8 = dict[0]->utf8;
}
- while (fgets (buf, sizeof(buf), f) != NULL)
+ while (get_line(buf, sizeof(buf), dict_contents, dict_length,
+ &dict_ptr) != NULL)
{
if (buf[0] != '%')
{
@@ -446,7 +493,6 @@ hnj_hyphen_load (const char *fn)
#endif
state_num = 0;
}
- fclose(f);
if (k == 2) dict[0]->nextlevel = dict[1];
return dict[0];
}
@@ -870,8 +916,8 @@ int hnj_hyphen_hyph_(HyphenDict *dict, const char *word, int word_size,
hyphens2 = hnj_malloc (word_size);
}
for (i = 0; i < word_size; i++) rep2[i] = NULL;
- for (i = 0; i < word_size; i++) if
- (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
+ for (i = 0; i < word_size; i++)
+ if (hyphens[i]&1 || (begin > 0 && i + 1 == word_size)) {
if (i - begin > 1) {
int hyph = 0;
prep_word[i + 2] = '\0';
diff --git a/hyphen.h b/hyphen.h
index 5d79308..29a0701 100644
--- a/hyphen.h
+++ b/hyphen.h
@@ -91,6 +91,8 @@ struct _HyphenTrans {
};
HyphenDict *hnj_hyphen_load (const char *fn);
+HyphenDict *hnj_hyphen_load_from_buffer (const char *dict_contents,
+ int dict_length);
void hnj_hyphen_free (HyphenDict *dict);
/* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
--
1.7.0.1