| /* |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @file picotok.c |
| * |
| * tokenizer |
| * |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * All rights reserved. |
| * |
| * History: |
| * - 2009-04-20 -- initial version |
| * |
| */ |
| |
| |
| /* ************************************************************/ |
| /* tokenisation and markup handling */ |
| /* ************************************************************/ |
| |
| /** @addtogroup picotok |
| @b tokenisation_overview |
| |
| markup handling overview: |
| |
| The following markups are recognized |
| - ignore |
| - speed |
| - pitch |
| - volume |
| - voice |
| - preproccontext |
| - mark |
| - play |
| - usesig |
| - genfile |
| - sentence |
| - s |
| - paragraph |
| - p |
| - break |
| - spell (pauses between letter) |
| - phoneme |
| |
| All markups which are recognized but are not yet implemented in pico |
| system have the mark. |
| */ |
| |
| |
| #include "picodefs.h" |
| #include "picoos.h" |
| #include "picobase.h" |
| #include "picodbg.h" |
| #include "picodata.h" |
| #include "picotok.h" |
| #include "picoktab.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| #if 0 |
| } |
| #endif |
| |
| /* *****************************************************************************/ |
| |
| #define IN_BUF_SIZE 255 |
| #define OUT_BUF_SIZE IN_BUF_SIZE + 3 * PICODATA_ITEM_HEADSIZE + 3 |
| |
| #define MARKUP_STRING_BUF_SIZE (IN_BUF_SIZE*5) |
| #define MAX_NR_MARKUP_PARAMS 6 |
| #define MARKUP_HANDLING_DISABLED 0 |
| #define MARKUP_HANDLING_ENABLED 1 |
| #define EOL '\n' |
| |
| |
| typedef picoos_int8 pico_tokenSubType; |
| typedef picoos_uint8 pico_tokenType; |
| |
| /** @todo : consider adding these specialized exception codes: */ |
| |
| #define PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE PICO_ERR_OTHER |
| #define PICO_ERR_INVALID_MARKUP_TAG PICO_ERR_OTHER |
| #define PICO_ERR_INTERNAL_LIMIT PICO_ERR_OTHER |
| |
| typedef enum {MIDummyStart, MIIgnore, |
| MIPitch, MISpeed, MIVolume, |
| MIVoice, MIPreprocContext, MIMarker, |
| MIPlay, MIUseSig, MIGenFile, MIParagraph, |
| MISentence, MIBreak, MISpell, MIPhoneme, MIItem, MISpeaker, MIDummyEnd |
| } MarkupId; |
| typedef enum {MSNotInMarkup, MSGotStart, MSExpectingmarkupTagName, MSInmarkupTagName, |
| MSGotmarkupTagName, MSInAttrName, MSGotAttrName, MSGotEqual, MSInAttrValue, |
| MSInAttrValueEscaped, MSGotAttrValue, MSGotEndSlash, MSGotEnd, |
| MSError, MSErrorTooLong, MSErrorSyntax |
| } MarkupState; |
| typedef enum {MENone, MEMissingStart, MEUnknownTag, MEIdent, MEMissingEqual, |
| MEMissingQuote, MEMissingEnd, MEUnexpectedChar, MEInterprete |
| } MarkupParseError; |
| |
| typedef enum {MTNone, MTStart, MTEnd, MTEmpty} MarkupTagType; |
| |
| #define UTF_CHAR_COMPLETE 2 |
| #define UTF_CHAR_INCOMPLETE 1 |
| #define UTF_CHAR_MALFORMED 0 |
| |
| #define TOK_MARKUP_KW_IGNORE (picoos_uchar*)"ignore" |
| #define TOK_MARKUP_KW_SPEED (picoos_uchar*)"speed" |
| #define TOK_MARKUP_KW_PITCH (picoos_uchar*)"pitch" |
| #define TOK_MARKUP_KW_VOLUME (picoos_uchar*)"volume" |
| #define TOK_MARKUP_KW_VOICE (picoos_uchar*)"voice" |
| #define TOK_MARKUP_KW_CONTEXT (picoos_uchar*)"preproccontext" |
| #define TOK_MARKUP_KW_MARK (picoos_uchar*)"mark" |
| #define TOK_MARKUP_KW_PLAY (picoos_uchar*)"play" |
| #define TOK_MARKUP_KW_USESIG (picoos_uchar*)"usesig" |
| #define TOK_MARKUP_KW_GENFILE (picoos_uchar*)"genfile" |
| #define TOK_MARKUP_KW_SENTENCE (picoos_uchar*)"sentence" |
| #define TOK_MARKUP_KW_S (picoos_uchar*)"s" |
| #define TOK_MARKUP_KW_PARAGRAPH (picoos_uchar*)"paragraph" |
| #define TOK_MARKUP_KW_P (picoos_uchar*)"p" |
| #define TOK_MARKUP_KW_BREAK (picoos_uchar*)"break" |
| #define TOK_MARKUP_KW_SPELL (picoos_uchar*)"spell" |
| #define TOK_MARKUP_KW_PHONEME (picoos_uchar*)"phoneme" |
| #define TOK_MARKUP_KW_ITEM (picoos_uchar*)"item" |
| #define TOK_MARKUP_KW_SPEAKER (picoos_uchar*)"speaker" |
| |
| #define KWLevel (picoos_uchar *)"level" |
| #define KWName (picoos_uchar *)"name" |
| #define KWProsDomain (picoos_uchar *)"prosodydomain" |
| #define KWTime (picoos_uchar *)"time" |
| #define KWMode (picoos_uchar *)"mode" |
| #define KWSB (picoos_uchar *)"sb" |
| #define KWPB (picoos_uchar *)"pb" |
| #define KWFile (picoos_uchar *)"file" |
| #define KWType (picoos_uchar *)"type" |
| #define KWF0Beg (picoos_uchar *)"f0beg" |
| #define KWF0End (picoos_uchar *)"f0end" |
| #define KWXFadeBeg (picoos_uchar *)"xfadebeg" |
| #define KWXFadeEnd (picoos_uchar *)"xfadeend" |
| #define KWAlphabet (picoos_uchar *)"alphabet" |
| #define KWPH (picoos_uchar *)"ph" |
| #define KWOrthMode (picoos_uchar *)"orthmode" |
| #define KWIgnorePunct (picoos_uchar *)"ignorepunct" |
| #define KWInfo1 (picoos_uchar *)"info1" |
| #define KWInfo2 (picoos_uchar *)"info2" |
| #define KWDATA (picoos_uchar *)"data" |
| |
| #define PICO_SPEED_MIN 20 |
| #define PICO_SPEED_MAX 500 |
| #define PICO_SPEED_DEFAULT 100 |
| #define PICO_SPEED_FACTOR_MIN 500 |
| #define PICO_SPEED_FACTOR_MAX 2000 |
| |
| #define PICO_PITCH_MIN 50 |
| #define PICO_PITCH_MAX 200 |
| #define PICO_PITCH_DEFAULT 100 |
| #define PICO_PITCH_FACTOR_MIN 500 |
| #define PICO_PITCH_FACTOR_MAX 2000 |
| #define PICO_PITCH_ADD_MIN -100 |
| #define PICO_PITCH_ADD_MAX 100 |
| #define PICO_PITCH_ADD_DEFAULT 0 |
| |
| #define PICO_VOLUME_MIN 0 |
| #define PICO_VOLUME_MAX 500 |
| #define PICO_VOLUME_DEFAULT 100 |
| #define PICO_VOLUME_FACTOR_MIN 500 |
| #define PICO_VOLUME_FACTOR_MAX 2000 |
| |
| #define PICO_SPEAKER_MIN 20 |
| #define PICO_SPEAKER_MAX 180 |
| #define PICO_SPEAKER_DEFAULT 100 |
| #define PICO_SPEAKER_FACTOR_MIN 500 |
| #define PICO_SPEAKER_FACTOR_MAX 2000 |
| |
| #define PICO_CONTEXT_DEFAULT (picoos_uchar*)"DEFAULT" |
| |
| #define PARAGRAPH_PAUSE_DUR 500 |
| #define SPELL_WITH_PHRASE_BREAK 1 |
| #define SPELL_WITH_SENTENCE_BREAK 2 |
| |
| /* *****************************************************************************/ |
| |
| #define TOK_PUNC_FLUSH (picoos_char) '\0' |
| |
| typedef picoos_uchar Word[MARKUP_STRING_BUF_SIZE]; |
| |
| |
| struct MarkupParam { |
| Word paramId; |
| Word paramVal; |
| }; |
| |
| typedef struct MarkupParam MarkupParams[MAX_NR_MARKUP_PARAMS]; |
| |
| |
| /** subobject : TokenizeUnit |
| * shortcut : tok |
| */ |
| typedef struct tok_subobj |
| { |
| picoos_int32 ignLevel; |
| |
| picoos_uchar utf[5]; |
| picoos_int32 utfpos; |
| picoos_int32 utflen; |
| |
| MarkupParams markupParams; |
| picoos_int32 nrMarkupParams; |
| MarkupState markupState; |
| picoos_uchar markupStr[MARKUP_STRING_BUF_SIZE]; |
| picoos_int32 markupPos; |
| picoos_int32 markupLevel[MIDummyEnd+1]; |
| picoos_uchar markupTagName[IN_BUF_SIZE]; |
| MarkupTagType markupTagType; |
| MarkupParseError markupTagErr; |
| |
| picoos_int32 strPos; |
| picoos_uchar strDelim; |
| picoos_bool isFileAttr; |
| |
| pico_tokenType tokenType; |
| pico_tokenSubType tokenSubType; |
| |
| picoos_int32 tokenPos; |
| picoos_uchar tokenStr[IN_BUF_SIZE]; |
| |
| picoos_int32 nrEOL; |
| |
| picoos_bool markupHandlingMode; /* to be moved ??? */ |
| picoos_bool aborted; /* to be moved ??? */ |
| |
| picoos_bool start; |
| |
| picoos_uint8 outBuf[OUT_BUF_SIZE]; /* internal output buffer */ |
| picoos_uint16 outReadPos; /* next pos to read from outBuf */ |
| picoos_uint16 outWritePos; /* next pos to write to outBuf */ |
| |
| picoos_uchar saveFile[IN_BUF_SIZE]; |
| Word phonemes; |
| |
| picotrns_SimpleTransducer transducer; |
| |
| /* kbs */ |
| |
| picoktab_Graphs graphTab; |
| picokfst_FST xsampa_parser; |
| picokfst_FST svoxpa_parser; |
| picokfst_FST xsampa2svoxpa_mapper; |
| |
| |
| |
| } tok_subobj_t; |
| |
| /* *****************************************************************************/ |
| |
| static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok); |
| static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling); |
| static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok); |
| static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]); |
| static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok); |
| static MarkupId tok_markupTagId (picoos_uchar tagId[]); |
| |
| /* *****************************************************************************/ |
| |
| static picoos_bool tok_strEqual(picoos_uchar * str1, picoos_uchar * str2) |
| { |
| return (picoos_strcmp((picoos_char*)str1, (picoos_char*)str2) == 0); |
| } |
| |
| static void tok_reduceBlanks(picoos_uchar * str) |
| /* Remove leading and trailing blanks of 'str' and reduce |
| groups of blanks within string to exactly one blank. */ |
| |
| { |
| int i = 0; |
| int j = 0; |
| |
| while (str[j] != 0) { |
| if (str[j] == (picoos_uchar)' ') { |
| /* note one blank except at the beginning of string */ |
| if (i > 0) { |
| str[i] = (picoos_uchar)' '; |
| i++; |
| } |
| j++; |
| while (str[j] == (picoos_uchar)' ') { |
| j++; |
| } |
| } else { |
| str[i] = str[j]; |
| j++; |
| i++; |
| } |
| } |
| |
| /* remove blanks at end of string */ |
| if ((i > 0) && (str[i - 1] == ' ')) { |
| i--; |
| } |
| str[i] = 0; |
| } |
| |
| |
| static void tok_startIgnore (tok_subobj_t * tok) |
| { |
| tok->ignLevel++; |
| } |
| |
| |
| static void tok_endIgnore (tok_subobj_t * tok) |
| { |
| if (tok->ignLevel > 0) { |
| tok->ignLevel--; |
| } |
| } |
| |
| |
| static void tok_getParamIntVal (MarkupParams params, picoos_uchar paramId[], picoos_int32 * paramVal, picoos_bool * paramFound) |
| { |
| int i=0; |
| |
| while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) { |
| i++; |
| } |
| if ((i < MAX_NR_MARKUP_PARAMS)) { |
| (*paramVal) = picoos_atoi((picoos_char*)params[i].paramVal); |
| (*paramFound) = TRUE; |
| } else { |
| (*paramVal) = -1; |
| (*paramFound) = FALSE; |
| } |
| } |
| |
| |
| |
| static void tok_getParamStrVal (MarkupParams params, picoos_uchar paramId[], picoos_uchar paramStrVal[], picoos_bool * paramFound) |
| { |
| int i=0; |
| |
| while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId,params[i].paramId)) { |
| i++; |
| } |
| if (i < MAX_NR_MARKUP_PARAMS) { |
| picoos_strcpy((picoos_char*)paramStrVal, (picoos_char*)params[i].paramVal); |
| (*paramFound) = TRUE; |
| } else { |
| paramStrVal[0] = 0; |
| (*paramFound) = FALSE; |
| } |
| } |
| |
| |
| static void tok_getParamPhonesStr (MarkupParams params, picoos_uchar paramId[], picoos_uchar alphabet[], picoos_uchar phones[], picoos_int32 phoneslen, picoos_bool * paramFound) |
| { |
| |
| int i; |
| picoos_bool done; |
| |
| i = 0; |
| while ((i < MAX_NR_MARKUP_PARAMS) && !tok_strEqual(paramId, params[i].paramId)) { |
| i++; |
| } |
| if (i < MAX_NR_MARKUP_PARAMS) { |
| if (tok_strEqual(alphabet, PICODATA_XSAMPA) || tok_strEqual(alphabet, (picoos_uchar*)"")) { |
| picoos_strlcpy((picoos_char*)phones, (picoos_char*)params[i].paramVal, phoneslen); |
| done = TRUE; |
| } else { |
| done = FALSE; |
| } |
| (*paramFound) = TRUE; |
| } else { |
| done = FALSE; |
| (*paramFound) = FALSE; |
| } |
| if (!done) { |
| phones[0] = 0; |
| } |
| } |
| |
| |
| static void tok_clearMarkupParams (MarkupParams params) |
| { |
| int i; |
| |
| for (i = 0; i<MAX_NR_MARKUP_PARAMS; i++) { |
| params[i].paramId[0] = 0; |
| params[i].paramVal[0] = 0; |
| } |
| } |
| |
| |
| static void tok_getDur (picoos_uchar durStr[], picoos_uint32 * dur, picoos_bool * done) |
| { |
| |
| int num=0; |
| int i=0; |
| picoos_uchar tmpWord[IN_BUF_SIZE]; |
| |
| picoos_strlcpy((picoos_char*)tmpWord, (picoos_char*)durStr, sizeof(tmpWord)); |
| tok_reduceBlanks(tmpWord); |
| while ((durStr[i] >= '0') && (durStr[i] <= '9')) { |
| num = 10 * num + (int)durStr[i] - (int)'0'; |
| tmpWord[i] = ' '; |
| i++; |
| } |
| tok_reduceBlanks(tmpWord); |
| if (tok_strEqual(tmpWord, (picoos_uchar*)"s")) { |
| (*dur) = (1000 * num); |
| (*done) = TRUE; |
| } else if (tok_strEqual(tmpWord,(picoos_uchar*)"ms")) { |
| (*dur) = num; |
| (*done) = TRUE; |
| } else { |
| (*dur) = 0; |
| (*done) = FALSE; |
| } |
| } |
| |
| |
| static picoos_int32 tok_putToUtf (tok_subobj_t * tok, picoos_uchar ch) |
| { |
| if (tok->utfpos < PICOBASE_UTF8_MAXLEN) { |
| tok->utf[tok->utfpos] = ch; |
| if (tok->utfpos == 0) { |
| tok->utflen = picobase_det_utf8_length(ch); |
| } else if (((ch < (picoos_uchar)'\200') || (ch >= (picoos_uchar)'\300'))) { |
| tok->utflen = 0; |
| } |
| (tok->utfpos)++; |
| if ((tok->utfpos == tok->utflen)) { |
| if ((tok->utfpos < PICOBASE_UTF8_MAXLEN)) { |
| tok->utf[tok->utfpos] = 0; |
| } |
| return UTF_CHAR_COMPLETE; |
| } else if (tok->utfpos < tok->utflen) { |
| return UTF_CHAR_INCOMPLETE; |
| } else { |
| return UTF_CHAR_MALFORMED; |
| } |
| } else { |
| return UTF_CHAR_MALFORMED; |
| } |
| } |
| |
| |
| static picoos_bool tok_isRelative (picoos_uchar strval[], picoos_uint32 * val) |
| { |
| picoos_int32 len; |
| picoos_bool rel; |
| |
| rel = FALSE; |
| len = picoos_strlen((picoos_char*)strval); |
| if (len > 0) { |
| if (strval[len - 1] == '%') { |
| strval[len - 1] = 0; |
| if ((strval[0] == '+') || (strval[0] == '-')) { |
| (*val) = 1000 + (picoos_atoi((picoos_char*)strval) * 10); |
| } else { |
| (*val) = picoos_atoi((picoos_char*)strval) * 10; |
| } |
| rel = TRUE; |
| } |
| } |
| return rel; |
| } |
| |
| |
| static void tok_putItem (picodata_ProcessingUnit this, tok_subobj_t * tok, |
| picoos_uint8 itemType, picoos_uint8 info1, picoos_uint8 info2, |
| picoos_uint16 val, |
| picoos_uchar str[]) |
| { |
| picoos_int32 len, i; |
| |
| if ((itemType == PICODATA_ITEM_CMD) && (info1 == PICODATA_ITEMINFO1_CMD_FLUSH)) { |
| tok->outBuf[tok->outWritePos++] = itemType; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = 0; |
| } |
| else if (tok->ignLevel <= 0) { |
| switch (itemType) { |
| case PICODATA_ITEM_CMD: |
| switch (info1) { |
| case PICODATA_ITEMINFO1_CMD_CONTEXT: |
| case PICODATA_ITEMINFO1_CMD_VOICE: |
| case PICODATA_ITEMINFO1_CMD_MARKER: |
| case PICODATA_ITEMINFO1_CMD_PLAY: |
| case PICODATA_ITEMINFO1_CMD_SAVE: |
| case PICODATA_ITEMINFO1_CMD_UNSAVE: |
| case PICODATA_ITEMINFO1_CMD_PROSDOMAIN: |
| case PICODATA_ITEMINFO1_CMD_PHONEME: |
| len = picoos_strlen((picoos_char*)str); |
| if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) { |
| tok->outBuf[tok->outWritePos++] = itemType; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = len; |
| for (i=0; i<len; i++) { |
| tok->outBuf[tok->outWritePos++] = str[i]; |
| } |
| } |
| else { |
| PICODBG_WARN(("tok_putItem: output buffer too small")); |
| } |
| break; |
| case PICODATA_ITEMINFO1_CMD_IGNSIG: |
| case PICODATA_ITEMINFO1_CMD_IGNORE: |
| if (tok->outWritePos + 4 < OUT_BUF_SIZE) { |
| tok->outBuf[tok->outWritePos++] = itemType; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = 0; |
| } |
| else { |
| PICODBG_WARN(("tok_putItem: output buffer too small")); |
| } |
| break; |
| case PICODATA_ITEMINFO1_CMD_SPEED: |
| case PICODATA_ITEMINFO1_CMD_PITCH: |
| case PICODATA_ITEMINFO1_CMD_VOLUME: |
| case PICODATA_ITEMINFO1_CMD_SPELL: |
| case PICODATA_ITEMINFO1_CMD_SIL: |
| case PICODATA_ITEMINFO1_CMD_SPEAKER: |
| if (tok->outWritePos + 4 + 2 < OUT_BUF_SIZE) { |
| tok->outBuf[tok->outWritePos++] = itemType; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = 2; |
| tok->outBuf[tok->outWritePos++] = val % 256; |
| tok->outBuf[tok->outWritePos++] = val / 256; |
| } |
| else { |
| PICODBG_WARN(("tok_putItem: output buffer too small")); |
| } |
| break; |
| default: |
| PICODBG_WARN(("tok_putItem: unknown command type")); |
| } |
| break; |
| case PICODATA_ITEM_TOKEN: |
| len = picoos_strlen((picoos_char*)str); |
| if (tok->outWritePos + 4 + len < OUT_BUF_SIZE) { |
| tok->outBuf[tok->outWritePos++] = itemType; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = len; |
| for (i=0; i<len; i++) { |
| tok->outBuf[tok->outWritePos++] = str[i]; |
| } |
| } |
| else { |
| PICODBG_WARN(("tok_putItem: output buffer too small")); |
| } |
| break; |
| default: |
| PICODBG_WARN(("tok_putItem: unknown item type")); |
| } |
| } |
| } |
| |
| |
| static void tok_putItem2 (picodata_ProcessingUnit this, tok_subobj_t * tok, |
| picoos_uint8 type, |
| picoos_uint8 info1, picoos_uint8 info2, |
| picoos_uint8 len, |
| picoos_uint8 data[]) |
| { |
| picoos_int32 i; |
| |
| if (is_valid_itemtype(type)) { |
| tok->outBuf[tok->outWritePos++] = type; |
| tok->outBuf[tok->outWritePos++] = info1; |
| tok->outBuf[tok->outWritePos++] = info2; |
| tok->outBuf[tok->outWritePos++] = len; |
| for (i=0; i<len; i++) { |
| tok->outBuf[tok->outWritePos++] = data[i]; |
| } |
| } |
| } |
| |
| |
| static MarkupId tok_markupTagId (picoos_uchar tagId[]) |
| { |
| if (picoos_strstr(tagId,(picoos_char *)"svox:") == (picoos_char *)tagId) { |
| tagId+=5; |
| } |
| if (tok_strEqual(tagId, TOK_MARKUP_KW_IGNORE)) { |
| return MIIgnore; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEED)) { |
| return MISpeed; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PITCH)) { |
| return MIPitch; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOLUME)) { |
| return MIVolume; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPEAKER)) { |
| return MISpeaker; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_VOICE)) { |
| return MIVoice; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_CONTEXT)) { |
| return MIPreprocContext; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_MARK)) { |
| return MIMarker; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PLAY)) { |
| return MIPlay; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_USESIG)) { |
| return MIUseSig; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_GENFILE)) { |
| return MIGenFile; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SENTENCE) || tok_strEqual(tagId, TOK_MARKUP_KW_S)) { |
| return MISentence; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PARAGRAPH) || tok_strEqual(tagId, TOK_MARKUP_KW_P)) { |
| return MIParagraph; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_BREAK)) { |
| return MIBreak; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_SPELL)) { |
| return MISpell; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_PHONEME)) { |
| return MIPhoneme; |
| } else if (tok_strEqual(tagId, TOK_MARKUP_KW_ITEM)) { |
| return MIItem; |
| } else { |
| return MIDummyEnd; |
| } |
| } |
| |
| |
| extern void tok_checkLimits (picodata_ProcessingUnit this, picoos_uint32 * value, picoos_uint32 min, picoos_uint32 max, picoos_uchar valueType[]) |
| { |
| if ((((*value) < min) || ((*value) > max))) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %i for %s", *value, valueType); |
| if (((*value) < min)) { |
| (*value) = min; |
| } else if (((*value) > max)) { |
| (*value) = max; |
| } |
| } |
| } |
| |
| |
| |
| extern void tok_checkRealLimits (picodata_ProcessingUnit this, picoos_single * value, picoos_single min, picoos_single max, picoos_uchar valueType[]) |
| { |
| if ((((*value) < min) || ((*value) > max))) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE, (picoos_char*)"", (picoos_char*)"attempt to set illegal value %f for %s", *value, valueType); |
| if (((*value) < min)) { |
| (*value) = min; |
| } else if (((*value) > max)) { |
| (*value) = max; |
| } |
| } |
| } |
| |
| |
| #define VAL_STR_LEN 21 |
| |
| static void tok_interpretMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_bool isStartTag, MarkupId mId) |
| { |
| picoos_bool done; |
| picoos_int32 ival; |
| picoos_uint32 uval; |
| picoos_int32 ival2; |
| picoos_uchar valStr[VAL_STR_LEN]; |
| picoos_uchar valStr2[VAL_STR_LEN]; |
| picoos_uchar valStr3[VAL_STR_LEN]; |
| picoos_int32 i2; |
| picoos_uint32 dur; |
| picoos_bool done1; |
| picoos_bool paramFound; |
| picoos_uint8 type, info1, info2; |
| picoos_uint8 data[256]; |
| picoos_int32 pos, n, len; |
| picoos_uchar part[10]; |
| |
| done = FALSE; |
| switch (mId) { |
| case MIIgnore: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { |
| tok_startIgnore(tok); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_endIgnore(tok); |
| done = TRUE; |
| } |
| break; |
| case MISpeed: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { |
| if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { |
| tok_checkLimits(this, & uval, PICO_SPEED_FACTOR_MIN, PICO_SPEED_FACTOR_MAX,(picoos_uchar*)"relative speed factor"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); |
| } else { |
| uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); |
| tok_checkLimits(this, & uval, PICO_SPEED_MIN, PICO_SPEED_MAX,(picoos_uchar*)"speed"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEED, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEED_DEFAULT, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIPitch: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { |
| if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { |
| tok_checkLimits(this, & uval,PICO_PITCH_FACTOR_MIN,PICO_PITCH_FACTOR_MAX, (picoos_uchar*)"relative pitch factor"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); |
| } else { |
| uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); |
| tok_checkLimits(this, & uval,PICO_PITCH_MIN,PICO_PITCH_MAX, (picoos_uchar*)"pitch"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PITCH,PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_PITCH_DEFAULT, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIVolume: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { |
| if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { |
| tok_checkLimits(this, & uval, PICO_VOLUME_FACTOR_MIN, PICO_VOLUME_FACTOR_MAX, (picoos_uchar*)"relative volume factor"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); |
| } else { |
| uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); |
| tok_checkLimits(this, & uval, PICO_VOLUME_MIN, PICO_VOLUME_MAX, (picoos_uchar*)"volume"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOLUME, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_VOLUME_DEFAULT, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MISpeaker: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWLevel)) { |
| if (tok_isRelative(tok->markupParams[0].paramVal, & uval)) { |
| tok_checkLimits(this, & uval, PICO_SPEAKER_FACTOR_MIN, PICO_SPEAKER_FACTOR_MAX, (picoos_uchar*)"relative speaker factor"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_RELATIVE, uval, (picoos_uchar*)""); |
| } else { |
| uval = picoos_atoi((picoos_char*)tok->markupParams[0].paramVal); |
| tok_checkLimits(this, & uval, PICO_SPEAKER_MIN, PICO_SPEAKER_MAX, (picoos_uchar*)"volume"); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, uval, (picoos_uchar*)""); |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPEAKER, PICODATA_ITEMINFO2_CMD_ABSOLUTE, PICO_SPEAKER_DEFAULT, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| |
| case MIVoice: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_VOICE, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 0, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIPreprocContext: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_CONTEXT, PICODATA_ITEMINFO2_NA, 0, PICO_CONTEXT_DEFAULT); |
| done = TRUE; |
| } |
| break; |
| case MIMarker: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWName)) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_MARKER, PICODATA_ITEMINFO2_NA, 0, tok->markupParams[0].paramVal); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { |
| done = TRUE; |
| } |
| break; |
| case MISentence: |
| if (isStartTag) { |
| tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, valStr); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 2, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIParagraph: |
| if (isStartTag) { |
| tok_getParamStrVal(tok->markupParams, KWProsDomain, (picoos_uchar*)valStr, & paramFound); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, valStr); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, PARAGRAPH_PAUSE_DUR, (picoos_uchar*)""); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PROSDOMAIN, 1, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIBreak: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWTime)) { |
| tok_getDur(tok->markupParams[0].paramVal, & dur, & done1); |
| tok_checkLimits (this, &dur, 0, 65535, (picoos_uchar*)"time"); |
| if (done1) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SIL, PICODATA_ITEMINFO2_NA, dur, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| done = TRUE; |
| } |
| break; |
| case MISpell: |
| if (isStartTag) { |
| if (tok_strEqual(tok->markupParams[0].paramId, KWMode)) { |
| if (tok_strEqual(tok->markupParams[0].paramVal, KWPB)) { |
| uval = SPELL_WITH_PHRASE_BREAK; |
| } else if (tok_strEqual(tok->markupParams[0].paramVal, KWSB)) { |
| uval = SPELL_WITH_SENTENCE_BREAK; |
| } else { |
| tok_getDur(tok->markupParams[0].paramVal, & uval, & done1); |
| tok_checkLimits (this, & uval, 0, 65535, (picoos_uchar*)"time"); |
| if (done1) { |
| done = TRUE; |
| } |
| } |
| } else { |
| uval = SPELL_WITH_PHRASE_BREAK; |
| } |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_START, uval, (picoos_uchar*)""); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SPELL, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIGenFile: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { |
| if (tok->saveFile[0] != 0) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE, |
| picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, tok->saveFile); |
| tok->saveFile[0] = 0; |
| } |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_SAVE, |
| picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/FALSE), 0, tok->markupParams[0].paramVal); |
| picoos_strcpy((picoos_char*)tok->saveFile, (picoos_char*)tok->markupParams[0].paramVal); |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| if (tok->saveFile[0] != 0) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_UNSAVE, |
| picodata_getPuTypeFromExtension(tok->saveFile, /*input*/FALSE), 0, (picoos_uchar*)""); |
| tok->saveFile[0] = 0; |
| } |
| done = TRUE; |
| } |
| break; |
| case MIPlay: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { |
| if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) { |
| tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound); |
| tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound); |
| tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3,& paramFound); |
| tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound); |
| tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY, |
| picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal); |
| tok_startIgnore(tok); |
| } else { |
| if (tok->ignLevel > 0) { |
| tok_startIgnore(tok); |
| } else { |
| picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead\n", tok->markupParams[0].paramVal); |
| } |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_endIgnore(tok); |
| done = TRUE; |
| } |
| break; |
| case MIUseSig: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWFile)) { |
| if (picoos_FileExists(this->common, (picoos_char*)tok->markupParams[0].paramVal)) { |
| tok_getParamIntVal(tok->markupParams,KWF0Beg,& ival,& paramFound); |
| tok_getParamIntVal(tok->markupParams,KWF0End,& ival2,& paramFound); |
| tok_getParamStrVal(tok->markupParams,KWAlphabet,valStr3, & paramFound); |
| tok_getParamPhonesStr(tok->markupParams,KWXFadeBeg,valStr3,valStr,VAL_STR_LEN,& paramFound); |
| tok_getParamPhonesStr(tok->markupParams,KWXFadeEnd,valStr3,valStr2,VAL_STR_LEN,& paramFound); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PLAY, |
| picodata_getPuTypeFromExtension(tok->markupParams[0].paramVal, /*input*/TRUE), 0, tok->markupParams[0].paramVal); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_START, 0, (picoos_uchar*)""); |
| } else { |
| if (tok->ignLevel <= 0) { |
| picoos_emRaiseWarning(this->common->em, PICO_EXC_CANT_OPEN_FILE, (picoos_char*)"", (picoos_char*)"file '%s' not found; synthesizing enclosed text instead", tok->markupParams[0].paramVal); |
| } |
| } |
| done = TRUE; |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_IGNSIG, PICODATA_ITEMINFO2_CMD_END, 0, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIPhoneme: |
| i2 = 0; |
| if (isStartTag) { |
| if (tok_strEqual(tok->markupParams[0].paramId, KWAlphabet) && tok_strEqual(tok->markupParams[1].paramId, KWPH)) { |
| if (tok_strEqual(tok->markupParams[2].paramId, KWOrthMode) |
| && tok_strEqual(tok->markupParams[2].paramVal, KWIgnorePunct)) { |
| i2 = 1; |
| } |
| if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[1].paramVal, tok->markupParams[0].paramVal, tok->phonemes, sizeof(tok->phonemes)-1) == PICO_OK) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, |
| PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes); |
| done = TRUE; |
| } else { |
| PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal)); |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal); |
| done = TRUE; |
| } |
| } else if (tok_strEqual(tok->markupParams[0].paramId, KWPH)) { |
| if (tok_strEqual(tok->markupParams[1].paramId, KWOrthMode) |
| && tok_strEqual(tok->markupParams[1].paramVal, KWIgnorePunct)) { |
| i2 = 1; |
| } |
| if (picodata_mapPAStrToPAIds(tok->transducer, this->common, tok->xsampa_parser, tok->svoxpa_parser, tok->xsampa2svoxpa_mapper, tok->markupParams[0].paramVal, PICODATA_XSAMPA, tok->phonemes, sizeof(tok->phonemes)) == PICO_OK) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, |
| PICODATA_ITEMINFO2_CMD_START, i2, tok->phonemes); |
| done = TRUE; |
| } |
| else { |
| PICODBG_WARN(("cannot map phonetic string '%s'; synthesizeing text instead", tok->markupParams[1].paramVal)); |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_MARKUP_VALUE_OUT_OF_RANGE,(picoos_char*)"", (picoos_char*)"cannot map phonetic string '%s'; synthesizing text instead", tok->markupParams[0].paramVal); |
| done = TRUE; |
| } |
| } |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId, (picoos_uchar*)"")) { |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_PHONEME, |
| PICODATA_ITEMINFO2_CMD_END, i2, (picoos_uchar*)""); |
| done = TRUE; |
| } |
| break; |
| case MIItem: |
| if (isStartTag && tok_strEqual(tok->markupParams[0].paramId, KWType) && |
| tok_strEqual(tok->markupParams[1].paramId, KWInfo1)&& |
| tok_strEqual(tok->markupParams[2].paramId, KWInfo2)&& |
| tok_strEqual(tok->markupParams[3].paramId, KWDATA)) { |
| picoos_int32 len2, n2; |
| type = picoos_atoi(tok->markupParams[0].paramVal); |
| info1 = picoos_atoi(tok->markupParams[1].paramVal); |
| info2 = picoos_atoi(tok->markupParams[2].paramVal); |
| n = 0; n2 = 0; |
| len2 = (picoos_int32)picoos_strlen(tok->markupParams[3].paramVal); |
| while (n<len2) { |
| while ((tok->markupParams[3].paramVal[n] != 0) && (tok->markupParams[3].paramVal[n] <= 32)) { |
| n++; |
| } |
| tok->markupParams[3].paramVal[n2] = tok->markupParams[3].paramVal[n]; |
| n++; |
| n2++; |
| } |
| if (is_valid_itemtype(type)) { |
| done = TRUE; |
| len = 0; |
| pos = 0; |
| picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal), |
| &pos, ',', part, 10, &done1); |
| while (done && done1) { |
| n = picoos_atoi(part); |
| if ((n>=0) && (n<256) && (len<256)) { |
| data[len++] = n; |
| } |
| else { |
| done = FALSE; |
| } |
| picoos_get_sep_part_str(tok->markupParams[3].paramVal, picoos_strlen(tok->markupParams[3].paramVal), |
| &pos, ',', part, 10, &done1); |
| } |
| if (done) { |
| tok_putItem2(this, tok, type, info1, info2, len, data); |
| } |
| } |
| else { |
| done = FALSE; |
| } |
| } else if (!isStartTag && tok_strEqual(tok->markupParams[0].paramId,(picoos_uchar*)"")) { |
| done = TRUE; |
| } |
| break; |
| default: |
| break; |
| } |
| if (!done) { |
| tok->markupTagErr = MEInterprete; |
| } |
| if (isStartTag) { |
| tok->markupLevel[mId]++; |
| } else if ((tok->markupLevel[mId] > 0)) { |
| tok->markupLevel[mId]--; |
| } |
| } |
| |
| |
| static picoos_bool tok_attrChar (picoos_uchar ch, picoos_bool first) |
| { |
| return ((((ch >= (picoos_uchar)'A') && (ch <= (picoos_uchar)'Z')) || |
| ((ch >= (picoos_uchar)'a') && (ch <= (picoos_uchar)'z'))) || |
| ( !(first) && ((ch >= (picoos_uchar)'0') && (ch <= (picoos_uchar)'9')))); |
| } |
| |
| |
| |
| static picoos_bool tok_idChar (picoos_uchar ch, picoos_bool first) |
| { |
| return tok_attrChar(ch, first) || ( !(first) && (ch == (picoos_uchar)':')); |
| } |
| |
| |
| static void tok_setIsFileAttr (picoos_uchar name[], picoos_bool * isFile) |
| { |
| (*isFile) = tok_strEqual(name, KWFile); |
| } |
| |
| /* *****************************************************************************/ |
| |
| static void tok_putToSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[], pico_tokenType type, pico_tokenSubType subtype) |
| { |
| int i, len; |
| |
| if (str[0] != 0) { |
| len = picoos_strlen((picoos_char*)str); |
| for (i = 0; i < len; i++) { |
| if (tok->tokenPos >= IN_BUF_SIZE) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT, (picoos_char*)"", (picoos_char*)"simple token too long; forced treatment"); |
| tok_treatSimpleToken(this, tok); |
| } |
| tok->tokenStr[tok->tokenPos] = str[i]; |
| tok->tokenPos++; |
| } |
| } |
| tok->tokenType = type; |
| tok->tokenSubType = subtype; |
| } |
| |
| |
| static void tok_putToMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar str[]) |
| { |
| picoos_int32 i, len; |
| picoos_uint8 ok; |
| |
| tok->markupTagErr = MENone; |
| len = picoos_strlen((picoos_char*)str); |
| for (i = 0; i< len; i++) { |
| if (tok->markupPos >= (MARKUP_STRING_BUF_SIZE - 1)) { |
| if ((tok->markupPos == (MARKUP_STRING_BUF_SIZE - 1)) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"markup tag too long"); |
| } |
| tok->markupState = MSErrorTooLong; |
| } else if ((str[i] == (picoos_uchar)' ') && ((tok->markupState == MSExpectingmarkupTagName) || (tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSGotAttrName) || (tok->markupState == MSGotEqual) || (tok->markupState == MSGotAttrValue))) { |
| } else if ((str[i] == (picoos_uchar)'>') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) { |
| tok->markupState = MSGotEnd; |
| } else if ((str[i] == (picoos_uchar)'/') && ((tok->markupState == MSGotmarkupTagName) || (tok->markupState == MSInmarkupTagName) || (tok->markupState == MSGotAttrValue))) { |
| if (tok->markupTagType == MTEnd) { |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| } else { |
| tok->markupTagType = MTEmpty; |
| tok->markupState = MSGotEndSlash; |
| } |
| } else { |
| switch (tok->markupState) { |
| case MSNotInMarkup: |
| if (str[i] == (picoos_uchar)'<') { |
| tok_clearMarkupParams(tok->markupParams); |
| tok->nrMarkupParams = 0; |
| tok->strPos = 0; |
| tok->markupTagType = MTStart; |
| tok->markupState = MSGotStart; |
| } else { |
| tok->markupTagErr = MEMissingStart; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSGotStart: |
| if (str[i] == (picoos_uchar)'/') { |
| tok->markupTagType = MTEnd; |
| tok->markupState = MSExpectingmarkupTagName; |
| } else if (str[i] == (picoos_uchar)' ') { |
| tok->markupState = MSExpectingmarkupTagName; |
| } else if (tok_idChar(str[i],TRUE)) { |
| tok->markupTagType = MTStart; |
| tok->markupTagName[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupTagName[tok->strPos] = 0; |
| tok->markupState = MSInmarkupTagName; |
| } else { |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSInmarkupTagName: case MSExpectingmarkupTagName: |
| if (tok_idChar(str[i],tok->markupState == MSExpectingmarkupTagName)) { |
| tok->markupTagName[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupTagName[(tok->strPos)] = 0; |
| tok->markupState = MSInmarkupTagName; |
| } else if ((tok->markupState == MSInmarkupTagName) && (str[i] == (picoos_uchar)' ')) { |
| tok->markupState = MSGotmarkupTagName; |
| picobase_lowercase_utf8_str(tok->markupTagName, (picoos_char*)tok->markupTagName, IN_BUF_SIZE, &ok); |
| tok->strPos = 0; |
| } else { |
| tok->markupTagErr = MEIdent; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSGotmarkupTagName: case MSGotAttrValue: |
| if (tok_attrChar(str[i], TRUE)) { |
| if (tok->markupTagType == MTEnd) { |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| } else { |
| if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { |
| tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0; |
| } else { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_INTERNAL_LIMIT ,(picoos_char*)"", (picoos_char*)"too many attributes in markup; ignoring"); |
| } |
| tok->markupState = MSInAttrName; |
| } |
| } else { |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSInAttrName: |
| if (tok_attrChar(str[i], FALSE)) { |
| if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { |
| tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupParams[tok->nrMarkupParams].paramId[tok->strPos] = 0; |
| } |
| tok->markupState = MSInAttrName; |
| } else if (str[i] == (picoos_uchar)' ') { |
| picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok); |
| tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr); |
| tok->markupState = MSGotAttrName; |
| } else if (str[i] == (picoos_uchar)'=') { |
| picobase_lowercase_utf8_str(tok->markupParams[tok->nrMarkupParams].paramId, (picoos_char*)tok->markupParams[tok->nrMarkupParams].paramId, IN_BUF_SIZE, &ok); |
| tok_setIsFileAttr(tok->markupParams[tok->nrMarkupParams].paramId, & tok->isFileAttr); |
| tok->markupState = MSGotEqual; |
| } else { |
| tok->markupTagErr = MEMissingEqual; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSGotAttrName: |
| if (str[i] == (picoos_uchar)'=') { |
| tok->markupState = MSGotEqual; |
| } else { |
| tok->markupTagErr = MEMissingEqual; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSGotEqual: |
| if ((str[i] == (picoos_uchar)'"') || (str[i] == (picoos_uchar)'\'')) { |
| tok->strDelim = str[i]; |
| tok->strPos = 0; |
| tok->markupState = MSInAttrValue; |
| } else { |
| tok->markupTagErr = MEMissingQuote; |
| tok->markupState = MSError; |
| } |
| break; |
| case MSInAttrValue: |
| if (!(tok->isFileAttr) && (str[i] == (picoos_uchar)'\\')) { |
| tok->markupState = MSInAttrValueEscaped; |
| } else if (str[i] == tok->strDelim) { |
| if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { |
| tok->nrMarkupParams++; |
| } |
| tok->strPos = 0; |
| tok->markupState = MSGotAttrValue; |
| } else { |
| if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { |
| tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0; |
| } |
| tok->markupState = MSInAttrValue; |
| } |
| break; |
| case MSInAttrValueEscaped: |
| if (tok->nrMarkupParams < MAX_NR_MARKUP_PARAMS) { |
| tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = str[i]; |
| tok->strPos++; |
| tok->markupParams[tok->nrMarkupParams].paramVal[tok->strPos] = 0; |
| } |
| tok->markupState = MSInAttrValue; |
| break; |
| case MSGotEndSlash: |
| if (str[i] == (picoos_uchar)'>') { |
| tok->markupState = MSGotEnd; |
| } else { |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| } |
| break; |
| default: |
| tok->markupTagErr = MEUnexpectedChar; |
| tok->markupState = MSError; |
| break; |
| } |
| } |
| tok->markupStr[tok->markupPos] = str[i]; |
| tok->markupPos++; |
| tok->markupStr[tok->markupPos] = 0; |
| } |
| /* |
| PICODBG_DEBUG(("putToMarkup %s", tok->markupStr)); |
| */ |
| } |
| |
| /* *****************************************************************************/ |
| |
| static void tok_treatMarkupAsSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok) |
| { |
| picoos_int32 i; |
| |
| tok->utfpos = 0; |
| tok->utflen = 0; |
| tok->markupState = MSNotInMarkup; |
| for (i = 0; i < tok->markupPos; i++) { |
| tok_treatChar(this, tok, tok->markupStr[i], FALSE); |
| } |
| tok->markupPos = 0; |
| tok->strPos = 0; |
| } |
| |
| |
| static void tok_treatMarkup (picodata_ProcessingUnit this, tok_subobj_t * tok) |
| { |
| MarkupId mId; |
| |
| if (tok_markupTagId(tok->markupTagName) != MIDummyEnd) { |
| if (tok->markupTagErr == MENone) { |
| tok->markupState = MSNotInMarkup; |
| if ((tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_SPACE) && (tok->tokenType != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED)) { |
| tok_treatSimpleToken(this, tok); |
| } |
| tok_putToSimpleToken(this, tok, (picoos_uchar*)" ", PICODATA_ITEMINFO1_TOKTYPE_SPACE, -1); |
| mId = tok_markupTagId(tok->markupTagName); |
| if ((tok->markupTagType == MTStart) || (tok->markupTagType == MTEmpty)) { |
| tok_interpretMarkup(this, tok, TRUE, mId); |
| } |
| if (((tok->markupTagType == MTEnd) || (tok->markupTagType == MTEmpty))) { |
| tok_clearMarkupParams(tok->markupParams); |
| tok->nrMarkupParams = 0; |
| tok_interpretMarkup(this, tok, FALSE,mId); |
| } |
| } |
| if (tok->markupTagErr != MENone) { |
| if (!tok->aborted) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"syntax error in markup token '%s'",tok->markupStr); |
| } |
| tok_treatMarkupAsSimpleToken(this, tok); |
| } |
| } else { |
| tok_treatMarkupAsSimpleToken(this, tok); |
| } |
| tok->markupState = MSNotInMarkup; |
| tok->markupPos = 0; |
| tok->strPos = 0; |
| } |
| |
| |
| |
| static void tok_treatChar (picodata_ProcessingUnit this, tok_subobj_t * tok, picoos_uchar ch, picoos_bool markupHandling) |
| { |
| picoos_int32 id; |
| picoos_uint8 uval8; |
| pico_tokenType type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; |
| pico_tokenSubType subtype = -1; |
| picoos_bool dummy; |
| |
| if (ch == NULLC) { |
| tok_treatSimpleToken(this, tok); |
| tok_putItem(this, tok, PICODATA_ITEM_CMD, PICODATA_ITEMINFO1_CMD_FLUSH, PICODATA_ITEMINFO2_NA, 0, (picoos_uchar*)""); |
| } |
| else { |
| switch (tok_putToUtf(tok, ch)) { |
| case UTF_CHAR_MALFORMED: |
| tok->utfpos = 0; |
| tok->utflen = 0; |
| break; |
| case UTF_CHAR_INCOMPLETE: |
| break; |
| case UTF_CHAR_COMPLETE: |
| markupHandling = (markupHandling && (tok->markupHandlingMode == MARKUP_HANDLING_ENABLED)); |
| id = picoktab_graphOffset(tok->graphTab, tok->utf); |
| if (id > 0) { |
| if (picoktab_getIntPropTokenType(tok->graphTab, id, &uval8)) { |
| type = (pico_tokenType)uval8; |
| if (type == PICODATA_ITEMINFO1_TOKTYPE_LETTERV) { |
| type = PICODATA_ITEMINFO1_TOKTYPE_LETTER; |
| } |
| } |
| dummy = picoktab_getIntPropTokenSubType(tok->graphTab, id, &subtype); |
| } else if (ch <= (picoos_uchar)' ') { |
| type = PICODATA_ITEMINFO1_TOKTYPE_SPACE; |
| subtype = -1; |
| } else { |
| type = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; |
| subtype = -1; |
| } |
| if ((ch > (picoos_uchar)' ')) { |
| tok->nrEOL = 0; |
| } else if ((ch == EOL)) { |
| tok->nrEOL++; |
| } |
| if (markupHandling && (tok->markupState != MSNotInMarkup)) { |
| tok_putToMarkup(this, tok, tok->utf); |
| if (tok->markupState >= MSError) { |
| tok_treatMarkupAsSimpleToken(this, tok); |
| } else if (tok->markupState == MSGotEnd) { |
| tok_treatMarkup(this, tok); |
| } |
| } else if ((markupHandling && (ch == (picoos_uchar)'<'))) { |
| tok_putToMarkup(this, tok, tok->utf); |
| } else if (type != PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED) { |
| if ((type != tok->tokenType) || (type == PICODATA_ITEMINFO1_TOKTYPE_CHAR) || (subtype != tok->tokenSubType)) { |
| tok_treatSimpleToken(this, tok); |
| } else if ((ch == EOL) && (tok->nrEOL == 2)) { |
| tok_treatSimpleToken(this, tok); |
| tok_putToSimpleToken(this, tok, (picoos_uchar*)".", PICODATA_ITEMINFO1_TOKTYPE_CHAR, -1); |
| tok_treatSimpleToken(this, tok); |
| } |
| tok_putToSimpleToken(this, tok, tok->utf, type, subtype); |
| } else { |
| tok_treatSimpleToken(this, tok); |
| } |
| tok->utfpos = 0; |
| tok->utflen = 0; |
| break; |
| } |
| } |
| } |
| |
| |
| static void tok_treatSimpleToken (picodata_ProcessingUnit this, tok_subobj_t * tok) |
| { |
| if (tok->tokenPos < IN_BUF_SIZE) { |
| tok->tokenStr[tok->tokenPos] = 0; |
| } |
| if (tok->markupState != MSNotInMarkup) { |
| if (!(tok->aborted) && (tok->markupState >= MSGotmarkupTagName) && (tok_markupTagId(tok->markupTagName) != MIDummyEnd)) { |
| picoos_emRaiseWarning(this->common->em, PICO_ERR_INVALID_MARKUP_TAG, (picoos_char*)"", (picoos_char*)"unfinished markup tag '%s'",tok->markupStr); |
| } |
| tok_treatMarkupAsSimpleToken(this, tok); |
| tok_treatSimpleToken(this, tok); |
| } else if ((tok->tokenPos > 0) && ((tok->ignLevel <= 0) || (tok->tokenType == PICODATA_ITEMINFO1_TOKTYPE_SPACE))) { |
| tok_putItem(this, tok, PICODATA_ITEM_TOKEN, tok->tokenType, (picoos_uint8)tok->tokenSubType, 0, tok->tokenStr); |
| } |
| tok->tokenPos = 0; |
| tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; |
| tok->tokenSubType = -1; |
| } |
| |
| /* *****************************************************************************/ |
| |
| static pico_status_t tokReset(register picodata_ProcessingUnit this) |
| { |
| tok_subobj_t * tok; |
| MarkupId mId; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return PICO_ERR_OTHER; |
| } |
| tok = (tok_subobj_t *) this->subObj; |
| |
| tok->ignLevel = 0; |
| |
| tok->utfpos = 0; |
| tok->utflen = 0; |
| |
| tok_clearMarkupParams(tok->markupParams); |
| tok->nrMarkupParams = 0; |
| tok->markupState = MSNotInMarkup; |
| tok->markupPos = 0; |
| for (mId = MIDummyStart; mId <= MIDummyEnd; mId++) { |
| tok->markupLevel[mId] = 0; |
| } |
| tok->markupTagName[0] = 0; |
| tok->markupTagType = MTNone; |
| tok->markupTagErr = MENone; |
| |
| tok->strPos = 0; |
| tok->strDelim = 0; |
| tok->isFileAttr = FALSE; |
| |
| tok->tokenType = PICODATA_ITEMINFO1_TOKTYPE_UNDEFINED; |
| tok->tokenSubType = -1; |
| tok->tokenPos = 0; |
| |
| tok->nrEOL = 0; |
| |
| |
| tok->markupHandlingMode = TRUE; |
| tok->aborted = FALSE; |
| |
| tok->start = TRUE; |
| |
| tok->outReadPos = 0; |
| tok->outWritePos = 0; |
| |
| tok->saveFile[0] = 0; |
| |
| |
| tok->graphTab = picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]); |
| |
| tok->xsampa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA_PARSE]); |
| PICODBG_TRACE(("got xsampa_parser @ %i",tok->xsampa_parser)); |
| |
| tok->svoxpa_parser = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_SVOXPA_PARSE]); |
| PICODBG_TRACE(("got svoxpa_parser @ %i",tok->svoxpa_parser)); |
| |
| tok->xsampa2svoxpa_mapper = picokfst_getFST(this->voice->kbArray[PICOKNOW_KBID_FST_XSAMPA2SVOXPA]); |
| PICODBG_TRACE(("got xsampa2svoxpa_mapper @ %i",tok->xsampa2svoxpa_mapper)); |
| |
| |
| |
| return PICO_OK; |
| } |
| |
| static pico_status_t tokInitialize(register picodata_ProcessingUnit this) |
| { |
| /* |
| |
| tok_subobj_t * tok; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return PICO_ERR_OTHER; |
| } |
| tok = (tok_subobj_t *) this->subObj; |
| */ |
| return tokReset(this); |
| } |
| |
| |
| static pico_status_t tokTerminate(register picodata_ProcessingUnit this) |
| { |
| return PICO_OK; |
| } |
| |
| static picodata_step_result_t tokStep(register picodata_ProcessingUnit this, picoos_int16 mode, picoos_uint16 * numBytesOutput); |
| |
| static pico_status_t tokSubObjDeallocate(register picodata_ProcessingUnit this, |
| picoos_MemoryManager mm) |
| { |
| |
| if (NULL != this) { |
| picoos_deallocate(this->common->mm, (void *) &this->subObj); |
| } |
| mm = mm; /* avoid warning "var not used in this function"*/ |
| return PICO_OK; |
| } |
| |
| picodata_ProcessingUnit picotok_newTokenizeUnit(picoos_MemoryManager mm, picoos_Common common, |
| picodata_CharBuffer cbIn, picodata_CharBuffer cbOut, |
| picorsrc_Voice voice) |
| { |
| tok_subobj_t * tok; |
| picodata_ProcessingUnit this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice); |
| if (this == NULL) { |
| return NULL; |
| } |
| this->initialize = tokInitialize; |
| PICODBG_DEBUG(("set this->step to tokStep")); |
| this->step = tokStep; |
| this->terminate = tokTerminate; |
| this->subDeallocate = tokSubObjDeallocate; |
| this->subObj = picoos_allocate(mm, sizeof(tok_subobj_t)); |
| if (this->subObj == NULL) { |
| picoos_deallocate(mm, (void *)&this); |
| return NULL; |
| } |
| tok = (tok_subobj_t *) this->subObj; |
| tok->transducer = picotrns_newSimpleTransducer(mm, common, 10*(PICOTRNS_MAX_NUM_POSSYM+2)); |
| if (NULL == tok->transducer) { |
| tokSubObjDeallocate(this,mm); |
| picoos_deallocate(mm, (void *)&this); |
| return NULL; |
| } |
| tokInitialize(this); |
| return this; |
| } |
| |
| /** |
| * fill up internal buffer, try to locate token, write token to output |
| */ |
| picodata_step_result_t tokStep(register picodata_ProcessingUnit this, |
| picoos_int16 mode, picoos_uint16 * numBytesOutput) |
| { |
| register tok_subobj_t * tok; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return PICODATA_PU_ERROR; |
| } |
| tok = (tok_subobj_t *) this->subObj; |
| |
| mode = mode; /* avoid warning "var not used in this function"*/ |
| |
| *numBytesOutput = 0; |
| while (1) { /* exit via return */ |
| picoos_int16 ch; |
| |
| if ((tok->outWritePos - tok->outReadPos) > 0) { |
| if (picodata_cbPutItem(this->cbOut, &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos, numBytesOutput) == PICO_OK) { |
| PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], |
| (picoos_uint8 *)"tok:", &tok->outBuf[tok->outReadPos], tok->outWritePos - tok->outReadPos); |
| tok->outReadPos += *numBytesOutput; |
| if (tok->outWritePos == tok->outReadPos) { |
| tok->outWritePos = 0; |
| tok->outReadPos = 0; |
| } |
| } |
| else { |
| return PICODATA_PU_OUT_FULL; |
| } |
| |
| } |
| else if (PICO_EOF != (ch = picodata_cbGetCh(this->cbIn))) { |
| PICODBG_DEBUG(("read in %c", (picoos_char) ch)); |
| tok_treatChar(this, tok, (picoos_uchar) ch, /*markupHandling*/TRUE); |
| } |
| else { |
| return PICODATA_PU_IDLE; |
| } |
| } |
| } |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| /* end */ |