| /* |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @file picosa.c |
| * |
| * sentence analysis - POS disambiguation |
| * |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * All rights reserved. |
| * |
| * History: |
| * - 2009-04-20 -- initial version |
| * |
| */ |
| |
| #include "picoos.h" |
| #include "picodbg.h" |
| #include "picobase.h" |
| #include "picokdt.h" |
| #include "picoklex.h" |
| #include "picoktab.h" |
| #include "picokfst.h" |
| #include "picotrns.h" |
| #include "picodata.h" |
| #include "picosa.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| #if 0 |
| } |
| #endif |
| |
| |
| /* PU saStep states */ |
| #define SA_STEPSTATE_COLLECT 0 |
| #define SA_STEPSTATE_PROCESS_POSD 10 |
| #define SA_STEPSTATE_PROCESS_WPHO 11 |
| #define SA_STEPSTATE_PROCESS_TRNS_PARSE 12 |
| #define SA_STEPSTATE_PROCESS_TRNS_FST 13 |
| #define SA_STEPSTATE_FEED 2 |
| |
| #define SA_MAX_ALTDESC_SIZE (30*(PICOTRNS_MAX_NUM_POSSYM + 2)) |
| |
| #define SA_MSGSTR_SIZE 32 |
| |
| /* subobject : SentAnaUnit |
| * shortcut : sa |
| * context size : one phrase, max. 30 non-PUNC items, for non-processed items |
| * one item if internal input empty |
| */ |
| |
| /** @addtogroup picosa |
| |
| internal buffers: |
| |
| - headx: array for extended item heads of fixed size (head plus |
| index for content, plus two fields for boundary strength/type) |
| |
| - cbuf1, cbuf2: buffers for item contents (referenced by index in |
| headx). Future: replace these two buffers by a single double-sided |
| buffer (double shrink-grow type) |
| |
| 0. bottom up filling of items in headx and cbuf1 |
| |
| 1. POS disambiguation (right-to-left, top-to-bottom): |
| - number and sequence of items unchanged |
| - item content can only get smaller (reducing nr of results in WORDINDEX) |
| -> info stays in "headx, cbuf1" and changed in place \n |
| WORDGRAPH(POSes,NA)graph -> WORDGRAPH(POS,NA)graph \n |
| WORDINDEX(POSes,NA)POS1ind1...POSNindN -> WORDINDEX(POS,NA)POS|ind \n |
| |
| 2. lex-index lookup and G2P (both directions possible, left-to-right done): |
| - number and sequence of items unchanged, item head info and content |
| changes |
| -> headx changed in place; cbuf1 to cbuf2 \n |
| WORDGRAPH(POS,NA)graph -> WORDPHON(POS,NA)phon \n |
| WORDINDEX(POS,NA)POS|ind -> WORDPHON(POS,NA)phon \n |
| |
| 3. phrasing (right-to-left): |
| |
| Previous (before introducing SBEG)\n |
| ---------------------------------- |
| 1| 2| 3| 4| \n |
| e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH \n |
| e.g. to BINIT WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND WP WP WP BSEND BTERM \n |
| |1 |2 |3 |4 \n |
| |
| 3-level bound state: to keep track of bound strength from end of |
| previous punc-phrase, then BOUND item output as first item |
| (strength from prev punc-phrase and type from current |
| punc-phrase). |
| |
| trailing PUNC item bound states |
| INIT SEND PHR1 |
| PUNC(SENTEND, T) B(I,T)>SEND B(S,T)>SEND B(P1,T)>SEND |
| PUNC(SENTEND, Q) B(I,Q)>SEND B(S,Q)>SEND B(P1,Q)>SEND |
| PUNC(SENTEND, E) B(I,E)>SEND B(S,E)>SEND B(P1,E)>SEND |
| PUNC(PHRASEEND, P) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1 |
| PUNC(PHRASEEND, FORC) B(I,P)>PHR1 B(S,P)>PHR1 B(P1,P)>PHR1 |
| PUNC(FLUSH, T) B(I,T).. B(S,T).. B(P1,T).. |
| B(T,NA) B(T,NA) B(T,NA) |
| >INIT >INIT >INIT |
| |
| PHR2/3 case: |
| trailing PUNC item bound states |
| INIT SEND PHR1 |
| PUNC(SENTEND, T) B(I,P)B(P,T)>SEND B(S,P)B(P,T)>SEND B(P1,P)B(P,T)>SEND |
| PUNC(SENTEND, Q) B(I,P)B(P,Q)>SEND B(S,P)B(P,Q)>SEND B(P1,P)B(P,Q)>SEND |
| PUNC(SENTEND, E) B(I,P)B(P,E)>SEND B(S,P)B(P,E)>SEND B(P1,P)B(P,E)>SEND |
| PUNC(PHRASEEND, P) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1 |
| PUNC(PHREND, FORC) B(I,P)B(P,P)>PHR1 B(S,P)B(P,P)>PHR1 B(P1,P)B(P,P)>PHR1 |
| PUNC(FLUSH, T) B(I,P)B(P,T).. B(S,T)B(P,T).. B(P1,T)B(P,T).. |
| B(T,NA) B(T,NA) B(T,NA) |
| >INIT >INIT >INIT |
| |
| Current |
| -------- |
| e.g. from WP WP WP WP WP PUNC WP WP PUNC WP WP WP PUNC FLUSH |
| e.g. to BSBEG WP WP WP BPHR3 WP WP BPHR1 WP WP BSEND BSBEG WP WP WP BSEND BTERM |
| |1 |2 |3 |4 |
| |
| 2-level bound state: The internal buffer contains one primary phrase (sometimes forced, if buffer |
| allmost full), with the trailing PUNCT item included (last item). |
| If the trailing PUNC is a a primary phrase separator, the |
| item is not output, but instead, the bound state is set to PPHR, so that the correct BOUND can |
| be output at the start of the next primary phrase. |
| Otherwise, |
| the item is converted to the corresponding BOUND and output. the bound state is set to SSEP, |
| so that a BOUND of type SBEG is output at the start of the next primary phrase. |
| |
| trailing PUNC item bound states |
| SSEP PPHR |
| PUNC(SENTEND, X) B(B,X)>SSEP B(P1,X)>SSEP (X = T | Q | E) |
| PUNC(FLUSH, T) B(B,T)>SSEP* B(P1,T)>SSEP |
| PUNC(PHRASEEND, P) B(B,P)>PPHR B(P1,P)>PPHR |
| PUNC(PHRASEEND, FORC) B(B,P)>PPHR B(P1,P)>PPHR |
| |
| * If more than one sentence separators follow each other (e.g. SEND-FLUSH, SEND-SEND) then |
| all but the first will be treated as an (empty) phrase containing just this item. |
| If this (single) item is a flush, creation of SBEG is suppressed. |
| |
| |
| - dtphr phrasing tree (rather subphrasing tree it should be called) |
| determines |
| BOUND_PHR2 |
| BOUND_PHR3 |
| - boundary strenghts are determined for every word (except the |
| first one) from right-to-left. The boundary types mark the phrase |
| type of the phrase following the boundary. |
| - number of items actually changed (new BOUND items added): because |
| of fixed size without content, two fields are contained in headx |
| to indicate if a BOUND needs to be added to the LEFT of the item. |
| -> headx further extended with boundary strength and type info to |
| indicate that to the left of the headx ele a BOUND needs to be |
| inserted when outputting. |
| |
| 4. accentuation: |
| - number of items unchanged, content unchanged, only head info changes |
| -> changed in place in headx |
| */ |
| |
| |
| typedef struct { |
| picodata_itemhead_t head; |
| picoos_uint16 cind; |
| } picosa_headx_t; |
| |
| |
| typedef struct sa_subobj { |
| picoos_uint8 procState; /* for next processing step decision */ |
| |
| picoos_uint8 inspaceok; /* flag: headx/cbuf1 has space for an item */ |
| picoos_uint8 needsmoreitems; /* flag: need more items */ |
| picoos_uint8 phonesTransduced; /* flag: */ |
| |
| picoos_uint8 tmpbuf[PICODATA_MAX_ITEMSIZE]; /* tmp. location for an item */ |
| |
| picosa_headx_t headx[PICOSA_MAXNR_HEADX]; |
| picoos_uint16 headxBottom; /* bottom */ |
| picoos_uint16 headxLen; /* length, 0 if empty */ |
| |
| picoos_uint8 cbuf1[PICOSA_MAXSIZE_CBUF]; |
| picoos_uint16 cbuf1BufSize; /* actually allocated size */ |
| picoos_uint16 cbuf1Len; /* length, 0 if empty */ |
| |
| picoos_uint8 cbuf2[PICOSA_MAXSIZE_CBUF]; |
| picoos_uint16 cbuf2BufSize; /* actually allocated size */ |
| picoos_uint16 cbuf2Len; /* length, 0 if empty */ |
| |
| picotrns_possym_t phonBufA[PICOTRNS_MAX_NUM_POSSYM+1]; |
| picotrns_possym_t phonBufB[PICOTRNS_MAX_NUM_POSSYM+1]; |
| picotrns_possym_t * phonBuf; |
| picotrns_possym_t * phonBufOut; |
| picoos_uint16 phonReadPos, phonWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */ |
| picoos_uint16 nextReadPos; /* position of (potential) next item to read from */ |
| |
| |
| /* buffer for internal calculation of transducer */ |
| picotrns_AltDesc altDescBuf; |
| /* the number of AltDesc in the buffer */ |
| picoos_uint16 maxAltDescLen; |
| |
| /* tab knowledge base */ |
| picoktab_Graphs tabgraphs; |
| picoktab_Phones tabphones; |
| picoktab_Pos tabpos; |
| picoktab_FixedIds fixedIds; |
| |
| /* dtposd knowledge base */ |
| picokdt_DtPosD dtposd; |
| |
| /* dtg2p knowledge base */ |
| picokdt_DtG2P dtg2p; |
| |
| /* lex knowledge base */ |
| picoklex_Lex lex; |
| |
| /* ulex knowledge bases */ |
| picoos_uint8 numUlex; |
| picoklex_Lex ulex[PICOKNOW_MAX_NUM_ULEX]; |
| |
| /* fst knowledge bases */ |
| picoos_uint8 numFsts; |
| picokfst_FST fst[PICOKNOW_MAX_NUM_WPHO_FSTS]; |
| picoos_uint8 curFst; /* the fst to be applied next */ |
| |
| |
| } sa_subobj_t; |
| |
| |
| static pico_status_t saInitialize(register picodata_ProcessingUnit this) { |
| sa_subobj_t * sa; |
| picoos_uint16 i; |
| picokfst_FST fst; |
| picoknow_kb_id_t fstKbIds[PICOKNOW_MAX_NUM_WPHO_FSTS] = PICOKNOW_KBID_WPHO_ARRAY; |
| picoklex_Lex ulex; |
| picoknow_kb_id_t ulexKbIds[PICOKNOW_MAX_NUM_ULEX] = PICOKNOW_KBID_ULEX_ARRAY; |
| |
| PICODBG_DEBUG(("calling")); |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(this->common->em, |
| PICO_ERR_NULLPTR_ACCESS, NULL, NULL); |
| } |
| sa = (sa_subobj_t *) this->subObj; |
| |
| /* sa->common = this->common; */ |
| |
| sa->procState = SA_STEPSTATE_COLLECT; |
| |
| sa->inspaceok = TRUE; |
| sa->needsmoreitems = TRUE; |
| |
| sa->headxBottom = 0; |
| sa->headxLen = 0; |
| sa->cbuf1BufSize = PICOSA_MAXSIZE_CBUF; |
| sa->cbuf2BufSize = PICOSA_MAXSIZE_CBUF; |
| sa->cbuf1Len = 0; |
| sa->cbuf2Len = 0; |
| |
| /* init headx, cbuf1, cbuf2 */ |
| for (i = 0; i < PICOSA_MAXNR_HEADX; i++){ |
| sa->headx[i].head.type = 0; |
| sa->headx[i].head.info1 = PICODATA_ITEMINFO1_NA; |
| sa->headx[i].head.info2 = PICODATA_ITEMINFO2_NA; |
| sa->headx[i].head.len = 0; |
| sa->headx[i].cind = 0; |
| } |
| for (i = 0; i < PICOSA_MAXSIZE_CBUF; i++) { |
| sa->cbuf1[i] = 0; |
| sa->cbuf2[i] = 0; |
| } |
| |
| |
| /* possym buffer */ |
| sa->phonesTransduced = FALSE; |
| sa->phonBuf = sa->phonBufA; |
| sa->phonBufOut = sa->phonBufB; |
| sa->phonReadPos = 0; |
| sa->phonWritePos = 0; |
| sa->nextReadPos = 0; |
| |
| /* kb fst[] */ |
| sa->numFsts = 0; |
| for (i = 0; i<PICOKNOW_MAX_NUM_WPHO_FSTS; i++) { |
| fst = picokfst_getFST(this->voice->kbArray[fstKbIds[i]]); |
| if (NULL != fst) { |
| sa->fst[sa->numFsts++] = fst; |
| } |
| } |
| sa->curFst = 0; |
| PICODBG_DEBUG(("got %i fsts", sa->numFsts)); |
| /* kb fixedIds */ |
| sa->fixedIds = picoktab_getFixedIds(this->voice->kbArray[PICOKNOW_KBID_FIXED_IDS]); |
| |
| /* kb tabgraphs */ |
| sa->tabgraphs = |
| picoktab_getGraphs(this->voice->kbArray[PICOKNOW_KBID_TAB_GRAPHS]); |
| if (sa->tabgraphs == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got tabgraphs")); |
| |
| /* kb tabphones */ |
| sa->tabphones = |
| picoktab_getPhones(this->voice->kbArray[PICOKNOW_KBID_TAB_PHONES]); |
| if (sa->tabphones == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got tabphones")); |
| |
| #ifdef PICO_DEBU |
| { |
| picoos_uint16 itmp; |
| for (itmp = 0; itmp < 256; itmp++) { |
| if (picoktab_hasVowelProp(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones hasVowel: %d", itmp)); |
| } |
| if (picoktab_hasDiphthProp(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones hasDiphth: %d", itmp)); |
| } |
| if (picoktab_hasGlottProp(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones hasGlott: %d", itmp)); |
| } |
| if (picoktab_hasNonsyllvowelProp(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones hasNonsyllvowel: %d", itmp)); |
| } |
| if (picoktab_hasSyllconsProp(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones hasSyllcons: %d", itmp)); |
| } |
| if (picoktab_isPrimstress(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones isPrimstress: %d", itmp)); |
| } |
| if (picoktab_isSecstress(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones isSecstress: %d", itmp)); |
| } |
| if (picoktab_isSyllbound(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones isSyllbound: %d", itmp)); |
| } |
| if (picoktab_isPause(sa->tabphones, itmp)) { |
| PICODBG_DEBUG(("tabphones isPause: %d", itmp)); |
| } |
| } |
| |
| PICODBG_DEBUG(("tabphones primstressID: %d", |
| picoktab_getPrimstressID(sa->tabphones))); |
| PICODBG_DEBUG(("tabphones secstressID: %d", |
| picoktab_getSecstressID(sa->tabphones))); |
| PICODBG_DEBUG(("tabphones syllboundID: %d", |
| picoktab_getSyllboundID(sa->tabphones))); |
| PICODBG_DEBUG(("tabphones pauseID: %d", |
| picoktab_getPauseID(sa->tabphones))); |
| } |
| #endif |
| |
| /* kb tabpos */ |
| sa->tabpos = |
| picoktab_getPos(this->voice->kbArray[PICOKNOW_KBID_TAB_POS]); |
| if (sa->tabpos == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got tabpos")); |
| |
| /* kb dtposd */ |
| sa->dtposd = picokdt_getDtPosD(this->voice->kbArray[PICOKNOW_KBID_DT_POSD]); |
| if (sa->dtposd == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got dtposd")); |
| |
| /* kb dtg2p */ |
| sa->dtg2p = picokdt_getDtG2P(this->voice->kbArray[PICOKNOW_KBID_DT_G2P]); |
| if (sa->dtg2p == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got dtg2p")); |
| |
| /* kb lex */ |
| sa->lex = picoklex_getLex(this->voice->kbArray[PICOKNOW_KBID_LEX_MAIN]); |
| if (sa->lex == NULL) { |
| return picoos_emRaiseException(this->common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| PICODBG_DEBUG(("got lex")); |
| |
| /* kb ulex[] */ |
| sa->numUlex = 0; |
| for (i = 0; i<PICOKNOW_MAX_NUM_ULEX; i++) { |
| ulex = picoklex_getLex(this->voice->kbArray[ulexKbIds[i]]); |
| if (NULL != ulex) { |
| sa->ulex[sa->numUlex++] = ulex; |
| } |
| } |
| PICODBG_DEBUG(("got %i user lexica", sa->numUlex)); |
| |
| return PICO_OK; |
| } |
| |
| static picodata_step_result_t saStep(register picodata_ProcessingUnit this, |
| picoos_int16 mode, |
| picoos_uint16 *numBytesOutput); |
| |
| static pico_status_t saTerminate(register picodata_ProcessingUnit this) { |
| return PICO_OK; |
| } |
| |
| static pico_status_t saSubObjDeallocate(register picodata_ProcessingUnit this, |
| picoos_MemoryManager mm) { |
| sa_subobj_t * sa; |
| if (NULL != this) { |
| sa = (sa_subobj_t *) this->subObj; |
| picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf); |
| picoos_deallocate(mm, (void *) &this->subObj); |
| } |
| return PICO_OK; |
| } |
| |
| |
| picodata_ProcessingUnit picosa_newSentAnaUnit(picoos_MemoryManager mm, |
| picoos_Common common, |
| picodata_CharBuffer cbIn, |
| picodata_CharBuffer cbOut, |
| picorsrc_Voice voice) { |
| picodata_ProcessingUnit this; |
| sa_subobj_t * sa; |
| this = picodata_newProcessingUnit(mm, common, cbIn, cbOut, voice); |
| if (this == NULL) { |
| return NULL; |
| } |
| |
| this->initialize = saInitialize; |
| PICODBG_DEBUG(("set this->step to saStep")); |
| this->step = saStep; |
| this->terminate = saTerminate; |
| this->subDeallocate = saSubObjDeallocate; |
| |
| this->subObj = picoos_allocate(mm, sizeof(sa_subobj_t)); |
| if (this->subObj == NULL) { |
| picoos_deallocate(mm, (void *)&this); |
| picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); |
| return NULL; |
| } |
| |
| sa = (sa_subobj_t *) this->subObj; |
| |
| sa->altDescBuf = picotrns_allocate_alt_desc_buf(mm, SA_MAX_ALTDESC_SIZE, &sa->maxAltDescLen); |
| if (NULL == sa->altDescBuf) { |
| picotrns_deallocate_alt_desc_buf(mm,&sa->altDescBuf); |
| picoos_deallocate(mm, (void *)&sa); |
| picoos_deallocate(mm, (void *)&this); |
| picoos_emRaiseException(common->em,PICO_EXC_OUT_OF_MEM, NULL, NULL); |
| } |
| |
| |
| saInitialize(this); |
| return this; |
| } |
| |
| |
| /* ***********************************************************************/ |
| /* PROCESS_POSD disambiguation functions */ |
| /* ***********************************************************************/ |
| |
| /* find next POS to the right of 'ind' and return its POS and index */ |
| static picoos_uint8 saPosDItemSeqGetPosRight(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| const picoos_uint16 ind, |
| const picoos_uint16 top, |
| picoos_uint16 *rightind) { |
| picoos_uint8 val; |
| picoos_int32 i; |
| |
| val = PICOKDT_EPSILON; |
| for (i = ind + 1; ((val == PICOKDT_EPSILON) && (i < top)); i++) { |
| if ((sa->headx[i].head.type == PICODATA_ITEM_WORDGRAPH) || |
| (sa->headx[i].head.type == PICODATA_ITEM_WORDINDEX) || |
| (sa->headx[i].head.type == PICODATA_ITEM_WORDPHON) ) { |
| val = sa->headx[i].head.info1; |
| } |
| } |
| *rightind = i - 1; |
| return val; |
| } |
| |
| |
| /* left-to-right, for each WORDGRAPH/WORDINDEX/WORDPHON do posd */ |
| static pico_status_t saDisambPos(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa) { |
| picokdt_classify_result_t dtres; |
| picoos_uint8 half_nratt_posd = PICOKDT_NRATT_POSD >> 1; |
| picoos_uint16 valbuf[PICOKDT_NRATT_POSD]; /* only [0..half_nratt_posd] can be >2^8 */ |
| picoos_uint16 prevout; /* direct dt output (hist.) or POS of prev word */ |
| picoos_uint16 lastprev3; /* last index of POS(es) found to the left */ |
| picoos_uint16 curPOS; /* POS(es) of current word */ |
| picoos_int32 first; /* index of first item with POS(es) */ |
| picoos_int32 ci; |
| picoos_uint8 okay; /* two uses: processing okay and lexind resovled */ |
| picoos_uint8 i; |
| picoos_uint16 inval; |
| picoos_uint16 fallback; |
| |
| /* set initial values */ |
| okay = TRUE; |
| prevout = PICOKDT_HISTORY_ZERO; |
| curPOS = PICODATA_ITEMINFO1_ERR; |
| first = 0; |
| |
| while ((first < sa->headxLen) && |
| (sa->headx[first].head.type != PICODATA_ITEM_WORDGRAPH) && |
| (sa->headx[first].head.type != PICODATA_ITEM_WORDINDEX) && |
| (sa->headx[first].head.type != PICODATA_ITEM_WORDPHON)) { |
| first++; |
| } |
| if (first >= sa->headxLen) { |
| /* phrase not containing an item with POSes info, e.g. single flush */ |
| PICODBG_DEBUG(("no item with POSes found")); |
| return PICO_OK; |
| } |
| |
| lastprev3 = first; |
| |
| for (i = 0; i <= half_nratt_posd; i++) { |
| valbuf[i] = PICOKDT_HISTORY_ZERO; |
| } |
| /* set POS(es) of current word, will be shifted afterwards */ |
| valbuf[half_nratt_posd+1] = sa->headx[first].head.info1; |
| for (i = half_nratt_posd+2; i < PICOKDT_NRATT_POSD; i++) { |
| /* find next POS to the right and set valbuf[i] */ |
| valbuf[i] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3); |
| } |
| |
| PICODBG_TRACE(("headxLen: %d", sa->headxLen)); |
| |
| /* process from left to right all items in headx */ |
| for (ci = first; ci < sa->headxLen; ci++) { |
| okay = TRUE; |
| |
| PICODBG_TRACE(("iter: %d, type: %c", ci, sa->headx[ci].head.type)); |
| |
| /* if not (WORDGRAPH or WORDINDEX) */ |
| if ((sa->headx[ci].head.type != PICODATA_ITEM_WORDGRAPH) && |
| (sa->headx[ci].head.type != PICODATA_ITEM_WORDINDEX) && |
| (sa->headx[ci].head.type != PICODATA_ITEM_WORDPHON)) { |
| continue; |
| } |
| |
| PICODBG_TRACE(("iter: %d, curPOS: %d", ci, sa->headx[ci].head.info1)); |
| |
| /* no continue so far => at [ci] we have a WORDGRAPH / WORDINDEX item */ |
| /* shift all elements one position to the left */ |
| /* shift predicted values (history) */ |
| for (i=1; i<half_nratt_posd; i++) { |
| valbuf[i-1] = valbuf[i]; |
| } |
| /* insert previously predicted value (now history) */ |
| valbuf[half_nratt_posd-1] = prevout; |
| /* shift not yet predicted values */ |
| for (i=half_nratt_posd+1; i<PICOKDT_NRATT_POSD; i++) { |
| valbuf[i-1] = valbuf[i]; |
| } |
| /* find next POS to the right and set valbuf[PICOKDT_NRATT_POSD-1] */ |
| valbuf[PICOKDT_NRATT_POSD-1] = saPosDItemSeqGetPosRight(this, sa, lastprev3, sa->headxLen, &lastprev3); |
| |
| /* just to be on the safe side; the following should never happen */ |
| if (sa->headx[ci].head.info1 != valbuf[half_nratt_posd]) { |
| PICODBG_WARN(("syncing POS")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, |
| NULL, NULL); |
| valbuf[half_nratt_posd] = sa->headx[ci].head.info1; |
| } |
| |
| curPOS = valbuf[half_nratt_posd]; |
| |
| /* Check if POS disambiguation not needed */ |
| if (picoktab_isUniquePos(sa->tabpos, (picoos_uint8) curPOS)) { |
| /* not needed */ |
| inval = 0; |
| fallback = 0; |
| if (!picokdt_dtPosDreverseMapOutFixed(sa->dtposd, curPOS, |
| &prevout, &fallback)) { |
| if (fallback) { |
| prevout = fallback; |
| |
| } else { |
| PICODBG_ERROR(("problem doing reverse output mapping")); |
| prevout = curPOS; |
| } |
| } |
| PICODBG_DEBUG(("keeping: %d", sa->headx[ci].head.info1)); |
| continue; |
| } |
| |
| /* assuming PICOKDT_NRATT_POSD == 7 */ |
| PICODBG_DEBUG(("%d: [%d %d %d %d %d %d %d]", |
| ci, valbuf[0], valbuf[1], valbuf[2], |
| valbuf[3], valbuf[4], valbuf[5], valbuf[6])); |
| |
| /* no continue so far => POS disambiguation needed */ |
| /* construct input vector, which is set in dtposd */ |
| if (!picokdt_dtPosDconstructInVec(sa->dtposd, valbuf)) { |
| /* error constructing invec */ |
| PICODBG_WARN(("problem with invec")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| /* classify */ |
| if (okay && (!picokdt_dtPosDclassify(sa->dtposd, &prevout))) { |
| /* error doing classification */ |
| PICODBG_WARN(("problem classifying")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| /* decompose */ |
| if (okay && (!picokdt_dtPosDdecomposeOutClass(sa->dtposd, &dtres))) { |
| /* error decomposing */ |
| PICODBG_WARN(("problem decomposing")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| if (okay && dtres.set) { |
| PICODBG_DEBUG(("in: %d, out: %d", valbuf[3], dtres.class)); |
| } else { |
| PICODBG_WARN(("problem disambiguating POS")); |
| dtres.class = PICODATA_ITEMINFO1_ERR; |
| } |
| |
| if (dtres.class > 255) { |
| PICODBG_WARN(("dt result outside valid range, setting pos to ERR")); |
| dtres.class = PICODATA_ITEMINFO1_ERR; |
| } |
| |
| sa->headx[ci].head.info1 = (picoos_uint8)dtres.class; |
| if (sa->headx[ci].head.type == PICODATA_ITEM_WORDINDEX) { |
| /* find pos/ind entry in cbuf matching unique, |
| disambiguated POS, adapt current headx cind/len |
| accordingly */ |
| PICODBG_DEBUG(("select phon based on POS disambiguation")); |
| okay = FALSE; |
| for (i = 0; i < sa->headx[ci].head.len; i += PICOKLEX_POSIND_SIZE) { |
| PICODBG_DEBUG(("comparing POS at cind + %d", i)); |
| if (picoktab_isPartOfPosGroup(sa->tabpos, |
| (picoos_uint8)dtres.class, |
| sa->cbuf1[sa->headx[ci].cind + i])) { |
| PICODBG_DEBUG(("found match for entry %d", |
| i/PICOKLEX_POSIND_SIZE + 1)); |
| sa->headx[ci].cind += i; |
| okay = TRUE; |
| break; |
| } |
| } |
| /* not finding a match is possible if posd predicts a POS that |
| is not part of any of the input POSes -> no warning */ |
| #if defined(PICO_DEBUG) |
| if (!okay) { |
| PICODBG_DEBUG(("no match found, selecting 1st entry")); |
| } |
| #endif |
| sa->headx[ci].head.len = PICOKLEX_POSIND_SIZE; |
| } |
| } |
| return PICO_OK; |
| } |
| |
| |
| /* ***********************************************************************/ |
| /* PROCESS_WPHO functions, copy, lexindex, and g2p */ |
| /* ***********************************************************************/ |
| |
| /* ************** copy ***************/ |
| |
| static pico_status_t saCopyItemContent1to2(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| picoos_uint16 ind) { |
| picoos_uint16 i; |
| picoos_uint16 cind1; |
| |
| /* set headx.cind, and copy content, head unchanged */ |
| cind1 = sa->headx[ind].cind; |
| sa->headx[ind].cind = sa->cbuf2Len; |
| |
| /* check cbufLen */ |
| if (sa->headx[ind].head.len > (sa->cbuf2BufSize - sa->cbuf2Len)) { |
| sa->headx[ind].head.len = sa->cbuf2BufSize - sa->cbuf2Len; |
| PICODBG_WARN(("phones skipped")); |
| picoos_emRaiseWarning(this->common->em, |
| PICO_WARN_INCOMPLETE, NULL, NULL); |
| if (sa->headx[ind].head.len == 0) { |
| sa->headx[ind].cind = 0; |
| } |
| } |
| |
| for (i = 0; i < sa->headx[ind].head.len; i++) { |
| sa->cbuf2[sa->cbuf2Len] = sa->cbuf1[cind1 + i]; |
| sa->cbuf2Len++; |
| } |
| |
| PICODBG_DEBUG(("%c item, len: %d", |
| sa->headx[ind].head.type, sa->headx[ind].head.len)); |
| |
| return PICO_OK; |
| } |
| |
| |
| /* ************** lexindex ***************/ |
| |
| static pico_status_t saLexIndLookup(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| picoklex_Lex lex, |
| picoos_uint16 ind) { |
| picoos_uint8 pos; |
| picoos_uint8 *phones; |
| picoos_uint8 plen; |
| picoos_uint16 i; |
| |
| if (picoklex_lexIndLookup(lex, &(sa->cbuf1[sa->headx[ind].cind + 1]), |
| PICOKLEX_IND_SIZE, &pos, &phones, &plen)) { |
| sa->headx[ind].cind = sa->cbuf2Len; |
| |
| /* check cbufLen */ |
| if (plen > (sa->cbuf2BufSize - sa->cbuf2Len)) { |
| plen = sa->cbuf2BufSize - sa->cbuf2Len; |
| PICODBG_WARN(("phones skipped")); |
| picoos_emRaiseWarning(this->common->em, |
| PICO_WARN_INCOMPLETE, NULL, NULL); |
| if (plen == 0) { |
| sa->headx[ind].cind = 0; |
| } |
| } |
| |
| /* set item head, info1, info2 unchanged */ |
| sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON; |
| sa->headx[ind].head.len = plen; |
| |
| for (i = 0; i < plen; i++) { |
| sa->cbuf2[sa->cbuf2Len] = phones[i]; |
| sa->cbuf2Len++; |
| } |
| |
| PICODBG_DEBUG(("%c item, pos: %d, plen: %d", |
| PICODATA_ITEM_WORDPHON, pos, plen)); |
| |
| } else { |
| PICODBG_WARN(("lexIndLookup problem")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM, |
| NULL, NULL); |
| } |
| return PICO_OK; |
| } |
| |
| |
| |
| /* ************** g2p ***************/ |
| |
| |
| /* Name : saGetNvowel |
| Function: returns vowel info in a word or word seq |
| Input : sInChar the grapheme string to be converted in phoneme |
| inLen number of bytes in grapheme buffer |
| inPos start position of current grapheme (0..inLen-1) |
| Output : nVow number of vowels in the word |
| nVord vowel order in the word |
| Returns : TRUE: processing successful; FALSE: errors |
| */ |
| static picoos_uint8 saGetNrVowel(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| const picoos_uint8 *sInChar, |
| const picoos_uint16 inLen, |
| const picoos_uint8 inPos, |
| picoos_uint8 *nVow, |
| picoos_uint8 *nVord) { |
| picoos_uint32 nCount; |
| picoos_uint32 pos; |
| picoos_uint8 cstr[PICOBASE_UTF8_MAXLEN + 1]; |
| |
| /*defaults*/ |
| *nVow = 0; |
| *nVord = 0; |
| /*1:check wether the current char is a vowel*/ |
| pos = inPos; |
| if (!picobase_get_next_utf8char(sInChar, inLen, &pos, cstr) || |
| !picoktab_hasVowellikeProp(sa->tabgraphs, cstr, PICOBASE_UTF8_MAXLEN)) { |
| return FALSE; |
| } |
| /*2:count number of vowels in current word and find vowel order*/ |
| for (nCount = 0; nCount < inLen; ) { |
| if (!picobase_get_next_utf8char(sInChar, inLen, &nCount, cstr)) { |
| return FALSE; |
| } |
| if (picoktab_hasVowellikeProp(sa->tabgraphs, cstr, |
| PICOBASE_UTF8_MAXLEN)) { |
| (*nVow)++; |
| if (nCount == pos) { |
| (*nVord) = (*nVow); |
| } |
| } |
| } |
| return TRUE; |
| } |
| |
| |
| /* do g2p for a full word, right-to-left */ |
| static picoos_uint8 saDoG2P(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| const picoos_uint8 *graph, |
| const picoos_uint8 graphlen, |
| const picoos_uint8 pos, |
| picoos_uint8 *phones, |
| const picoos_uint16 phonesmaxlen, |
| picoos_uint16 *plen) { |
| picoos_uint16 outNp1Ch; /*last 3 outputs produced*/ |
| picoos_uint16 outNp2Ch; |
| picoos_uint16 outNp3Ch; |
| picoos_uint8 nPrimary; |
| picoos_uint8 nCount; |
| picoos_uint32 utfpos; |
| picoos_uint16 nOutVal; |
| picoos_uint8 okay; |
| picoos_uint16 phonesind; |
| picoos_uint8 nrvow; |
| picoos_uint8 ordvow; |
| picokdt_classify_vecresult_t dtresv; |
| picoos_uint16 i; |
| |
| *plen = 0; |
| okay = TRUE; |
| |
| /* use sa->tmpbuf[PICOSA_MAXITEMSIZE] to temporarly store the |
| phones which are predicted in reverse order. Once all are |
| available put them in phones in usuable order. phonesind is |
| used to fille item in reverse order starting at the end of |
| tmpbuf. */ |
| phonesind = PICOSA_MAXITEMSIZE - 1; |
| |
| /* prepare the data for loop operations */ |
| outNp1Ch = PICOKDT_HISTORY_ZERO; |
| outNp2Ch = PICOKDT_HISTORY_ZERO; |
| outNp3Ch = PICOKDT_HISTORY_ZERO; |
| |
| /* inner loop */ |
| nPrimary = 0; |
| |
| /* ************************************************/ |
| /* go backward grapheme by grapheme, it's utf8... */ |
| /* ************************************************/ |
| |
| /* set start nCount to position of start of last utfchar */ |
| /* ! watch out! somethimes starting at 1, sometimes at 0, |
| ! sometimes counting per byte, sometimes per UTF8 char */ |
| /* nCount is (start position + 1) of utf8 char */ |
| utfpos = graphlen; |
| if (picobase_get_prev_utf8charpos(graph, 0, &utfpos)) { |
| nCount = utfpos + 1; |
| } else { |
| /* should not occurr */ |
| PICODBG_ERROR(("invalid utf8 string, graphlen: %d", graphlen)); |
| return FALSE; |
| } |
| |
| while (nCount > 0) { |
| PICODBG_TRACE(("right-to-left g2p, count: %d", nCount)); |
| okay = TRUE; |
| |
| if (!saGetNrVowel(this, sa, graph, graphlen, nCount-1, &nrvow, |
| &ordvow)) { |
| nrvow = 0; |
| ordvow = 0; |
| } |
| |
| /* prepare input vector, set inside tree object invec, |
| * g2pBuildVector will call the constructInVec tree method */ |
| if (!picokdt_dtG2PconstructInVec(sa->dtg2p, |
| graph, /*grapheme start*/ |
| graphlen, /*grapheme length*/ |
| nCount-1, /*grapheme current position*/ |
| pos, /*Word POS*/ |
| nrvow, /*nr vowels if vowel, 0 else */ |
| ordvow, /*ord of vowel if vowel, 0 el*/ |
| &nPrimary, /*primary stress flag*/ |
| outNp1Ch, /*Right phoneme context +1*/ |
| outNp2Ch, /*Right phoneme context +2*/ |
| outNp3Ch)) { /*Right phon context +3*/ |
| /*Errors in preparing the input vector : skip processing*/ |
| PICODBG_WARN(("problem with invec")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_INVECTOR, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| |
| /* classify using the invec in the tree object and save the direct |
| tree output also in the tree object */ |
| if (okay && (!picokdt_dtG2Pclassify(sa->dtg2p, &nOutVal))) { |
| /* error doing classification */ |
| PICODBG_WARN(("problem classifying")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_CLASSIFICATION, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| |
| /* decompose the invec in the tree object and return result in dtresv */ |
| if (okay && (!picokdt_dtG2PdecomposeOutClass(sa->dtg2p, &dtresv))) { |
| /* error decomposing */ |
| PICODBG_WARN(("problem decomposing")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_OUTVECTOR, |
| NULL, NULL); |
| okay = FALSE; |
| } |
| |
| if (okay) { |
| if ((dtresv.nr == 0) || (dtresv.classvec[0] == PICOKDT_EPSILON)) { |
| /* no phones to be added */ |
| PICODBG_TRACE(("epsilon, no phone added %c", graph[nCount-1])); |
| ; |
| } else { |
| /* add decomposed output to tmpbuf, reverse order */ |
| for (i = dtresv.nr; ((((PICOSA_MAXITEMSIZE - 1) - |
| phonesind)<phonesmaxlen) && |
| (i > 0)); ) { |
| i--; |
| PICODBG_TRACE(("%c %d",graph[nCount-1],dtresv.classvec[i])); |
| if (dtresv.classvec[i] > 255) { |
| PICODBG_WARN(("dt result outside valid range, " |
| "skipping phone")); |
| continue; |
| } |
| sa->tmpbuf[phonesind--] = (picoos_uint8)dtresv.classvec[i]; |
| if (!nPrimary) { |
| if (picoktab_isPrimstress(sa->tabphones, |
| (picoos_uint8)dtresv.classvec[i])) { |
| nPrimary = 1; |
| } |
| } |
| (*plen)++; |
| } |
| if (i > 0) { |
| PICODBG_WARN(("phones skipped")); |
| picoos_emRaiseWarning(this->common->em, |
| PICO_WARN_INCOMPLETE, NULL, NULL); |
| } |
| } |
| } |
| |
| /*shift tree output history and update*/ |
| outNp3Ch = outNp2Ch; |
| outNp2Ch = outNp1Ch; |
| outNp1Ch = nOutVal; |
| |
| /* go backward one utf8 char */ |
| /* nCount is in +1 domain */ |
| if (nCount <= 1) { |
| /* end of str */ |
| nCount = 0; |
| } else { |
| utfpos = nCount - 1; |
| if (!picobase_get_prev_utf8charpos(graph, 0, &utfpos)) { |
| /* should not occur */ |
| PICODBG_ERROR(("invalid utf8 string, utfpos: %d", utfpos)); |
| return FALSE; |
| } else { |
| nCount = utfpos + 1; |
| } |
| } |
| } |
| |
| /* a must be: (PICOSA_MAXITEMSIZE-1) - phonesind == *plen */ |
| /* now that we have all phone IDs, copy in correct order to phones */ |
| /* phonesind point to next free slot in the reverse domainn, |
| ie. inc first */ |
| phonesind++; |
| for (i = 0; i < *plen; i++, phonesind++) { |
| phones[i] = sa->tmpbuf[phonesind]; |
| } |
| return TRUE; |
| } |
| |
| |
| /* item in headx[ind]/cbuf1, out: modified headx and cbuf2 */ |
| |
| static pico_status_t saGraphemeToPhoneme(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, |
| picoos_uint16 ind) { |
| picoos_uint16 plen; |
| |
| PICODBG_TRACE(("starting g2p")); |
| |
| if (saDoG2P(this, sa, &(sa->cbuf1[sa->headx[ind].cind]), |
| sa->headx[ind].head.len, sa->headx[ind].head.info1, |
| &(sa->cbuf2[sa->cbuf2Len]), (sa->cbuf2BufSize - sa->cbuf2Len), |
| &plen)) { |
| |
| /* check of cbuf2Len done in saDoG2P, phones skipped if needed */ |
| if (plen > 255) { |
| PICODBG_WARN(("maximum number of phones exceeded (%d), skipping", |
| plen)); |
| plen = 255; |
| } |
| |
| /* set item head, info1, info2 unchanged */ |
| sa->headx[ind].head.type = PICODATA_ITEM_WORDPHON; |
| sa->headx[ind].head.len = (picoos_uint8)plen; |
| sa->headx[ind].cind = sa->cbuf2Len; |
| sa->cbuf2Len += plen; |
| PICODBG_DEBUG(("%c item, plen: %d", |
| PICODATA_ITEM_WORDPHON, plen)); |
| } else { |
| PICODBG_WARN(("problem doing g2p")); |
| picoos_emRaiseWarning(this->common->em, PICO_WARN_PU_IRREG_ITEM, |
| NULL, NULL); |
| } |
| return PICO_OK; |
| } |
| |
| |
| /* ***********************************************************************/ |
| /* extract phonemes of an item into a phonBuf */ |
| /* ***********************************************************************/ |
| |
| static pico_status_t saAddPhoneme(register sa_subobj_t *sa, picoos_uint16 pos, picoos_uint16 sym) { |
| /* picoos_uint8 plane, unshifted; */ |
| |
| /* just for debuging */ |
| /* |
| unshifted = picotrns_unplane(sym,&plane); |
| PICODBG_DEBUG(("adding %i/%i (%c on plane %i) at phonBuf[%i]",pos,sym,unshifted,plane,sa->phonWritePos)); |
| */ |
| if (PICOTRNS_MAX_NUM_POSSYM <= sa->phonWritePos) { |
| /* not an error! */ |
| PICODBG_DEBUG(("couldn't add because phon buffer full")); |
| return PICO_EXC_BUF_OVERFLOW; |
| } else { |
| sa->phonBuf[sa->phonWritePos].pos = pos; |
| sa->phonBuf[sa->phonWritePos].sym = sym; |
| sa->phonWritePos++; |
| return PICO_OK; |
| } |
| } |
| |
| /* |
| static pico_status_t saAddStartPhoneme(register sa_subobj_t *sa) { |
| return saAddPhoneme(sa, PICOTRNS_POS_IGNORE, |
| (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId); |
| } |
| |
| |
| static pico_status_t saAddTermPhoneme(register sa_subobj_t *sa) { |
| return saAddPhoneme(sa, PICOTRNS_POS_IGNORE, |
| (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId); |
| } |
| |
| */ |
| |
| static pico_status_t saExtractPhonemes(register picodata_ProcessingUnit this, |
| register sa_subobj_t *sa, picoos_uint16 pos, |
| picodata_itemhead_t* head, const picoos_uint8* content) |
| { |
| pico_status_t rv= PICO_OK; |
| picoos_uint8 i; |
| picoos_int16 fstSymbol; |
| #if defined(PICO_DEBUG) |
| picoos_char msgstr[SA_MSGSTR_SIZE]; |
| #endif |
| |
| PICODBG_TRACE(("doing item %s", |
| picodata_head_to_string(head,msgstr,SA_MSGSTR_SIZE))); |
| /* |
| Items considered in a transduction are WORDPHON item. its starting offset within the inBuf is given as |
| 'pos'. |
| Elements that go into the transduction receive "their" position in the buffer. |
| */ |
| sa->phonWritePos = 0; |
| /* WORDPHON(POS,WACC)phon */ |
| rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE, |
| (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonStartId); |
| for (i = 0; i < head->len; i++) { |
| fstSymbol = /* (PICOKFST_PLANE_PHONEMES << 8) + */content[i]; |
| /* */ |
| PICODBG_TRACE(("adding phoneme %c",fstSymbol)); |
| rv = saAddPhoneme(sa, pos+PICODATA_ITEM_HEADSIZE+i, fstSymbol); |
| } |
| rv = saAddPhoneme(sa, PICOTRNS_POS_IGNORE, |
| (PICOKFST_PLANE_INTERN << 8) + sa->fixedIds->phonTermId); |
| sa->nextReadPos = pos + PICODATA_ITEM_HEADSIZE + head->len; |
| return rv; |
| } |
| |
| |
| #define SA_POSSYM_OK 0 |
| #define SA_POSSYM_OUT_OF_RANGE 1 |
| #define SA_POSSYM_END 2 |
| #define SA_POSSYM_INVALID -3 |
| /* *readPos is the next position in phonBuf to be read, and *writePos is the first position not to be read (may be outside |
| * buf). |
| * 'rangeEnd' is the first possym position outside the desired range. |
| * Possible return values: |
| * SA_POSSYM_OK : 'pos' and 'sym' are set to the read possym, *readPos is advanced |
| * SA_POSSYM_OUT_OF_RANGE : pos is out of range. 'pos' is set to that of the read possym, 'sym' is undefined |
| * SA_POSSYM_UNDERFLOW : no more data in buf. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined |
| * SA_POSSYM_INVALID : "strange" pos. 'pos' is set to PICOTRNS_POS_INVALID, 'sym' is undefined |
| */ |
| static pico_status_t getNextPosSym(sa_subobj_t * sa, picoos_int16 * pos, picoos_int16 * sym, |
| picoos_int16 rangeEnd) { |
| /* skip POS_IGNORE */ |
| while ((sa->phonReadPos < sa->phonWritePos) && (PICOTRNS_POS_IGNORE == sa->phonBuf[sa->phonReadPos].pos)) { |
| PICODBG_DEBUG(("ignoring phone at sa->phonBuf[%i] because it has pos==IGNORE",sa->phonReadPos)); |
| sa->phonReadPos++; |
| } |
| if ((sa->phonReadPos < sa->phonWritePos)) { |
| *pos = sa->phonBuf[sa->phonReadPos].pos; |
| if ((PICOTRNS_POS_INSERT == *pos) || ((0 <= *pos) && (*pos < rangeEnd))) { |
| *sym = sa->phonBuf[sa->phonReadPos++].sym; |
| return SA_POSSYM_OK; |
| } else if (*pos < 0){ /* *pos is "strange" (e.g. POS_INVALID) */ |
| return SA_POSSYM_INVALID; |
| } else { |
| return SA_POSSYM_OUT_OF_RANGE; |
| } |
| } else { |
| /* no more possyms to read */ |
| *pos = PICOTRNS_POS_INVALID; |
| return SA_POSSYM_END; |
| } |
| } |
| |
| |
| |
| |
| /* ***********************************************************************/ |
| /* saStep function */ |
| /* ***********************************************************************/ |
| |
| /* |
| complete phrase processed in one step, if not fast enough -> rework |
| |
| init, collect into internal buffer, process, and then feed to |
| output buffer |
| |
| init state: INIT ext ext |
| state trans: in hc1 hc2 out |
| |
| INIT | putItem = 0 0 +1 | BUSY -> COLL (put B-SBEG item, |
| set do-init to false) |
| |
| inspace-ok-hc1 |
| needs-more-items-(phrase-or-flush) |
| COLL1 |getItems -n +n 0 1 | ATOMIC -> PPOSD (got items, |
| if flush set do-init) |
| COLL2 |getItems -n +n 1 0 | ATOMIC -> PPOSD (got items, forced) |
| COLL3 |getItems -n +n 1 1 | IDLE (got items, need more) |
| COLL4 |getItems = = 1 1 | IDLE (got no items) |
| |
| PPOSD | posd = ~n~n | BUSY -> PWP (posd done) |
| PWP | lex/g2p = ~n-n 0+n | BUSY -> PPHR (lex/g2p done) |
| PPHR | phr = -n 0 +m=n | BUSY -> PACC (phr done, m>=n) |
| PACC | acc = 0 0 ~m=n | BUSY -> FEED (acc done) |
| |
| doinit-flag |
| FEED | putItems 0 0 0 -m-n +m 0 | BUSY -> COLL (put items) |
| FEED | putItems 0 0 0 -m-n +m 1 | BUSY -> INIT (put items) |
| FEED | putItems 0 0 0 -d-d +d | OUT_FULL (put some items) |
| */ |
| |
| static picodata_step_result_t saStep(register picodata_ProcessingUnit this, |
| picoos_int16 mode, |
| picoos_uint16 *numBytesOutput) { |
| register sa_subobj_t *sa; |
| pico_status_t rv = PICO_OK; |
| pico_status_t rvP = PICO_OK; |
| picoos_uint16 blen = 0; |
| picoos_uint16 clen = 0; |
| picoos_uint16 i; |
| picoklex_Lex lex; |
| |
| |
| if (NULL == this || NULL == this->subObj) { |
| return PICODATA_PU_ERROR; |
| } |
| sa = (sa_subobj_t *) this->subObj; |
| mode = mode; /* avoid warning "var not used in this function"*/ |
| *numBytesOutput = 0; |
| while (1) { /* exit via return */ |
| PICODBG_DEBUG(("doing state %i, hLen|c1Len|c2Len: %d|%d|%d", |
| sa->procState, sa->headxLen, sa->cbuf1Len, |
| sa->cbuf2Len)); |
| |
| switch (sa->procState) { |
| |
| /* *********************************************************/ |
| /* collect state: get item(s) from charBuf and store in |
| * internal buffers, need a complete punctuation-phrase |
| */ |
| case SA_STEPSTATE_COLLECT: |
| |
| while (sa->inspaceok && sa->needsmoreitems |
| && (PICO_OK == |
| (rv = picodata_cbGetItem(this->cbIn, sa->tmpbuf, |
| PICOSA_MAXITEMSIZE, &blen)))) { |
| rvP = picodata_get_itemparts(sa->tmpbuf, |
| PICOSA_MAXITEMSIZE, |
| &(sa->headx[sa->headxLen].head), |
| &(sa->cbuf1[sa->cbuf1Len]), |
| sa->cbuf1BufSize-sa->cbuf1Len, |
| &clen); |
| if (rvP != PICO_OK) { |
| PICODBG_ERROR(("problem getting item parts")); |
| picoos_emRaiseException(this->common->em, rvP, |
| NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| |
| /* if CMD(...FLUSH...) -> PUNC(...FLUSH...), |
| construct PUNC-FLUSH item in headx */ |
| if ((sa->headx[sa->headxLen].head.type == |
| PICODATA_ITEM_CMD) && |
| (sa->headx[sa->headxLen].head.info1 == |
| PICODATA_ITEMINFO1_CMD_FLUSH)) { |
| sa->headx[sa->headxLen].head.type = |
| PICODATA_ITEM_PUNC; |
| sa->headx[sa->headxLen].head.info1 = |
| PICODATA_ITEMINFO1_PUNC_FLUSH; |
| sa->headx[sa->headxLen].head.info2 = |
| PICODATA_ITEMINFO2_PUNC_SENT_T; |
| sa->headx[sa->headxLen].head.len = 0; |
| } |
| |
| /* convert opening phoneme command to WORDPHON |
| * and assign user-POS XX to it (Bug 432) */ |
| sa->headx[sa->headxLen].cind = sa->cbuf1Len; |
| /* maybe overwritten later */ |
| if ((sa->headx[sa->headxLen].head.type == |
| PICODATA_ITEM_CMD) && |
| (sa->headx[sa->headxLen].head.info1 == |
| PICODATA_ITEMINFO1_CMD_PHONEME)&& |
| (sa->headx[sa->headxLen].head.info2 == |
| PICODATA_ITEMINFO2_CMD_START)) { |
| picoos_uint8 i; |
| picoos_uint8 wordsep = picoktab_getWordboundID(sa->tabphones); |
| PICODBG_INFO(("wordsep id is %i",wordsep)); |
| sa->headx[sa->headxLen].head.type = PICODATA_ITEM_WORDPHON; |
| sa->headx[sa->headxLen].head.info1 = PICODATA_POS_XX; |
| sa->headx[sa->headxLen].head.info2 = PICODATA_ITEMINFO2_NA; |
| /* cut off additional words */ |
| i = 0; |
| while ((i < sa->headx[sa->headxLen].head.len) && (wordsep != sa->cbuf1[sa->headx[sa->headxLen].cind+i])) { |
| PICODBG_INFO(("accepting phoneme %i",sa->cbuf1[sa->headx[sa->headxLen].cind+i])); |
| |
| i++; |
| } |
| if (i < sa->headx[sa->headxLen].head.len) { |
| PICODBG_INFO(("cutting off superfluous phonetic words at %i",i)); |
| sa->headx[sa->headxLen].head.len = i; |
| } |
| } |
| |
| /* check/set needsmoreitems */ |
| if (sa->headx[sa->headxLen].head.type == |
| PICODATA_ITEM_PUNC) { |
| sa->needsmoreitems = FALSE; |
| } |
| |
| /* check/set inspaceok, keep spare slot for forcing */ |
| if ((sa->headxLen >= (PICOSA_MAXNR_HEADX - 2)) || |
| ((sa->cbuf1BufSize - sa->cbuf1Len) < |
| PICOSA_MAXITEMSIZE)) { |
| sa->inspaceok = FALSE; |
| } |
| |
| if (clen > 0) { |
| sa->headx[sa->headxLen].cind = sa->cbuf1Len; |
| sa->cbuf1Len += clen; |
| } else { |
| sa->headx[sa->headxLen].cind = 0; |
| } |
| sa->headxLen++; |
| } |
| |
| if (!sa->needsmoreitems) { |
| /* 1, phrase buffered */ |
| sa->procState = SA_STEPSTATE_PROCESS_POSD; |
| return PICODATA_PU_ATOMIC; |
| } else if (!sa->inspaceok) { |
| /* 2, forced phrase end */ |
| /* at least one slot is still free, use it to |
| force a trailing PUNC item */ |
| sa->headx[sa->headxLen].head.type = PICODATA_ITEM_PUNC; |
| sa->headx[sa->headxLen].head.info1 = |
| PICODATA_ITEMINFO1_PUNC_PHRASEEND; |
| sa->headx[sa->headxLen].head.info2 = |
| PICODATA_ITEMINFO2_PUNC_PHRASE_FORCED; |
| sa->headx[sa->headxLen].head.len = 0; |
| sa->needsmoreitems = FALSE; /* not really needed for now */ |
| sa->headxLen++; |
| PICODBG_WARN(("forcing phrase end, added PUNC_PHRASEEND")); |
| picoos_emRaiseWarning(this->common->em, |
| PICO_WARN_FALLBACK, NULL, |
| (picoos_char *)"forced phrase end"); |
| sa->procState = SA_STEPSTATE_PROCESS_POSD; |
| return PICODATA_PU_ATOMIC; |
| } else if (rv == PICO_EOF) { |
| /* 3, 4 */ |
| return PICODATA_PU_IDLE; |
| } else if ((rv == PICO_EXC_BUF_UNDERFLOW) || |
| (rv == PICO_EXC_BUF_OVERFLOW)) { |
| /* error, no valid item in cb (UNDER) */ |
| /* or tmpbuf not large enough, not possible (OVER) */ |
| /* no exception raised, left for ctrl to handle */ |
| PICODBG_ERROR(("buffer under/overflow, rv: %d", rv)); |
| return PICODATA_PU_ERROR; |
| } else { |
| /* error, only possible if cbGetItem implementation |
| changes without this function being adapted*/ |
| PICODBG_ERROR(("untreated return value, rv: %d", rv)); |
| return PICODATA_PU_ERROR; |
| } |
| break; |
| |
| |
| /* *********************************************************/ |
| /* process posd state: process items in headx/cbuf1 |
| * and change in place |
| */ |
| case SA_STEPSTATE_PROCESS_POSD: |
| /* ensure there is an item in inBuf */ |
| if (sa->headxLen > 0) { |
| /* we have a phrase in headx, cbuf1 (can be |
| single PUNC item without POS), do pos disamb */ |
| if (PICO_OK != saDisambPos(this, sa)) { |
| picoos_emRaiseException(this->common->em, |
| PICO_ERR_OTHER, NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| sa->procState = SA_STEPSTATE_PROCESS_WPHO; |
| |
| } else if (sa->headxLen == 0) { /* no items in inBuf */ |
| PICODBG_WARN(("no items in inBuf")); |
| sa->procState = SA_STEPSTATE_COLLECT; |
| return PICODATA_PU_BUSY; |
| } |
| |
| #if defined (PICO_DEBUG) |
| if (1) { |
| picoos_uint8 i, j, ittype; |
| for (i = 0; i < sa->headxLen; i++) { |
| ittype = sa->headx[i].head.type; |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa-d: (")); |
| PICODBG_INFO_MSG(("'%c',", ittype)); |
| if ((32 <= sa->headx[i].head.info1) && |
| (sa->headx[i].head.info1 < 127) && |
| (ittype != PICODATA_ITEM_WORDGRAPH) && |
| (ittype != PICODATA_ITEM_WORDINDEX)) { |
| PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1)); |
| } else { |
| PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1)); |
| } |
| if ((32 <= sa->headx[i].head.info2) && |
| (sa->headx[i].head.info2 < 127)) { |
| PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2)); |
| } else { |
| PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2)); |
| } |
| PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len)); |
| |
| for (j = 0; j < sa->headx[i].head.len; j++) { |
| if ((ittype == PICODATA_ITEM_WORDGRAPH) || |
| (ittype == PICODATA_ITEM_CMD)) { |
| PICODBG_INFO_MSG(("%c", |
| sa->cbuf1[sa->headx[i].cind+j])); |
| } else { |
| PICODBG_INFO_MSG(("%4d", |
| sa->cbuf1[sa->headx[i].cind+j])); |
| } |
| } |
| PICODBG_INFO_MSG(("\n")); |
| } |
| } |
| #endif |
| |
| break; |
| |
| |
| /* *********************************************************/ |
| /* process wpho state: process items in headx/cbuf1 and modify |
| * headx in place and fill cbuf2 |
| */ |
| case SA_STEPSTATE_PROCESS_WPHO: |
| /* ensure there is an item in inBuf */ |
| if (sa->headxLen > 0) { |
| /* we have a phrase in headx, cbuf1 (can be single |
| PUNC item), do lex lookup, g2p, or copy */ |
| |
| /* check if cbuf2 is empty as it should be */ |
| if (sa->cbuf2Len > 0) { |
| /* enforce emptyness */ |
| PICODBG_WARN(("forcing empty cbuf2, discarding buf")); |
| picoos_emRaiseWarning(this->common->em, |
| PICO_WARN_PU_DISCARD_BUF, |
| NULL, NULL); |
| } |
| |
| /* cbuf2 overflow avoided in saGrapheme*, saLexInd*, |
| saCopyItem*, phones skipped if needed */ |
| for (i = 0; i < sa->headxLen; i++) { |
| switch (sa->headx[i].head.type) { |
| case PICODATA_ITEM_WORDGRAPH: |
| if (PICO_OK != saGraphemeToPhoneme(this, sa, |
| i)) { |
| /* not possible, phones skipped if needed */ |
| picoos_emRaiseException(this->common->em, |
| PICO_ERR_OTHER, |
| NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| break; |
| case PICODATA_ITEM_WORDINDEX: |
| if (0 == sa->headx[i].head.info2) { |
| lex = sa->lex; |
| } else { |
| lex = sa->ulex[sa->headx[i].head.info2-1]; |
| } |
| if (PICO_OK != saLexIndLookup(this, sa, lex, i)) { |
| /* not possible, phones skipped if needed */ |
| picoos_emRaiseException(this->common->em, |
| PICO_ERR_OTHER, |
| NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| break; |
| default: |
| /* copy item unmodified, ie. headx untouched, |
| content from cbuf1 to cbuf2 */ |
| if (PICO_OK != saCopyItemContent1to2(this, sa, |
| i)) { |
| /* not possible, phones skipped if needed */ |
| picoos_emRaiseException(this->common->em, |
| PICO_ERR_OTHER, |
| NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| break; |
| } |
| } |
| /* set cbuf1 to empty */ |
| sa->cbuf1Len = 0; |
| sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE; |
| |
| } else if (sa->headxLen == 0) { /* no items in inBuf */ |
| PICODBG_WARN(("no items in inBuf")); |
| sa->procState = SA_STEPSTATE_COLLECT; |
| return PICODATA_PU_BUSY; |
| } |
| |
| #if defined (PICO_DEBUG) |
| if (1) { |
| picoos_uint8 i, j, ittype; |
| for (i = 0; i < sa->headxLen; i++) { |
| ittype = sa->headx[i].head.type; |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa-g: (")); |
| PICODBG_INFO_MSG(("'%c',", ittype)); |
| if ((32 <= sa->headx[i].head.info1) && |
| (sa->headx[i].head.info1 < 127) && |
| (ittype != PICODATA_ITEM_WORDPHON)) { |
| PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info1)); |
| } else { |
| PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info1)); |
| } |
| if ((32 <= sa->headx[i].head.info2) && |
| (sa->headx[i].head.info2 < 127)) { |
| PICODBG_INFO_MSG(("'%c',",sa->headx[i].head.info2)); |
| } else { |
| PICODBG_INFO_MSG(("%3d,", sa->headx[i].head.info2)); |
| } |
| PICODBG_INFO_MSG(("%3d)", sa->headx[i].head.len)); |
| |
| for (j = 0; j < sa->headx[i].head.len; j++) { |
| if ((ittype == PICODATA_ITEM_CMD)) { |
| PICODBG_INFO_MSG(("%c", |
| sa->cbuf2[sa->headx[i].cind+j])); |
| } else { |
| PICODBG_INFO_MSG(("%4d", |
| sa->cbuf2[sa->headx[i].cind+j])); |
| } |
| } |
| PICODBG_INFO_MSG(("\n")); |
| } |
| } |
| #endif |
| |
| break; |
| |
| |
| /* *********************************************************/ |
| /* transduction parse state: extract phonemes of item in internal outBuf */ |
| case SA_STEPSTATE_PROCESS_TRNS_PARSE: |
| |
| PICODBG_DEBUG(("transduce item (bot, remain): (%d, %d)", |
| sa->headxBottom, sa->headxLen)); |
| |
| /* check for termination condition first */ |
| if (0 == sa->headxLen) { |
| /* reset headx, cbuf2 */ |
| sa->headxBottom = 0; |
| sa->cbuf2Len = 0; |
| /* reset collect state support variables */ |
| sa->inspaceok = TRUE; |
| sa->needsmoreitems = TRUE; |
| |
| sa->procState = SA_STEPSTATE_COLLECT; |
| return PICODATA_PU_BUSY; |
| } |
| |
| sa->procState = SA_STEPSTATE_FEED; |
| /* copy item unmodified */ |
| rv = picodata_put_itemparts( |
| &(sa->headx[sa->headxBottom].head), |
| &(sa->cbuf2[sa->headx[sa->headxBottom].cind]), |
| sa->headx[sa->headxBottom].head.len, sa->tmpbuf, |
| PICOSA_MAXITEMSIZE, &blen); |
| |
| if (PICODATA_ITEM_WORDPHON == sa->headx[sa->headxBottom].head.type) { |
| PICODBG_DEBUG(("PARSE found WORDPHON")); |
| rv = saExtractPhonemes(this, sa, 0, &(sa->headx[sa->headxBottom].head), |
| &(sa->cbuf2[sa->headx[sa->headxBottom].cind])); |
| if (PICO_OK == rv) { |
| PICODBG_DEBUG(("PARSE successfully returned from phoneme extraction")); |
| sa->procState = SA_STEPSTATE_PROCESS_TRNS_FST; |
| } else { |
| PICODBG_WARN(("PARSE phone extraction returned exception %i, output WORDPHON untransduced",rv)); |
| } |
| } else { |
| PICODBG_DEBUG(("PARSE found other item, just copying")); |
| } |
| if (SA_STEPSTATE_FEED == sa->procState) { |
| PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], |
| (picoos_uint8 *)"sa-p: ", |
| sa->tmpbuf, PICOSA_MAXITEMSIZE); |
| |
| } |
| |
| /* consume item */ |
| sa->headxBottom++; |
| sa->headxLen--; |
| |
| break; |
| |
| /* *********************************************************/ |
| /* transduce state: copy item in internal outBuf to tmpBuf and transduce */ |
| case SA_STEPSTATE_PROCESS_TRNS_FST: |
| |
| |
| |
| |
| |
| /* if no word-level FSTs: doing trivial syllabification instead */ |
| if (0 == sa->numFsts) { |
| PICODBG_DEBUG(("doing trivial sylabification with %i phones", sa->phonWritePos)); |
| #if defined(PICO_DEBUG) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa trying to trivially syllabify: ")); |
| PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| #endif |
| |
| picotrns_trivial_syllabify(sa->tabphones, sa->phonBuf, |
| sa->phonWritePos, sa->phonBufOut, |
| &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); |
| PICODBG_DEBUG(("returned from trivial sylabification with %i phones", sa->phonWritePos)); |
| #if defined(PICO_DEBUG) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa returned from syllabification: ")); |
| PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| #endif |
| |
| /* eliminate deep epsilons */ |
| PICODBG_DEBUG(("doing epsilon elimination with %i phones", sa->phonWritePos)); |
| picotrns_eliminate_epsilons(sa->phonBufOut, |
| sa->phonWritePos, sa->phonBuf, |
| &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); |
| PICODBG_DEBUG(("returning from epsilon elimination with %i phones", sa->phonWritePos)); |
| sa->phonReadPos = 0; |
| sa->phonesTransduced = 1; |
| sa->procState = SA_STEPSTATE_FEED; |
| break; |
| } |
| |
| /* there are word-level FSTs */ |
| /* termination condition first */ |
| if (sa->curFst >= sa->numFsts) { |
| /* reset for next transduction */ |
| sa->curFst = 0; |
| sa->phonReadPos = 0; |
| sa->phonesTransduced = 1; |
| sa->procState = SA_STEPSTATE_FEED; |
| break; |
| } |
| |
| /* transduce from phonBufIn to PhonBufOut */ |
| { |
| |
| picoos_uint32 nrSteps; |
| #if defined(PICO_DEBUG) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa trying to transduce: ")); |
| PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBuf, sa->phonWritePos); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| #endif |
| picotrns_transduce(sa->fst[sa->curFst], FALSE, |
| picotrns_printSolution, sa->phonBuf, sa->phonWritePos, sa->phonBufOut, |
| &sa->phonWritePos, |
| PICOTRNS_MAX_NUM_POSSYM, sa->altDescBuf, |
| sa->maxAltDescLen, &nrSteps); |
| #if defined(PICO_DEBUG) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("sa returned from transduction: ")); |
| PICOTRNS_PRINTSYMSEQ(this->voice->kbArray[PICOKNOW_KBID_DBG], sa->phonBufOut, sa->phonWritePos); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| #endif |
| } |
| |
| |
| |
| /* |
| The trasduction output will contain equivalent items i.e. (x,y') for each (x,y) plus inserted deep symbols (-1,d). |
| In case of deletions, (x,0) might also be omitted... |
| */ |
| /* eliminate deep epsilons */ |
| picotrns_eliminate_epsilons(sa->phonBufOut, |
| sa->phonWritePos, sa->phonBuf, &sa->phonWritePos,PICOTRNS_MAX_NUM_POSSYM); |
| sa->phonesTransduced = 1; |
| |
| sa->curFst++; |
| |
| return PICODATA_PU_ATOMIC; |
| /* break; */ |
| |
| /* *********************************************************/ |
| /* feed state: copy item in internal outBuf to output charBuf */ |
| |
| case SA_STEPSTATE_FEED: |
| |
| PICODBG_DEBUG(("FEED")); |
| |
| if (sa->phonesTransduced) { |
| /* replace original phones by transduced */ |
| picoos_uint16 phonWritePos = PICODATA_ITEM_HEADSIZE; |
| picoos_uint8 plane; |
| picoos_int16 sym, pos; |
| while (SA_POSSYM_OK == (rv = getNextPosSym(sa,&pos,&sym,sa->nextReadPos))) { |
| PICODBG_TRACE(("FEED inserting phoneme %c into inBuf[%i]",sym,phonWritePos)); |
| sym = picotrns_unplane(sym, &plane); |
| PICODBG_ASSERT((PICOKFST_PLANE_PHONEMES == plane)); |
| sa->tmpbuf[phonWritePos++] = (picoos_uint8) sym; |
| } |
| PICODBG_DEBUG(("FEED setting item length to %i",phonWritePos - PICODATA_ITEM_HEADSIZE)); |
| picodata_set_itemlen(sa->tmpbuf,PICODATA_ITEM_HEADSIZE,phonWritePos - PICODATA_ITEM_HEADSIZE); |
| if (SA_POSSYM_INVALID == rv) { |
| PICODBG_ERROR(("FEED unexpected symbol or unexpected end of phoneme list")); |
| return (picodata_step_result_t)picoos_emRaiseException(this->common->em, PICO_WARN_INCOMPLETE, NULL, NULL); |
| } |
| sa->phonesTransduced = 0; |
| |
| } /* if (sa->phonesTransduced) */ |
| |
| |
| rvP = picodata_cbPutItem(this->cbOut, sa->tmpbuf, |
| PICOSA_MAXITEMSIZE, &clen); |
| |
| *numBytesOutput += clen; |
| |
| PICODBG_DEBUG(("put item, status: %d", rvP)); |
| |
| if (rvP == PICO_OK) { |
| } else if (rvP == PICO_EXC_BUF_OVERFLOW) { |
| /* try again next time */ |
| PICODBG_DEBUG(("feeding overflow")); |
| return PICODATA_PU_OUT_FULL; |
| } else { |
| /* error, should never happen */ |
| PICODBG_ERROR(("untreated return value, rvP: %d", rvP)); |
| return PICODATA_PU_ERROR; |
| } |
| |
| PICODATA_INFO_ITEM(this->voice->kbArray[PICOKNOW_KBID_DBG], |
| (picoos_uint8 *)"sana: ", |
| sa->tmpbuf, PICOSA_MAXITEMSIZE); |
| |
| sa->procState = SA_STEPSTATE_PROCESS_TRNS_PARSE; |
| /* return PICODATA_PU_BUSY; */ |
| break; |
| |
| default: |
| break; |
| } /* switch */ |
| |
| } /* while */ |
| |
| /* should be never reached */ |
| PICODBG_ERROR(("reached end of function")); |
| picoos_emRaiseException(this->common->em, PICO_ERR_OTHER, NULL, NULL); |
| return PICODATA_PU_ERROR; |
| } |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| |
| /* end */ |