| /* |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @file picotrns.c |
| * |
| * fst processing |
| * |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * All rights reserved. |
| * |
| * History: |
| * - 2009-04-20 -- initial version |
| * |
| */ |
| |
| #include "picoos.h" |
| #include "picodbg.h" |
| /* #include "picodata.h" */ |
| /* #include "picoknow.h" */ |
| #include "picoktab.h" |
| #include "picokfst.h" |
| #include "picotrns.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| #if 0 |
| } |
| #endif |
| |
| |
| |
| picoos_uint8 picotrns_unplane(picoos_int16 symIn, picoos_uint8 * plane) { |
| if (symIn < 0) { |
| (*plane) = 0; |
| return (picoos_uint8) symIn; |
| } else { |
| (*plane) = symIn >> 8; |
| return (picoos_uint8) (symIn & 0xFF); |
| } |
| } |
| |
| #if defined(PICO_DEBUG) |
| |
| void PICOTRNS_PRINTSYM1(picoknow_KnowledgeBase kbdbg, picoos_int16 insym, picoos_uint8 phonemic) |
| { |
| #include "picokdbg.h" |
| picoos_int16 sym; |
| picoos_uint8 plane; |
| picokdbg_Dbg dbg = (NULL == kbdbg) ? NULL : picokdbg_getDbg(kbdbg); |
| sym = picotrns_unplane(insym, &plane); |
| switch (plane) { |
| case PICOKFST_PLANE_PHONEMES: /* phones */ |
| if ((NULL == dbg) || !phonemic) { |
| PICODBG_INFO_MSG((" %c", sym)); |
| } else { |
| PICODBG_INFO_MSG((" %s", picokdbg_getPhoneSym(dbg, (picoos_uint8) sym))); |
| } |
| break; |
| case PICOKFST_PLANE_ACCENTS: /* accents */ |
| PICODBG_INFO_MSG((" {A%c}", sym)); |
| break; |
| case PICOKFST_PLANE_XSAMPA: /* xsampa symbols */ |
| PICODBG_INFO_MSG((" {XS:(%i)}", sym)); |
| break; |
| case PICOKFST_PLANE_POS: /* part of speech */ |
| PICODBG_INFO_MSG((" {P:%d}", sym)); |
| break; |
| case PICOKFST_PLANE_PB_STRENGTHS: /* phrases */ |
| if (sym == 48) { |
| PICODBG_INFO_MSG((" {WB}", sym)); |
| } else if (sym == 115) { |
| PICODBG_INFO_MSG((" {P0}", sym)); |
| } else { |
| PICODBG_INFO_MSG((" {P%c}", sym)); |
| } |
| break; |
| case PICOKFST_PLANE_INTERN: /* intern */ |
| PICODBG_INFO_MSG((" [%c]", sym)); |
| break; |
| } |
| } |
| |
| void PICOTRNS_PRINTSYM(picoknow_KnowledgeBase kbdbg, picoos_int16 insym) |
| { |
| PICOTRNS_PRINTSYM1(kbdbg,insym,1); |
| } |
| |
| void PICOTRNS_PRINTSYMSEQ1(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen, |
| picoos_uint8 phonemic) { |
| picoos_uint16 i; |
| for (i=0; i<seqLen; i++) { |
| PICOTRNS_PRINTSYM1(kbdbg, seq[i].sym, phonemic); |
| } |
| } |
| |
| void PICOTRNS_PRINTSYMSEQ(picoknow_KnowledgeBase kbdbg, const picotrns_possym_t seq[], const picoos_uint16 seqLen) { |
| PICOTRNS_PRINTSYMSEQ1(kbdbg,seq, seqLen, 1); |
| } |
| |
| void picotrns_printSolution(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("solution: ")); |
| PICOTRNS_PRINTSYMSEQ(NULL, outSeq, outSeqLen); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| |
| void picotrns_printSolutionAscii(const picotrns_possym_t outSeq[], const picoos_uint16 outSeqLen) |
| { |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("solution: ")); |
| PICOTRNS_PRINTSYMSEQ1(NULL, outSeq, outSeqLen,0); |
| PICODBG_INFO_MSG(("\n")); |
| } |
| |
| #endif |
| |
| |
| |
| |
| /* * +CT+ ***/ |
| struct picotrns_transductionState { |
| picoos_uint16 phase; /* transduction phase: |
| 0 = before start |
| 1 = before regular recursion step |
| 2 = before finish |
| 3 = after finish */ |
| picoos_uint32 nrSol; /* nr of solutions so far */ |
| picoos_int16 recPos; /* recursion position; must be signed! */ |
| }; |
| |
| typedef struct picotrns_altDesc { |
| picokfst_state_t startFSTState; /**< starting FST state in current recursion position */ |
| picoos_int32 inPos; /**< corresponding position in input string */ |
| picokfst_state_t altState; /**< state of alternatives search; |
| - 0 = before pair search |
| - 1 = search state is a valid pair search state |
| - 2 = before inEps search |
| - 3 = search state is a valid inEps trans search state |
| - 4 = no more alternatives */ |
| picoos_int32 searchState; /**< pair search state or inEps trans search state */ |
| picokfst_symid_t altOutSym; /**< current output symbol at this recursion position */ |
| picoos_int32 altOutRefPos; /**< output reference position at this recursion position */ |
| } picotrns_altDesc_t; |
| |
| |
| picotrns_AltDesc picotrns_allocate_alt_desc_buf(picoos_MemoryManager mm, picoos_uint32 maxByteSize, picoos_uint16 * numAltDescs) |
| { |
| picotrns_AltDesc buf; |
| (*numAltDescs) = (picoos_uint32) (maxByteSize / sizeof(picotrns_altDesc_t)); |
| buf = (picotrns_AltDesc) picoos_allocate(mm, (*numAltDescs) * sizeof(picotrns_altDesc_t)); |
| if (NULL == buf) { |
| (*numAltDescs) = 0; |
| return NULL; |
| } else { |
| return buf; |
| } |
| } |
| |
| void picotrns_deallocate_alt_desc_buf(picoos_MemoryManager mm, picotrns_AltDesc * altDescBuf) |
| { |
| picoos_deallocate(mm, (void *) altDescBuf); |
| } |
| |
| /* copy elements from inSeq to outSeq, ignoring elements with epsilon symbol */ |
| pico_status_t picotrns_eliminate_epsilons(const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, |
| picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) |
| { |
| picoos_uint16 i, j = 0; |
| |
| for (i=0; i < inSeqLen; i++) { |
| /* it is assumed that PICOKFST_SYMID_EPS is a hardwired value and not shifted */ |
| if (PICOKFST_SYMID_EPS != inSeq[i].sym) { |
| if (j < maxOutSeqLen) { |
| outSeq[j].pos = inSeq[i].pos; |
| outSeq[j].sym = inSeq[i].sym; |
| j++; |
| } |
| } |
| *outSeqLen = j; |
| } |
| return PICO_OK; |
| } |
| |
| |
| static void insertSym(picotrns_possym_t inSeq[], picoos_uint16 pos, picoos_int16 sym) { |
| inSeq[pos].sym = sym; |
| inSeq[pos].pos = PICOTRNS_POS_INSERT; |
| } |
| |
| /* copy elements from inSeq to outSeq, inserting syllable separators in some trivial way. |
| * inSeq is assumed to be at most PICOTRNS_MAX_NUM_POSSYM, outSeq at least of size PICOTRNS_MAX_NUM_POSSYM */ |
| pico_status_t picotrns_trivial_syllabify(picoktab_Phones phones, |
| const picotrns_possym_t inSeq[], const picoos_uint16 inSeqLen, |
| picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) |
| { |
| picoos_uint16 i = 0, j = 0, out = 0, numInserted = 0; |
| picoos_uint8 vowelFound = FALSE; |
| picoos_uint16 accentpos = 0; |
| picoos_int16 accent = 0; |
| |
| PICODBG_TRACE(("start")); |
| |
| |
| while (i < inSeqLen) { |
| /* make sure that at least one more sylSep can be inserted */ |
| if (inSeqLen+numInserted+1 >= maxOutSeqLen) { |
| return PICO_EXC_BUF_OVERFLOW; |
| } |
| /* let j skip consonant cluster */ |
| accent = 0; |
| accentpos = 0; |
| while ((j < inSeqLen) && !picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[j].sym)) { |
| if ((inSeq[j].sym == picoktab_getPrimstressID(phones)) |
| || (inSeq[j].sym == picoktab_getPrimstressID(phones))) { |
| PICODBG_TRACE(("j skipping stress symbol inSeq[%i].sym = %c", j, inSeq[j].sym)); |
| accent = inSeq[j].sym; |
| accentpos = j; |
| } else { |
| PICODBG_TRACE(("j skipping consonant inSeq[%i].sym = %c", j, inSeq[j].sym)); |
| } |
| j++; |
| } |
| if (j < inSeqLen) { /* j is at the start of a new vowel */ |
| /* copy consonant cluster (moving i) to output, insert syll separator if between vowels */ |
| while (i < j-1) { |
| if ((accent > 0) && (i == accentpos)) { |
| PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym)); |
| i++; |
| } else { |
| PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym)); |
| outSeq[out++] = inSeq[i++]; |
| } |
| } |
| if (vowelFound) { /* we're between vowels */ |
| PICODBG_TRACE(("inserting syllable separator into output buffer")); |
| insertSym(outSeq,out++,picoktab_getSyllboundID(phones)); |
| if (accent > 0) { |
| insertSym(outSeq,out++,accent); |
| } |
| numInserted++; |
| } |
| if ((accent > 0) && (i == accentpos)) { |
| PICODBG_TRACE(("skipping inSeq[%i].sym = %c (stress)", i, inSeq[i].sym)); |
| i++; |
| } else { |
| PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant) into output buffer", i, inSeq[i].sym)); |
| outSeq[out++] = inSeq[i++]; |
| } |
| vowelFound = TRUE; |
| /* now copy vowel cluster */ |
| while ((i < inSeqLen) && picoktab_isSyllCarrier(phones,(picoos_uint8)inSeq[i].sym)) { |
| PICODBG_TRACE(("copying inSeq[%i].sym = %c (vowel) into output buffer", i, inSeq[i].sym)); |
| outSeq[out++] = inSeq[i++]; |
| } |
| j = i; |
| } else { /* j is at end of word or end of input */ |
| while (i < j) { |
| PICODBG_TRACE(("copying inSeq[%i].sym = %c (consonant or stress) into output buffer", i, inSeq[i].sym)); |
| outSeq[out++] = inSeq[i++]; |
| } |
| } |
| *outSeqLen = out; |
| } |
| PICODBG_ASSERT((out == inSeqLen + numInserted)); |
| |
| return PICO_OK; |
| } |
| |
| |
| /* ******** +CT+: full transduction procedure **********/ |
| |
| |
| /* Gets next acceptable alternative for output symbol '*outSym' at current recursion position |
| starting from previous alternative in 'altDesc'; possibly uses input symbol |
| given by 'inSeq'/'inSeq'; returns whether alterative was found in '*found'; |
| if '*found', the other output values ('*outRefPos', '*endFSTstate', '*nextInPos'*) |
| return the characteristics for next recursion step; |
| if '*found' is false, the output values are undefined. */ |
| |
| static void GetNextAlternative (picokfst_FST fst, picotrns_AltDesc altDesc, |
| const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, |
| picokfst_symid_t * outSym, picoos_int32 * outRefPos, |
| picokfst_state_t * endFSTState, picoos_int32 * nextInPos, picoos_bool * found) |
| { |
| |
| picoos_bool inSymFound; |
| picoos_bool pairFound; |
| picokfst_class_t pairClass; |
| picoos_bool inEpsTransFound; |
| picokfst_symid_t inSym; |
| |
| (*found) = 0; |
| do { |
| switch (altDesc->altState) { |
| case 0: /* before pair search */ |
| if (altDesc->inPos < inSeqLen) { |
| inSym = inSeq[altDesc->inPos].sym; |
| if (inSym == PICOKFST_SYMID_EPS) { |
| /* very special case: input epsilon simply produces eps in output |
| without fst state change */ |
| (*found) = 1; |
| (*outSym) = PICOKFST_SYMID_EPS; |
| (*outRefPos) = inSeq[altDesc->inPos].pos; |
| (*endFSTState) = altDesc->startFSTState; |
| (*nextInPos) = altDesc->inPos + 1; |
| altDesc->altState = 2; |
| } else { |
| /* start search for alternatives using input symbol */ |
| picokfst_kfstStartPairSearch(fst,inSeq[altDesc->inPos].sym,& inSymFound,& altDesc->searchState); |
| if (!inSymFound) { |
| altDesc->altState = 2; |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG((" didnt find symbol ")); |
| PICOTRNS_PRINTSYM(NULL, inSeq[altDesc->inPos].sym); |
| PICODBG_INFO_MSG(("\n")); |
| |
| } else { |
| altDesc->altState = 1; |
| } |
| } |
| } else { |
| altDesc->altState = 2; |
| } |
| break; |
| case 1: /* within pair search */ |
| picokfst_kfstGetNextPair(fst,& altDesc->searchState,& pairFound,& (*outSym),& pairClass); |
| if (pairFound) { |
| picokfst_kfstGetTrans(fst,altDesc->startFSTState,pairClass,& (*endFSTState)); |
| if ((*endFSTState) > 0) { |
| (*found) = 1; |
| (*outRefPos) = inSeq[altDesc->inPos].pos; |
| (*nextInPos) = altDesc->inPos + 1; |
| } |
| } else { |
| /* no more pair found */ |
| altDesc->altState = 2; |
| } |
| break; |
| case 2: /* before inEps trans search */ |
| picokfst_kfstStartInEpsTransSearch(fst,altDesc->startFSTState,& inEpsTransFound,& altDesc->searchState); |
| if (inEpsTransFound) { |
| altDesc->altState = 3; |
| } else { |
| altDesc->altState = 4; |
| } |
| break; |
| case 3: /* within inEps trans search */ |
| picokfst_kfstGetNextInEpsTrans(fst,& altDesc->searchState,& inEpsTransFound,& (*outSym),& (*endFSTState)); |
| if (inEpsTransFound) { |
| (*found) = 1; |
| (*outRefPos) = PICOTRNS_POS_INSERT; |
| (*nextInPos) = altDesc->inPos; |
| } else { |
| altDesc->altState = 4; |
| } |
| break; |
| case 4: /* no more alternatives */ |
| break; |
| } |
| } while (! ((*found) || (altDesc->altState == 4)) ); /* i.e., until (*found) || (altState == 4) */ |
| } |
| |
| |
| |
| /* Transfers current alternatives path stored in 'altDesc' with current path length 'pathLen' |
| into 'outSeq'/'outSeqLen'. The number of solutions is incremented. */ |
| |
| static void NoteSolution (picoos_uint32 * nrSol, picotrns_printSolutionFct printSolution, |
| picotrns_altDesc_t altDesc[], picoos_uint16 pathLen, |
| picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen) |
| { |
| register picotrns_AltDesc ap; |
| picoos_uint32 i; |
| |
| (*nrSol)++; |
| (*outSeqLen) = 0; |
| for (i = 0; i < pathLen; i++) { |
| if (i < maxOutSeqLen) { |
| ap = &altDesc[i]; |
| outSeq[i].sym = ap->altOutSym; |
| outSeq[i].pos = ap->altOutRefPos; |
| (*outSeqLen)++; |
| } |
| } |
| if (pathLen > maxOutSeqLen) { |
| PICODBG_WARN(("**** output symbol array too small to hold full solution\n")); |
| } |
| if (printSolution != NULL) { |
| printSolution(outSeq,(*outSeqLen)); |
| } |
| } |
| |
| |
| |
| /* * |
| general scheme to get all solutions ("position" refers to abstract backtracking recursion depth, |
| which in the current solution is equal to the output symbol position): |
| |
| "set position to first position"; |
| "initialize alternatives in first position"; |
| REPEAT |
| IF "current state in current position is a solution" THEN |
| "note solution"; |
| END; |
| "get first or next acceptable alternative in current position"; |
| IF "acceptable alternative found" THEN |
| "note alternative"; |
| "go to next position"; |
| "initialize alternatives in that position"; |
| ELSE |
| "step back to previous position"; |
| END; |
| UNTIL "current position is before first position" |
| ***/ |
| |
| |
| /* Initializes transduction state for further use in repeated application |
| of 'TransductionStep'. */ |
| |
| static void StartTransduction (struct picotrns_transductionState * transductionState) |
| { |
| (*transductionState).phase = 0; |
| } |
| |
| |
| |
| /* Performs one step in the transduction of 'inSeqLen' input symbols with corresponding |
| reference positions in 'inSeq'. '*transductionState' must have been |
| initialized by 'StartTransduction'. Repeat calls to this procedure until '*finished' returns true. |
| The output is returned in 'outSeqLen' symbols and reference positions in 'outSeq'. |
| The output reference positions refer to the corresponding input reference positions. |
| Inserted output symbols receive the reference position -1. If several solutions are possible, |
| only the last found solution is returned. |
| 'altDesc' is a temporary workspace which should be at least one cell longer than 'outSeq'. |
| 'firstSolOnly' determines whether only the first solution should be found or if |
| the search should go on to find all solutions (mainly for testing purposes). |
| |
| NOTE: current version written for use in single repetitive steps; |
| could be simplified if full transduction can be done as an atomic operation */ |
| |
| static void TransductionStep (picokfst_FST fst, struct picotrns_transductionState * transductionState, |
| picotrns_altDesc_t altDesc[], picoos_uint16 maxAltDescLen, |
| picoos_bool firstSolOnly, picotrns_printSolutionFct printSolution, |
| const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, |
| picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen, |
| picoos_bool * finished) |
| { |
| register picotrns_AltDesc ap; |
| picoos_int32 i; |
| picokfst_state_t endFSTState; |
| picoos_int32 nextInPos; |
| picoos_bool found; |
| picokfst_symid_t outSym; |
| picoos_int32 outRefPos; |
| picoos_int32 tmpRecPos; |
| |
| (*finished) = 0; |
| tmpRecPos = (*transductionState).recPos; |
| switch ((*transductionState).phase) { |
| case 0: /* before initialization */ |
| (*transductionState).nrSol = 0; |
| |
| /* check for initial solution (empty strings are always accepted) */ |
| if (inSeqLen == 0) { |
| NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,0,outSeq,outSeqLen,maxOutSeqLen); |
| } |
| |
| /* initialize first recursion position */ |
| tmpRecPos = 0; |
| ap = & altDesc[0]; |
| ap->startFSTState = 1; |
| ap->inPos = 0; |
| ap->altState = 0; |
| (*transductionState).phase = 1; |
| break; |
| |
| case 1: /* before regular recursion step */ |
| if ((tmpRecPos < 0) || (firstSolOnly && ((*transductionState).nrSol > 0))) { |
| /* end reached */ |
| (*transductionState).phase = 2; |
| } else { |
| /* not finished; do regular step */ |
| |
| /* get first or next acceptable alternative in current position */ |
| GetNextAlternative(fst,& altDesc[tmpRecPos],inSeq,inSeqLen,& outSym,& outRefPos,& endFSTState,& nextInPos,& found); |
| if (found) { |
| /* note alternative in current position */ |
| ap = & altDesc[tmpRecPos]; |
| ap->altOutSym = outSym; |
| ap->altOutRefPos = outRefPos; |
| |
| /* check for solution after found alternative */ |
| if ((nextInPos == inSeqLen) && picokfst_kfstIsAcceptingState(fst,endFSTState)) { |
| NoteSolution(& (*transductionState).nrSol,printSolution,altDesc,tmpRecPos+1, |
| outSeq,outSeqLen,maxOutSeqLen); |
| } |
| |
| /* go to next position if possible, start search for follower alternative symbols */ |
| if (tmpRecPos < maxAltDescLen-1) { |
| /* got to next position */ |
| tmpRecPos = tmpRecPos + 1; |
| |
| /* initialize alternatives in new position */ |
| ap = & altDesc[tmpRecPos]; |
| ap->startFSTState = endFSTState; |
| ap->inPos = nextInPos; |
| ap->altState = 0; |
| |
| } else { |
| /* do not go on due to limited path but still treat alternatives in current position */ |
| PICODBG_WARN(("--- transduction path too long; may fail to find solution\n")); |
| } |
| } else { /* no more acceptable alternative found in current position */ |
| /* backtrack to previous recursion */ |
| tmpRecPos = tmpRecPos - 1; |
| } |
| } |
| break; |
| |
| case 2: /* before finish */ |
| if ((*transductionState).nrSol == 0) { |
| PICODBG_WARN(("--- no transduction solution found, using input as output\n")); |
| i = 0; |
| while ((i < inSeqLen) && (i < maxOutSeqLen)) { |
| outSeq[i].sym = inSeq[i].sym; |
| outSeq[i].pos = inSeq[i].pos; |
| i++; |
| } |
| (*outSeqLen) = i; |
| } else if ((*transductionState).nrSol > 1) { |
| PICODBG_WARN(("--- more than one transducer solutions found\n")); |
| } |
| (*transductionState).phase = 3; |
| break; |
| |
| case 3: /* after finish */ |
| (*finished) = 1; |
| break; |
| } |
| (*transductionState).recPos = tmpRecPos; |
| } |
| |
| |
| |
| /* see description in header */ |
| pico_status_t picotrns_transduce (picokfst_FST fst, picoos_bool firstSolOnly, |
| picotrns_printSolutionFct printSolution, |
| const picotrns_possym_t inSeq[], picoos_uint16 inSeqLen, |
| picotrns_possym_t outSeq[], picoos_uint16 * outSeqLen, picoos_uint16 maxOutSeqLen, |
| picotrns_AltDesc altDescBuf, picoos_uint16 maxAltDescLen, |
| picoos_uint32 *nrSteps) |
| { |
| struct picotrns_transductionState transductionState; |
| picoos_bool finished; |
| |
| #if defined(PICO_DEBUG) |
| { |
| picoos_uint16 i; |
| |
| PICODBG_INFO_CTX(); |
| PICODBG_INFO_MSG(("got input: ")); |
| for (i=0; i<inSeqLen; i++) { |
| PICODBG_INFO_MSG((" %d", inSeq[i].sym)); |
| } |
| PICODBG_INFO_MSG((" (")); |
| PICOTRNS_PRINTSYMSEQ(NULL,inSeq,inSeqLen); |
| PICODBG_INFO_MSG((")\n")); |
| } |
| #endif |
| StartTransduction(&transductionState); |
| finished = 0; |
| *nrSteps = 0; |
| while (!finished) { |
| TransductionStep(fst,&transductionState,altDescBuf,maxAltDescLen,firstSolOnly,printSolution, |
| inSeq,inSeqLen,outSeq,outSeqLen,maxOutSeqLen,&finished); |
| (*nrSteps)++; |
| } |
| |
| return PICO_OK; |
| } |
| |
| |
| /** |
| * Data structure for picotrns_SimpleTransducer object. |
| */ |
| typedef struct picotrns_simple_transducer { |
| picoos_Common common; |
| picotrns_possym_t possymBufA[PICOTRNS_MAX_NUM_POSSYM+1]; |
| picotrns_possym_t possymBufB[PICOTRNS_MAX_NUM_POSSYM+1]; |
| picotrns_possym_t * possymBuf; /**< the buffer of the pos/sym pairs */ |
| picotrns_possym_t * possymBufTmp; |
| picoos_uint16 possymReadPos, possymWritePos; /* next pos to read from phonBufIn, next pos to write to phonBufIn */ |
| |
| /* buffer for internal calculation of transducer */ |
| picotrns_AltDesc altDescBuf; |
| /* the number of AltDesc in the buffer */ |
| picoos_uint16 maxAltDescLen; |
| } picotrns_simple_transducer_t; |
| |
| |
| pico_status_t picotrns_stInitialize(picotrns_SimpleTransducer transducer) |
| { |
| transducer->possymBuf = transducer->possymBufA; |
| transducer->possymBufTmp = transducer->possymBufB; |
| transducer->possymReadPos = 0; |
| transducer->possymWritePos = 0; |
| return PICO_OK; |
| } |
| /** creates a SimpleTranducer with a working buffer of given size |
| * |
| * @param mm MemoryManager handle |
| * @param common Common handle |
| * @param maxAltDescLen maximal size for working buffer (in bytes) |
| * @return handle to new SimpleTransducer or NULL if error |
| */ |
| picotrns_SimpleTransducer picotrns_newSimpleTransducer(picoos_MemoryManager mm, |
| picoos_Common common, |
| picoos_uint16 maxAltDescLen) |
| { |
| picotrns_SimpleTransducer this; |
| this = picoos_allocate(mm, sizeof(picotrns_simple_transducer_t)); |
| if (this == NULL) { |
| picoos_deallocate(mm, (void *)&this); |
| picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); |
| return NULL; |
| } |
| |
| /* allocate working buffer */ |
| this->altDescBuf = picotrns_allocate_alt_desc_buf(mm, maxAltDescLen, &this->maxAltDescLen); |
| if (this->altDescBuf == NULL) { |
| picoos_deallocate(mm, (void *)&this); |
| picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, NULL, NULL); |
| return NULL; |
| } |
| this->common = common; |
| picotrns_stInitialize(this); |
| return this; |
| } |
| /** disposes a SimpleTransducer |
| * |
| * @param this |
| * @param mm |
| * @return PICO_OK |
| */ |
| pico_status_t picotrns_disposeSimpleTransducer(picotrns_SimpleTransducer * this, |
| picoos_MemoryManager mm) |
| { |
| if (NULL != (*this)) { |
| picotrns_deallocate_alt_desc_buf(mm,&(*this)->altDescBuf); |
| picoos_deallocate(mm, (void *) this); |
| (*this) = NULL; |
| } |
| return PICO_OK; |
| } |
| |
| /** transduces the contents previously inserted via @ref picotrns_newSimpleTransducer and @ref |
| * picotrns_disposeSimpleTransducer. |
| * |
| * @param this |
| * @param fst |
| * @return |
| */ |
| pico_status_t picotrns_stTransduce(picotrns_SimpleTransducer this, picokfst_FST fst) |
| { |
| picoos_uint16 outSeqLen; |
| picoos_uint32 nrSteps; |
| pico_status_t status; |
| |
| status = picotrns_transduce(fst,TRUE,NULL, |
| this->possymBuf, this->possymWritePos, |
| this->possymBufTmp,&outSeqLen, PICOTRNS_MAX_NUM_POSSYM, |
| this->altDescBuf,this->maxAltDescLen,&nrSteps); |
| if (PICO_OK != status) { |
| return status; |
| } |
| return picotrns_eliminate_epsilons(this->possymBufTmp,outSeqLen,this->possymBuf,&this->possymWritePos,PICOTRNS_MAX_NUM_POSSYM); |
| } |
| |
| /** |
| * Add chars from NULLC-terminated string \c inStr, shifted to plane \c plane, to internal input buffer of |
| * \c transducer. |
| * |
| * @param this is an initialized picotrns_SimpleTransducer |
| * @param inStr NULLC-terminated byte sequence |
| * @param plane |
| * @return PICO_OK, if all bytes fit into buffer, or PICO_EXC_BUF_OVERFLOW otherwise |
| */ |
| pico_status_t picotrns_stAddWithPlane(picotrns_SimpleTransducer this, picoos_char * inStr, picoos_uint8 plane) |
| { |
| while ((*inStr) && (this->possymWritePos < PICOTRNS_MAX_NUM_POSSYM)) { |
| this->possymBuf[this->possymWritePos].pos = PICOTRNS_POS_INSERT; |
| this->possymBuf[this->possymWritePos].sym = (plane << 8) + (*inStr); |
| PICODBG_DEBUG(("inserting pos/sym = %i/'%c' at pos %i", |
| this->possymBuf[this->possymWritePos].pos, |
| this->possymBuf[this->possymWritePos].sym, |
| this->possymWritePos)); |
| this->possymWritePos++; |
| inStr++; |
| } |
| if (!(*inStr)) { |
| return PICO_OK; |
| } else { |
| return PICO_EXC_BUF_OVERFLOW; |
| } |
| } |
| |
| pico_status_t picotrns_stGetSymSequence( |
| picotrns_SimpleTransducer this, |
| picoos_uint8 * outputSymIds, |
| picoos_uint32 maxOutputSymIds) |
| { |
| picoos_uint8 plane; |
| picoos_uint32 outputCount = 0; |
| while ((this->possymReadPos < this->possymWritePos) && (outputCount < maxOutputSymIds)) { |
| *outputSymIds++ = picotrns_unplane(this->possymBuf[this->possymReadPos++].sym, &plane); |
| outputCount++; |
| } |
| *outputSymIds = NULLC; |
| if (outputCount <= maxOutputSymIds) { |
| return PICO_OK; |
| } else { |
| return PICO_EXC_BUF_OVERFLOW; |
| } |
| } |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| /* end picotrns.c */ |