| /* |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @file picokdt.h |
| * |
| * knowledge handling for decision trees |
| * |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * All rights reserved. |
| * |
| * History: |
| * - 2009-04-20 -- initial version |
| * |
| */ |
| |
| #ifndef PICOKDT_H_ |
| #define PICOKDT_H_ |
| |
| #include "picoos.h" |
| #include "picoknow.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| #if 0 |
| } |
| #endif |
| |
| |
| /* ************************************************************/ |
| /* |
| Several specialized decision trees kb are provided by this |
| knowledge handling module: |
| |
| - Part of speech prediction decision tree: ...kdt_PosP |
| - Part of speech disambiguation decision tree: ...kdt_PosD |
| - Grapheme-to-phoneme decision tree: ...kdt_G2P |
| - Phrasing decision tree: ...kdt_PHR |
| - Accentuation decision tree: ...kdt_ACC |
| these 5 tree types may be unified in the future to a single type |
| |
| - Phono-acoustical model trees: ...kdt_PAM |
| (actually 11 trees, but all have the same characteristics and |
| are instances of the same class) |
| */ |
| /* ************************************************************/ |
| |
| |
| /* ************************************************************/ |
| /* defines and functions to create specialized kb, */ |
| /* to be used by picorsrc only */ |
| /* ************************************************************/ |
| |
| typedef enum { |
| PICOKDT_KDTTYPE_POSP, |
| PICOKDT_KDTTYPE_POSD, |
| PICOKDT_KDTTYPE_G2P, |
| PICOKDT_KDTTYPE_PHR, |
| PICOKDT_KDTTYPE_ACC, |
| PICOKDT_KDTTYPE_PAM |
| } picokdt_kdttype_t; |
| |
| pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this, |
| picoos_Common common, |
| const picokdt_kdttype_t type); |
| |
| |
| /* ************************************************************/ |
| /* decision tree types (opaque) and get Tree functions */ |
| /* ************************************************************/ |
| |
| /* decision tree types */ |
| typedef struct picokdt_dtposp * picokdt_DtPosP; |
| typedef struct picokdt_dtposd * picokdt_DtPosD; |
| typedef struct picokdt_dtg2p * picokdt_DtG2P; |
| typedef struct picokdt_dtphr * picokdt_DtPHR; |
| typedef struct picokdt_dtacc * picokdt_DtACC; |
| typedef struct picokdt_dtpam * picokdt_DtPAM; |
| |
| /* return kb decision tree for usage in PU */ |
| picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this); |
| picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this); |
| picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this); |
| picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this); |
| picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this); |
| picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this); |
| |
| |
| /* number of attributes (= input vector size) for each tree type */ |
| typedef enum { |
| PICOKDT_NRATT_POSP = 12, |
| PICOKDT_NRATT_POSD = 7, |
| PICOKDT_NRATT_G2P = 16, |
| PICOKDT_NRATT_PHR = 8, |
| PICOKDT_NRATT_ACC = 13, |
| PICOKDT_NRATT_PAM = 60 |
| } kdt_nratt_t; |
| |
| |
| /* ************************************************************/ |
| /* decision tree classification result type */ |
| /* ************************************************************/ |
| |
| typedef struct { |
| picoos_uint8 set; /* TRUE if class set, FALSE otherwise */ |
| picoos_uint16 class; |
| } picokdt_classify_result_t; |
| |
| |
| /* maximum number of output values the tree output is mapped to */ |
| #define PICOKDT_MAXSIZE_OUTVEC 8 |
| |
| typedef struct { |
| picoos_uint8 nr; /* 0 if no class set, nr of values set otherwise */ |
| picoos_uint16 classvec[PICOKDT_MAXSIZE_OUTVEC]; |
| } picokdt_classify_vecresult_t; |
| |
| |
| /* ************************************************************/ |
| /* decision tree functions */ |
| /* ************************************************************/ |
| |
| /* constructInVec: |
| for every tree type there is a constructInVec function to construct |
| the size-optimized input vector for the tree using the input map |
| tables that are part of the decistion tree knowledge base. The |
| constructed input vector is stored in the tree object (this->invec |
| and this->inveclen) and will be used in the following call to the |
| classify function. |
| |
| classify: |
| for every tree type there is a classify function to apply the |
| decision tree to the previously constructed input vector. The |
| size-optimized, encoded output is stored in the tree object |
| (this->outval) and will be used in the following call to the |
| decompose function. Where needed (hitory attribute) the direct tree |
| output is returned by the classify function in a variable. |
| |
| decomposeOutClass: |
| for every tree type there is a decompose function to decompose the |
| size-optimized, encoded tree output and map it to the outside the |
| tree usable class value. |
| */ |
| |
| |
| /* ************************************************************/ |
| /* decision tree defines */ |
| /* ************************************************************/ |
| |
| /* to construct the input vectors several hard-coded values are used |
| to handle attributes that, at the given position, are outside the |
| context. */ |
| |
| /* graph attributes: values to be used if the graph attribute is |
| outside the grapheme string (ie. word) */ |
| #define PICOKDT_OUTSIDEGRAPH_DEFCH (picoos_uint8)'\x30' /* ascii "0" */ |
| #define PICOKDT_OUTSIDEGRAPH_DEFSTR (picoos_uint8 *)"\x30" /* ascii "0" */ |
| #define PICOKDT_OUTSIDEGRAPH_DEFLEN 1 |
| |
| /* graph attributes (special case for g2p): values to be used if the |
| graph attribute is directly outside the grapheme string (ie. at the |
| word boundary word). Use PICOKDT_OUTSIDEGRAPH_DEF* if further |
| outside. */ |
| #define PICOKDT_OUTSIDEGRAPH_EOW_DEFCH (picoos_uint8)'\x31' /* ascii "1" */ |
| #define PICOKDT_OUTSIDEGRAPH_EOW_DEFSTR (picoos_uint8 *)"\x31" /* ascii "1" */ |
| #define PICOKDT_OUTSIDEGRAPH_EOW_DEFLEN 1 |
| |
| /* byte and word type attributes: value to be used if a byte or word |
| attribute is outside the context, e.g. for POS */ |
| #define PICOKDT_EPSILON 7 |
| |
| /* byte and word type attributes: for attribute with history info a |
| 'zero' value is needed when starting the sequence of predictions. |
| Use the following value to initialize history. Note that the direct |
| tree outputs (not mapped with output map table) of previous |
| predictions need to be used when constructing the input vector for |
| a following prediction. This direct tree output will then be mapped |
| together with the rest of the input vector by the input map |
| table. */ |
| #define PICOKDT_HISTORY_ZERO 30000 |
| |
| |
| /* ************************************************************/ |
| /* decision tree POS prediction (PosP) functions */ |
| /* ************************************************************/ |
| |
| /* construct a POS prediction input vector |
| tree input vector: 0-3 prefix UTF8 graphemes |
| 4-9 suffex UTF8 graphemes |
| 10 special grapheme existence flag (TRUE/FALSE) |
| 11 number of graphemes |
| graph: the grapheme string of the word for wich POS will be predicted |
| graphlen: length of graph in number of bytes |
| specgraphflag: existence of a special grapheme boolean |
| returns: TRUE if okay, FALSE otherwise |
| note: use PICOKDT_OUTSIDEGRAPH* for att values outside context |
| */ |
| picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this, |
| const picoos_uint8 *graph, |
| const picoos_uint16 graphlen, |
| const picoos_uint8 specgraphflag); |
| |
| |
| /* classify a previously constructed input vector using tree 'this' |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this); |
| |
| /* decompose the tree output and return the class in dtres |
| dtres: POS or POSgroup ID classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this, |
| picokdt_classify_result_t *dtres); |
| |
| |
| /* ************************************************************/ |
| /* decision tree POS disambiguation (PosD) functions */ |
| /* ************************************************************/ |
| |
| /* construct a POS disambiguation input vector (run in left-to-right mode) |
| tree input vector: 0-2 POS or POSgroup for each of the three previous words |
| 3 POSgroup for current word |
| 4-6 POS or POSgroup (can be history) for each of |
| the three following words |
| pre3 - pre1: POSgroup or POS for the previous three words |
| src: POSgroup of current word (if unique POS no posdisa possible) |
| fol1 - fol3: POS or history for the following three words (the more |
| complicated the better... :-( NEEDS TO BE uint16 |
| ishist1-ishist3: flag to indicate if fol1-3 are predicted tree |
| output values (history) or the HISTORY_ZERO (TRUE) |
| or an already unambiguous POS (FALSE) |
| returns: TRUE if okay, FALSE otherwise |
| note: use PICOKDT_EPSILON for att values outside context, |
| if POS in fol* unique use this POS instead of real |
| history, use reverse output mapping in these cases |
| */ |
| picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this, |
| const picoos_uint16 * input); |
| |
| |
| /* classify a previously constructed input vector using tree 'this' |
| treeout: direct tree output value |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this, |
| picoos_uint16 *treeout); |
| |
| /* decompose the tree output and return the class in dtres |
| dtres: POS classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this, |
| picokdt_classify_result_t *dtres); |
| |
| /* convert (unique) POS index into corresponding tree output index */ |
| picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this, |
| const picoos_uint16 inval, |
| picoos_uint16 *outval, |
| picoos_uint16 *outfallbackval); |
| |
| /* ************************************************************/ |
| /* decision tree grapheme-to-phoneme (G2P) functions */ |
| /* ************************************************************/ |
| |
| /* construct a G2P input vector (run in right-to-left mode) |
| tree input vector: 0-8 the 4 previous, current, and 4 following graphemes |
| 9 POS |
| 10-11 vowel count and vowel ID |
| 12 primary stress flag (TRUE/FALSE) |
| 13-15 the three following phones predicted |
| graph: the grapheme string used to determine invec[0:8] |
| graphlen: length of graph in number of bytes |
| count: the grapheme number for which invec will be constructed [0..] |
| pos: the part of speech of the word |
| nrvow number of vowel-like graphemes in graph if vowel, |
| set to 0 otherwise |
| ordvow order of 'count' vowel in graph if vowel, |
| set to 0 otherwise |
| primstressflag: flag indicating if primary stress was already predicted |
| phonech1-3: the three following phon chunks predicted (right-to-left) |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this, |
| const picoos_uint8 *graph, |
| const picoos_uint16 graphlen, |
| const picoos_uint8 count, |
| const picoos_uint8 pos, |
| const picoos_uint8 nrvow, |
| const picoos_uint8 ordvow, |
| picoos_uint8 *primstressflag, |
| const picoos_uint16 phonech1, |
| const picoos_uint16 phonech2, |
| const picoos_uint16 phonech3); |
| |
| /* classify a previously constructed input vector using tree 'this' |
| treeout: direct tree output value |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this, |
| picoos_uint16 *treeout); |
| |
| /* decompose the tree output and return the class vector in dtvres |
| dtvres: phones vector classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this, |
| picokdt_classify_vecresult_t *dtvres); |
| |
| |
| /* ************************************************************/ |
| /* decision tree phrasing (PHR) functions */ |
| /* ************************************************************/ |
| |
| /* construct a PHR input vector (run in right-to-left mode) |
| tree input vector: 0-1 POS for each of the two previous words |
| 2 POS for current word |
| 3-4 POS for each of the two following words |
| 5 nr words left |
| 6 nr words right |
| 7 nr syllables right |
| pre2 - pre1: POS for the previous two words |
| src: POS of current word |
| fol1 - fol2: POS for the following two words |
| nrwordspre: number of words left (previous) of current word |
| nrwordsfol: number of words right (following) of current word, |
| incl. current word, up to next BOUND (also |
| considering previously predicted PHR2/3) |
| nrsyllsfol: number of syllables right (following) of current word, |
| incl. syllables of current word, up to next BOUND |
| (also considering previously predicted PHR2/3) |
| returns: TRUE if okay, FALSE otherwise |
| note: use PICOKDT_EPSILON for att values outside context |
| */ |
| picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this, |
| const picoos_uint8 pre2, |
| const picoos_uint8 pre1, |
| const picoos_uint8 src, |
| const picoos_uint8 fol1, |
| const picoos_uint8 fol2, |
| const picoos_uint16 nrwordspre, |
| const picoos_uint16 nrwordsfol, |
| const picoos_uint16 nrsyllsfol); |
| |
| /* classify a previously constructed input vector using tree 'this' |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this); |
| |
| /* decompose the tree output and return the class vector in dtres |
| dtres: phrasing classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this, |
| picokdt_classify_result_t *dtres); |
| |
| |
| /* ************************************************************/ |
| /* decision tree accentuation (ACC) functions */ |
| /* ************************************************************/ |
| |
| /* construct an ACC input vector (run in right-to-left mode) |
| tree input vector: 0-1 POS for each of the two previous words |
| 2 POS for current word |
| 3-4 POS for each of the two following words |
| 5-6 history values (already predicted following) |
| 7 nr words left (previous) to any bound |
| 8 nr syllables left to any bound |
| 9 nr words right (following) to any bound |
| 10 nr syllables right to any bound |
| 11 nr words right to predicted "1" prominence (foot) |
| 12 nr syllables right to predicted "1" prominence (foot) |
| pre2 - pre1: POS for the previous two words |
| src: POS of current word |
| fol1 - fol2: POS for the following two words |
| hist1 - hist2: previously predicted ACC values |
| nrwordspre: number of words left (previous) of current word |
| nrsyllspre: number of syllables left (previous) of current word, |
| incl. initial non-prim stress syllables of current word |
| nrwordsfol: number of words right (following) of current word, |
| incl. current word, up to next BOUND (any strength != 0) |
| nrsyllsfol: number of syllables right (following) of current word, |
| incl. syllables of current word starting with prim. stress |
| syllable |
| footwordsfol: nr of words to the following prominence '1' |
| footsyllspre: nr of syllables to the previous prominence '1' |
| returns: TRUE if okay, FALSE otherwise |
| note: use PICOKDT_EPSILON for att 0-4 values outside context |
| */ |
| picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this, |
| const picoos_uint8 pre2, |
| const picoos_uint8 pre1, |
| const picoos_uint8 src, |
| const picoos_uint8 fol1, |
| const picoos_uint8 fol2, |
| const picoos_uint16 hist1, |
| const picoos_uint16 hist2, |
| const picoos_uint16 nrwordspre, |
| const picoos_uint16 nrsyllspre, |
| const picoos_uint16 nrwordsfol, |
| const picoos_uint16 nrsyllsfol, |
| const picoos_uint16 footwordsfol, |
| const picoos_uint16 footsyllsfol); |
| |
| /* classify a previously constructed input vector using tree 'this' |
| treeout: direct tree output value |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this, |
| picoos_uint16 *treeout); |
| |
| /* decompose the tree output and return the class vector in dtres |
| dtres: phrasing classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this, |
| picokdt_classify_result_t *dtres); |
| |
| |
| /* ************************************************************/ |
| /* decision tree phono-acoustical model (PAM) functions */ |
| /* ************************************************************/ |
| |
| /* construct a Pam input vector and store the tree-specific encoded |
| input vector in the tree object. |
| vec: tree input vector, 60 single-byte-sized attributes |
| veclen: length of vec in number of bytes |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this, |
| const picoos_uint8 *vec, |
| const picoos_uint8 veclen); |
| |
| /* classify a previously constructed input vector using tree 'this' |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this); |
| |
| /* decompose the tree output and return the class in dtres |
| dtres: phones vector classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this, |
| picokdt_classify_result_t *dtres); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| |
| |
| #endif /*PICOKDT_H_*/ |