| /* |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @file picokdt.c |
| * |
| * knowledge handling for decision trees |
| * |
| * Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland |
| * All rights reserved. |
| * |
| * History: |
| * - 2009-04-20 -- initial version |
| * |
| */ |
| |
| #include "picoos.h" |
| #include "picodbg.h" |
| #include "picobase.h" |
| #include "picoknow.h" |
| #include "picodata.h" |
| #include "picokdt.h" |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| #if 0 |
| } |
| #endif |
| |
| |
| /* ************************************************************/ |
| /* decision tree */ |
| /* ************************************************************/ |
| |
| /** |
| * @addtogroup picokdt |
| * ---------------------------------------------------\n |
| * <b> Pico KDT support </b>\n |
| * ---------------------------------------------------\n |
| overview extended binary tree file: |
| - dt consists of optional attribute mapping tables and a non-empty |
| tree part |
| - using the attribute mapping tables an attribute value as used |
| throughout the TTS can be mapped to its smaller representation |
| used in the tree |
| - multi-byte values always little endian |
| |
| ------------------------------------------------------------------- |
| - bin-file, decision tree knowledge base in binary form |
| |
| - dt-kb = header inputmaptables outputmaptables tree |
| |
| |
| - header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2 |
| |
| - INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from |
| the start of kb to the start of input map tables, |
| may not be 0 |
| - OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from |
| the start of kb to the start of outtables, |
| may not be 0 |
| - TREEPOS2: two bytes, equals offest in number of bytes from the |
| start of kb to the start of the tree |
| |
| |
| - inputmaptables = maptables |
| - outputmaptables = maptables |
| - maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1 |
| - maptable = LENTABLE2 TABLETYPE1 ( bytemaptable |
| | wordmaptable |
| | graphinmaptable |
| | bytetovarmaptable ) |
| - bytemaptable (in or out, usage varies) = NRBYTES2 {BYTE1}=NRBYTES2 |
| - wordmaptable (in or out, usage varies) = NRWORDS2 {WORD2}=NRWORDS2 |
| - graphinmaptable (in only) = NRGRAPHS2 {GRAPH1:4}=NRGRAPHS2 |
| - bytetovarmaptable (out only) = NRINBYTES2 outvarsearchind |
| outvaroutputs |
| - outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2 |
| - outvaroutputs = {VARVALID1:}=NRINBYTES2 |
| |
| - bytemaptable: fixed size, *Map*Fixed \n |
| - wordmaptable: fixed size, *Map*Fixed \n |
| - graphinmaptable: search value is variable size (UTF8 grapheme), \n |
| value to be mapped to is fixed size, one byte \n |
| - bytetovarmaptable: search value is fixed size, one byte, values \n |
| to be mapped to are of variable size (e.g. several \n |
| phones) \n |
| |
| - NRMAPTABLES1: one byte representing the number of map tables |
| - LENTABLE2: two bytes, equals offset to the next table (or next |
| part of kb, e.g. tree), |
| if LENTABLE2 = 3, and |
| TABLETYPE1 = EMPTY -> empty table, no mapping to be done |
| - TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8) |
| - NRBYTES2: two bytes, number of bytes following in the table (one |
| would be okay, to simplify some implementation also set |
| to 2) |
| - BYTE1: one btye, the sequence is used to determine the values |
| being mapped to, starting with 0 |
| - NRWORDS2: two bytes, number of words (two btyes) following in the table |
| - WORD2: two bytes, the sequence is used to determine the values |
| being mapped to, starting with 0 |
| - NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following |
| in table |
| - GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the |
| sequence of graphemes is used to determine the value being |
| mapped to, starting with 0, the length information is |
| encoded in UTF8, no need for extra length info |
| - NRINBYTES2: two bytes, number of single byte IDs the tree can produce |
| - OUTVAROFFSET2: two bytes, offset from the start of the |
| outvaroutputs to the start of the following output |
| phone ID group, ie. the first outvaroffset is the |
| offset to the start of the second PHONEID |
| group. Using the previous outvaroffset (or the start |
| of the outvaroutputs) the start and lenth of the |
| PHONEID group can be determined and we can get the |
| sequence of output values we map the chunk value to |
| - VARVALID1:: one to several bytes, one byte each for an output phone ID |
| |
| - tree = treenodeinfos TREEBODYSIZE4 treebody |
| - treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields |
| - vfields = {VFIELD1}=NRVFIELDS1 |
| - qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1 |
| - treebody = "cf. code" |
| |
| - TREEBODYSIZE4: four bytes, size of treebody in number of bytes |
| - NRVFIELDS1: one byte, number of node properties in the following |
| vector (predefined and fixed sequence of properties) |
| - VFIELD1: number of bits used to represent a node property |
| - NRATTRIBUTES1: one byte, number of attributes (rows) in the |
| following matrix |
| - NRQFIELDS1: one byte, number (columns) of question-dependent node |
| properties per attribute in the following matrix |
| (predefined and fixed sequence of properties) |
| - QFIELD1: number of bits used to represent a question-dependent |
| property in the matrix |
| |
| |
| - Currently, |
| - NRVFIELDS1 is fixed at 2 for all trees, ie. |
| - vfields = 2 aVFIELD1 bVFIELD1 |
| - aVFIELD1: nr of bits for questions |
| - bVFIELD1: nr of bits for decisions |
| |
| - NRQFIELDS1 is fixed at 5 for all trees, ie. \n |
| - qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n |
| - aQFIELD1: nr of bits for fork count \n |
| - bQFIELD1: nr of bits for start position for subsets \n |
| - cQFIELD1: nr of bits for group size \n |
| - dQFIELD1: nr of bits for offset to reach output \n |
| - eQFIELD1: nr of bits for threshold (if continuous node) \n |
| */ |
| |
| |
| /* ************************************************************/ |
| /* decision tree data defines */ |
| /* may not be changed with current implementation */ |
| /* ************************************************************/ |
| |
| /* maptables fields */ |
| #define PICOKDT_MTSPOS_NRMAPTABLES 0 |
| |
| /* position of first byte of first maptable (for omt the only table */ |
| #define PICOKDT_MTPOS_START 1 |
| |
| /* maptable fields */ |
| #define PICOKDT_MTPOS_LENTABLE 0 |
| #define PICOKDT_MTPOS_TABLETYPE 2 |
| #define PICOKDT_MTPOS_NUMBER 3 |
| #define PICOKDT_MTPOS_MAPSTART 5 |
| |
| /* treenodeinfos fields */ |
| #define PICOKDT_NIPOS_NRVFIELDS 0 |
| #define PICOKDT_NIPOS_NRATTS 3 |
| #define PICOKDT_NIPOS_NRQFIELDS 4 |
| |
| /* fixed treenodeinfos number of fields */ |
| #define PICOKDT_NODEINFO_NRVFIELDS 2 |
| #define PICOKDT_NODEINFO_NRQFIELDS 5 |
| |
| /* fixed number of bits used */ |
| #define PICOKDT_NODETYPE_NRBITS 2 |
| #define PICOKDT_SUBSETTYPE_NRBITS 2 |
| #define PICOKDT_ISDECIDE_NRBITS 1 |
| |
| /* number of inpmaptables for each tree. Since we have a possibly |
| empty input map table for each att, currently these values must be |
| equal to PICOKDT_NRATT* */ |
| typedef enum { |
| PICOKDT_NRINPMT_POSP = 12, |
| PICOKDT_NRINPMT_POSD = 7, |
| PICOKDT_NRINPMT_G2P = 16, |
| PICOKDT_NRINPMT_PHR = 8, |
| PICOKDT_NRINPMT_ACC = 13, |
| PICOKDT_NRINPMT_PAM = 60 |
| } kdt_nrinpmaptables_t; |
| |
| /* number of outmaptables for each tree, at least one, possibly empty, |
| output map table for each tree */ |
| typedef enum { |
| PICOKDT_NROUTMT_POSP = 1, |
| PICOKDT_NROUTMT_POSD = 1, |
| PICOKDT_NROUTMT_G2P = 1, |
| PICOKDT_NROUTMT_PHR = 1, |
| PICOKDT_NROUTMT_ACC = 1, |
| PICOKDT_NROUTMT_PAM = 1 |
| } kdt_nroutmaptables_t; |
| |
| /* maptable types */ |
| typedef enum { |
| PICOKDT_MTTYPE_EMPTY = 0, |
| PICOKDT_MTTYPE_BYTE = 1, |
| PICOKDT_MTTYPE_WORD = 2, |
| PICOKDT_MTTYPE_GRAPH = 3, |
| PICOKDT_MTTYPE_BYTETOVAR = 4 |
| } kdt_mttype_t; |
| |
| |
| /* ************************************************************/ |
| /* decision tree types and loading */ |
| /* ************************************************************/ |
| /* object : Dt*KnowledgeBase |
| * shortcut : kdt* |
| * derived from : picoknow_KnowledgeBase |
| */ |
| |
| /* subobj shared by all decision trees */ |
| typedef struct { |
| picokdt_kdttype_t type; |
| picoos_uint8 *inpmaptable; |
| picoos_uint8 *outmaptable; |
| picoos_uint8 *tree; |
| picoos_uint32 beg_offset[128]; /* for efficiency */ |
| |
| /* tree-internal details for faster processing */ |
| picoos_uint8 *vfields; |
| picoos_uint8 *qfields; |
| picoos_uint8 nrattributes; |
| picoos_uint8 *treebody; |
| /*picoos_uint8 nrvfields;*/ /* fix PICOKDT_NODEINFO_NRVFIELDS */ |
| /*picoos_uint8 nrqfields;*/ /* fix PICOKDT_NODEINFO_NRQFIELDS */ |
| |
| /* direct output vector (no output mapping) */ |
| picoos_uint8 dset; /* TRUE if class set, FALSE otherwise */ |
| picoos_uint16 dclass; |
| } kdt_subobj_t; |
| |
| /* subobj specific for each decision tree type */ |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_POSP]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtposp_subobj_t; |
| |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_POSD]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtposd_subobj_t; |
| |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_G2P]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtg2p_subobj_t; |
| |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_PHR]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtphr_subobj_t; |
| |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_ACC]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtacc_subobj_t; |
| |
| typedef struct { |
| kdt_subobj_t dt; |
| picoos_uint16 invec[PICOKDT_NRATT_PAM]; /* input vector */ |
| picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */ |
| } kdtpam_subobj_t; |
| |
| |
| static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common, |
| kdt_subobj_t *dtp) { |
| picoos_uint16 inppos; |
| picoos_uint16 outpos; |
| picoos_uint16 treepos; |
| picoos_uint32 curpos = 0, pos; |
| picoos_uint16 lentable; |
| picoos_uint16 i; |
| picoos_uint8 imtnr; |
| |
| PICODBG_DEBUG(("start")); |
| |
| /* get inmap, outmap, tree offsets */ |
| if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos)) |
| && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos)) |
| && (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, |
| &treepos))) { |
| |
| /* all pos are mandatory, verify */ |
| if (inppos && outpos && treepos) { |
| dtp->inpmaptable = this->base + inppos; |
| dtp->outmaptable = this->base + outpos; |
| dtp->tree = this->base + treepos; |
| /* precalc beg offset table */ |
| imtnr=dtp->inpmaptable[0]; |
| pos=1; |
| dtp->beg_offset[0] = 1; |
| for (i = 0; i < imtnr; i++) { |
| lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 | |
| dtp->inpmaptable[pos]; |
| pos += lentable; |
| dtp->beg_offset[i+1] = pos; |
| } |
| } else { |
| dtp->inpmaptable = NULL; |
| dtp->outmaptable = NULL; |
| dtp->tree = NULL; |
| PICODBG_ERROR(("invalid kb position info")); |
| return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| |
| /* nr of outmaptables is equal 1 for all trees, verify */ |
| if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) { |
| PICODBG_ERROR(("wrong number of outmaptables")); |
| return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| |
| /* check if this is an empty table, ie. len == 3 */ |
| if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE] |
| == 3) |
| && (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE |
| + 1] == 0)) { |
| /* verify that this is supposed to be an empty table and |
| set outmaptable to NULL if so */ |
| if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE] |
| == PICOKDT_MTTYPE_EMPTY) { |
| dtp->outmaptable = NULL; |
| } else { |
| PICODBG_ERROR(("table length vs. type problem")); |
| return picoos_emRaiseException(common->em, |
| PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| } |
| |
| dtp->vfields = dtp->tree + 1; |
| dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3; |
| dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS]; |
| dtp->treebody = dtp->qfields + 4 + |
| (dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/ |
| |
| /*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */ |
| /*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */ |
| /* verify that nrvfields ad nrqfields are correct */ |
| if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) || |
| (PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) { |
| PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)", |
| dtp->tree[PICOKDT_NIPOS_NRVFIELDS], |
| dtp->tree[PICOKDT_NIPOS_NRQFIELDS])); |
| return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| dtp->dset = 0; |
| dtp->dclass = 0; |
| PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d", |
| dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable), |
| (dtp->tree - dtp->inpmaptable))); |
| return PICO_OK; |
| } else { |
| PICODBG_ERROR(("problem reading kb in memory")); |
| return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| } |
| |
| |
| static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this, |
| picoos_Common common, |
| kdt_subobj_t *dtp, |
| kdt_nratt_t nratt, |
| kdt_nrinpmaptables_t nrinpmt, |
| kdt_nroutmaptables_t nroutmt, |
| kdt_mttype_t mttype) { |
| /* check nr attributes */ |
| /* check nr inpmaptables */ |
| /* check nr outmaptables */ |
| /* check outmaptable is word type */ |
| if ((nratt != dtp->nrattributes) |
| || (dtp->inpmaptable == NULL) |
| || (dtp->outmaptable == NULL) |
| || (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt) |
| || (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt) |
| || (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE] |
| != mttype)) { |
| PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d", |
| dtp->nrattributes, |
| dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES], |
| dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES], |
| dtp->outmaptable[PICOKDT_MTPOS_START + |
| PICOKDT_MTPOS_TABLETYPE])); |
| return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT, |
| NULL, NULL); |
| } |
| return PICO_OK; |
| } |
| |
| |
| |
| static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtposp_subobj_t *dtposp; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtposp = (kdtposp_subobj_t *)this->subObj; |
| dt = &(dtposp->dt); |
| dt->type = PICOKDT_KDTTYPE_POSP; |
| if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { |
| return status; |
| } |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP, |
| PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP, |
| PICOKDT_MTTYPE_WORD)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init specialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_POSP; i++) { |
| dtposp->invec[i] = 0; |
| } |
| dtposp->inveclen = 0; |
| PICODBG_DEBUG(("posp tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtposd_subobj_t *dtposd; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtposd = (kdtposd_subobj_t *)this->subObj; |
| dt = &(dtposd->dt); |
| dt->type = PICOKDT_KDTTYPE_POSD; |
| if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { |
| return status; |
| } |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD, |
| PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD, |
| PICOKDT_MTTYPE_WORD)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init spezialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_POSD; i++) { |
| dtposd->invec[i] = 0; |
| } |
| dtposd->inveclen = 0; |
| PICODBG_DEBUG(("posd tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtg2p_subobj_t *dtg2p; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtg2p = (kdtg2p_subobj_t *)this->subObj; |
| dt = &(dtg2p->dt); |
| dt->type = PICOKDT_KDTTYPE_G2P; |
| if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { |
| return status; |
| } |
| |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P, |
| PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P, |
| PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init spezialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_G2P; i++) { |
| dtg2p->invec[i] = 0; |
| } |
| dtg2p->inveclen = 0; |
| PICODBG_DEBUG(("g2p tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtphr_subobj_t *dtphr; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtphr = (kdtphr_subobj_t *)this->subObj; |
| dt = &(dtphr->dt); |
| dt->type = PICOKDT_KDTTYPE_PHR; |
| if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) { |
| return status; |
| } |
| |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR, |
| PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR, |
| PICOKDT_MTTYPE_WORD)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init spezialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_PHR; i++) { |
| dtphr->invec[i] = 0; |
| } |
| dtphr->inveclen = 0; |
| PICODBG_DEBUG(("phr tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtacc_subobj_t *dtacc; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtacc = (kdtacc_subobj_t *)this->subObj; |
| dt = &(dtacc->dt); |
| dt->type = PICOKDT_KDTTYPE_ACC; |
| if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { |
| return status; |
| } |
| |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC, |
| PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC, |
| PICOKDT_MTTYPE_WORD)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init spezialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_ACC; i++) { |
| dtacc->invec[i] = 0; |
| } |
| dtacc->inveclen = 0; |
| PICODBG_DEBUG(("acc tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this, |
| picoos_Common common) { |
| pico_status_t status; |
| kdtpam_subobj_t *dtpam; |
| kdt_subobj_t *dt; |
| picoos_uint8 i; |
| |
| if (NULL == this || NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| dtpam = (kdtpam_subobj_t *)this->subObj; |
| dt = &(dtpam->dt); |
| dt->type = PICOKDT_KDTTYPE_PAM; |
| if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) { |
| return status; |
| } |
| |
| if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM, |
| PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM, |
| PICOKDT_MTTYPE_WORD)) != PICO_OK) { |
| return status; |
| } |
| |
| /* init spezialized subobj part */ |
| for (i = 0; i < PICOKDT_NRATT_PAM; i++) { |
| dtpam->invec[i] = 0; |
| } |
| dtpam->inveclen = 0; |
| PICODBG_DEBUG(("pam tree initialized")); |
| return PICO_OK; |
| } |
| |
| |
| static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this, |
| picoos_MemoryManager mm) { |
| if (NULL != this) { |
| picoos_deallocate(mm, (void *) &this->subObj); |
| } |
| return PICO_OK; |
| } |
| |
| |
| /* we don't offer a specialized constructor for a *KnowledgeBase but |
| * instead a "specializer" of an allready existing generic |
| * picoknow_KnowledgeBase */ |
| |
| pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this, |
| picoos_Common common, |
| const picokdt_kdttype_t kdttype) { |
| pico_status_t status; |
| |
| if (NULL == this) { |
| return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING, |
| NULL, NULL); |
| } |
| this->subDeallocate = kdtSubObjDeallocate; |
| switch (kdttype) { |
| case PICOKDT_KDTTYPE_POSP: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtPosPInitialize(this, common); |
| break; |
| case PICOKDT_KDTTYPE_POSD: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtPosDInitialize(this, common); |
| break; |
| case PICOKDT_KDTTYPE_G2P: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtG2PInitialize(this, common); |
| break; |
| case PICOKDT_KDTTYPE_PHR: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtPhrInitialize(this, common); |
| break; |
| case PICOKDT_KDTTYPE_ACC: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtAccInitialize(this, common); |
| break; |
| case PICOKDT_KDTTYPE_PAM: |
| this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t)); |
| if (NULL == this->subObj) { |
| return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM, |
| NULL, NULL); |
| } |
| status = kdtPamInitialize(this, common); |
| break; |
| default: |
| return picoos_emRaiseException(common->em, PICO_ERR_OTHER, |
| NULL, NULL); |
| } |
| |
| if (status != PICO_OK) { |
| picoos_deallocate(common->mm, (void *) &this->subObj); |
| return picoos_emRaiseException(common->em, status, NULL, NULL); |
| } |
| return PICO_OK; |
| } |
| |
| |
| /* ************************************************************/ |
| /* decision tree getDt* */ |
| /* ************************************************************/ |
| |
| picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj)); |
| } |
| |
| picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj)); |
| } |
| |
| picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj)); |
| } |
| |
| picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj)); |
| } |
| |
| picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj)); |
| } |
| |
| picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this) { |
| return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj)); |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree support functions, tree */ |
| /* ************************************************************/ |
| |
| |
| typedef enum { |
| eQuestion = 0, /* index to #bits to identify question */ |
| eDecide = 1 /* index to #bits to identify decision */ |
| } kdt_vfields_ind_t; |
| |
| typedef enum { |
| eForkCount = 0, /* index to #bits for number of forks */ |
| eBitNo = 1, /* index to #bits for index of 1st element */ |
| eBitCount = 2, /* index to #bits for size of the group */ |
| eJump = 3, /* index to #bits for offset to reach output node */ |
| eCut = 4 /* for contin. node: #bits for threshold checked */ |
| } kdt_qfields_ind_t; |
| |
| typedef enum { |
| eNTerminal = 0, |
| eNBinary = 1, |
| eNContinuous = 2, |
| eNDiscrete = 3 |
| } kdt_nodetypes_t; |
| |
| typedef enum { |
| eOneValue = 0, |
| eTwoValues = 1, |
| eWithoutBitMask = 2, |
| eBitMask = 3 |
| } kdt_subsettypes_t; |
| |
| |
| /* Name : kdt_jump |
| Function: maps the iJump offset to byte + bit coordinates |
| Input : iJump absolute bit offset (0..(nr-bytes-treebody)*8) |
| Output : iByteNo the first byte containing the bits to extract |
| (0..(nr-bytes-treebody)) |
| iBitNo the first bit to be extracted (0..7) |
| Returns : void |
| Notes : updates the iByteNo + iBitNo fields |
| */ |
| static void kdt_jump(const picoos_uint32 iJump, |
| picoos_uint32 *iByteNo, |
| picoos_int8 *iBitNo) { |
| picoos_uint32 iByteSize; |
| |
| iByteSize = (iJump / 8 ); |
| *iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo); |
| *iByteNo += iByteSize; |
| if (*iBitNo >= 8) { |
| (*iByteNo)++; |
| *iBitNo = 15 - *iBitNo; |
| } else { |
| *iBitNo = 7 - *iBitNo; |
| } |
| } |
| |
| |
| /* replaced inline for speedup */ |
| /* Name : kdtIsVal |
| Function: Returns the binary value of the bit pointed to by iByteNo, iBitNo |
| Input : iByteNo ofsset to the byte containing the bits to extract |
| (0..sizeof(treebody)) |
| iBitNo ofsset to the first bit to be extracted (0..7) |
| Returns : 0/1 depending on the bit pointed to |
| */ |
| /* |
| static picoos_uint8 kdtIsVal(register kdt_subobj_t *this, |
| picoos_uint32 iByteNo, |
| picoos_int8 iBitNo) { |
| return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0); |
| } |
| */ |
| |
| |
| /* @todo : consider replacing inline for speedup */ |
| |
| /* Name : kdtGetQFieldsVal (was: m_QuestDependentFields) |
| Function: gets a byte from qfields |
| Input : this handle to a dt subobj |
| attind index of the attribute |
| qind index of the byte to be read |
| Returns : the requested byte |
| Notes : check that attind < this->nrattributes needed before calling |
| this function! |
| */ |
| static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this, |
| const picoos_uint8 attind, |
| const kdt_qfields_ind_t qind) { |
| /* check of qind done in initialize and (for some compilers) with typing */ |
| /* check of attind needed before calling this function */ |
| return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind]; |
| } |
| |
| |
| /* Name : kdtGetShiftVal (was: get_shift_value) |
| Function: returns the (treebody) value pointed to by iByteNo, iBitNo, |
| and with size iSize |
| Input : this reference to the processing unit struct |
| iSize number of bits to be extracted (0..N) |
| iByteNo ofsset to the byte containing the bits to extract |
| (0..sizeof(treebody)) |
| iBitNo ofsset to the first bit to be extracted (0..7) |
| Returns : the value requested (if size==0 --> 0 is returned) |
| */ |
| /* |
| static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this, |
| const picoos_int16 iSize, |
| picoos_uint32 *iByteNo, |
| picoos_int8 *iBitNo) { |
| picoos_uint32 iVal; |
| picoos_int16 i; |
| |
| iVal = 0; |
| for (i = iSize-1; i >= 0; i--) { |
| if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { |
| iVal |= ( (1) << i ); |
| } |
| (*iBitNo)--; |
| if (*iBitNo < 0) { |
| *iBitNo = 7; |
| (*iByteNo)++; |
| } |
| } |
| return iVal; |
| } |
| */ |
| /* refactor */ |
| static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this, |
| const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo) |
| { |
| picoos_uint32 v, b, iVal; |
| picoos_int16 i, j, len; |
| picoos_uint8 val; |
| |
| if (iSize < 4) { |
| iVal = 0; |
| for (i = iSize - 1; i >= 0; i--) { |
| /* no check that *iByteNo is within valid treebody range */ |
| if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) { |
| iVal |= ((1) << i); |
| } |
| (*iBitNo)--; |
| if (*iBitNo < 0) { |
| *iBitNo = 7; |
| (*iByteNo)++; |
| } |
| } |
| return iVal; |
| } |
| |
| b = *iByteNo; |
| j = *iBitNo; |
| len = iSize; |
| *iBitNo = j - iSize; |
| v = 0; |
| while (*iBitNo < 0) { |
| *iBitNo += 8; |
| (*iByteNo)++; |
| } |
| |
| val = this->treebody[b++]; |
| if (j < 7) { |
| switch (j) { |
| case 0: |
| val &= 0x01; |
| break; |
| case 1: |
| val &= 0x03; |
| break; |
| case 2: |
| val &= 0x07; |
| break; |
| case 3: |
| val &= 0x0f; |
| break; |
| case 4: |
| val &= 0x1f; |
| break; |
| case 5: |
| val &= 0x3f; |
| break; |
| case 6: |
| val &= 0x7f; |
| break; |
| } |
| } |
| len -= j + 1; |
| if (len < 0) { |
| val >>= -len; |
| } |
| v = val; |
| while (len > 0) { |
| if (len >= 8) { |
| j = 8; |
| } else { |
| j = len; |
| } |
| v <<= j; |
| val = this->treebody[b++]; |
| if (j < 8) { |
| switch (j) { |
| case 1: |
| val &= 0x80; |
| val >>= 7; |
| break; |
| case 2: |
| val &= 0xc0; |
| val >>= 6; |
| break; |
| case 3: |
| val &= 0xe0; |
| val >>= 5; |
| break; |
| case 4: |
| val &= 0xf0; |
| val >>= 4; |
| break; |
| case 5: |
| val &= 0xf8; |
| val >>= 3; |
| break; |
| case 6: |
| val &= 0xfc; |
| val >>= 2; |
| break; |
| case 7: |
| val &= 0xfe; |
| val >>= 1; |
| break; |
| } |
| } |
| v |= val; |
| len -= j; |
| } |
| return v; |
| } |
| |
| |
| /* Name : kdtAskTree |
| Function: Tree Traversal routine |
| Input : iByteNo ofsset to the first byte containing the bits |
| to extract (0..sizeof(treebody)) |
| iBitNo ofsset to the first bit to be extracted (0..7) |
| Returns : >0 continue, no solution yet found |
| =0 solution found |
| <0 error, no solution found |
| Notes : |
| */ |
| static picoos_int8 kdtAskTree(register kdt_subobj_t *this, |
| picoos_uint16 *invec, |
| const kdt_nratt_t invecmax, |
| picoos_uint32 *iByteNo, |
| picoos_int8 *iBitNo) { |
| picoos_uint32 iNodeType; |
| picoos_uint8 iQuestion; |
| picoos_int32 iVal; |
| picoos_int32 iForks; |
| picoos_int32 iID; |
| |
| picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision; |
| picoos_int32 i; |
| picoos_char iIsDecide; |
| |
| PICODBG_TRACE(("start")); |
| |
| /* get node type, value should be in kdt_nodetype_t range */ |
| iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo); |
| PICODBG_TRACE(("iNodeType: %d", iNodeType)); |
| |
| /* get attribute to be used in question, check if in range, and get val */ |
| /* check of vfields argument done in initialize */ |
| iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo); |
| if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) { |
| iVal = invec[iQuestion]; |
| } else { |
| this->dset = FALSE; |
| PICODBG_TRACE(("invalid question")); |
| return -1; /* iQuestion invalid */ |
| } |
| iForks = 0; |
| iID = -1; |
| PICODBG_TRACE(("iQuestion: %d", iQuestion)); |
| |
| switch (iNodeType) { |
| case eNBinary: { |
| iForks = 2; |
| iID = iVal; |
| break; |
| } |
| case eNContinuous: { |
| iForks = 2; |
| iID = 1; |
| iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut), |
| iByteNo, iBitNo); /*read the threshold*/ |
| if (iVal <= iCut) { |
| iID = 0; |
| } |
| break; |
| } |
| case eNDiscrete: { |
| iForks = |
| kdtGetShiftVal(this, |
| kdtGetQFieldsVal(this, iQuestion, eForkCount), |
| iByteNo, iBitNo); |
| |
| for (i = 0; i < iForks-1; i++) { |
| iSubsetType = |
| kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS, |
| iByteNo, iBitNo); |
| |
| switch (iSubsetType) { |
| case eOneValue: { |
| if (iID > -1) { |
| kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), |
| iByteNo, iBitNo); |
| break; |
| } |
| iBitPos = |
| kdtGetShiftVal(this, |
| kdtGetQFieldsVal(this, iQuestion, |
| eBitNo), |
| iByteNo, iBitNo); |
| if (iVal == iBitPos) { |
| iID = i; |
| } |
| break; |
| } |
| case eTwoValues: { |
| if (iID > -1) { |
| kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + |
| kdtGetQFieldsVal(this, iQuestion, eBitCount)), |
| iByteNo, iBitNo); |
| break; |
| } |
| |
| iBitPos = |
| kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, |
| eBitNo), |
| iByteNo, iBitNo); |
| iBitCount = |
| kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, |
| eBitCount), |
| iByteNo, iBitNo); |
| if ((iVal == iBitPos) || (iVal == iBitCount)) { |
| iID = i; |
| } |
| break; |
| } |
| case eWithoutBitMask: { |
| if (iID > -1) { |
| kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) + |
| kdtGetQFieldsVal(this, iQuestion, eBitCount)), |
| iByteNo, iBitNo); |
| break; |
| } |
| |
| iBitPos = |
| kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, |
| eBitNo), |
| iByteNo, iBitNo); |
| iBitCount = |
| kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, |
| eBitCount), |
| iByteNo, iBitNo); |
| if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { |
| iID = i; |
| } |
| break; |
| } |
| case eBitMask: { |
| iBitPos = 0; |
| if (iID > -1) { |
| kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo), |
| iByteNo, iBitNo); |
| } else { |
| iBitPos = |
| kdtGetShiftVal(this, |
| kdtGetQFieldsVal(this, iQuestion, |
| eBitNo), |
| iByteNo, iBitNo); |
| } |
| |
| iBitCount = |
| kdtGetShiftVal(this, |
| kdtGetQFieldsVal(this, iQuestion, |
| eBitCount), |
| iByteNo, iBitNo); |
| if (iID > -1) { |
| kdt_jump(iBitCount, iByteNo, iBitNo); |
| break; |
| } |
| |
| if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) { |
| iPos = iVal - iBitPos; |
| kdt_jump((iVal - iBitPos), iByteNo, iBitNo); |
| /* if (kdtIsVal(this, *iByteNo, *iBitNo))*/ |
| if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) { |
| iID = i; |
| } |
| kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo); |
| } else { |
| kdt_jump(iBitCount, iByteNo, iBitNo); |
| } |
| break; |
| }/*end case eBitMask*/ |
| }/*end switch (iSubsetType)*/ |
| }/*end for ( i = 0; i < iForks-1; i++ ) */ |
| |
| /*default tree branch*/ |
| if (-1 == iID) { |
| iID = iForks-1; |
| } |
| break; |
| }/*end case eNDiscrete*/ |
| }/*end switch (iNodeType)*/ |
| |
| for (i = 0; i < iForks; i++) { |
| iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo); |
| |
| PICODBG_TRACE(("doing forks: %d", i)); |
| |
| if (!iIsDecide) { |
| if (iID == i) { |
| iJump = |
| kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump), |
| iByteNo, iBitNo); |
| kdt_jump(iJump, iByteNo, iBitNo); |
| this->dset = FALSE; |
| return 1; /* to be continued, no solution yet found */ |
| } else { |
| kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump), |
| iByteNo, iBitNo); |
| } |
| } else { |
| if (iID == i) { |
| /* check of vfields argument done in initialize */ |
| iDecision = kdtGetShiftVal(this, this->vfields[eDecide], |
| iByteNo, iBitNo); |
| this->dclass = iDecision; |
| this->dset = TRUE; |
| return 0; /* solution found */ |
| } else { |
| /* check of vfields argument done in initialize */ |
| kdt_jump(this->vfields[eDecide], iByteNo, iBitNo); |
| } |
| }/*end if (!iIsDecide)*/ |
| }/*end for (i = 0; i < iForks; i++ )*/ |
| |
| this->dset = FALSE; |
| PICODBG_TRACE(("problem determining class")); |
| return -1; /* solution not found, problem determining a class */ |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree support functions, mappings */ |
| /* ************************************************************/ |
| |
| |
| /* size==1 -> MapInByte, size==2 -> MapInWord, |
| size determined from table type contained in kb. |
| if the inmaptable is empty, outval = inval */ |
| |
| static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt, |
| const picoos_uint8 imtnr, |
| const picoos_uint16 inval, |
| picoos_uint16 *outval, |
| picoos_uint16 *outfallbackval) { |
| picoos_uint8 size; |
| picoos_uint32 pos; |
| picoos_uint16 lentable; |
| picoos_uint16 posbound; |
| picoos_uint16 i; |
| |
| *outval = 0; |
| *outfallbackval = 0; |
| |
| size = 0; |
| pos = 0; |
| |
| /* check what can be checked */ |
| if (imtnr >= dt->inpmaptable[pos++]) { /* outside tablenr range? */ |
| PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d", |
| dt->inpmaptable[pos-1], imtnr)); |
| return FALSE; |
| } |
| |
| /* go forward to the needed tablenr */ |
| if (imtnr > 0) { |
| pos = dt->beg_offset[imtnr]; |
| } |
| |
| /* get length */ |
| lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos]; |
| posbound = pos + lentable; |
| pos += 2; |
| |
| /* check type of table and set size */ |
| if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) { |
| /* empty table no mapping needed */ |
| PICODBG_TRACE(("empty table: %d", imtnr)); |
| *outval = inval; |
| return TRUE; |
| } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) { |
| size = 1; |
| } else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) { |
| size = 2; |
| } else { |
| /* wrong table type */ |
| PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos])); |
| return FALSE; |
| } |
| pos++; |
| |
| /* set fallback value in case of failed mapping, and set upper bound pos */ |
| *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos]; |
| pos += 2; |
| |
| /* size must be 1 or 2 here, keep 'redundant' so save time */ |
| if (size == 1) { |
| for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { |
| if (inval == dt->inpmaptable[pos]) { |
| *outval = i; |
| PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval)); |
| return TRUE; |
| } |
| pos++; |
| } |
| } else if (size == 2) { |
| posbound--; |
| for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { |
| if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos])) { |
| *outval = i; |
| PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval)); |
| return TRUE; |
| } |
| pos += 2; |
| } |
| } else { |
| /* impossible size */ |
| PICODBG_ERROR(("wrong size %d", size)); |
| return FALSE; |
| } |
| |
| PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); |
| return FALSE; |
| } |
| |
| |
| static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt, |
| const picoos_uint8 imtnr, |
| const picoos_uint8 *inval, |
| const picoos_uint8 invalmaxlen, |
| picoos_uint16 *outval, |
| picoos_uint16 *outfallbackval) { |
| picoos_uint8 ilen; |
| picoos_uint8 tlen; |
| picoos_uint8 cont; |
| picoos_uint32 pos; |
| picoos_uint16 lentable; |
| picoos_uint16 posbound; |
| picoos_uint16 i; |
| picoos_uint8 j; |
| |
| *outfallbackval = 0; |
| |
| pos = 0; |
| /* check what can be checked */ |
| if ((imtnr >= dt->inpmaptable[pos++]) || /* outside tablenr range? */ |
| (invalmaxlen == 0) || /* too short? */ |
| ((ilen = picobase_det_utf8_length(inval[0])) == 0) || /* invalid? */ |
| (ilen > invalmaxlen)) { /* not accessible? */ |
| PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, " |
| "ilen: %d", |
| dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen)); |
| return FALSE; |
| } |
| |
| /* go forward to the needed tablenr */ |
| for (i = 0; i < imtnr; i++) { |
| lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos]; |
| pos += lentable; |
| } |
| |
| /* get length and check type of inpmaptable */ |
| lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos]; |
| posbound = pos + lentable; |
| pos += 2; |
| |
| #if defined(PICO_DEBUG) |
| if (1) { |
| int id; |
| PICODBG_TRACE(("imtnr %d", imtnr)); |
| for (id = pos-2; id < posbound; id++) { |
| PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2), |
| dt->inpmaptable[id], dt->inpmaptable[id])); |
| } |
| } |
| #endif |
| |
| /* check type of table */ |
| if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) { |
| /* empty table does not make sense for graph */ |
| /* wrong table type */ |
| PICODBG_ERROR(("wrong table type")); |
| return FALSE; |
| } |
| pos++; |
| |
| /* set fallback value in case of failed mapping, and set upper bound pos */ |
| *outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 | |
| dt->inpmaptable[pos]; |
| pos += 2; |
| |
| /* sequential search */ |
| for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { |
| tlen = picobase_det_utf8_length(dt->inpmaptable[pos]); |
| if ((pos + tlen) > posbound) { |
| PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d", |
| posbound, pos, tlen)); |
| return FALSE; |
| } |
| if (ilen == tlen) { |
| cont = TRUE; |
| for (j = 0; cont && (j < ilen); j++) { |
| if (dt->inpmaptable[pos + j] != inval[j]) { |
| cont = FALSE; |
| } |
| } |
| if (cont && (j == ilen)) { /* match found */ |
| *outval = i; |
| PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d", |
| posbound, pos, i, tlen)); |
| return TRUE; |
| } |
| } |
| pos += tlen; |
| } |
| PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d", |
| imtnr, posbound, pos, i, *outfallbackval)); |
| return FALSE; |
| } |
| |
| |
| /* size==1 -> MapOutByte, size==2 -> MapOutWord */ |
| static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt, |
| const picoos_uint16 inval, |
| picoos_uint16 *outval) { |
| picoos_uint8 size; |
| picoos_uint16 nr; |
| |
| /* no check of lentable vs. nr in initialize done */ |
| |
| size = 0; |
| |
| /* type */ |
| nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]; |
| |
| /* check type of table and set size */ |
| if (nr == PICOKDT_MTTYPE_EMPTY) { |
| /* empty table no mapping needed */ |
| PICODBG_TRACE(("empty table")); |
| *outval = inval; |
| return TRUE; |
| } else if (nr == PICOKDT_MTTYPE_BYTE) { |
| size = 1; |
| } else if (nr == PICOKDT_MTTYPE_WORD) { |
| size = 2; |
| } else { |
| /* wrong table type */ |
| PICODBG_ERROR(("wrong table type %d", nr)); |
| return FALSE; |
| } |
| |
| /* number of mapvalues */ |
| nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + |
| PICOKDT_MTPOS_NUMBER + 1])) << 8 |
| | dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER]; |
| |
| if (inval < nr) { |
| if (size == 1) { |
| *outval = dt->outmaptable[PICOKDT_MTPOS_START + |
| PICOKDT_MTPOS_MAPSTART + (size * inval)]; |
| } else { |
| *outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START + |
| PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8 |
| | dt->outmaptable[PICOKDT_MTPOS_START + |
| PICOKDT_MTPOS_MAPSTART + (size * inval)]; |
| } |
| return TRUE; |
| } else { |
| *outval = 0; |
| return FALSE; |
| } |
| } |
| |
| |
| /* size==1 -> ReverseMapOutByte, size==2 -> ReverseMapOutWord */ |
| /* outmaptable also used to map from decoded tree output domain to |
| direct tree output domain */ |
| static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt, |
| const picoos_uint16 inval, |
| picoos_uint16 *outval, |
| picoos_uint16 *outfallbackval) { |
| picoos_uint8 size; |
| picoos_uint32 pos; |
| picoos_uint16 lentable; |
| picoos_uint16 posbound; |
| picoos_uint16 i; |
| |
| /* no check of lentable vs. nr in initialize done */ |
| |
| size = 0; |
| pos = 0; |
| *outval = 0; |
| *outfallbackval = 0; |
| |
| if (dt->outmaptable == NULL) { |
| /* empty table no mapping needed */ |
| PICODBG_TRACE(("empty table")); |
| *outval = inval; |
| return TRUE; |
| } |
| |
| /* check what can be checked */ |
| if (dt->outmaptable[pos++] != 1) { /* only one omt possible */ |
| PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1])); |
| return FALSE; |
| } |
| |
| /* get length */ |
| lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | |
| dt->outmaptable[pos]; |
| posbound = pos + lentable; |
| pos += 2; |
| |
| /* check type of table and set size */ |
| /* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in |
| ...Initialize the omt is set to NULL if not existing, checked |
| above */ |
| |
| if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) { |
| size = 1; |
| } else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) { |
| size = 2; |
| } else { |
| /* wrong table type */ |
| PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); |
| return FALSE; |
| } |
| pos++; |
| |
| /* set fallback value in case of failed mapping, and set upper bound pos */ |
| *outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | |
| dt->outmaptable[pos]; |
| pos += 2; |
| |
| /* size must be 1 or 2 here, keep 'redundant' so save time */ |
| if (size == 1) { |
| for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { |
| if (inval == dt->outmaptable[pos]) { |
| *outval = i; |
| PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval)); |
| return TRUE; |
| } |
| pos++; |
| } |
| } else if (size == 2) { |
| posbound--; |
| for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) { |
| if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | |
| dt->outmaptable[pos])) { |
| *outval = i; |
| PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval)); |
| return TRUE; |
| } |
| pos += 2; |
| } |
| } else { |
| /* impossible size */ |
| PICODBG_ERROR(("wrong size %d", size)); |
| return FALSE; |
| } |
| |
| PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval)); |
| return FALSE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this, |
| const picoos_uint16 inval, |
| picoos_uint16 *outval, |
| picoos_uint16 *outfallbackval) { |
| |
| kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this; |
| kdt_subobj_t * dt = &(dtposd->dt); |
| return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval); |
| } |
| |
| /* not yet impl. size==1 -> MapOutByteToVar, |
| fix: size==2 -> MapOutWordToVar */ |
| static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt, |
| const picoos_uint16 inval, |
| picoos_uint8 *nr, |
| picoos_uint16 *outval, |
| const picoos_uint16 outvalmaxlen) { |
| picoos_uint16 pos; |
| picoos_uint16 off2ind; |
| picoos_uint16 lentable; |
| picoos_uint16 nrinbytes; |
| picoos_uint8 size; |
| picoos_uint16 offset1; |
| picoos_uint16 i; |
| |
| if (dt->outmaptable == NULL) { |
| /* empty table not possible */ |
| PICODBG_ERROR(("no table found")); |
| return FALSE; |
| } |
| |
| /* nr of tables == 1 already checked in *Initialize, no need here, go |
| directly to position 1 */ |
| pos = 1; |
| |
| /* get length of table */ |
| lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 | |
| dt->outmaptable[pos]); |
| pos += 2; |
| |
| /* check table type */ |
| if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) { |
| /* wrong table type */ |
| PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos])); |
| return FALSE; |
| } |
| size = 2; |
| pos++; |
| |
| /* get nr of ele in maptable (= nr of possible invals) */ |
| nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 | |
| dt->outmaptable[pos]); |
| pos += 2; |
| |
| /* check what's checkable */ |
| if (nrinbytes == 0) { |
| PICODBG_ERROR(("table with length zero")); |
| return FALSE; |
| } else if (inval >= nrinbytes) { |
| PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes)); |
| return FALSE; |
| } |
| |
| PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval, |
| lentable, nrinbytes, pos)); |
| |
| /* set off2ind to the position of the start of offset2-val */ |
| /* offset2 points to start of next ele */ |
| off2ind = pos + (size*inval); |
| |
| /* get number of output values, offset2 - offset1 */ |
| if (inval == 0) { |
| offset1 = 0; |
| } else { |
| offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 | |
| dt->outmaptable[off2ind - 2]); |
| } |
| *nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 | |
| dt->outmaptable[off2ind]) - offset1; |
| |
| PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos)); |
| |
| /* set pos to position of 1st value being mapped to */ |
| pos += (size * nrinbytes) + offset1; |
| |
| if ((pos + *nr - 1) > lentable) { |
| /* outside table, should not happen */ |
| PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d", |
| pos, *nr, lentable)); |
| return FALSE; |
| } |
| if (*nr > outvalmaxlen) { |
| /* not enough space in outval */ |
| PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen)); |
| return FALSE; |
| } |
| |
| /* finally, copy outmap result to outval */ |
| for (i = 0; i < *nr; i++) { |
| outval[i] = dt->outmaptable[pos++]; |
| } |
| return TRUE; |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree POS prediction (PosP) functions */ |
| /* ************************************************************/ |
| |
| /* number of prefix and suffix graphemes used to construct the input vector */ |
| #define KDT_POSP_NRGRAPHPREFATT 4 |
| #define KDT_POSP_NRGRAPHSUFFATT 6 |
| #define KDT_POSP_NRGRAPHATT 10 |
| |
| /* positions of specgraph and nrgraphs attributes */ |
| #define KDT_POSP_SPECGRAPHATTPOS 10 |
| #define KDT_POSP_NRGRAPHSATTPOS 11 |
| |
| |
| /* construct PosP input vector |
| |
| PosP invec: 12 elements |
| |
| prefix 0-3 prefix graphemes (encoded using tree inpmaptable 0-3) |
| suffix 4-9 suffix graphemes (encoded using tree inpmaptable 4-9) |
| isspecchar 10 is a special grapheme (e.g. hyphen) inside the word (0/1)? |
| nr-utf-graphs 11 number of graphemes (ie. UTF8 chars) |
| |
| if there are less than 10 graphemes, each grapheme is used only |
| once, with the suffix having higher priority, ie. elements 0-9 are |
| filled as follows: |
| |
| #graph |
| 1 0 0 0 0 0 0 0 0 0 1 |
| 2 0 0 0 0 0 0 0 0 1 2 |
| 3 0 0 0 0 0 0 0 1 2 3 |
| 4 0 0 0 0 0 0 1 2 3 4 |
| 5 0 0 0 0 0 1 2 3 4 5 |
| 6 0 0 0 0 1 2 3 4 5 6 |
| 7 1 0 0 0 2 3 4 5 6 7 |
| 8 1 2 0 0 3 4 5 6 7 8 |
| 9 1 2 3 0 4 5 6 7 8 9 |
| 10 1 2 3 4 5 6 7 8 9 10 |
| 11 1 2 3 4 6 7 8 9 10 11 |
| ... |
| |
| 1-6: Fill chbuf |
| 7-10: front to invec 1st part, remove front, add rear |
| >10: remove front, add rear |
| no more graph -> |
| while chbuflen>0: |
| add rear to the last empty slot in 2nd part of invec, remove rear |
| */ |
| |
| |
| picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this, |
| const picoos_uint8 *graph, |
| const picoos_uint16 graphlen, |
| const picoos_uint8 specgraphflag) { |
| kdtposp_subobj_t *dtposp; |
| |
| /* utf8 circular char buffer, used as restricted input deque */ |
| /* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */ |
| /* max of UTF8_MAXLEN bytes per utf8 char */ |
| picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN]; |
| picoos_uint8 chbrear; /* next free pos */ |
| picoos_uint8 chbfront; /* next read pos */ |
| picoos_uint8 chblen; /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */ |
| |
| picoos_uint16 poscg; /* position of current graph (= utf8 char) */ |
| picoos_uint16 lencg = 0; /* length of current grapheme */ |
| picoos_uint16 nrutfg; /* number of utf graphemes */ |
| picoos_uint8 invecpos; /* next element to add in invec */ |
| picoos_uint16 fallback; /* fallback value for failed graph encodings */ |
| picoos_uint8 i; |
| |
| dtposp = (kdtposp_subobj_t *)this; |
| chbrear = 0; |
| chbfront = 0; |
| chblen = 0; |
| poscg = 0; |
| nrutfg = 0; |
| invecpos = 0; |
| |
| PICODBG_DEBUG(("graphlen %d", graphlen)); |
| |
| /* not needed, since all elements are set |
| for (i = 0; i < PICOKDT_NRATT_POSP; i++) { |
| dtposp->invec[i] = '\x63'; |
| } |
| */ |
| |
| dtposp->inveclen = 0; |
| |
| while ((poscg < graphlen) && |
| ((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) { |
| if (chblen >= KDT_POSP_NRGRAPHSUFFATT) { /* chbuf full */ |
| if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */ |
| /* att-encode front utf graph and add in invec */ |
| if (!kdtMapInGraph(&(dtposp->dt), invecpos, |
| chbuf[chbfront], PICOBASE_UTF8_MAXLEN, |
| &(dtposp->invec[invecpos]), |
| &fallback)) { |
| if (fallback) { |
| dtposp->invec[invecpos] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| invecpos++; |
| } |
| /* remove front utf graph */ |
| chbfront++; |
| chbfront %= KDT_POSP_NRGRAPHSUFFATT; |
| chblen--; |
| } |
| /* add current utf graph to chbuf */ |
| for (i=0; i<lencg; i++) { |
| chbuf[chbrear][i] = graph[poscg++]; |
| } |
| if (i < PICOBASE_UTF8_MAXLEN) { |
| chbuf[chbrear][i] = '\0'; |
| } |
| chbrear++; |
| chbrear %= KDT_POSP_NRGRAPHSUFFATT; |
| chblen++; |
| /* increase utf graph count */ |
| nrutfg++; |
| } |
| |
| if ((lencg == 0) || (chblen == 0)) { |
| return FALSE; |
| } else if (chblen > 0) { |
| |
| while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */ |
| if (!kdtMapInGraph(&(dtposp->dt), invecpos, |
| PICOKDT_OUTSIDEGRAPH_DEFSTR, |
| PICOKDT_OUTSIDEGRAPH_DEFLEN, |
| &(dtposp->invec[invecpos]), &fallback)) { |
| if (fallback) { |
| dtposp->invec[invecpos] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| invecpos++; |
| } |
| |
| for (i = (KDT_POSP_NRGRAPHATT - 1); |
| i >= KDT_POSP_NRGRAPHPREFATT; i--) { |
| if (chblen > 0) { |
| if (chbrear == 0) { |
| chbrear = KDT_POSP_NRGRAPHSUFFATT - 1; |
| } else { |
| chbrear--; |
| } |
| if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear], |
| PICOBASE_UTF8_MAXLEN, |
| &(dtposp->invec[i]), &fallback)) { |
| if (fallback) { |
| dtposp->invec[i] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| chblen--; |
| } else { |
| if (!kdtMapInGraph(&(dtposp->dt), i, |
| PICOKDT_OUTSIDEGRAPH_DEFSTR, |
| PICOKDT_OUTSIDEGRAPH_DEFLEN, |
| &(dtposp->invec[i]), &fallback)) { |
| if (fallback) { |
| dtposp->invec[i] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| } |
| } |
| |
| /* set isSpecChar attribute, reuse var i */ |
| i = (specgraphflag ? 1 : 0); |
| if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i, |
| &(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]), |
| &fallback)) { |
| if (fallback) { |
| dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| /* set nrGraphs attribute */ |
| if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg, |
| &(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]), |
| &fallback)) { |
| if (fallback) { |
| dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback; |
| } else { |
| return FALSE; |
| } |
| } |
| PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]", |
| dtposp->invec[0], dtposp->invec[1], dtposp->invec[2], |
| dtposp->invec[3], dtposp->invec[4], dtposp->invec[5], |
| dtposp->invec[6], dtposp->invec[7], dtposp->invec[8], |
| dtposp->invec[9], dtposp->invec[10], |
| dtposp->invec[11], dtposp->invec[12])); |
| dtposp->inveclen = PICOKDT_NRINPMT_POSP; |
| return TRUE; |
| } |
| |
| return FALSE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtposp_subobj_t *dtposp; |
| kdt_subobj_t *dt; |
| |
| dtposp = (kdtposp_subobj_t *)this; |
| dt = &(dtposp->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_DEBUG(("done: %d", dt->dclass)); |
| return ((rv == 0) && dt->dset); |
| } |
| |
| |
| picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this, |
| picokdt_classify_result_t *dtres) { |
| kdtposp_subobj_t *dtposp; |
| picoos_uint16 val; |
| |
| dtposp = (kdtposp_subobj_t *)this; |
| |
| if (dtposp->dt.dset && |
| kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) { |
| dtres->set = TRUE; |
| dtres->class = val; |
| return TRUE; |
| } else { |
| dtres->set = FALSE; |
| return FALSE; |
| } |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree POS disambiguation (PosD) functions */ |
| /* ************************************************************/ |
| |
| |
| picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this, |
| const picoos_uint16 * input) { |
| kdtposd_subobj_t *dtposd; |
| picoos_uint8 i; |
| picoos_uint16 fallback = 0; |
| |
| dtposd = (kdtposd_subobj_t *)this; |
| dtposd->inveclen = 0; |
| |
| PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]", |
| input[0], input[1], input[2], |
| input[3], input[4], input[5], |
| input[6])); |
| for (i = 0; i < PICOKDT_NRATT_POSD; i++) { |
| |
| /* do the imt mapping for all inval */ |
| if (!kdtMapInFixed(&(dtposd->dt), i, input[i], |
| &(dtposd->invec[i]), &fallback)) { |
| if (fallback) { |
| PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback)); |
| dtposd->invec[i] = fallback; |
| } else { |
| PICODBG_ERROR(("problem doing input mapping")); |
| return FALSE; |
| } |
| } |
| } |
| |
| PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]", |
| dtposd->invec[0], dtposd->invec[1], dtposd->invec[2], |
| dtposd->invec[3], dtposd->invec[4], dtposd->invec[5], |
| dtposd->invec[6])); |
| dtposd->inveclen = PICOKDT_NRINPMT_POSD; |
| return TRUE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this, |
| picoos_uint16 *treeout) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtposd_subobj_t *dtposd; |
| kdt_subobj_t *dt; |
| |
| dtposd = (kdtposd_subobj_t *)this; |
| dt = &(dtposd->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_DEBUG(("done: %d", dt->dclass)); |
| if ((rv == 0) && dt->dset) { |
| *treeout = dt->dclass; |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| |
| /* decompose the tree output and return the class in dtres |
| dtres: POS classification result |
| returns: TRUE if okay, FALSE otherwise |
| */ |
| picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this, |
| picokdt_classify_result_t *dtres) { |
| kdtposd_subobj_t *dtposd; |
| picoos_uint16 val; |
| |
| dtposd = (kdtposd_subobj_t *)this; |
| |
| if (dtposd->dt.dset && |
| kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) { |
| dtres->set = TRUE; |
| dtres->class = val; |
| return TRUE; |
| } else { |
| dtres->set = FALSE; |
| return FALSE; |
| } |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree grapheme-to-phoneme (G2P) functions */ |
| /* ************************************************************/ |
| |
| |
| /* get the nr'th (starting at 0) utf char in utfgraph */ |
| static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph, |
| const picoos_uint16 graphlen, |
| const picoos_uint16 nr, |
| picoos_uint8 *utf8char) { |
| picoos_uint16 i; |
| picoos_uint32 pos; |
| |
| pos = 0; |
| for (i = 0; i < nr; i++) { |
| if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) { |
| return FALSE; |
| } |
| } |
| return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char); |
| } |
| |
| /* determine the utfchar count (starting at 1) of the utfchar starting at pos */ |
| static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph, |
| const picoos_uint16 graphlen, |
| const picoos_uint16 pos) { |
| picoos_uint32 postmp; |
| picoos_uint16 count; |
| |
| count = 0; |
| postmp = 0; |
| while ((postmp <= pos) && (count < graphlen)) { |
| if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) { |
| PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d", |
| count, pos, postmp)); |
| return count + 1; |
| } |
| count++; |
| } |
| return count; |
| } |
| |
| |
| picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this, |
| const picoos_uint8 *graph, |
| const picoos_uint16 graphlen, |
| const picoos_uint8 count, |
| const picoos_uint8 pos, |
| const picoos_uint8 nrvow, |
| const picoos_uint8 ordvow, |
| picoos_uint8 *primstressflag, |
| const picoos_uint16 phonech1, |
| const picoos_uint16 phonech2, |
| const picoos_uint16 phonech3) { |
| kdtg2p_subobj_t *dtg2p; |
| picoos_uint16 fallback = 0; |
| picoos_uint8 iAttr; |
| picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1]; |
| picoos_uint16 inval; |
| picoos_int16 cinv; |
| picoos_uint8 retval; |
| picoos_int32 utfgraphlen; |
| picoos_uint16 utfcount; |
| |
| dtg2p = (kdtg2p_subobj_t *)this; |
| retval = TRUE; |
| inval = 0; |
| |
| PICODBG_TRACE(("in: [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos, |
| nrvow, ordvow, *primstressflag, phonech1, phonech2, |
| phonech3)); |
| |
| dtg2p->inveclen = 0; |
| |
| /* many speed-ups possible */ |
| |
| /* graph attributes */ |
| /* count > = <= count |
| iAttr lowbound eow upbound delta |
| 0 4 4 graphlen 5 |
| 1 3 3 graphlen 4 |
| 2 2 2 graphlen 3 |
| 3 1 1 graphlen 2 |
| 4 0 - graphlen 1 |
| |
| 5 0 graphlen graphlen-1 0 |
| 6 0 graphlen-1 graphlen-2 -1 |
| 7 0 graphlen-2 graphlen-3 -2 |
| 8 0 graphlen-3 graphlen-4 -3 |
| */ |
| |
| /* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */ |
| |
| utfgraphlen = picobase_utf8_length(graph, graphlen); |
| if (utfgraphlen <= 0) { |
| utfgraphlen = 0; |
| } |
| utfcount = kdtGetUTF8Nr(graph, graphlen, count); |
| |
| cinv = 4; |
| for (iAttr = 0; iAttr < 5; iAttr++) { |
| if ((utfcount > cinv) && (utfcount <= utfgraphlen)) { |
| |
| /* utf8char[0] = graph[count - cinv - 1];*/ |
| if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1, |
| utf8char)) { |
| PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1)); |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; |
| utf8char[1] = '\0'; |
| } |
| } else { |
| if ((utfcount == cinv) && (iAttr != 4)) { |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; |
| } else { |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; |
| } |
| utf8char[1] = '\0'; |
| } |
| |
| if (!kdtMapInGraph(&(dtg2p->dt), iAttr, |
| utf8char, PICOBASE_UTF8_MAXLEN, |
| &(dtg2p->invec[iAttr]), |
| &fallback)) { |
| if (fallback) { |
| dtg2p->invec[iAttr] = fallback; |
| } else { |
| PICODBG_WARN(("setting attribute %d to zero", iAttr)); |
| dtg2p->invec[iAttr] = 0; |
| retval = FALSE; |
| } |
| } |
| PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); |
| cinv--; |
| } |
| |
| /* graph attributes right (context 1/2/3/4), MapInGraph */ |
| cinv = utfgraphlen; |
| for (iAttr = 5; iAttr < 9; iAttr++) { |
| if ((utfcount > 0) && (utfcount <= (cinv - 1))) { |
| /* utf8char[0] = graph[count + graphlen - cinv];*/ |
| if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv, |
| utf8char)) { |
| PICODBG_WARN(("problem getting UTF char %d", |
| utfcount+utfgraphlen-cinv-1)); |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; |
| utf8char[1] = '\0'; |
| } |
| } else { |
| if (utfcount == cinv) { |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH; |
| utf8char[1] = '\0'; |
| } else { |
| utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH; |
| utf8char[1] = '\0'; |
| } |
| } |
| if (!kdtMapInGraph(&(dtg2p->dt), iAttr, |
| utf8char, PICOBASE_UTF8_MAXLEN, |
| &(dtg2p->invec[iAttr]), |
| &fallback)) { |
| if (fallback) { |
| dtg2p->invec[iAttr] = fallback; |
| } else { |
| PICODBG_WARN(("setting attribute %d to zero", iAttr)); |
| dtg2p->invec[iAttr] = 0; |
| retval = FALSE; |
| } |
| } |
| PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0])); |
| cinv--; |
| } |
| |
| /* other attributes, MapInFixed */ |
| for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) { |
| switch (iAttr) { |
| case 9: /* word POS, Fix1 */ |
| inval = pos; |
| break; |
| case 10: /* nr of vowel-like graphs in word, if vowel, Fix2 */ |
| inval = nrvow; |
| break; |
| case 11: /* order of current vowel-like graph in word, Fix2 */ |
| inval = ordvow; |
| break; |
| case 12: /* primary stress mark, Fix2 */ |
| if (*primstressflag == 1) { |
| /*already set previously*/ |
| inval = 1; |
| } else { |
| inval = 0; |
| } |
| break; |
| case 13: /* phone chunk right context +1, Hist */ |
| inval = phonech1; |
| break; |
| case 14: /* phone chunk right context +2, Hist */ |
| inval = phonech2; |
| break; |
| case 15: /* phone chunk right context +3, Hist */ |
| inval = phonech3; |
| break; |
| } |
| |
| PICODBG_TRACE(("invec %d %d", iAttr, inval)); |
| |
| if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval, |
| &(dtg2p->invec[iAttr]), &fallback)) { |
| if (fallback) { |
| dtg2p->invec[iAttr] = fallback; |
| } else { |
| PICODBG_WARN(("setting attribute %d to zero", iAttr)); |
| dtg2p->invec[iAttr] = 0; |
| retval = FALSE; |
| } |
| } |
| } |
| |
| PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|" |
| "%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1], |
| dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4], |
| dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7], |
| dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10], |
| dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13], |
| dtg2p->invec[14], dtg2p->invec[15])); |
| |
| dtg2p->inveclen = PICOKDT_NRINPMT_G2P; |
| return retval; |
| } |
| |
| |
| |
| |
| picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this, |
| picoos_uint16 *treeout) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtg2p_subobj_t *dtg2p; |
| kdt_subobj_t *dt; |
| |
| dtg2p = (kdtg2p_subobj_t *)this; |
| dt = &(dtg2p->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_TRACE(("done: %d", dt->dclass)); |
| if ((rv == 0) && dt->dset) { |
| *treeout = dt->dclass; |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| |
| |
| picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this, |
| picokdt_classify_vecresult_t *dtvres) { |
| kdtg2p_subobj_t *dtg2p; |
| |
| dtg2p = (kdtg2p_subobj_t *)this; |
| |
| if (dtg2p->dt.dset && |
| kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr), |
| dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) { |
| return TRUE; |
| } else { |
| dtvres->nr = 0; |
| return FALSE; |
| } |
| return TRUE; |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree phrasing (PHR) functions */ |
| /* ************************************************************/ |
| |
| picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this, |
| const picoos_uint8 pre2, |
| const picoos_uint8 pre1, |
| const picoos_uint8 src, |
| const picoos_uint8 fol1, |
| const picoos_uint8 fol2, |
| const picoos_uint16 nrwordspre, |
| const picoos_uint16 nrwordsfol, |
| const picoos_uint16 nrsyllsfol) { |
| kdtphr_subobj_t *dtphr; |
| picoos_uint8 i; |
| picoos_uint16 inval = 0; |
| picoos_uint16 fallback = 0; |
| |
| dtphr = (kdtphr_subobj_t *)this; |
| PICODBG_DEBUG(("in: [%d,%d|%d|%d,%d|%d,%d,%d]", |
| pre2, pre1, src, fol1, fol2, |
| nrwordspre, nrwordsfol, nrsyllsfol)); |
| dtphr->inveclen = 0; |
| |
| for (i = 0; i < PICOKDT_NRATT_PHR; i++) { |
| switch (i) { |
| case 0: inval = pre2; break; |
| case 1: inval = pre1; break; |
| case 2: inval = src; break; |
| case 3: inval = fol1; break; |
| case 4: inval = fol2; break; |
| case 5: inval = nrwordspre; break; |
| case 6: inval = nrwordsfol; break; |
| case 7: inval = nrsyllsfol; break; |
| default: |
| PICODBG_ERROR(("size mismatch")); |
| return FALSE; |
| break; |
| } |
| |
| /* do the imt mapping for all inval */ |
| if (!kdtMapInFixed(&(dtphr->dt), i, inval, |
| &(dtphr->invec[i]), &fallback)) { |
| if (fallback) { |
| dtphr->invec[i] = fallback; |
| } else { |
| PICODBG_ERROR(("problem doing input mapping")); |
| return FALSE; |
| } |
| } |
| } |
| |
| PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]", |
| dtphr->invec[0], dtphr->invec[1], dtphr->invec[2], |
| dtphr->invec[3], dtphr->invec[4], dtphr->invec[5], |
| dtphr->invec[6], dtphr->invec[7])); |
| dtphr->inveclen = PICOKDT_NRINPMT_PHR; |
| return TRUE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtphr_subobj_t *dtphr; |
| kdt_subobj_t *dt; |
| |
| dtphr = (kdtphr_subobj_t *)this; |
| dt = &(dtphr->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_DEBUG(("done: %d", dt->dclass)); |
| return ((rv == 0) && dt->dset); |
| } |
| |
| |
| picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this, |
| picokdt_classify_result_t *dtres) { |
| kdtphr_subobj_t *dtphr; |
| picoos_uint16 val; |
| |
| dtphr = (kdtphr_subobj_t *)this; |
| |
| if (dtphr->dt.dset && |
| kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) { |
| dtres->set = TRUE; |
| dtres->class = val; |
| return TRUE; |
| } else { |
| dtres->set = FALSE; |
| return FALSE; |
| } |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree phono-acoustical model (PAM) functions */ |
| /* ************************************************************/ |
| |
| picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this, |
| const picoos_uint8 *vec, |
| const picoos_uint8 veclen) { |
| kdtpam_subobj_t *dtpam; |
| picoos_uint8 i; |
| picoos_uint16 fallback = 0; |
| |
| dtpam = (kdtpam_subobj_t *)this; |
| |
| PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", |
| vec[0], vec[1], vec[2], vec[3], vec[4], |
| vec[5], vec[6], vec[7], vec[8], vec[9])); |
| PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", |
| vec[10], vec[11], vec[12], vec[13], vec[14], |
| vec[15], vec[16], vec[17], vec[18], vec[19])); |
| PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", |
| vec[20], vec[21], vec[22], vec[23], vec[24], |
| vec[25], vec[26], vec[27], vec[28], vec[29])); |
| PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", |
| vec[30], vec[31], vec[32], vec[33], vec[34], |
| vec[35], vec[36], vec[37], vec[38], vec[39])); |
| PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", |
| vec[40], vec[41], vec[42], vec[43], vec[44], |
| vec[45], vec[46], vec[47], vec[48], vec[49])); |
| PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", |
| vec[50], vec[51], vec[52], vec[53], vec[54], |
| vec[55], vec[56], vec[57], vec[58], vec[59])); |
| |
| dtpam->inveclen = 0; |
| |
| /* check veclen */ |
| if (veclen != PICOKDT_NRINPMT_PAM) { |
| PICODBG_ERROR(("wrong number of input vector elements")); |
| return FALSE; |
| } |
| |
| for (i = 0; i < PICOKDT_NRATT_PAM; i++) { |
| |
| /* do the imt mapping for all vec eles */ |
| if (!kdtMapInFixed(&(dtpam->dt), i, vec[i], |
| &(dtpam->invec[i]), &fallback)) { |
| if (fallback) { |
| dtpam->invec[i] = fallback; |
| } else { |
| PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i])); |
| return FALSE; |
| } |
| } |
| } |
| |
| PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[0], dtpam->invec[1], dtpam->invec[2], |
| dtpam->invec[3], dtpam->invec[4], dtpam->invec[5], |
| dtpam->invec[6], dtpam->invec[7], dtpam->invec[8], |
| dtpam->invec[9])); |
| PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[10], dtpam->invec[11], dtpam->invec[12], |
| dtpam->invec[13], dtpam->invec[14], dtpam->invec[15], |
| dtpam->invec[16], dtpam->invec[17], dtpam->invec[18], |
| dtpam->invec[19])); |
| PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[20], dtpam->invec[21], dtpam->invec[22], |
| dtpam->invec[23], dtpam->invec[24], dtpam->invec[25], |
| dtpam->invec[26], dtpam->invec[27], dtpam->invec[28], |
| dtpam->invec[29])); |
| PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[30], dtpam->invec[31], dtpam->invec[32], |
| dtpam->invec[33], dtpam->invec[34], dtpam->invec[35], |
| dtpam->invec[36], dtpam->invec[37], dtpam->invec[38], |
| dtpam->invec[39])); |
| PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[40], dtpam->invec[41], dtpam->invec[42], |
| dtpam->invec[43], dtpam->invec[44], dtpam->invec[45], |
| dtpam->invec[46], dtpam->invec[47], dtpam->invec[48], |
| dtpam->invec[49])); |
| PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d", |
| dtpam->invec[50], dtpam->invec[51], dtpam->invec[52], |
| dtpam->invec[53], dtpam->invec[54], dtpam->invec[55], |
| dtpam->invec[56], dtpam->invec[57], dtpam->invec[58], |
| dtpam->invec[59])); |
| |
| dtpam->inveclen = PICOKDT_NRINPMT_PAM; |
| return TRUE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtpam_subobj_t *dtpam; |
| kdt_subobj_t *dt; |
| |
| dtpam = (kdtpam_subobj_t *)this; |
| dt = &(dtpam->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_DEBUG(("done: %d", dt->dclass)); |
| return ((rv == 0) && dt->dset); |
| } |
| |
| |
| picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this, |
| picokdt_classify_result_t *dtres) { |
| kdtpam_subobj_t *dtpam; |
| picoos_uint16 val; |
| |
| dtpam = (kdtpam_subobj_t *)this; |
| |
| if (dtpam->dt.dset && |
| kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) { |
| dtres->set = TRUE; |
| dtres->class = val; |
| return TRUE; |
| } else { |
| dtres->set = FALSE; |
| return FALSE; |
| } |
| } |
| |
| |
| |
| /* ************************************************************/ |
| /* decision tree accentuation (ACC) functions */ |
| /* ************************************************************/ |
| |
| picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this, |
| const picoos_uint8 pre2, |
| const picoos_uint8 pre1, |
| const picoos_uint8 src, |
| const picoos_uint8 fol1, |
| const picoos_uint8 fol2, |
| const picoos_uint16 hist1, |
| const picoos_uint16 hist2, |
| const picoos_uint16 nrwordspre, |
| const picoos_uint16 nrsyllspre, |
| const picoos_uint16 nrwordsfol, |
| const picoos_uint16 nrsyllsfol, |
| const picoos_uint16 footwordsfol, |
| const picoos_uint16 footsyllsfol) { |
| kdtacc_subobj_t *dtacc; |
| picoos_uint8 i; |
| picoos_uint16 inval = 0; |
| picoos_uint16 fallback = 0; |
| |
| dtacc = (kdtacc_subobj_t *)this; |
| PICODBG_DEBUG(("in: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", |
| pre2, pre1, src, fol1, fol2, hist1, hist2, |
| nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol, |
| footwordsfol, footsyllsfol)); |
| dtacc->inveclen = 0; |
| |
| for (i = 0; i < PICOKDT_NRATT_ACC; i++) { |
| switch (i) { |
| case 0: inval = pre2; break; |
| case 1: inval = pre1; break; |
| case 2: inval = src; break; |
| case 3: inval = fol1; break; |
| case 4: inval = fol2; break; |
| case 5: inval = hist1; break; |
| case 6: inval = hist2; break; |
| case 7: inval = nrwordspre; break; |
| case 8: inval = nrsyllspre; break; |
| case 9: inval = nrwordsfol; break; |
| case 10: inval = nrsyllsfol; break; |
| case 11: inval = footwordsfol; break; |
| case 12: inval = footsyllsfol; break; |
| default: |
| PICODBG_ERROR(("size mismatch")); |
| return FALSE; |
| break; |
| } |
| |
| if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) { |
| /* in input to this function the HISTORY_ZERO is used to |
| mark the no-value-available case. For sparsity reasons |
| this was not used in the training. For |
| no-value-available cases, instead, do reverse out |
| mapping of ACC0 to get tree domain for ACC0 */ |
| if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0, |
| &inval, &fallback)) { |
| if (fallback) { |
| inval = fallback; |
| } else { |
| PICODBG_ERROR(("problem doing reverse output mapping")); |
| return FALSE; |
| } |
| } |
| } |
| |
| /* do the imt mapping for all inval */ |
| if (!kdtMapInFixed(&(dtacc->dt), i, inval, |
| &(dtacc->invec[i]), &fallback)) { |
| if (fallback) { |
| dtacc->invec[i] = fallback; |
| } else { |
| PICODBG_ERROR(("problem doing input mapping")); |
| return FALSE; |
| } |
| } |
| } |
| |
| PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]", |
| dtacc->invec[0], dtacc->invec[1], dtacc->invec[2], |
| dtacc->invec[3], dtacc->invec[4], dtacc->invec[5], |
| dtacc->invec[6], dtacc->invec[7], dtacc->invec[8], |
| dtacc->invec[9], dtacc->invec[10], dtacc->invec[11], |
| dtacc->invec[12])); |
| dtacc->inveclen = PICOKDT_NRINPMT_ACC; |
| return TRUE; |
| } |
| |
| |
| picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this, |
| picoos_uint16 *treeout) { |
| picoos_uint32 iByteNo; |
| picoos_int8 iBitNo; |
| picoos_int8 rv; |
| kdtacc_subobj_t *dtacc; |
| kdt_subobj_t *dt; |
| |
| dtacc = (kdtacc_subobj_t *)this; |
| dt = &(dtacc->dt); |
| iByteNo = 0; |
| iBitNo = 7; |
| while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC, |
| &iByteNo, &iBitNo)) > 0) { |
| PICODBG_TRACE(("asking tree")); |
| } |
| PICODBG_TRACE(("done: %d", dt->dclass)); |
| if ((rv == 0) && dt->dset) { |
| *treeout = dt->dclass; |
| return TRUE; |
| } else { |
| return FALSE; |
| } |
| } |
| |
| |
| picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this, |
| picokdt_classify_result_t *dtres) { |
| kdtacc_subobj_t *dtacc; |
| picoos_uint16 val; |
| |
| dtacc = (kdtacc_subobj_t *)this; |
| |
| if (dtacc->dt.dset && |
| kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) { |
| dtres->set = TRUE; |
| dtres->class = val; |
| return TRUE; |
| } else { |
| dtres->set = FALSE; |
| return FALSE; |
| } |
| } |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| |
| /* end */ |