blob: 54e36ac53f3cdf5f07773dc09d92a0aa9886760a [file] [log] [blame]
/*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file picokdt.c
*
* knowledge handling for decision trees
*
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
* All rights reserved.
*
* History:
* - 2009-04-20 -- initial version
*
*/
#include "picoos.h"
#include "picodbg.h"
#include "picobase.h"
#include "picoknow.h"
#include "picodata.h"
#include "picokdt.h"
#ifdef __cplusplus
extern "C" {
#endif
#if 0
}
#endif
/* ************************************************************/
/* decision tree */
/* ************************************************************/
/**
* @addtogroup picokdt
* ---------------------------------------------------\n
* <b> Pico KDT support </b>\n
* ---------------------------------------------------\n
overview extended binary tree file:
- dt consists of optional attribute mapping tables and a non-empty
tree part
- using the attribute mapping tables an attribute value as used
throughout the TTS can be mapped to its smaller representation
used in the tree
- multi-byte values always little endian
-------------------------------------------------------------------
- bin-file, decision tree knowledge base in binary form
- dt-kb = header inputmaptables outputmaptables tree
- header = INPMAPTABLEPOS2 OUTMAPTABLEPOS2 TREEPOS2
- INPMAPTABLEPOS2: two bytes, equals offest in number of bytes from
the start of kb to the start of input map tables,
may not be 0
- OUTMAPTABLEPOS2: two bytes, equals offest in number of bytes from
the start of kb to the start of outtables,
may not be 0
- TREEPOS2: two bytes, equals offest in number of bytes from the
start of kb to the start of the tree
- inputmaptables = maptables
- outputmaptables = maptables
- maptables = NRMAPTABLES1 {maptable}=NRMAPTABLES1
- maptable = LENTABLE2 TABLETYPE1 ( bytemaptable
| wordmaptable
| graphinmaptable
| bytetovarmaptable )
- bytemaptable (in or out, usage varies) = NRBYTES2 {BYTE1}=NRBYTES2
- wordmaptable (in or out, usage varies) = NRWORDS2 {WORD2}=NRWORDS2
- graphinmaptable (in only) = NRGRAPHS2 {GRAPH1:4}=NRGRAPHS2
- bytetovarmaptable (out only) = NRINBYTES2 outvarsearchind
outvaroutputs
- outvarsearchind = {OUTVAROFFSET2}=NRINBYTES2
- outvaroutputs = {VARVALID1:}=NRINBYTES2
- bytemaptable: fixed size, *Map*Fixed \n
- wordmaptable: fixed size, *Map*Fixed \n
- graphinmaptable: search value is variable size (UTF8 grapheme), \n
value to be mapped to is fixed size, one byte \n
- bytetovarmaptable: search value is fixed size, one byte, values \n
to be mapped to are of variable size (e.g. several \n
phones) \n
- NRMAPTABLES1: one byte representing the number of map tables
- LENTABLE2: two bytes, equals offset to the next table (or next
part of kb, e.g. tree),
if LENTABLE2 = 3, and
TABLETYPE1 = EMPTY -> empty table, no mapping to be done
- TABLETYPE1: one byte, type of map table (byte, word, or graph=utf8)
- NRBYTES2: two bytes, number of bytes following in the table (one
would be okay, to simplify some implementation also set
to 2)
- BYTE1: one btye, the sequence is used to determine the values
being mapped to, starting with 0
- NRWORDS2: two bytes, number of words (two btyes) following in the table
- WORD2: two bytes, the sequence is used to determine the values
being mapped to, starting with 0
- NRGRAPHS2: two bytes, number of graphemes encoded in UTF8 following
in table
- GRAPH1:4: one to four bytes, UTF8 representation of a grapheme, the
sequence of graphemes is used to determine the value being
mapped to, starting with 0, the length information is
encoded in UTF8, no need for extra length info
- NRINBYTES2: two bytes, number of single byte IDs the tree can produce
- OUTVAROFFSET2: two bytes, offset from the start of the
outvaroutputs to the start of the following output
phone ID group, ie. the first outvaroffset is the
offset to the start of the second PHONEID
group. Using the previous outvaroffset (or the start
of the outvaroutputs) the start and lenth of the
PHONEID group can be determined and we can get the
sequence of output values we map the chunk value to
- VARVALID1:: one to several bytes, one byte each for an output phone ID
- tree = treenodeinfos TREEBODYSIZE4 treebody
- treenodeinfos = NRVFIELDS1 vfields NRATTRIBUTES1 NRQFIELDS1 qfields
- vfields = {VFIELD1}=NRVFIELDS1
- qfields = {QFIELD1}=NRATTRIBUTES1xNRQFIELDS1
- treebody = "cf. code"
- TREEBODYSIZE4: four bytes, size of treebody in number of bytes
- NRVFIELDS1: one byte, number of node properties in the following
vector (predefined and fixed sequence of properties)
- VFIELD1: number of bits used to represent a node property
- NRATTRIBUTES1: one byte, number of attributes (rows) in the
following matrix
- NRQFIELDS1: one byte, number (columns) of question-dependent node
properties per attribute in the following matrix
(predefined and fixed sequence of properties)
- QFIELD1: number of bits used to represent a question-dependent
property in the matrix
- Currently,
- NRVFIELDS1 is fixed at 2 for all trees, ie.
- vfields = 2 aVFIELD1 bVFIELD1
- aVFIELD1: nr of bits for questions
- bVFIELD1: nr of bits for decisions
- NRQFIELDS1 is fixed at 5 for all trees, ie. \n
- qfields = NRATTRIBUTES1 5 aQFIELD1 bQFIELD1 cQFIELD1 dQFIELD1 eQFIELD1 \n
- aQFIELD1: nr of bits for fork count \n
- bQFIELD1: nr of bits for start position for subsets \n
- cQFIELD1: nr of bits for group size \n
- dQFIELD1: nr of bits for offset to reach output \n
- eQFIELD1: nr of bits for threshold (if continuous node) \n
*/
/* ************************************************************/
/* decision tree data defines */
/* may not be changed with current implementation */
/* ************************************************************/
/* maptables fields */
#define PICOKDT_MTSPOS_NRMAPTABLES 0
/* position of first byte of first maptable (for omt the only table */
#define PICOKDT_MTPOS_START 1
/* maptable fields */
#define PICOKDT_MTPOS_LENTABLE 0
#define PICOKDT_MTPOS_TABLETYPE 2
#define PICOKDT_MTPOS_NUMBER 3
#define PICOKDT_MTPOS_MAPSTART 5
/* treenodeinfos fields */
#define PICOKDT_NIPOS_NRVFIELDS 0
#define PICOKDT_NIPOS_NRATTS 3
#define PICOKDT_NIPOS_NRQFIELDS 4
/* fixed treenodeinfos number of fields */
#define PICOKDT_NODEINFO_NRVFIELDS 2
#define PICOKDT_NODEINFO_NRQFIELDS 5
/* fixed number of bits used */
#define PICOKDT_NODETYPE_NRBITS 2
#define PICOKDT_SUBSETTYPE_NRBITS 2
#define PICOKDT_ISDECIDE_NRBITS 1
/* number of inpmaptables for each tree. Since we have a possibly
empty input map table for each att, currently these values must be
equal to PICOKDT_NRATT* */
typedef enum {
PICOKDT_NRINPMT_POSP = 12,
PICOKDT_NRINPMT_POSD = 7,
PICOKDT_NRINPMT_G2P = 16,
PICOKDT_NRINPMT_PHR = 8,
PICOKDT_NRINPMT_ACC = 13,
PICOKDT_NRINPMT_PAM = 60
} kdt_nrinpmaptables_t;
/* number of outmaptables for each tree, at least one, possibly empty,
output map table for each tree */
typedef enum {
PICOKDT_NROUTMT_POSP = 1,
PICOKDT_NROUTMT_POSD = 1,
PICOKDT_NROUTMT_G2P = 1,
PICOKDT_NROUTMT_PHR = 1,
PICOKDT_NROUTMT_ACC = 1,
PICOKDT_NROUTMT_PAM = 1
} kdt_nroutmaptables_t;
/* maptable types */
typedef enum {
PICOKDT_MTTYPE_EMPTY = 0,
PICOKDT_MTTYPE_BYTE = 1,
PICOKDT_MTTYPE_WORD = 2,
PICOKDT_MTTYPE_GRAPH = 3,
PICOKDT_MTTYPE_BYTETOVAR = 4
} kdt_mttype_t;
/* ************************************************************/
/* decision tree types and loading */
/* ************************************************************/
/* object : Dt*KnowledgeBase
* shortcut : kdt*
* derived from : picoknow_KnowledgeBase
*/
/* subobj shared by all decision trees */
typedef struct {
picokdt_kdttype_t type;
picoos_uint8 *inpmaptable;
picoos_uint8 *outmaptable;
picoos_uint8 *tree;
picoos_uint32 beg_offset[128]; /* for efficiency */
/* tree-internal details for faster processing */
picoos_uint8 *vfields;
picoos_uint8 *qfields;
picoos_uint8 nrattributes;
picoos_uint8 *treebody;
/*picoos_uint8 nrvfields;*/ /* fix PICOKDT_NODEINFO_NRVFIELDS */
/*picoos_uint8 nrqfields;*/ /* fix PICOKDT_NODEINFO_NRQFIELDS */
/* direct output vector (no output mapping) */
picoos_uint8 dset; /* TRUE if class set, FALSE otherwise */
picoos_uint16 dclass;
} kdt_subobj_t;
/* subobj specific for each decision tree type */
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_POSP]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtposp_subobj_t;
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_POSD]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtposd_subobj_t;
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_G2P]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtg2p_subobj_t;
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_PHR]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtphr_subobj_t;
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_ACC]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtacc_subobj_t;
typedef struct {
kdt_subobj_t dt;
picoos_uint16 invec[PICOKDT_NRATT_PAM]; /* input vector */
picoos_uint8 inveclen; /* nr of ele set in invec; must be =nrattributes */
} kdtpam_subobj_t;
static pico_status_t kdtDtInitialize(register picoknow_KnowledgeBase this,
picoos_Common common,
kdt_subobj_t *dtp) {
picoos_uint16 inppos;
picoos_uint16 outpos;
picoos_uint16 treepos;
picoos_uint32 curpos = 0, pos;
picoos_uint16 lentable;
picoos_uint16 i;
picoos_uint8 imtnr;
PICODBG_DEBUG(("start"));
/* get inmap, outmap, tree offsets */
if ((PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &inppos))
&& (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos, &outpos))
&& (PICO_OK == picoos_read_mem_pi_uint16(this->base, &curpos,
&treepos))) {
/* all pos are mandatory, verify */
if (inppos && outpos && treepos) {
dtp->inpmaptable = this->base + inppos;
dtp->outmaptable = this->base + outpos;
dtp->tree = this->base + treepos;
/* precalc beg offset table */
imtnr=dtp->inpmaptable[0];
pos=1;
dtp->beg_offset[0] = 1;
for (i = 0; i < imtnr; i++) {
lentable = ((picoos_uint16)(dtp->inpmaptable[pos+1])) << 8 |
dtp->inpmaptable[pos];
pos += lentable;
dtp->beg_offset[i+1] = pos;
}
} else {
dtp->inpmaptable = NULL;
dtp->outmaptable = NULL;
dtp->tree = NULL;
PICODBG_ERROR(("invalid kb position info"));
return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
/* nr of outmaptables is equal 1 for all trees, verify */
if (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != 1) {
PICODBG_ERROR(("wrong number of outmaptables"));
return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
/* check if this is an empty table, ie. len == 3 */
if ((dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE]
== 3)
&& (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_LENTABLE
+ 1] == 0)) {
/* verify that this is supposed to be an empty table and
set outmaptable to NULL if so */
if (dtp->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE]
== PICOKDT_MTTYPE_EMPTY) {
dtp->outmaptable = NULL;
} else {
PICODBG_ERROR(("table length vs. type problem"));
return picoos_emRaiseException(common->em,
PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
}
dtp->vfields = dtp->tree + 1;
dtp->qfields = dtp->tree + PICOKDT_NODEINFO_NRVFIELDS + 3;
dtp->nrattributes = dtp->tree[PICOKDT_NIPOS_NRATTS];
dtp->treebody = dtp->qfields + 4 +
(dtp->nrattributes * PICOKDT_NODEINFO_NRQFIELDS); /* TREEBODYSIZE4*/
/*dtp->nrvfields = dtp->tree[PICOKDT_NIPOS_NRVFIELDS]; <- is fix */
/*dtp->nrqfields = dtp->tree[PICOKDT_NIPOS_NRQFIELDS]; <- is fix */
/* verify that nrvfields ad nrqfields are correct */
if ((PICOKDT_NODEINFO_NRVFIELDS != dtp->tree[PICOKDT_NIPOS_NRVFIELDS]) ||
(PICOKDT_NODEINFO_NRQFIELDS != dtp->tree[PICOKDT_NIPOS_NRQFIELDS])) {
PICODBG_ERROR(("problem with nr of vfields (%d) or qfields (%d)",
dtp->tree[PICOKDT_NIPOS_NRVFIELDS],
dtp->tree[PICOKDT_NIPOS_NRQFIELDS]));
return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
dtp->dset = 0;
dtp->dclass = 0;
PICODBG_DEBUG(("tree init: nratt: %d, posomt: %d, postree: %d",
dtp->nrattributes, (dtp->outmaptable - dtp->inpmaptable),
(dtp->tree - dtp->inpmaptable)));
return PICO_OK;
} else {
PICODBG_ERROR(("problem reading kb in memory"));
return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
}
static pico_status_t kdtDtCheck(register picoknow_KnowledgeBase this,
picoos_Common common,
kdt_subobj_t *dtp,
kdt_nratt_t nratt,
kdt_nrinpmaptables_t nrinpmt,
kdt_nroutmaptables_t nroutmt,
kdt_mttype_t mttype) {
/* check nr attributes */
/* check nr inpmaptables */
/* check nr outmaptables */
/* check outmaptable is word type */
if ((nratt != dtp->nrattributes)
|| (dtp->inpmaptable == NULL)
|| (dtp->outmaptable == NULL)
|| (dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nrinpmt)
|| (dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES] != nroutmt)
|| (dtp->outmaptable[PICOKDT_MTPOS_START+PICOKDT_MTPOS_TABLETYPE]
!= mttype)) {
PICODBG_ERROR(("check failed, nratt %d, nrimt %d, nromt %d, omttype %d",
dtp->nrattributes,
dtp->inpmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
dtp->outmaptable[PICOKDT_MTSPOS_NRMAPTABLES],
dtp->outmaptable[PICOKDT_MTPOS_START +
PICOKDT_MTPOS_TABLETYPE]));
return picoos_emRaiseException(common->em, PICO_EXC_FILE_CORRUPT,
NULL, NULL);
}
return PICO_OK;
}
static pico_status_t kdtPosPInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtposp_subobj_t *dtposp;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtposp = (kdtposp_subobj_t *)this->subObj;
dt = &(dtposp->dt);
dt->type = PICOKDT_KDTTYPE_POSP;
if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSP,
PICOKDT_NRINPMT_POSP, PICOKDT_NROUTMT_POSP,
PICOKDT_MTTYPE_WORD)) != PICO_OK) {
return status;
}
/* init specialized subobj part */
for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
dtposp->invec[i] = 0;
}
dtposp->inveclen = 0;
PICODBG_DEBUG(("posp tree initialized"));
return PICO_OK;
}
static pico_status_t kdtPosDInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtposd_subobj_t *dtposd;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtposd = (kdtposd_subobj_t *)this->subObj;
dt = &(dtposd->dt);
dt->type = PICOKDT_KDTTYPE_POSD;
if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_POSD,
PICOKDT_NRINPMT_POSD, PICOKDT_NROUTMT_POSD,
PICOKDT_MTTYPE_WORD)) != PICO_OK) {
return status;
}
/* init spezialized subobj part */
for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
dtposd->invec[i] = 0;
}
dtposd->inveclen = 0;
PICODBG_DEBUG(("posd tree initialized"));
return PICO_OK;
}
static pico_status_t kdtG2PInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtg2p_subobj_t *dtg2p;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtg2p = (kdtg2p_subobj_t *)this->subObj;
dt = &(dtg2p->dt);
dt->type = PICOKDT_KDTTYPE_G2P;
if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_G2P,
PICOKDT_NRINPMT_G2P, PICOKDT_NROUTMT_G2P,
PICOKDT_MTTYPE_BYTETOVAR)) != PICO_OK) {
return status;
}
/* init spezialized subobj part */
for (i = 0; i < PICOKDT_NRATT_G2P; i++) {
dtg2p->invec[i] = 0;
}
dtg2p->inveclen = 0;
PICODBG_DEBUG(("g2p tree initialized"));
return PICO_OK;
}
static pico_status_t kdtPhrInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtphr_subobj_t *dtphr;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtphr = (kdtphr_subobj_t *)this->subObj;
dt = &(dtphr->dt);
dt->type = PICOKDT_KDTTYPE_PHR;
if ((status = kdtDtInitialize(this, common,dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PHR,
PICOKDT_NRINPMT_PHR, PICOKDT_NROUTMT_PHR,
PICOKDT_MTTYPE_WORD)) != PICO_OK) {
return status;
}
/* init spezialized subobj part */
for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
dtphr->invec[i] = 0;
}
dtphr->inveclen = 0;
PICODBG_DEBUG(("phr tree initialized"));
return PICO_OK;
}
static pico_status_t kdtAccInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtacc_subobj_t *dtacc;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtacc = (kdtacc_subobj_t *)this->subObj;
dt = &(dtacc->dt);
dt->type = PICOKDT_KDTTYPE_ACC;
if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_ACC,
PICOKDT_NRINPMT_ACC, PICOKDT_NROUTMT_ACC,
PICOKDT_MTTYPE_WORD)) != PICO_OK) {
return status;
}
/* init spezialized subobj part */
for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
dtacc->invec[i] = 0;
}
dtacc->inveclen = 0;
PICODBG_DEBUG(("acc tree initialized"));
return PICO_OK;
}
static pico_status_t kdtPamInitialize(register picoknow_KnowledgeBase this,
picoos_Common common) {
pico_status_t status;
kdtpam_subobj_t *dtpam;
kdt_subobj_t *dt;
picoos_uint8 i;
if (NULL == this || NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
dtpam = (kdtpam_subobj_t *)this->subObj;
dt = &(dtpam->dt);
dt->type = PICOKDT_KDTTYPE_PAM;
if ((status = kdtDtInitialize(this, common, dt)) != PICO_OK) {
return status;
}
if ((status = kdtDtCheck(this, common, dt, PICOKDT_NRATT_PAM,
PICOKDT_NRINPMT_PAM, PICOKDT_NROUTMT_PAM,
PICOKDT_MTTYPE_WORD)) != PICO_OK) {
return status;
}
/* init spezialized subobj part */
for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
dtpam->invec[i] = 0;
}
dtpam->inveclen = 0;
PICODBG_DEBUG(("pam tree initialized"));
return PICO_OK;
}
static pico_status_t kdtSubObjDeallocate(register picoknow_KnowledgeBase this,
picoos_MemoryManager mm) {
if (NULL != this) {
picoos_deallocate(mm, (void *) &this->subObj);
}
return PICO_OK;
}
/* we don't offer a specialized constructor for a *KnowledgeBase but
* instead a "specializer" of an allready existing generic
* picoknow_KnowledgeBase */
pico_status_t picokdt_specializeDtKnowledgeBase(picoknow_KnowledgeBase this,
picoos_Common common,
const picokdt_kdttype_t kdttype) {
pico_status_t status;
if (NULL == this) {
return picoos_emRaiseException(common->em, PICO_EXC_KB_MISSING,
NULL, NULL);
}
this->subDeallocate = kdtSubObjDeallocate;
switch (kdttype) {
case PICOKDT_KDTTYPE_POSP:
this->subObj = picoos_allocate(common->mm,sizeof(kdtposp_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtPosPInitialize(this, common);
break;
case PICOKDT_KDTTYPE_POSD:
this->subObj = picoos_allocate(common->mm,sizeof(kdtposd_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtPosDInitialize(this, common);
break;
case PICOKDT_KDTTYPE_G2P:
this->subObj = picoos_allocate(common->mm,sizeof(kdtg2p_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtG2PInitialize(this, common);
break;
case PICOKDT_KDTTYPE_PHR:
this->subObj = picoos_allocate(common->mm,sizeof(kdtphr_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtPhrInitialize(this, common);
break;
case PICOKDT_KDTTYPE_ACC:
this->subObj = picoos_allocate(common->mm,sizeof(kdtacc_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtAccInitialize(this, common);
break;
case PICOKDT_KDTTYPE_PAM:
this->subObj = picoos_allocate(common->mm,sizeof(kdtpam_subobj_t));
if (NULL == this->subObj) {
return picoos_emRaiseException(common->em, PICO_EXC_OUT_OF_MEM,
NULL, NULL);
}
status = kdtPamInitialize(this, common);
break;
default:
return picoos_emRaiseException(common->em, PICO_ERR_OTHER,
NULL, NULL);
}
if (status != PICO_OK) {
picoos_deallocate(common->mm, (void *) &this->subObj);
return picoos_emRaiseException(common->em, status, NULL, NULL);
}
return PICO_OK;
}
/* ************************************************************/
/* decision tree getDt* */
/* ************************************************************/
picokdt_DtPosP picokdt_getDtPosP(picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtPosP) this->subObj));
}
picokdt_DtPosD picokdt_getDtPosD(picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtPosD) this->subObj));
}
picokdt_DtG2P picokdt_getDtG2P (picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtG2P) this->subObj));
}
picokdt_DtPHR picokdt_getDtPHR (picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtPHR) this->subObj));
}
picokdt_DtACC picokdt_getDtACC (picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtACC) this->subObj));
}
picokdt_DtPAM picokdt_getDtPAM (picoknow_KnowledgeBase this) {
return ((NULL == this) ? NULL : ((picokdt_DtPAM) this->subObj));
}
/* ************************************************************/
/* decision tree support functions, tree */
/* ************************************************************/
typedef enum {
eQuestion = 0, /* index to #bits to identify question */
eDecide = 1 /* index to #bits to identify decision */
} kdt_vfields_ind_t;
typedef enum {
eForkCount = 0, /* index to #bits for number of forks */
eBitNo = 1, /* index to #bits for index of 1st element */
eBitCount = 2, /* index to #bits for size of the group */
eJump = 3, /* index to #bits for offset to reach output node */
eCut = 4 /* for contin. node: #bits for threshold checked */
} kdt_qfields_ind_t;
typedef enum {
eNTerminal = 0,
eNBinary = 1,
eNContinuous = 2,
eNDiscrete = 3
} kdt_nodetypes_t;
typedef enum {
eOneValue = 0,
eTwoValues = 1,
eWithoutBitMask = 2,
eBitMask = 3
} kdt_subsettypes_t;
/* Name : kdt_jump
Function: maps the iJump offset to byte + bit coordinates
Input : iJump absolute bit offset (0..(nr-bytes-treebody)*8)
Output : iByteNo the first byte containing the bits to extract
(0..(nr-bytes-treebody))
iBitNo the first bit to be extracted (0..7)
Returns : void
Notes : updates the iByteNo + iBitNo fields
*/
static void kdt_jump(const picoos_uint32 iJump,
picoos_uint32 *iByteNo,
picoos_int8 *iBitNo) {
picoos_uint32 iByteSize;
iByteSize = (iJump / 8 );
*iBitNo = (iJump - (iByteSize * 8)) + (7 - *iBitNo);
*iByteNo += iByteSize;
if (*iBitNo >= 8) {
(*iByteNo)++;
*iBitNo = 15 - *iBitNo;
} else {
*iBitNo = 7 - *iBitNo;
}
}
/* replaced inline for speedup */
/* Name : kdtIsVal
Function: Returns the binary value of the bit pointed to by iByteNo, iBitNo
Input : iByteNo ofsset to the byte containing the bits to extract
(0..sizeof(treebody))
iBitNo ofsset to the first bit to be extracted (0..7)
Returns : 0/1 depending on the bit pointed to
*/
/*
static picoos_uint8 kdtIsVal(register kdt_subobj_t *this,
picoos_uint32 iByteNo,
picoos_int8 iBitNo) {
return ((this->treebody[iByteNo] & ((1)<<iBitNo)) > 0);
}
*/
/* @todo : consider replacing inline for speedup */
/* Name : kdtGetQFieldsVal (was: m_QuestDependentFields)
Function: gets a byte from qfields
Input : this handle to a dt subobj
attind index of the attribute
qind index of the byte to be read
Returns : the requested byte
Notes : check that attind < this->nrattributes needed before calling
this function!
*/
static picoos_uint8 kdtGetQFieldsVal(register kdt_subobj_t *this,
const picoos_uint8 attind,
const kdt_qfields_ind_t qind) {
/* check of qind done in initialize and (for some compilers) with typing */
/* check of attind needed before calling this function */
return this->qfields[(attind * PICOKDT_NODEINFO_NRQFIELDS) + qind];
}
/* Name : kdtGetShiftVal (was: get_shift_value)
Function: returns the (treebody) value pointed to by iByteNo, iBitNo,
and with size iSize
Input : this reference to the processing unit struct
iSize number of bits to be extracted (0..N)
iByteNo ofsset to the byte containing the bits to extract
(0..sizeof(treebody))
iBitNo ofsset to the first bit to be extracted (0..7)
Returns : the value requested (if size==0 --> 0 is returned)
*/
/*
static picoos_uint32 orig_kdtGetShiftVal(register kdt_subobj_t *this,
const picoos_int16 iSize,
picoos_uint32 *iByteNo,
picoos_int8 *iBitNo) {
picoos_uint32 iVal;
picoos_int16 i;
iVal = 0;
for (i = iSize-1; i >= 0; i--) {
if ( (this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
iVal |= ( (1) << i );
}
(*iBitNo)--;
if (*iBitNo < 0) {
*iBitNo = 7;
(*iByteNo)++;
}
}
return iVal;
}
*/
/* refactor */
static picoos_uint32 kdtGetShiftVal(register kdt_subobj_t *this,
const picoos_int16 iSize, picoos_uint32 *iByteNo, picoos_int8 *iBitNo)
{
picoos_uint32 v, b, iVal;
picoos_int16 i, j, len;
picoos_uint8 val;
if (iSize < 4) {
iVal = 0;
for (i = iSize - 1; i >= 0; i--) {
/* no check that *iByteNo is within valid treebody range */
if ((this->treebody[*iByteNo] & ((1) << (*iBitNo))) > 0) {
iVal |= ((1) << i);
}
(*iBitNo)--;
if (*iBitNo < 0) {
*iBitNo = 7;
(*iByteNo)++;
}
}
return iVal;
}
b = *iByteNo;
j = *iBitNo;
len = iSize;
*iBitNo = j - iSize;
v = 0;
while (*iBitNo < 0) {
*iBitNo += 8;
(*iByteNo)++;
}
val = this->treebody[b++];
if (j < 7) {
switch (j) {
case 0:
val &= 0x01;
break;
case 1:
val &= 0x03;
break;
case 2:
val &= 0x07;
break;
case 3:
val &= 0x0f;
break;
case 4:
val &= 0x1f;
break;
case 5:
val &= 0x3f;
break;
case 6:
val &= 0x7f;
break;
}
}
len -= j + 1;
if (len < 0) {
val >>= -len;
}
v = val;
while (len > 0) {
if (len >= 8) {
j = 8;
} else {
j = len;
}
v <<= j;
val = this->treebody[b++];
if (j < 8) {
switch (j) {
case 1:
val &= 0x80;
val >>= 7;
break;
case 2:
val &= 0xc0;
val >>= 6;
break;
case 3:
val &= 0xe0;
val >>= 5;
break;
case 4:
val &= 0xf0;
val >>= 4;
break;
case 5:
val &= 0xf8;
val >>= 3;
break;
case 6:
val &= 0xfc;
val >>= 2;
break;
case 7:
val &= 0xfe;
val >>= 1;
break;
}
}
v |= val;
len -= j;
}
return v;
}
/* Name : kdtAskTree
Function: Tree Traversal routine
Input : iByteNo ofsset to the first byte containing the bits
to extract (0..sizeof(treebody))
iBitNo ofsset to the first bit to be extracted (0..7)
Returns : >0 continue, no solution yet found
=0 solution found
<0 error, no solution found
Notes :
*/
static picoos_int8 kdtAskTree(register kdt_subobj_t *this,
picoos_uint16 *invec,
const kdt_nratt_t invecmax,
picoos_uint32 *iByteNo,
picoos_int8 *iBitNo) {
picoos_uint32 iNodeType;
picoos_uint8 iQuestion;
picoos_int32 iVal;
picoos_int32 iForks;
picoos_int32 iID;
picoos_int32 iCut, iSubsetType, iBitPos, iBitCount, iPos, iJump, iDecision;
picoos_int32 i;
picoos_char iIsDecide;
PICODBG_TRACE(("start"));
/* get node type, value should be in kdt_nodetype_t range */
iNodeType = kdtGetShiftVal(this, PICOKDT_NODETYPE_NRBITS, iByteNo, iBitNo);
PICODBG_TRACE(("iNodeType: %d", iNodeType));
/* get attribute to be used in question, check if in range, and get val */
/* check of vfields argument done in initialize */
iQuestion = kdtGetShiftVal(this, this->vfields[eQuestion], iByteNo, iBitNo);
if ((iQuestion < this->nrattributes) && (iQuestion < invecmax)) {
iVal = invec[iQuestion];
} else {
this->dset = FALSE;
PICODBG_TRACE(("invalid question"));
return -1; /* iQuestion invalid */
}
iForks = 0;
iID = -1;
PICODBG_TRACE(("iQuestion: %d", iQuestion));
switch (iNodeType) {
case eNBinary: {
iForks = 2;
iID = iVal;
break;
}
case eNContinuous: {
iForks = 2;
iID = 1;
iCut = kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eCut),
iByteNo, iBitNo); /*read the threshold*/
if (iVal <= iCut) {
iID = 0;
}
break;
}
case eNDiscrete: {
iForks =
kdtGetShiftVal(this,
kdtGetQFieldsVal(this, iQuestion, eForkCount),
iByteNo, iBitNo);
for (i = 0; i < iForks-1; i++) {
iSubsetType =
kdtGetShiftVal(this, PICOKDT_SUBSETTYPE_NRBITS,
iByteNo, iBitNo);
switch (iSubsetType) {
case eOneValue: {
if (iID > -1) {
kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
iByteNo, iBitNo);
break;
}
iBitPos =
kdtGetShiftVal(this,
kdtGetQFieldsVal(this, iQuestion,
eBitNo),
iByteNo, iBitNo);
if (iVal == iBitPos) {
iID = i;
}
break;
}
case eTwoValues: {
if (iID > -1) {
kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
kdtGetQFieldsVal(this, iQuestion, eBitCount)),
iByteNo, iBitNo);
break;
}
iBitPos =
kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
eBitNo),
iByteNo, iBitNo);
iBitCount =
kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
eBitCount),
iByteNo, iBitNo);
if ((iVal == iBitPos) || (iVal == iBitCount)) {
iID = i;
}
break;
}
case eWithoutBitMask: {
if (iID > -1) {
kdt_jump((kdtGetQFieldsVal(this, iQuestion, eBitNo) +
kdtGetQFieldsVal(this, iQuestion, eBitCount)),
iByteNo, iBitNo);
break;
}
iBitPos =
kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
eBitNo),
iByteNo, iBitNo);
iBitCount =
kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion,
eBitCount),
iByteNo, iBitNo);
if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
iID = i;
}
break;
}
case eBitMask: {
iBitPos = 0;
if (iID > -1) {
kdt_jump(kdtGetQFieldsVal(this, iQuestion, eBitNo),
iByteNo, iBitNo);
} else {
iBitPos =
kdtGetShiftVal(this,
kdtGetQFieldsVal(this, iQuestion,
eBitNo),
iByteNo, iBitNo);
}
iBitCount =
kdtGetShiftVal(this,
kdtGetQFieldsVal(this, iQuestion,
eBitCount),
iByteNo, iBitNo);
if (iID > -1) {
kdt_jump(iBitCount, iByteNo, iBitNo);
break;
}
if ((iVal >= iBitPos) && (iVal < (iBitPos + iBitCount))) {
iPos = iVal - iBitPos;
kdt_jump((iVal - iBitPos), iByteNo, iBitNo);
/* if (kdtIsVal(this, *iByteNo, *iBitNo))*/
if ((this->treebody[*iByteNo] & ((1)<<(*iBitNo))) > 0) {
iID = i;
}
kdt_jump((iBitCount - (iVal-iBitPos)), iByteNo, iBitNo);
} else {
kdt_jump(iBitCount, iByteNo, iBitNo);
}
break;
}/*end case eBitMask*/
}/*end switch (iSubsetType)*/
}/*end for ( i = 0; i < iForks-1; i++ ) */
/*default tree branch*/
if (-1 == iID) {
iID = iForks-1;
}
break;
}/*end case eNDiscrete*/
}/*end switch (iNodeType)*/
for (i = 0; i < iForks; i++) {
iIsDecide = kdtGetShiftVal(this, PICOKDT_ISDECIDE_NRBITS, iByteNo, iBitNo);
PICODBG_TRACE(("doing forks: %d", i));
if (!iIsDecide) {
if (iID == i) {
iJump =
kdtGetShiftVal(this, kdtGetQFieldsVal(this, iQuestion, eJump),
iByteNo, iBitNo);
kdt_jump(iJump, iByteNo, iBitNo);
this->dset = FALSE;
return 1; /* to be continued, no solution yet found */
} else {
kdt_jump(kdtGetQFieldsVal(this, iQuestion, eJump),
iByteNo, iBitNo);
}
} else {
if (iID == i) {
/* check of vfields argument done in initialize */
iDecision = kdtGetShiftVal(this, this->vfields[eDecide],
iByteNo, iBitNo);
this->dclass = iDecision;
this->dset = TRUE;
return 0; /* solution found */
} else {
/* check of vfields argument done in initialize */
kdt_jump(this->vfields[eDecide], iByteNo, iBitNo);
}
}/*end if (!iIsDecide)*/
}/*end for (i = 0; i < iForks; i++ )*/
this->dset = FALSE;
PICODBG_TRACE(("problem determining class"));
return -1; /* solution not found, problem determining a class */
}
/* ************************************************************/
/* decision tree support functions, mappings */
/* ************************************************************/
/* size==1 -> MapInByte, size==2 -> MapInWord,
size determined from table type contained in kb.
if the inmaptable is empty, outval = inval */
static picoos_uint8 kdtMapInFixed(const kdt_subobj_t *dt,
const picoos_uint8 imtnr,
const picoos_uint16 inval,
picoos_uint16 *outval,
picoos_uint16 *outfallbackval) {
picoos_uint8 size;
picoos_uint32 pos;
picoos_uint16 lentable;
picoos_uint16 posbound;
picoos_uint16 i;
*outval = 0;
*outfallbackval = 0;
size = 0;
pos = 0;
/* check what can be checked */
if (imtnr >= dt->inpmaptable[pos++]) { /* outside tablenr range? */
PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d",
dt->inpmaptable[pos-1], imtnr));
return FALSE;
}
/* go forward to the needed tablenr */
if (imtnr > 0) {
pos = dt->beg_offset[imtnr];
}
/* get length */
lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos];
posbound = pos + lentable;
pos += 2;
/* check type of table and set size */
if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_EMPTY) {
/* empty table no mapping needed */
PICODBG_TRACE(("empty table: %d", imtnr));
*outval = inval;
return TRUE;
} else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
size = 1;
} else if (dt->inpmaptable[pos] == PICOKDT_MTTYPE_WORD) {
size = 2;
} else {
/* wrong table type */
PICODBG_ERROR(("wrong table type %d", dt->inpmaptable[pos]));
return FALSE;
}
pos++;
/* set fallback value in case of failed mapping, and set upper bound pos */
*outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos];
pos += 2;
/* size must be 1 or 2 here, keep 'redundant' so save time */
if (size == 1) {
for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
if (inval == dt->inpmaptable[pos]) {
*outval = i;
PICODBG_TRACE(("s1 %d in %d -> out %d", imtnr, inval, *outval));
return TRUE;
}
pos++;
}
} else if (size == 2) {
posbound--;
for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
if (inval == (((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos])) {
*outval = i;
PICODBG_TRACE(("s2 %d in %d -> out %d", imtnr, inval, *outval));
return TRUE;
}
pos += 2;
}
} else {
/* impossible size */
PICODBG_ERROR(("wrong size %d", size));
return FALSE;
}
PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
return FALSE;
}
static picoos_uint8 kdtMapInGraph(const kdt_subobj_t *dt,
const picoos_uint8 imtnr,
const picoos_uint8 *inval,
const picoos_uint8 invalmaxlen,
picoos_uint16 *outval,
picoos_uint16 *outfallbackval) {
picoos_uint8 ilen;
picoos_uint8 tlen;
picoos_uint8 cont;
picoos_uint32 pos;
picoos_uint16 lentable;
picoos_uint16 posbound;
picoos_uint16 i;
picoos_uint8 j;
*outfallbackval = 0;
pos = 0;
/* check what can be checked */
if ((imtnr >= dt->inpmaptable[pos++]) || /* outside tablenr range? */
(invalmaxlen == 0) || /* too short? */
((ilen = picobase_det_utf8_length(inval[0])) == 0) || /* invalid? */
(ilen > invalmaxlen)) { /* not accessible? */
PICODBG_ERROR(("check failed: nrtab: %d, imtnr: %d, invalmaxlen: %d, "
"ilen: %d",
dt->inpmaptable[pos-1], imtnr, invalmaxlen, ilen));
return FALSE;
}
/* go forward to the needed tablenr */
for (i = 0; i < imtnr; i++) {
lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos];
pos += lentable;
}
/* get length and check type of inpmaptable */
lentable = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos];
posbound = pos + lentable;
pos += 2;
#if defined(PICO_DEBUG)
if (1) {
int id;
PICODBG_TRACE(("imtnr %d", imtnr));
for (id = pos-2; id < posbound; id++) {
PICODBG_TRACE(("imtbyte pos %d, %c %d", id - (pos-2),
dt->inpmaptable[id], dt->inpmaptable[id]));
}
}
#endif
/* check type of table */
if (dt->inpmaptable[pos] != PICOKDT_MTTYPE_GRAPH) {
/* empty table does not make sense for graph */
/* wrong table type */
PICODBG_ERROR(("wrong table type"));
return FALSE;
}
pos++;
/* set fallback value in case of failed mapping, and set upper bound pos */
*outfallbackval = ((picoos_uint16)(dt->inpmaptable[pos+1])) << 8 |
dt->inpmaptable[pos];
pos += 2;
/* sequential search */
for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
tlen = picobase_det_utf8_length(dt->inpmaptable[pos]);
if ((pos + tlen) > posbound) {
PICODBG_ERROR(("trying outside imt, posb: %d, pos: %d, tlen: %d",
posbound, pos, tlen));
return FALSE;
}
if (ilen == tlen) {
cont = TRUE;
for (j = 0; cont && (j < ilen); j++) {
if (dt->inpmaptable[pos + j] != inval[j]) {
cont = FALSE;
}
}
if (cont && (j == ilen)) { /* match found */
*outval = i;
PICODBG_TRACE(("found mapval, posb %d, pos %d, i %d, tlen %d",
posbound, pos, i, tlen));
return TRUE;
}
}
pos += tlen;
}
PICODBG_DEBUG(("outside imt %d, posb/pos/i: %d/%d/%d, fallback: %d",
imtnr, posbound, pos, i, *outfallbackval));
return FALSE;
}
/* size==1 -> MapOutByte, size==2 -> MapOutWord */
static picoos_uint8 kdtMapOutFixed(const kdt_subobj_t *dt,
const picoos_uint16 inval,
picoos_uint16 *outval) {
picoos_uint8 size;
picoos_uint16 nr;
/* no check of lentable vs. nr in initialize done */
size = 0;
/* type */
nr = dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_TABLETYPE];
/* check type of table and set size */
if (nr == PICOKDT_MTTYPE_EMPTY) {
/* empty table no mapping needed */
PICODBG_TRACE(("empty table"));
*outval = inval;
return TRUE;
} else if (nr == PICOKDT_MTTYPE_BYTE) {
size = 1;
} else if (nr == PICOKDT_MTTYPE_WORD) {
size = 2;
} else {
/* wrong table type */
PICODBG_ERROR(("wrong table type %d", nr));
return FALSE;
}
/* number of mapvalues */
nr = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
PICOKDT_MTPOS_NUMBER + 1])) << 8
| dt->outmaptable[PICOKDT_MTPOS_START + PICOKDT_MTPOS_NUMBER];
if (inval < nr) {
if (size == 1) {
*outval = dt->outmaptable[PICOKDT_MTPOS_START +
PICOKDT_MTPOS_MAPSTART + (size * inval)];
} else {
*outval = ((picoos_uint16)(dt->outmaptable[PICOKDT_MTPOS_START +
PICOKDT_MTPOS_MAPSTART + (size * inval) + 1])) << 8
| dt->outmaptable[PICOKDT_MTPOS_START +
PICOKDT_MTPOS_MAPSTART + (size * inval)];
}
return TRUE;
} else {
*outval = 0;
return FALSE;
}
}
/* size==1 -> ReverseMapOutByte, size==2 -> ReverseMapOutWord */
/* outmaptable also used to map from decoded tree output domain to
direct tree output domain */
static picoos_uint8 kdtReverseMapOutFixed(const kdt_subobj_t *dt,
const picoos_uint16 inval,
picoos_uint16 *outval,
picoos_uint16 *outfallbackval) {
picoos_uint8 size;
picoos_uint32 pos;
picoos_uint16 lentable;
picoos_uint16 posbound;
picoos_uint16 i;
/* no check of lentable vs. nr in initialize done */
size = 0;
pos = 0;
*outval = 0;
*outfallbackval = 0;
if (dt->outmaptable == NULL) {
/* empty table no mapping needed */
PICODBG_TRACE(("empty table"));
*outval = inval;
return TRUE;
}
/* check what can be checked */
if (dt->outmaptable[pos++] != 1) { /* only one omt possible */
PICODBG_ERROR(("check failed: nrtab: %d", dt->outmaptable[pos-1]));
return FALSE;
}
/* get length */
lentable = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
dt->outmaptable[pos];
posbound = pos + lentable;
pos += 2;
/* check type of table and set size */
/* if (dt->outmaptable[pos] == PICOKDT_MTTYPE_EMPTY), in
...Initialize the omt is set to NULL if not existing, checked
above */
if (dt->outmaptable[pos] == PICOKDT_MTTYPE_BYTE) {
size = 1;
} else if (dt->outmaptable[pos] == PICOKDT_MTTYPE_WORD) {
size = 2;
} else {
/* wrong table type */
PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
return FALSE;
}
pos++;
/* set fallback value in case of failed mapping, and set upper bound pos */
*outfallbackval = ((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
dt->outmaptable[pos];
pos += 2;
/* size must be 1 or 2 here, keep 'redundant' so save time */
if (size == 1) {
for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
if (inval == dt->outmaptable[pos]) {
*outval = i;
PICODBG_TRACE(("s1 inval %d -> outval %d", inval, *outval));
return TRUE;
}
pos++;
}
} else if (size == 2) {
posbound--;
for (i = 0; (i < *outfallbackval) && (pos < posbound); i++) {
if (inval == (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
dt->outmaptable[pos])) {
*outval = i;
PICODBG_TRACE(("s2 inval %d -> outval %d", inval, *outval));
return TRUE;
}
pos += 2;
}
} else {
/* impossible size */
PICODBG_ERROR(("wrong size %d", size));
return FALSE;
}
PICODBG_DEBUG(("no mapping found, fallback: %d", *outfallbackval));
return FALSE;
}
picoos_uint8 picokdt_dtPosDreverseMapOutFixed(const picokdt_DtPosD this,
const picoos_uint16 inval,
picoos_uint16 *outval,
picoos_uint16 *outfallbackval) {
kdtposd_subobj_t * dtposd = (kdtposd_subobj_t *)this;
kdt_subobj_t * dt = &(dtposd->dt);
return kdtReverseMapOutFixed(dt,inval, outval, outfallbackval);
}
/* not yet impl. size==1 -> MapOutByteToVar,
fix: size==2 -> MapOutWordToVar */
static picoos_uint8 kdtMapOutVar(const kdt_subobj_t *dt,
const picoos_uint16 inval,
picoos_uint8 *nr,
picoos_uint16 *outval,
const picoos_uint16 outvalmaxlen) {
picoos_uint16 pos;
picoos_uint16 off2ind;
picoos_uint16 lentable;
picoos_uint16 nrinbytes;
picoos_uint8 size;
picoos_uint16 offset1;
picoos_uint16 i;
if (dt->outmaptable == NULL) {
/* empty table not possible */
PICODBG_ERROR(("no table found"));
return FALSE;
}
/* nr of tables == 1 already checked in *Initialize, no need here, go
directly to position 1 */
pos = 1;
/* get length of table */
lentable = (((picoos_uint16)(dt->outmaptable[pos + 1])) << 8 |
dt->outmaptable[pos]);
pos += 2;
/* check table type */
if (dt->outmaptable[pos] != PICOKDT_MTTYPE_BYTETOVAR) {
/* wrong table type */
PICODBG_ERROR(("wrong table type %d", dt->outmaptable[pos]));
return FALSE;
}
size = 2;
pos++;
/* get nr of ele in maptable (= nr of possible invals) */
nrinbytes = (((picoos_uint16)(dt->outmaptable[pos+1])) << 8 |
dt->outmaptable[pos]);
pos += 2;
/* check what's checkable */
if (nrinbytes == 0) {
PICODBG_ERROR(("table with length zero"));
return FALSE;
} else if (inval >= nrinbytes) {
PICODBG_ERROR(("inval %d outside valid range %d", inval, nrinbytes));
return FALSE;
}
PICODBG_TRACE(("inval %d, lentable %d, nrinbytes %d, pos %d", inval,
lentable, nrinbytes, pos));
/* set off2ind to the position of the start of offset2-val */
/* offset2 points to start of next ele */
off2ind = pos + (size*inval);
/* get number of output values, offset2 - offset1 */
if (inval == 0) {
offset1 = 0;
} else {
offset1 = (((picoos_uint16)(dt->outmaptable[off2ind - 1])) << 8 |
dt->outmaptable[off2ind - 2]);
}
*nr = (((picoos_uint16)(dt->outmaptable[off2ind + 1])) << 8 |
dt->outmaptable[off2ind]) - offset1;
PICODBG_TRACE(("offset1 %d, nr %d, pos %d", offset1, *nr, pos));
/* set pos to position of 1st value being mapped to */
pos += (size * nrinbytes) + offset1;
if ((pos + *nr - 1) > lentable) {
/* outside table, should not happen */
PICODBG_ERROR(("problem with table index, pos %d, nr %d, len %d",
pos, *nr, lentable));
return FALSE;
}
if (*nr > outvalmaxlen) {
/* not enough space in outval */
PICODBG_ERROR(("overflow in outval, %d > %d", *nr, outvalmaxlen));
return FALSE;
}
/* finally, copy outmap result to outval */
for (i = 0; i < *nr; i++) {
outval[i] = dt->outmaptable[pos++];
}
return TRUE;
}
/* ************************************************************/
/* decision tree POS prediction (PosP) functions */
/* ************************************************************/
/* number of prefix and suffix graphemes used to construct the input vector */
#define KDT_POSP_NRGRAPHPREFATT 4
#define KDT_POSP_NRGRAPHSUFFATT 6
#define KDT_POSP_NRGRAPHATT 10
/* positions of specgraph and nrgraphs attributes */
#define KDT_POSP_SPECGRAPHATTPOS 10
#define KDT_POSP_NRGRAPHSATTPOS 11
/* construct PosP input vector
PosP invec: 12 elements
prefix 0-3 prefix graphemes (encoded using tree inpmaptable 0-3)
suffix 4-9 suffix graphemes (encoded using tree inpmaptable 4-9)
isspecchar 10 is a special grapheme (e.g. hyphen) inside the word (0/1)?
nr-utf-graphs 11 number of graphemes (ie. UTF8 chars)
if there are less than 10 graphemes, each grapheme is used only
once, with the suffix having higher priority, ie. elements 0-9 are
filled as follows:
#graph
1 0 0 0 0 0 0 0 0 0 1
2 0 0 0 0 0 0 0 0 1 2
3 0 0 0 0 0 0 0 1 2 3
4 0 0 0 0 0 0 1 2 3 4
5 0 0 0 0 0 1 2 3 4 5
6 0 0 0 0 1 2 3 4 5 6
7 1 0 0 0 2 3 4 5 6 7
8 1 2 0 0 3 4 5 6 7 8
9 1 2 3 0 4 5 6 7 8 9
10 1 2 3 4 5 6 7 8 9 10
11 1 2 3 4 6 7 8 9 10 11
...
1-6: Fill chbuf
7-10: front to invec 1st part, remove front, add rear
>10: remove front, add rear
no more graph ->
while chbuflen>0:
add rear to the last empty slot in 2nd part of invec, remove rear
*/
picoos_uint8 picokdt_dtPosPconstructInVec(const picokdt_DtPosP this,
const picoos_uint8 *graph,
const picoos_uint16 graphlen,
const picoos_uint8 specgraphflag) {
kdtposp_subobj_t *dtposp;
/* utf8 circular char buffer, used as restricted input deque */
/* 2nd part of graph invec has KDT_POSP_NRGRAPHSUFFATT elements, */
/* max of UTF8_MAXLEN bytes per utf8 char */
picoos_uint8 chbuf[KDT_POSP_NRGRAPHSUFFATT][PICOBASE_UTF8_MAXLEN];
picoos_uint8 chbrear; /* next free pos */
picoos_uint8 chbfront; /* next read pos */
picoos_uint8 chblen; /* empty=0; full=KDT_POSP_NRGRAPHSUFFATT */
picoos_uint16 poscg; /* position of current graph (= utf8 char) */
picoos_uint16 lencg = 0; /* length of current grapheme */
picoos_uint16 nrutfg; /* number of utf graphemes */
picoos_uint8 invecpos; /* next element to add in invec */
picoos_uint16 fallback; /* fallback value for failed graph encodings */
picoos_uint8 i;
dtposp = (kdtposp_subobj_t *)this;
chbrear = 0;
chbfront = 0;
chblen = 0;
poscg = 0;
nrutfg = 0;
invecpos = 0;
PICODBG_DEBUG(("graphlen %d", graphlen));
/* not needed, since all elements are set
for (i = 0; i < PICOKDT_NRATT_POSP; i++) {
dtposp->invec[i] = '\x63';
}
*/
dtposp->inveclen = 0;
while ((poscg < graphlen) &&
((lencg = picobase_det_utf8_length(graph[poscg])) > 0)) {
if (chblen >= KDT_POSP_NRGRAPHSUFFATT) { /* chbuf full */
if (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* prefix not full */
/* att-encode front utf graph and add in invec */
if (!kdtMapInGraph(&(dtposp->dt), invecpos,
chbuf[chbfront], PICOBASE_UTF8_MAXLEN,
&(dtposp->invec[invecpos]),
&fallback)) {
if (fallback) {
dtposp->invec[invecpos] = fallback;
} else {
return FALSE;
}
}
invecpos++;
}
/* remove front utf graph */
chbfront++;
chbfront %= KDT_POSP_NRGRAPHSUFFATT;
chblen--;
}
/* add current utf graph to chbuf */
for (i=0; i<lencg; i++) {
chbuf[chbrear][i] = graph[poscg++];
}
if (i < PICOBASE_UTF8_MAXLEN) {
chbuf[chbrear][i] = '\0';
}
chbrear++;
chbrear %= KDT_POSP_NRGRAPHSUFFATT;
chblen++;
/* increase utf graph count */
nrutfg++;
}
if ((lencg == 0) || (chblen == 0)) {
return FALSE;
} else if (chblen > 0) {
while (invecpos < KDT_POSP_NRGRAPHPREFATT) { /* fill up prefix */
if (!kdtMapInGraph(&(dtposp->dt), invecpos,
PICOKDT_OUTSIDEGRAPH_DEFSTR,
PICOKDT_OUTSIDEGRAPH_DEFLEN,
&(dtposp->invec[invecpos]), &fallback)) {
if (fallback) {
dtposp->invec[invecpos] = fallback;
} else {
return FALSE;
}
}
invecpos++;
}
for (i = (KDT_POSP_NRGRAPHATT - 1);
i >= KDT_POSP_NRGRAPHPREFATT; i--) {
if (chblen > 0) {
if (chbrear == 0) {
chbrear = KDT_POSP_NRGRAPHSUFFATT - 1;
} else {
chbrear--;
}
if (!kdtMapInGraph(&(dtposp->dt), i, chbuf[chbrear],
PICOBASE_UTF8_MAXLEN,
&(dtposp->invec[i]), &fallback)) {
if (fallback) {
dtposp->invec[i] = fallback;
} else {
return FALSE;
}
}
chblen--;
} else {
if (!kdtMapInGraph(&(dtposp->dt), i,
PICOKDT_OUTSIDEGRAPH_DEFSTR,
PICOKDT_OUTSIDEGRAPH_DEFLEN,
&(dtposp->invec[i]), &fallback)) {
if (fallback) {
dtposp->invec[i] = fallback;
} else {
return FALSE;
}
}
}
}
/* set isSpecChar attribute, reuse var i */
i = (specgraphflag ? 1 : 0);
if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_SPECGRAPHATTPOS, i,
&(dtposp->invec[KDT_POSP_SPECGRAPHATTPOS]),
&fallback)) {
if (fallback) {
dtposp->invec[KDT_POSP_SPECGRAPHATTPOS] = fallback;
} else {
return FALSE;
}
}
/* set nrGraphs attribute */
if (!kdtMapInFixed(&(dtposp->dt), KDT_POSP_NRGRAPHSATTPOS, nrutfg,
&(dtposp->invec[KDT_POSP_NRGRAPHSATTPOS]),
&fallback)) {
if (fallback) {
dtposp->invec[KDT_POSP_NRGRAPHSATTPOS] = fallback;
} else {
return FALSE;
}
}
PICODBG_DEBUG(("posp-invec: [%d,%d,%d,%d|%d,%d,%d,%d,%d,%d|%d|%d]",
dtposp->invec[0], dtposp->invec[1], dtposp->invec[2],
dtposp->invec[3], dtposp->invec[4], dtposp->invec[5],
dtposp->invec[6], dtposp->invec[7], dtposp->invec[8],
dtposp->invec[9], dtposp->invec[10],
dtposp->invec[11], dtposp->invec[12]));
dtposp->inveclen = PICOKDT_NRINPMT_POSP;
return TRUE;
}
return FALSE;
}
picoos_uint8 picokdt_dtPosPclassify(const picokdt_DtPosP this) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtposp_subobj_t *dtposp;
kdt_subobj_t *dt;
dtposp = (kdtposp_subobj_t *)this;
dt = &(dtposp->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtposp->invec, PICOKDT_NRATT_POSP,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_DEBUG(("done: %d", dt->dclass));
return ((rv == 0) && dt->dset);
}
picoos_uint8 picokdt_dtPosPdecomposeOutClass(const picokdt_DtPosP this,
picokdt_classify_result_t *dtres) {
kdtposp_subobj_t *dtposp;
picoos_uint16 val;
dtposp = (kdtposp_subobj_t *)this;
if (dtposp->dt.dset &&
kdtMapOutFixed(&(dtposp->dt), dtposp->dt.dclass, &val)) {
dtres->set = TRUE;
dtres->class = val;
return TRUE;
} else {
dtres->set = FALSE;
return FALSE;
}
}
/* ************************************************************/
/* decision tree POS disambiguation (PosD) functions */
/* ************************************************************/
picoos_uint8 picokdt_dtPosDconstructInVec(const picokdt_DtPosD this,
const picoos_uint16 * input) {
kdtposd_subobj_t *dtposd;
picoos_uint8 i;
picoos_uint16 fallback = 0;
dtposd = (kdtposd_subobj_t *)this;
dtposd->inveclen = 0;
PICODBG_DEBUG(("in: [%d,%d,%d|%d|%d,%d,%d]",
input[0], input[1], input[2],
input[3], input[4], input[5],
input[6]));
for (i = 0; i < PICOKDT_NRATT_POSD; i++) {
/* do the imt mapping for all inval */
if (!kdtMapInFixed(&(dtposd->dt), i, input[i],
&(dtposd->invec[i]), &fallback)) {
if (fallback) {
PICODBG_DEBUG(("*** using fallback for input mapping: %i -> %i", input[i], fallback));
dtposd->invec[i] = fallback;
} else {
PICODBG_ERROR(("problem doing input mapping"));
return FALSE;
}
}
}
PICODBG_DEBUG(("out: [%d,%d,%d|%d|%d,%d,%d]",
dtposd->invec[0], dtposd->invec[1], dtposd->invec[2],
dtposd->invec[3], dtposd->invec[4], dtposd->invec[5],
dtposd->invec[6]));
dtposd->inveclen = PICOKDT_NRINPMT_POSD;
return TRUE;
}
picoos_uint8 picokdt_dtPosDclassify(const picokdt_DtPosD this,
picoos_uint16 *treeout) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtposd_subobj_t *dtposd;
kdt_subobj_t *dt;
dtposd = (kdtposd_subobj_t *)this;
dt = &(dtposd->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtposd->invec, PICOKDT_NRATT_POSD,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_DEBUG(("done: %d", dt->dclass));
if ((rv == 0) && dt->dset) {
*treeout = dt->dclass;
return TRUE;
} else {
return FALSE;
}
}
/* decompose the tree output and return the class in dtres
dtres: POS classification result
returns: TRUE if okay, FALSE otherwise
*/
picoos_uint8 picokdt_dtPosDdecomposeOutClass(const picokdt_DtPosD this,
picokdt_classify_result_t *dtres) {
kdtposd_subobj_t *dtposd;
picoos_uint16 val;
dtposd = (kdtposd_subobj_t *)this;
if (dtposd->dt.dset &&
kdtMapOutFixed(&(dtposd->dt), dtposd->dt.dclass, &val)) {
dtres->set = TRUE;
dtres->class = val;
return TRUE;
} else {
dtres->set = FALSE;
return FALSE;
}
}
/* ************************************************************/
/* decision tree grapheme-to-phoneme (G2P) functions */
/* ************************************************************/
/* get the nr'th (starting at 0) utf char in utfgraph */
static picoos_uint8 kdtGetUTF8char(const picoos_uint8 *utfgraph,
const picoos_uint16 graphlen,
const picoos_uint16 nr,
picoos_uint8 *utf8char) {
picoos_uint16 i;
picoos_uint32 pos;
pos = 0;
for (i = 0; i < nr; i++) {
if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &pos)) {
return FALSE;
}
}
return picobase_get_next_utf8char(utfgraph, graphlen, &pos, utf8char);
}
/* determine the utfchar count (starting at 1) of the utfchar starting at pos */
static picoos_uint16 kdtGetUTF8Nr(const picoos_uint8 *utfgraph,
const picoos_uint16 graphlen,
const picoos_uint16 pos) {
picoos_uint32 postmp;
picoos_uint16 count;
count = 0;
postmp = 0;
while ((postmp <= pos) && (count < graphlen)) {
if (!picobase_get_next_utf8charpos(utfgraph, graphlen, &postmp)) {
PICODBG_ERROR(("invalid utf8 string, count: %d, pos: %d, post: %d",
count, pos, postmp));
return count + 1;
}
count++;
}
return count;
}
picoos_uint8 picokdt_dtG2PconstructInVec(const picokdt_DtG2P this,
const picoos_uint8 *graph,
const picoos_uint16 graphlen,
const picoos_uint8 count,
const picoos_uint8 pos,
const picoos_uint8 nrvow,
const picoos_uint8 ordvow,
picoos_uint8 *primstressflag,
const picoos_uint16 phonech1,
const picoos_uint16 phonech2,
const picoos_uint16 phonech3) {
kdtg2p_subobj_t *dtg2p;
picoos_uint16 fallback = 0;
picoos_uint8 iAttr;
picoos_uint8 utf8char[PICOBASE_UTF8_MAXLEN + 1];
picoos_uint16 inval;
picoos_int16 cinv;
picoos_uint8 retval;
picoos_int32 utfgraphlen;
picoos_uint16 utfcount;
dtg2p = (kdtg2p_subobj_t *)this;
retval = TRUE;
inval = 0;
PICODBG_TRACE(("in: [%d,%d,%d|%d,%d|%d|%d,%d,%d]", graphlen, count, pos,
nrvow, ordvow, *primstressflag, phonech1, phonech2,
phonech3));
dtg2p->inveclen = 0;
/* many speed-ups possible */
/* graph attributes */
/* count > = <= count
iAttr lowbound eow upbound delta
0 4 4 graphlen 5
1 3 3 graphlen 4
2 2 2 graphlen 3
3 1 1 graphlen 2
4 0 - graphlen 1
5 0 graphlen graphlen-1 0
6 0 graphlen-1 graphlen-2 -1
7 0 graphlen-2 graphlen-3 -2
8 0 graphlen-3 graphlen-4 -3
*/
/* graph attributes left (context -4/-3/-2/-1) and current, MapInGraph */
utfgraphlen = picobase_utf8_length(graph, graphlen);
if (utfgraphlen <= 0) {
utfgraphlen = 0;
}
utfcount = kdtGetUTF8Nr(graph, graphlen, count);
cinv = 4;
for (iAttr = 0; iAttr < 5; iAttr++) {
if ((utfcount > cinv) && (utfcount <= utfgraphlen)) {
/* utf8char[0] = graph[count - cinv - 1];*/
if (!kdtGetUTF8char(graph, graphlen, utfcount-cinv-1,
utf8char)) {
PICODBG_WARN(("problem getting UTF char %d", utfcount-cinv-1));
utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
utf8char[1] = '\0';
}
} else {
if ((utfcount == cinv) && (iAttr != 4)) {
utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
} else {
utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
}
utf8char[1] = '\0';
}
if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
utf8char, PICOBASE_UTF8_MAXLEN,
&(dtg2p->invec[iAttr]),
&fallback)) {
if (fallback) {
dtg2p->invec[iAttr] = fallback;
} else {
PICODBG_WARN(("setting attribute %d to zero", iAttr));
dtg2p->invec[iAttr] = 0;
retval = FALSE;
}
}
PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
cinv--;
}
/* graph attributes right (context 1/2/3/4), MapInGraph */
cinv = utfgraphlen;
for (iAttr = 5; iAttr < 9; iAttr++) {
if ((utfcount > 0) && (utfcount <= (cinv - 1))) {
/* utf8char[0] = graph[count + graphlen - cinv];*/
if (!kdtGetUTF8char(graph, graphlen, utfcount+utfgraphlen-cinv,
utf8char)) {
PICODBG_WARN(("problem getting UTF char %d",
utfcount+utfgraphlen-cinv-1));
utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
utf8char[1] = '\0';
}
} else {
if (utfcount == cinv) {
utf8char[0] = PICOKDT_OUTSIDEGRAPH_EOW_DEFCH;
utf8char[1] = '\0';
} else {
utf8char[0] = PICOKDT_OUTSIDEGRAPH_DEFCH;
utf8char[1] = '\0';
}
}
if (!kdtMapInGraph(&(dtg2p->dt), iAttr,
utf8char, PICOBASE_UTF8_MAXLEN,
&(dtg2p->invec[iAttr]),
&fallback)) {
if (fallback) {
dtg2p->invec[iAttr] = fallback;
} else {
PICODBG_WARN(("setting attribute %d to zero", iAttr));
dtg2p->invec[iAttr] = 0;
retval = FALSE;
}
}
PICODBG_TRACE(("invec %d %c", iAttr, utf8char[0]));
cinv--;
}
/* other attributes, MapInFixed */
for (iAttr = 9; iAttr < PICOKDT_NRATT_G2P; iAttr++) {
switch (iAttr) {
case 9: /* word POS, Fix1 */
inval = pos;
break;
case 10: /* nr of vowel-like graphs in word, if vowel, Fix2 */
inval = nrvow;
break;
case 11: /* order of current vowel-like graph in word, Fix2 */
inval = ordvow;
break;
case 12: /* primary stress mark, Fix2 */
if (*primstressflag == 1) {
/*already set previously*/
inval = 1;
} else {
inval = 0;
}
break;
case 13: /* phone chunk right context +1, Hist */
inval = phonech1;
break;
case 14: /* phone chunk right context +2, Hist */
inval = phonech2;
break;
case 15: /* phone chunk right context +3, Hist */
inval = phonech3;
break;
}
PICODBG_TRACE(("invec %d %d", iAttr, inval));
if (!kdtMapInFixed(&(dtg2p->dt), iAttr, inval,
&(dtg2p->invec[iAttr]), &fallback)) {
if (fallback) {
dtg2p->invec[iAttr] = fallback;
} else {
PICODBG_WARN(("setting attribute %d to zero", iAttr));
dtg2p->invec[iAttr] = 0;
retval = FALSE;
}
}
}
PICODBG_TRACE(("out: [%d,%d%,%d,%d|%d|%d,%d,%d,%d|%d,%d,%d,%d|"
"%d,%d,%d]", dtg2p->invec[0], dtg2p->invec[1],
dtg2p->invec[2], dtg2p->invec[3], dtg2p->invec[4],
dtg2p->invec[5], dtg2p->invec[6], dtg2p->invec[7],
dtg2p->invec[8], dtg2p->invec[9], dtg2p->invec[10],
dtg2p->invec[11], dtg2p->invec[12], dtg2p->invec[13],
dtg2p->invec[14], dtg2p->invec[15]));
dtg2p->inveclen = PICOKDT_NRINPMT_G2P;
return retval;
}
picoos_uint8 picokdt_dtG2Pclassify(const picokdt_DtG2P this,
picoos_uint16 *treeout) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtg2p_subobj_t *dtg2p;
kdt_subobj_t *dt;
dtg2p = (kdtg2p_subobj_t *)this;
dt = &(dtg2p->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtg2p->invec, PICOKDT_NRATT_G2P,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_TRACE(("done: %d", dt->dclass));
if ((rv == 0) && dt->dset) {
*treeout = dt->dclass;
return TRUE;
} else {
return FALSE;
}
}
picoos_uint8 picokdt_dtG2PdecomposeOutClass(const picokdt_DtG2P this,
picokdt_classify_vecresult_t *dtvres) {
kdtg2p_subobj_t *dtg2p;
dtg2p = (kdtg2p_subobj_t *)this;
if (dtg2p->dt.dset &&
kdtMapOutVar(&(dtg2p->dt), dtg2p->dt.dclass, &(dtvres->nr),
dtvres->classvec, PICOKDT_MAXSIZE_OUTVEC)) {
return TRUE;
} else {
dtvres->nr = 0;
return FALSE;
}
return TRUE;
}
/* ************************************************************/
/* decision tree phrasing (PHR) functions */
/* ************************************************************/
picoos_uint8 picokdt_dtPHRconstructInVec(const picokdt_DtPHR this,
const picoos_uint8 pre2,
const picoos_uint8 pre1,
const picoos_uint8 src,
const picoos_uint8 fol1,
const picoos_uint8 fol2,
const picoos_uint16 nrwordspre,
const picoos_uint16 nrwordsfol,
const picoos_uint16 nrsyllsfol) {
kdtphr_subobj_t *dtphr;
picoos_uint8 i;
picoos_uint16 inval = 0;
picoos_uint16 fallback = 0;
dtphr = (kdtphr_subobj_t *)this;
PICODBG_DEBUG(("in: [%d,%d|%d|%d,%d|%d,%d,%d]",
pre2, pre1, src, fol1, fol2,
nrwordspre, nrwordsfol, nrsyllsfol));
dtphr->inveclen = 0;
for (i = 0; i < PICOKDT_NRATT_PHR; i++) {
switch (i) {
case 0: inval = pre2; break;
case 1: inval = pre1; break;
case 2: inval = src; break;
case 3: inval = fol1; break;
case 4: inval = fol2; break;
case 5: inval = nrwordspre; break;
case 6: inval = nrwordsfol; break;
case 7: inval = nrsyllsfol; break;
default:
PICODBG_ERROR(("size mismatch"));
return FALSE;
break;
}
/* do the imt mapping for all inval */
if (!kdtMapInFixed(&(dtphr->dt), i, inval,
&(dtphr->invec[i]), &fallback)) {
if (fallback) {
dtphr->invec[i] = fallback;
} else {
PICODBG_ERROR(("problem doing input mapping"));
return FALSE;
}
}
}
PICODBG_DEBUG(("out: [%d,%d|%d|%d,%d|%d,%d,%d]",
dtphr->invec[0], dtphr->invec[1], dtphr->invec[2],
dtphr->invec[3], dtphr->invec[4], dtphr->invec[5],
dtphr->invec[6], dtphr->invec[7]));
dtphr->inveclen = PICOKDT_NRINPMT_PHR;
return TRUE;
}
picoos_uint8 picokdt_dtPHRclassify(const picokdt_DtPHR this) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtphr_subobj_t *dtphr;
kdt_subobj_t *dt;
dtphr = (kdtphr_subobj_t *)this;
dt = &(dtphr->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtphr->invec, PICOKDT_NRATT_PHR,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_DEBUG(("done: %d", dt->dclass));
return ((rv == 0) && dt->dset);
}
picoos_uint8 picokdt_dtPHRdecomposeOutClass(const picokdt_DtPHR this,
picokdt_classify_result_t *dtres) {
kdtphr_subobj_t *dtphr;
picoos_uint16 val;
dtphr = (kdtphr_subobj_t *)this;
if (dtphr->dt.dset &&
kdtMapOutFixed(&(dtphr->dt), dtphr->dt.dclass, &val)) {
dtres->set = TRUE;
dtres->class = val;
return TRUE;
} else {
dtres->set = FALSE;
return FALSE;
}
}
/* ************************************************************/
/* decision tree phono-acoustical model (PAM) functions */
/* ************************************************************/
picoos_uint8 picokdt_dtPAMconstructInVec(const picokdt_DtPAM this,
const picoos_uint8 *vec,
const picoos_uint8 veclen) {
kdtpam_subobj_t *dtpam;
picoos_uint8 i;
picoos_uint16 fallback = 0;
dtpam = (kdtpam_subobj_t *)this;
PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d",
vec[0], vec[1], vec[2], vec[3], vec[4],
vec[5], vec[6], vec[7], vec[8], vec[9]));
PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d",
vec[10], vec[11], vec[12], vec[13], vec[14],
vec[15], vec[16], vec[17], vec[18], vec[19]));
PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d",
vec[20], vec[21], vec[22], vec[23], vec[24],
vec[25], vec[26], vec[27], vec[28], vec[29]));
PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d",
vec[30], vec[31], vec[32], vec[33], vec[34],
vec[35], vec[36], vec[37], vec[38], vec[39]));
PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d",
vec[40], vec[41], vec[42], vec[43], vec[44],
vec[45], vec[46], vec[47], vec[48], vec[49]));
PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d",
vec[50], vec[51], vec[52], vec[53], vec[54],
vec[55], vec[56], vec[57], vec[58], vec[59]));
dtpam->inveclen = 0;
/* check veclen */
if (veclen != PICOKDT_NRINPMT_PAM) {
PICODBG_ERROR(("wrong number of input vector elements"));
return FALSE;
}
for (i = 0; i < PICOKDT_NRATT_PAM; i++) {
/* do the imt mapping for all vec eles */
if (!kdtMapInFixed(&(dtpam->dt), i, vec[i],
&(dtpam->invec[i]), &fallback)) {
if (fallback) {
dtpam->invec[i] = fallback;
} else {
PICODBG_ERROR(("problem doing input mapping, %d %d", i,vec[i]));
return FALSE;
}
}
}
PICODBG_TRACE(("in0: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[0], dtpam->invec[1], dtpam->invec[2],
dtpam->invec[3], dtpam->invec[4], dtpam->invec[5],
dtpam->invec[6], dtpam->invec[7], dtpam->invec[8],
dtpam->invec[9]));
PICODBG_TRACE(("in1: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[10], dtpam->invec[11], dtpam->invec[12],
dtpam->invec[13], dtpam->invec[14], dtpam->invec[15],
dtpam->invec[16], dtpam->invec[17], dtpam->invec[18],
dtpam->invec[19]));
PICODBG_TRACE(("in2: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[20], dtpam->invec[21], dtpam->invec[22],
dtpam->invec[23], dtpam->invec[24], dtpam->invec[25],
dtpam->invec[26], dtpam->invec[27], dtpam->invec[28],
dtpam->invec[29]));
PICODBG_TRACE(("in3: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[30], dtpam->invec[31], dtpam->invec[32],
dtpam->invec[33], dtpam->invec[34], dtpam->invec[35],
dtpam->invec[36], dtpam->invec[37], dtpam->invec[38],
dtpam->invec[39]));
PICODBG_TRACE(("in4: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[40], dtpam->invec[41], dtpam->invec[42],
dtpam->invec[43], dtpam->invec[44], dtpam->invec[45],
dtpam->invec[46], dtpam->invec[47], dtpam->invec[48],
dtpam->invec[49]));
PICODBG_TRACE(("in5: %d %d %d %d %d %d %d %d %d %d",
dtpam->invec[50], dtpam->invec[51], dtpam->invec[52],
dtpam->invec[53], dtpam->invec[54], dtpam->invec[55],
dtpam->invec[56], dtpam->invec[57], dtpam->invec[58],
dtpam->invec[59]));
dtpam->inveclen = PICOKDT_NRINPMT_PAM;
return TRUE;
}
picoos_uint8 picokdt_dtPAMclassify(const picokdt_DtPAM this) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtpam_subobj_t *dtpam;
kdt_subobj_t *dt;
dtpam = (kdtpam_subobj_t *)this;
dt = &(dtpam->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtpam->invec, PICOKDT_NRATT_PAM,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_DEBUG(("done: %d", dt->dclass));
return ((rv == 0) && dt->dset);
}
picoos_uint8 picokdt_dtPAMdecomposeOutClass(const picokdt_DtPAM this,
picokdt_classify_result_t *dtres) {
kdtpam_subobj_t *dtpam;
picoos_uint16 val;
dtpam = (kdtpam_subobj_t *)this;
if (dtpam->dt.dset &&
kdtMapOutFixed(&(dtpam->dt), dtpam->dt.dclass, &val)) {
dtres->set = TRUE;
dtres->class = val;
return TRUE;
} else {
dtres->set = FALSE;
return FALSE;
}
}
/* ************************************************************/
/* decision tree accentuation (ACC) functions */
/* ************************************************************/
picoos_uint8 picokdt_dtACCconstructInVec(const picokdt_DtACC this,
const picoos_uint8 pre2,
const picoos_uint8 pre1,
const picoos_uint8 src,
const picoos_uint8 fol1,
const picoos_uint8 fol2,
const picoos_uint16 hist1,
const picoos_uint16 hist2,
const picoos_uint16 nrwordspre,
const picoos_uint16 nrsyllspre,
const picoos_uint16 nrwordsfol,
const picoos_uint16 nrsyllsfol,
const picoos_uint16 footwordsfol,
const picoos_uint16 footsyllsfol) {
kdtacc_subobj_t *dtacc;
picoos_uint8 i;
picoos_uint16 inval = 0;
picoos_uint16 fallback = 0;
dtacc = (kdtacc_subobj_t *)this;
PICODBG_DEBUG(("in: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
pre2, pre1, src, fol1, fol2, hist1, hist2,
nrwordspre, nrsyllspre, nrwordsfol, nrsyllsfol,
footwordsfol, footsyllsfol));
dtacc->inveclen = 0;
for (i = 0; i < PICOKDT_NRATT_ACC; i++) {
switch (i) {
case 0: inval = pre2; break;
case 1: inval = pre1; break;
case 2: inval = src; break;
case 3: inval = fol1; break;
case 4: inval = fol2; break;
case 5: inval = hist1; break;
case 6: inval = hist2; break;
case 7: inval = nrwordspre; break;
case 8: inval = nrsyllspre; break;
case 9: inval = nrwordsfol; break;
case 10: inval = nrsyllsfol; break;
case 11: inval = footwordsfol; break;
case 12: inval = footsyllsfol; break;
default:
PICODBG_ERROR(("size mismatch"));
return FALSE;
break;
}
if (((i == 5) || (i == 6)) && (inval == PICOKDT_HISTORY_ZERO)) {
/* in input to this function the HISTORY_ZERO is used to
mark the no-value-available case. For sparsity reasons
this was not used in the training. For
no-value-available cases, instead, do reverse out
mapping of ACC0 to get tree domain for ACC0 */
if (!kdtReverseMapOutFixed(&(dtacc->dt), PICODATA_ACC0,
&inval, &fallback)) {
if (fallback) {
inval = fallback;
} else {
PICODBG_ERROR(("problem doing reverse output mapping"));
return FALSE;
}
}
}
/* do the imt mapping for all inval */
if (!kdtMapInFixed(&(dtacc->dt), i, inval,
&(dtacc->invec[i]), &fallback)) {
if (fallback) {
dtacc->invec[i] = fallback;
} else {
PICODBG_ERROR(("problem doing input mapping"));
return FALSE;
}
}
}
PICODBG_DEBUG(("out: [%d,%d,%d,%d,%d|%d,%d|%d,%d,%d,%d|%d,%d]",
dtacc->invec[0], dtacc->invec[1], dtacc->invec[2],
dtacc->invec[3], dtacc->invec[4], dtacc->invec[5],
dtacc->invec[6], dtacc->invec[7], dtacc->invec[8],
dtacc->invec[9], dtacc->invec[10], dtacc->invec[11],
dtacc->invec[12]));
dtacc->inveclen = PICOKDT_NRINPMT_ACC;
return TRUE;
}
picoos_uint8 picokdt_dtACCclassify(const picokdt_DtACC this,
picoos_uint16 *treeout) {
picoos_uint32 iByteNo;
picoos_int8 iBitNo;
picoos_int8 rv;
kdtacc_subobj_t *dtacc;
kdt_subobj_t *dt;
dtacc = (kdtacc_subobj_t *)this;
dt = &(dtacc->dt);
iByteNo = 0;
iBitNo = 7;
while ((rv = kdtAskTree(dt, dtacc->invec, PICOKDT_NRATT_ACC,
&iByteNo, &iBitNo)) > 0) {
PICODBG_TRACE(("asking tree"));
}
PICODBG_TRACE(("done: %d", dt->dclass));
if ((rv == 0) && dt->dset) {
*treeout = dt->dclass;
return TRUE;
} else {
return FALSE;
}
}
picoos_uint8 picokdt_dtACCdecomposeOutClass(const picokdt_DtACC this,
picokdt_classify_result_t *dtres) {
kdtacc_subobj_t *dtacc;
picoos_uint16 val;
dtacc = (kdtacc_subobj_t *)this;
if (dtacc->dt.dset &&
kdtMapOutFixed(&(dtacc->dt), dtacc->dt.dclass, &val)) {
dtres->set = TRUE;
dtres->class = val;
return TRUE;
} else {
dtres->set = FALSE;
return FALSE;
}
}
#ifdef __cplusplus
}
#endif
/* end */