blob: 7278cea829f5b089be4d95b40c266debb92aac28 [file] [log] [blame]
/*---------------------------------------------------------------------------*
* grxmldoc.h *
* *
* Copyright 2007, 2008 Nuance Communciations, Inc. *
* *
* Licensed under the Apache License, Version 2.0 (the 'License'); *
* you may not use this file except in compliance with the License. *
* *
* You may obtain a copy of the License at *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an 'AS IS' BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*---------------------------------------------------------------------------*/
#ifndef __grxmldoc_h__
#define __grxmldoc_h__
// #define MEMTRACE // Uses mtrace() to detect leaks
#include "hashmap.h"
#include "tinyxml.h"
#include <stack>
#include "vocab.h"
#define SCRIPT_LABEL_PREFIX "_"
#define SCRIPT_LABEL_PREFIX_LEN 1
class Node;
template <typename T1, typename T2> class HashMap;
class Graph;
class SubGraph;
class GRXMLDoc
{
public:
typedef TiXmlNode XMLNode;
// Some convenience items for string comparison
typedef enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue};
typedef std::map<std::string, KeywordValues> KEYWDPAIR;
typedef struct {
bool hasRuleRef;
std::string RuleRefName;
int tagID;
} ItemData;
GRXMLDoc();
~GRXMLDoc();
// Optional use of voc and model files
// TODO: Rearrange access to voc and models
#ifndef OPENFSTSDK
void initialize_SR(char* parfile);
void shutdown_SR();
Vocabulary *getVocabulary() { return m_pVocab;}
AcousticModel* getModel() { return m_pModel;}
int addPhonemeToList( std::string const& s );
bool findPhoneme( int i, std::string & s );
bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence);
#endif
// Lookup functions
bool findSubGraph(std::string & s, SubGraph *&p_SubGraph);
bool findRule(int i, std::string &s );
bool findTag(int i, std::string &s );
bool findLabel(int i, std::string &s );
bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s );
bool findRuleIndex( std::string s, int &i );
bool findTagIndex( std::string s, int &i );
bool findLabelIndex( std::string s, int &i );
bool findSortedLabel(int i, std::string &s );
bool findSortedLabelIndex( int i, int &sortedIndex );
bool findMeta(const std::string & sn, std::string &s);
bool setMeta(const std::string & sn, const std::string &s);
void sortLabels();
void addOLabelToOList( std::string & s);
bool WriteOLabels(const std::string& fileName);
// Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists.
bool parseGrammar( XMLNode &node, std::string & xMLFileName );
// Generate output files
void writeMapFile( std::string & fileName );
void writeScriptFile( std::string & fileName );
void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs );
void writeParamsFile( std::string & fileName );
void printLists();
void printSubgraphs();
protected:
void initializeLists();
bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level );
bool beginParseGrammarNode( XMLNode &node );
bool endParseGrammarNode( XMLNode &node );
bool beginParseMetaNode( XMLNode &node );
bool endParseMetaNode( XMLNode &node );
bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph);
bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph );
bool beginItem( XMLNode &node, SubGraph *&p_SubGraph );
bool endItem( XMLNode &node, SubGraph *&p_SubGraph );
bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph );
bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph );
bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph );
bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph );
bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph );
bool fixRuleRef( SubGraph *&p_SubGraph );
bool getRuleRefName(XMLNode &node, std::string &ruleName);
bool extendAltExpression( XMLNode &node, int level );
bool beginTag( XMLNode &node, SubGraph *&p_SubGraph );
bool endTag( XMLNode &node, SubGraph *&p_SubGraph );
bool beginCount( XMLNode &node, SubGraph *&p_SubGraph );
bool endCount( XMLNode &node, SubGraph *&p_SubGraph );
void printNode( XMLNode &node, int level );
bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph);
bool deleteRules();
bool addTagToList( std::string const& s );
bool addLabelToList( std::string const& s );
void printSubgraph( SubGraph &p_SubGraph );
private:
Graph *m_pGraph; // The top-level container object for the word graph;
KEYWDPAIR m_NodeKeyWords;
// The unique attributes of the GRML doc
std::string m_XMLMode;
std::string m_XMLLanguage;
std::string m_RootRule;
std::string m_XMLTagFormat;
std::string m_XMLVersion;
std::string m_XMLBase;
std::string m_XMLFileName;
// We store indices for all labels used in the word graph.
// Store all these labels in the m_LabelList table, which is auto-indexed.
// We need a list of the rule names so that we can distinguish them from other labels.
// Store these rule names in the m_RuleList table with an index equal to the label index for the rule.
// Thus, when we need the index of a rule, we go straight to m_RuleList
// and when we need the label of a rule or any other item we use m_LabelList.
HashMap<std::string,SubGraph*> m_SubgraphList;
HashMap<int,std::string> m_TagList; // <item tag = ...
HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names
HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo
HashMap<int, std::string> m_PhonemeList; // Stores triphones
HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules.
HashMap<int, std::string> m_RuleScope;
HashMap<int, std::string> m_SlotList;
HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value
HashMap<std::string, int> m_OutputPtxtLabels;
std::stack<ItemData*> m_ItemVarsStack;
std::stack<std::string> m_RuleListStack;
int m_RuleAutoIndex;
int m_TagAutoIndex;
int m_LabelAutoIndex;
int m_PhonemeAutoIndex;
int m_ExpandedRulesAutoIndex;
int m_TagID; // Use to stash tag index for items.
// Note the subgraph list does not have an auto-index as it is string-indexed.
// All these lists also have an internal numeric index which can be used.
#ifndef OPENFSTSDK
Vocabulary *m_pVocab;
AcousticModel *m_pModel;
#endif
};
#endif // __grxmldoc_h__