| /*---------------------------------------------------------------------------* |
| * grxmldoc.h * |
| * * |
| * Copyright 2007, 2008 Nuance Communciations, Inc. * |
| * * |
| * Licensed under the Apache License, Version 2.0 (the 'License'); * |
| * you may not use this file except in compliance with the License. * |
| * * |
| * You may obtain a copy of the License at * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, software * |
| * distributed under the License is distributed on an 'AS IS' BASIS, * |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * |
| * See the License for the specific language governing permissions and * |
| * limitations under the License. * |
| * * |
| *---------------------------------------------------------------------------*/ |
| |
| |
| #ifndef __grxmldoc_h__ |
| #define __grxmldoc_h__ |
| |
| // #define MEMTRACE // Uses mtrace() to detect leaks |
| |
| #include "hashmap.h" |
| #include "tinyxml.h" |
| #include <stack> |
| #include "vocab.h" |
| |
| #define SCRIPT_LABEL_PREFIX "_" |
| #define SCRIPT_LABEL_PREFIX_LEN 1 |
| class Node; |
| template <typename T1, typename T2> class HashMap; |
| class Graph; |
| class SubGraph; |
| |
| class GRXMLDoc |
| { |
| public: |
| typedef TiXmlNode XMLNode; |
| // Some convenience items for string comparison |
| typedef enum KeywordValues {NodeTypeGrammar, NodeTypeRule, NodeTypeRuleReference, NodeTypeOneOf, NodeTypeItem, NodeTypeTag, NodeTypeCount, NodeTypeMeta, NodeTypeBadValue}; |
| typedef std::map<std::string, KeywordValues> KEYWDPAIR; |
| |
| typedef struct { |
| bool hasRuleRef; |
| std::string RuleRefName; |
| int tagID; |
| } ItemData; |
| |
| GRXMLDoc(); |
| ~GRXMLDoc(); |
| |
| // Optional use of voc and model files |
| // TODO: Rearrange access to voc and models |
| #ifndef OPENFSTSDK |
| void initialize_SR(char* parfile); |
| void shutdown_SR(); |
| Vocabulary *getVocabulary() { return m_pVocab;} |
| AcousticModel* getModel() { return m_pModel;} |
| int addPhonemeToList( std::string const& s ); |
| bool findPhoneme( int i, std::string & s ); |
| bool getHMMSequence (int centre, int left, int right, std::vector<int> & modelSequence); |
| #endif |
| |
| // Lookup functions |
| bool findSubGraph(std::string & s, SubGraph *&p_SubGraph); |
| bool findRule(int i, std::string &s ); |
| bool findTag(int i, std::string &s ); |
| bool findLabel(int i, std::string &s ); |
| bool findSubGraphIndex( SubGraph *p_SubGraph, std::string &s ); |
| bool findRuleIndex( std::string s, int &i ); |
| bool findTagIndex( std::string s, int &i ); |
| bool findLabelIndex( std::string s, int &i ); |
| bool findSortedLabel(int i, std::string &s ); |
| bool findSortedLabelIndex( int i, int &sortedIndex ); |
| bool findMeta(const std::string & sn, std::string &s); |
| bool setMeta(const std::string & sn, const std::string &s); |
| void sortLabels(); |
| void addOLabelToOList( std::string & s); |
| bool WriteOLabels(const std::string& fileName); |
| |
| // Take DOM object and create word graph. Creates SubGraph, rule, tag and label lists. |
| bool parseGrammar( XMLNode &node, std::string & xMLFileName ); |
| |
| // Generate output files |
| void writeMapFile( std::string & fileName ); |
| void writeScriptFile( std::string & fileName ); |
| void writeGraphFiles( std::string & fileName, bool bDoWriteRecogGraphs ); |
| void writeParamsFile( std::string & fileName ); |
| void printLists(); |
| void printSubgraphs(); |
| |
| protected: |
| void initializeLists(); |
| bool parseNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); |
| bool beginNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); |
| bool endNode( XMLNode &node, SubGraph *&p_SubGraph, const unsigned int level ); |
| bool beginParseGrammarNode( XMLNode &node ); |
| bool endParseGrammarNode( XMLNode &node ); |
| bool beginParseMetaNode( XMLNode &node ); |
| bool endParseMetaNode( XMLNode &node ); |
| bool beginParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph); |
| bool endParseRuleNode( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool beginItem( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool endItem( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool processCDATA( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool beginOneOf( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool endOneOf( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool beginRuleRef( XMLNode &grmNode, SubGraph *&p_SubGraph ); |
| bool endRuleRef(XMLNode &node, SubGraph *&p_SubGraph ); |
| bool fixRuleRef( SubGraph *&p_SubGraph ); |
| bool getRuleRefName(XMLNode &node, std::string &ruleName); |
| bool extendAltExpression( XMLNode &node, int level ); |
| bool beginTag( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool endTag( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool beginCount( XMLNode &node, SubGraph *&p_SubGraph ); |
| bool endCount( XMLNode &node, SubGraph *&p_SubGraph ); |
| void printNode( XMLNode &node, int level ); |
| bool addRuleToList(std::string const& ruleName, SubGraph *&p_SubGraph); |
| |
| bool deleteRules(); |
| bool addTagToList( std::string const& s ); |
| bool addLabelToList( std::string const& s ); |
| void printSubgraph( SubGraph &p_SubGraph ); |
| |
| private: |
| |
| Graph *m_pGraph; // The top-level container object for the word graph; |
| KEYWDPAIR m_NodeKeyWords; |
| // The unique attributes of the GRML doc |
| std::string m_XMLMode; |
| std::string m_XMLLanguage; |
| std::string m_RootRule; |
| std::string m_XMLTagFormat; |
| std::string m_XMLVersion; |
| std::string m_XMLBase; |
| std::string m_XMLFileName; |
| |
| // We store indices for all labels used in the word graph. |
| // Store all these labels in the m_LabelList table, which is auto-indexed. |
| // We need a list of the rule names so that we can distinguish them from other labels. |
| // Store these rule names in the m_RuleList table with an index equal to the label index for the rule. |
| // Thus, when we need the index of a rule, we go straight to m_RuleList |
| // and when we need the label of a rule or any other item we use m_LabelList. |
| |
| HashMap<std::string,SubGraph*> m_SubgraphList; |
| HashMap<int,std::string> m_TagList; // <item tag = ... |
| HashMap<int,std::string> m_LabelList; // Stores all network label IDs, including rule names |
| HashMap<int,std::string> m_SortedLabelList; // Used to sort the labels fo |
| HashMap<int, std::string> m_PhonemeList; // Stores triphones |
| HashMap<std::string,int> m_RuleList; // Stores rule name and index used in the LabelList. Use to distinguish which are rules. |
| HashMap<int, std::string> m_RuleScope; |
| HashMap<int, std::string> m_SlotList; |
| HashMap<std::string, std::string> m_MetaKeyValPairs; //Store word-penalty value |
| HashMap<std::string, int> m_OutputPtxtLabels; |
| |
| std::stack<ItemData*> m_ItemVarsStack; |
| std::stack<std::string> m_RuleListStack; |
| int m_RuleAutoIndex; |
| int m_TagAutoIndex; |
| int m_LabelAutoIndex; |
| int m_PhonemeAutoIndex; |
| int m_ExpandedRulesAutoIndex; |
| int m_TagID; // Use to stash tag index for items. |
| // Note the subgraph list does not have an auto-index as it is string-indexed. |
| // All these lists also have an internal numeric index which can be used. |
| |
| #ifndef OPENFSTSDK |
| Vocabulary *m_pVocab; |
| AcousticModel *m_pModel; |
| #endif |
| |
| }; |
| |
| #endif // __grxmldoc_h__ |
| |
| |
| |