tools/grxmlcompile/vocab.cpp - platform/external/srec - Git at Google

 /*---------------------------------------------------------------------------*
  *  vocab.cpp                                                                *
  *                                                                           *
  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
  *                                                                           *
  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
  *  you may not use this file except in compliance with the License.         *
  *                                                                           *
  *  You may obtain a copy of the License at                                  *
  *      http://www.apache.org/licenses/LICENSE-2.0                           *
  *                                                                           *
  *  Unless required by applicable law or agreed to in writing, software      *
  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
  *  See the License for the specific language governing permissions and      *
  *  limitations under the License.                                           *
  *                                                                           *
  *---------------------------------------------------------------------------*/

 #include <string>
 #include <iostream>
 #include <stdexcept>
 #include "ESR_Locale.h"
 #include "LCHAR.h"
 #include "pstdio.h"
 #include "ESR_Session.h"
 #include "SR_Vocabulary.h"

 #include "vocab.h"

 #define MAX_LINE_LENGTH     256
 #define MAX_PRONS_LENGTH 1024

 #define DEBUG	0

 #define GENERIC CONTEXT "#"

 Vocabulary::Vocabulary( std::string const & vocFileName )
 {
     ESR_ReturnCode rc;
     rc = SR_VocabularyLoad(vocFileName.c_str(), &m_hVocab);
     if (rc != ESR_SUCCESS)
     {
         std::cout << "Error: " << ESR_rc2str(rc) <<std::endl;
         exit (-1);
     }
 }

 Vocabulary::~Vocabulary()
 {
     SR_VocabularyDestroy(m_hVocab);
 }

 Pronunciation::Pronunciation()
 {
 }

 Pronunciation::~Pronunciation()
 {
 }

 void Pronunciation::clear()
 {
     m_Prons.clear();
     for (unsigned int ii=0;ii<m_ModelIDs.size();ii++ )
     {
         m_ModelIDs[ii].clear();
     }
     m_ModelIDs.clear();
 }

 int Pronunciation::lookup(  Vocabulary & vocab, std::string  & phrase )
 {
     ESR_ReturnCode rc;
     LCHAR prons[MAX_PRONS_LENGTH];
     LCHAR* c_phrase;
     size_t len;

     LCHAR s[MAX_LINE_LENGTH];
     strcpy (s, phrase.c_str() ); // No conversion for std::string to wchar
     //clear();

     memset (prons, 0x00, sizeof(LCHAR));

     c_phrase = s;
     SR_Vocabulary *p_SRVocab = vocab.getSRVocabularyHandle();
 #if DEBUG
     std::cout << "DEBUG: " << phrase <<" to be looked up" << std::endl;
 #endif
     rc = SR_VocabularyGetPronunciation( p_SRVocab, c_phrase, prons, &len );
     if (rc != ESR_SUCCESS)
         //  std::cout <<"ERORORORORROOR!" <<std::endl;
         std::cout <<"ERROR: " << ESR_rc2str(rc) << std::endl;
     else {
 #if DEBUG
         std::cout <<"OUTPUT: " << prons << " num " << len << std::endl;
 #endif
         size_t len_used;
         LCHAR *pron = 0;
         for(len_used=0; len_used <len; ) {
             pron = &prons[0]+len_used;
             len_used += LSTRLEN(pron)+1;
 #if DEBUG
             std::cout << "DEBUG: used " << len_used << " now " << LSTRLEN(pron) << std::endl;
 #endif
             std::string pronString( pron ); // wstring conversion if needed
             addPron( pronString );
 #if DEBUG
             std::cout << "DEBUG: " << phrase << " " << pron << std::endl;
 #endif
         }
     }
     return getPronCount();
 }


 int Pronunciation::addPron( std::string & s )
 {
     m_Prons.push_back( s );
     return m_Prons.size();
 }

 int Pronunciation::getPronCount()
 {  // returns number of prons
     return m_Prons.size();
 }

 bool Pronunciation::getPron( int index, std::string &s )
 {
  // returns string length used
     try {
       s = m_Prons.at(index);
     }
     catch(std::out_of_range& err) {
       std::cerr << "out_of_range: " << err.what() << std::endl;
     }
     return true;
 }

 void Pronunciation::print()
 {
   std::string s;
   for (int ii=0; ii< getPronCount(); ii++) {
     getPron(ii, s);
 #if DEBUG
     std::cout << "Pron #" << ii << ": " << s << std::endl;
 #endif
   }
 }

 void Pronunciation::printModelIDs()
 {
   std::string s;
   for (int ii=0; ii< getPronCount(); ii++) {
     getPron(ii, s);
 #if DEBUG
     std::cout << "  Pron #" << ii << ": " << s << std::endl;
     std::cout << "    Model IDs: ";
 #endif
     for (int jj=0;jj<getModelCount(ii);jj++) {
       std::cout << " " << getModelID(ii,jj);
     }
 #if DEBUG
     std::cout <<  std::endl;
 #endif
   }
 }

 int Pronunciation::getPhonemeCount( int pronIndex )
 {
   std::string s;
   getPron(pronIndex, s);
   return s.size();
 }

 bool Pronunciation::getPhoneme( int pronIndex, int picIndex , std::string &phoneme )
 {
   std::string s;
   getPron(pronIndex, s);
   phoneme= s.at(picIndex);
   return true;
 }


 bool Pronunciation::getPIC( int pronIndex, int picIndex, std::string &pic )
 {
   std::string pron;
   char lphon;
   char cphon;
   char rphon;

   getPron( pronIndex, pron );
   int numPhonemes = pron.size();
   if ( 1==numPhonemes ) {
     lphon=GENERIC_CONTEXT;
     rphon=GENERIC_CONTEXT;
     cphon = pron.at(0);
   }
   else
     {
       if ( 0==picIndex ) {
 	lphon=GENERIC_CONTEXT;
 	rphon=GENERIC_CONTEXT;
       }
       else if( numPhonemes-1==picIndex ) {
 	lphon = pron.at(picIndex-1);
 	rphon=GENERIC_CONTEXT;
       }
       else {
 	lphon = pron.at(picIndex-1);
 	rphon = pron.at(picIndex+1);
       }
       cphon = pron.at(picIndex);
       pic = lphon + cphon + rphon;
     }
   return true;
 }

 int Pronunciation::lookupModelIDs( AcousticModel &acoustic )
 {
   // Looks up all hmms for all prons
   std::string pron;
   char lphon;
   char cphon;
   char rphon;

   int numProns = getPronCount();
   int totalCount=0;
   for (int ii=0;ii < numProns; ii++ )
     {
       getPron( ii, pron );
       std::vector<int> idList; // Create storage
       int numPhonemes = getPhonemeCount(ii);
       if (1==numPhonemes) {
 	lphon=GENERIC_CONTEXT;
 	rphon=GENERIC_CONTEXT;
 	cphon = pron.at(0);
       }
       else
       for ( int jj=0;jj<numPhonemes;jj++ )
 	{
 	  std::string pic;
 	  getPIC(ii, jj, pic);
 	  lphon = pron.at(0);
 	  cphon = pron.at(1);
 	  rphon = pron.at(2);
 	  int id = CA_ArbdataGetModelIdsForPIC( acoustic.getCAModelHandle(), lphon, cphon,  rphon );
 #if DEBUG
 	  std::cout <<"DEBUG model id: " << lphon <<cphon << rphon << "  "<< id << std::endl;
 #endif

 	  idList.push_back(id);
 	}
       m_ModelIDs.push_back(idList);
       totalCount+=numPhonemes;
     }
   return totalCount;
 }

 int Pronunciation::getModelCount( int pronIndex )
 {
   return m_ModelIDs[pronIndex].size();
 }

 int Pronunciation::getModelID( int pronIndex, int modelPos )
 {
   return m_ModelIDs[pronIndex][modelPos];
 }

 AcousticModel::AcousticModel( std::string & arbFileName )
 {
   m_CA_Arbdata = CA_LoadArbdata( arbFileName.c_str() );
   if (!m_CA_Arbdata)
     {
       std::cout << "Error: while trying to load " << arbFileName.c_str() << std::endl;
       exit (-1);
     }

 }

 AcousticModel::~AcousticModel()
 {
   CA_FreeArbdata( m_CA_Arbdata);
 }

 int AcousticModel::getStateIndices(int id, std::vector<int> & stateIDs)
 {
   srec_arbdata *allotree = (srec_arbdata*) m_CA_Arbdata;
   int numStates = allotree->hmm_infos[id].num_states;
 #if DEBUG
   std::cout << "getStateIndices: count = " << numStates <<std::endl;
 #endif
   for (int ii=0; ii <numStates; ii++ ) {
     stateIDs.push_back( allotree->hmm_infos[id].state_indices[ii] );
 #if DEBUG
     std::cout <<  allotree->hmm_infos[id].state_indices[ii] ;
 #endif
   }
 #if DEBUG
   std::cout << std::endl;
 #endif
     return stateIDs.size();
 }
	/---------------------------------------------------------------------------
	* vocab.cpp *
	* *
	* Copyright 2007, 2008 Nuance Communciations, Inc. *
	* *
	* Licensed under the Apache License, Version 2.0 (the 'License'); *
	* you may not use this file except in compliance with the License. *
	* *
	* You may obtain a copy of the License at *
	* http://www.apache.org/licenses/LICENSE-2.0 *
	* *
	* Unless required by applicable law or agreed to in writing, software *
	* distributed under the License is distributed on an 'AS IS' BASIS, *
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
	* See the License for the specific language governing permissions and *
	* limitations under the License. *
	* *
	---------------------------------------------------------------------------/

	#include <string>
	#include <iostream>
	#include <stdexcept>
	#include "ESR_Locale.h"
	#include "LCHAR.h"
	#include "pstdio.h"
	#include "ESR_Session.h"
	#include "SR_Vocabulary.h"

	#include "vocab.h"

	#define MAX_LINE_LENGTH 256
	#define MAX_PRONS_LENGTH 1024

	#define DEBUG 0

	#define GENERIC CONTEXT "#"

	Vocabulary::Vocabulary( std::string const & vocFileName )
	{
	ESR_ReturnCode rc;
	rc = SR_VocabularyLoad(vocFileName.c_str(), &m_hVocab);
	if (rc != ESR_SUCCESS)
	{
	std::cout << "Error: " << ESR_rc2str(rc) <<std::endl;
	exit (-1);
	}
	}

	Vocabulary::~Vocabulary()
	{
	SR_VocabularyDestroy(m_hVocab);
	}

	Pronunciation::Pronunciation()
	{
	}

	Pronunciation::~Pronunciation()
	{
	}

	void Pronunciation::clear()
	{
	m_Prons.clear();
	for (unsigned int ii=0;ii<m_ModelIDs.size();ii++ )
	{
	m_ModelIDs[ii].clear();
	}
	m_ModelIDs.clear();
	}

	int Pronunciation::lookup( Vocabulary & vocab, std::string & phrase )
	{
	ESR_ReturnCode rc;
	LCHAR prons[MAX_PRONS_LENGTH];
	LCHAR* c_phrase;
	size_t len;

	LCHAR s[MAX_LINE_LENGTH];
	strcpy (s, phrase.c_str() ); // No conversion for std::string to wchar
	//clear();

	memset (prons, 0x00, sizeof(LCHAR));

	c_phrase = s;
	SR_Vocabulary *p_SRVocab = vocab.getSRVocabularyHandle();
	#if DEBUG
	std::cout << "DEBUG: " << phrase <<" to be looked up" << std::endl;
	#endif
	rc = SR_VocabularyGetPronunciation( p_SRVocab, c_phrase, prons, &len );
	if (rc != ESR_SUCCESS)
	// std::cout <<"ERORORORORROOR!" <<std::endl;
	std::cout <<"ERROR: " << ESR_rc2str(rc) << std::endl;
	else {
	#if DEBUG
	std::cout <<"OUTPUT: " << prons << " num " << len << std::endl;
	#endif
	size_t len_used;
	LCHAR *pron = 0;
	for(len_used=0; len_used <len; ) {
	pron = &prons[0]+len_used;
	len_used += LSTRLEN(pron)+1;
	#if DEBUG
	std::cout << "DEBUG: used " << len_used << " now " << LSTRLEN(pron) << std::endl;
	#endif
	std::string pronString( pron ); // wstring conversion if needed
	addPron( pronString );
	#if DEBUG
	std::cout << "DEBUG: " << phrase << " " << pron << std::endl;
	#endif
	}
	}
	return getPronCount();
	}


	int Pronunciation::addPron( std::string & s )
	{
	m_Prons.push_back( s );
	return m_Prons.size();
	}

	int Pronunciation::getPronCount()
	{ // returns number of prons
	return m_Prons.size();
	}

	bool Pronunciation::getPron( int index, std::string &s )
	{
	// returns string length used
	try {
	s = m_Prons.at(index);
	}
	catch(std::out_of_range& err) {
	std::cerr << "out_of_range: " << err.what() << std::endl;
	}
	return true;
	}

	void Pronunciation::print()
	{
	std::string s;
	for (int ii=0; ii< getPronCount(); ii++) {
	getPron(ii, s);
	#if DEBUG
	std::cout << "Pron #" << ii << ": " << s << std::endl;
	#endif
	}
	}

	void Pronunciation::printModelIDs()
	{
	std::string s;
	for (int ii=0; ii< getPronCount(); ii++) {
	getPron(ii, s);
	#if DEBUG
	std::cout << " Pron #" << ii << ": " << s << std::endl;
	std::cout << " Model IDs: ";
	#endif
	for (int jj=0;jj<getModelCount(ii);jj++) {
	std::cout << " " << getModelID(ii,jj);
	}
	#if DEBUG
	std::cout << std::endl;
	#endif
	}
	}

	int Pronunciation::getPhonemeCount( int pronIndex )
	{
	std::string s;
	getPron(pronIndex, s);
	return s.size();
	}

	bool Pronunciation::getPhoneme( int pronIndex, int picIndex , std::string &phoneme )
	{
	std::string s;
	getPron(pronIndex, s);
	phoneme= s.at(picIndex);
	return true;
	}


	bool Pronunciation::getPIC( int pronIndex, int picIndex, std::string &pic )
	{
	std::string pron;
	char lphon;
	char cphon;
	char rphon;

	getPron( pronIndex, pron );
	int numPhonemes = pron.size();
	if ( 1==numPhonemes ) {
	lphon=GENERIC_CONTEXT;
	rphon=GENERIC_CONTEXT;
	cphon = pron.at(0);
	}
	else
	{
	if ( 0==picIndex ) {
	lphon=GENERIC_CONTEXT;
	rphon=GENERIC_CONTEXT;
	}
	else if( numPhonemes-1==picIndex ) {
	lphon = pron.at(picIndex-1);
	rphon=GENERIC_CONTEXT;
	}
	else {
	lphon = pron.at(picIndex-1);
	rphon = pron.at(picIndex+1);
	}
	cphon = pron.at(picIndex);
	pic = lphon + cphon + rphon;
	}
	return true;
	}

	int Pronunciation::lookupModelIDs( AcousticModel &acoustic )
	{
	// Looks up all hmms for all prons
	std::string pron;
	char lphon;
	char cphon;
	char rphon;

	int numProns = getPronCount();
	int totalCount=0;
	for (int ii=0;ii < numProns; ii++ )
	{
	getPron( ii, pron );
	std::vector<int> idList; // Create storage
	int numPhonemes = getPhonemeCount(ii);
	if (1==numPhonemes) {
	lphon=GENERIC_CONTEXT;
	rphon=GENERIC_CONTEXT;
	cphon = pron.at(0);
	}
	else
	for ( int jj=0;jj<numPhonemes;jj++ )
	{
	std::string pic;
	getPIC(ii, jj, pic);
	lphon = pron.at(0);
	cphon = pron.at(1);
	rphon = pron.at(2);
	int id = CA_ArbdataGetModelIdsForPIC( acoustic.getCAModelHandle(), lphon, cphon, rphon );
	#if DEBUG
	std::cout <<"DEBUG model id: " << lphon <<cphon << rphon << " "<< id << std::endl;
	#endif

	idList.push_back(id);
	}
	m_ModelIDs.push_back(idList);
	totalCount+=numPhonemes;
	}
	return totalCount;
	}

	int Pronunciation::getModelCount( int pronIndex )
	{
	return m_ModelIDs[pronIndex].size();
	}

	int Pronunciation::getModelID( int pronIndex, int modelPos )
	{
	return m_ModelIDs[pronIndex][modelPos];
	}

	AcousticModel::AcousticModel( std::string & arbFileName )
	{
	m_CA_Arbdata = CA_LoadArbdata( arbFileName.c_str() );
	if (!m_CA_Arbdata)
	{
	std::cout << "Error: while trying to load " << arbFileName.c_str() << std::endl;
	exit (-1);
	}

	}

	AcousticModel::~AcousticModel()
	{
	CA_FreeArbdata( m_CA_Arbdata);
	}

	int AcousticModel::getStateIndices(int id, std::vector<int> & stateIDs)
	{
	srec_arbdata allotree = (srec_arbdata) m_CA_Arbdata;
	int numStates = allotree->hmm_infos[id].num_states;
	#if DEBUG
	std::cout << "getStateIndices: count = " << numStates <<std::endl;
	#endif
	for (int ii=0; ii <numStates; ii++ ) {
	stateIDs.push_back( allotree->hmm_infos[id].state_indices[ii] );
	#if DEBUG
	std::cout << allotree->hmm_infos[id].state_indices[ii] ;
	#endif
	}
	#if DEBUG
	std::cout << std::endl;
	#endif
	return stateIDs.size();
	}