kttsd/kttsd/ssmlconvert.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295

/***************************************************** vim:set ts=4 sw=4 sts=4:
  SSMLConvert class

  This class is in charge of converting SSML text into a format that can
  be handled by individual synths. 
  -------------------
  Copyright:
  (C) 2004 by Paul Giannaros <ceruleanblaze@gmail.com>
  (C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
  -------------------
  Original author: Paul Giannaros <ceruleanblaze@gmail.com>
******************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; version 2 of the License.               *
 *                                                                         *
 ***************************************************************************/

// TQt includes.
#include <tqstring.h>
#include <tqstringlist.h>
#include <tqdom.h>
#include <tqfile.h>
#include <tqtextstream.h>

// KDE includes.
#include <tdeversion.h>
#include <kstandarddirs.h>
#include <kprocess.h>
#include <ktempfile.h>
#include <kdebug.h>

// SSMLConvert includes.
#include "ssmlconvert.h"
#include "ssmlconvert.moc"

/// Constructor.
SSMLConvert::SSMLConvert() {
    m_talkers = TQStringList();
    m_xsltProc = 0;
    m_state = tsIdle;
}

/// Constructor. Set the talkers to be used as reference for entered text.
SSMLConvert::SSMLConvert(const TQStringList &talkers) {
    m_talkers = talkers;
    m_xsltProc = 0;
    m_state = tsIdle;
}

/// Destructor.
SSMLConvert::~SSMLConvert() {
    delete m_xsltProc;
    if (!m_inFilename.isEmpty()) TQFile::remove(m_inFilename);
    if (!m_outFilename.isEmpty()) TQFile::remove(m_outFilename);
}

/// Set the talkers to be used as reference for entered text.
void SSMLConvert::setTalkers(const TQStringList &talkers) {
    m_talkers = talkers;
}

TQString SSMLConvert::extractTalker(const TQString &talkercode) {
    TQString t = talkercode.section("synthesizer=", 1, 1);
    t = t.section('"', 1, 1);
    if(t.contains("flite"))
        return "flite";
    else
        return t.left(t.find(" ")).lower();
}

/**
* Return the most appropriate talker for the text to synth talker code.
* @param text               the text that will be parsed.
* @returns                  the appropriate talker for the job as a talker code.
*
* The appropriate talker is the one that has the most features that are required in some
* SSML markup. In the future i'm hoping to make the importance of individual features 
* configurable, but better to walk before you can run.
* Currently, the searching method in place is like a filter: Those that meet the criteria we're
* searchin for stay while others are sifted out. This should leave us with the right talker to use.
* It's not a very good method, but should be appropriate in most cases and should do just fine for now.
*
* As it stands, here is the list of things that are looked for, in order of most importance:
*   - Language
*      Obviously the most important. If a language is specified, look for the talkers that support it.
*      Default to en (or some form of en - en_US, en_GB, etc). Only one language at a time is allowed
*      at the moment, and must be specified in the root speak element (<speak xml:lang="en-US">)
*   - Gender
*      If a gender is specified, look for talkers that comply. There is no default so if no gender is
*      specified, no talkers will be removed. The only gender that will be searched for is the one 
*      specified in the root speak element. This should change in the future.
*   - Prosody
*      Check if prosody modification is allowed by the talker. Currently this is hardcoded (it 
*      is stated which talkers do and do not in a variable somewhere).
* 
* Bear in mind that the XSL stylesheet that will be applied to the SSML is the same regardless
* of the how the talker is chosen, meaning that you don't lose some features of the talker if this
* search doesn't encompass them.
* 
* TQDom is the item of choice for the matching. Just walk the tree..
*/
TQString SSMLConvert::appropriateTalker(const TQString &text) const {
    TQDomDocument ssml;
    ssml.setContent(text, false);  // No namespace processing.
    /// Matches are stored here. Obviously to begin with every talker matches.
    TQStringList matches = m_talkers;

    /// Check that this is (well formed) SSML and all our searching will not be in vain.
    TQDomElement root = ssml.documentElement();
    if(root.tagName() != "speak") {
        // Not SSML.
        return TQString();
    }

    /** 
    * For each rule that we are looking through, iterate over all currently
    * matching talkers and remove all the talkers that don't match.
    *
    * Storage for talker code components.
    */
    TQString talklang, talkvoice, talkgender, talkvolume, talkrate, talkname;

    kdDebug() << "SSMLConvert::appropriateTalker: BEFORE LANGUAGE SEARCH: " << matches.join(" ") << endl;;
    /**
    * Language searching
    */
    if(root.hasAttribute("xml:lang")) {
        TQString lang = root.attribute("xml:lang");
        kdDebug() << "SSMLConvert::appropriateTalker: xml:lang found (" << lang << ")" << endl;
        /// If it is set to en*, then match all english speakers. They all sound the same anyways.
        if(lang.contains("en-")) {
            kdDebug() << "SSMLConvert::appropriateTalker: English" << endl;
            lang = "en";
        }
        /// Find all hits and place them in matches. We don't search for the closing " because if
        /// the talker emits lang="en-UK" or something we'll be ignoring it, which we don't what.
        matches = matches.grep("lang=\"" + lang);
    }
    else {
        kdDebug() << "SSMLConvert::appropriateTalker: no xml:lang found. Defaulting to en.." << endl;
        matches = matches.grep("lang=\"en");
    }

    kdDebug() << "SSMLConvert::appropriateTalker: AFTER LANGUAGE SEARCH: " << matches.join(" ") << endl;;

    /**
    * Gender searching
    * If, for example, male is specified and only female is found, 
    * ignore the choice and just use female.
    */
    if(root.hasAttribute("gender")) {
        TQString gender = root.attribute("gender");
        kdDebug() << "SSMLConvert::appropriateTalker: gender found (" << gender << ")" << endl;
        /// If the gender found is not 'male' or 'female' then ignore it.
        if(!(gender == "male" || gender == "female")) {
            /// Make sure that we don't strip away all the talkers because of no matches.
            if(matches.grep("gender=\"" + gender).count() >= 1)
                matches = matches.grep("gender=\"" + gender);
        }
    }
    else {
        kdDebug() << "SSMLConvert::appropriateTalker: no gender found." << endl;
    }

    /**
    * Prosody
    * Search for talkers that allow modification of the synth output - louder, higher,
    * slower, etc. There should be a direct way to query each synth to find out if this
    * is supported (some function in PlugInConf), but for now, hardcode all the way :(
    */
    /// Known to support (feel free to add to the list and if search):
    ///   Festival Int (not flite), Hadifix
    if(matches.grep("synthesizer=\"Festival Interactive").count() >= 1 ||
    matches.grep("synthesizer=\"Hadifix").count() >= 1) {

        kdDebug() << "SSMLConvert::appropriateTalker: Prosody allowed" << endl;
        TQStringList tmpmatches = matches.grep("synthesizer=\"Festival Interactive");
        matches = matches.grep("synthesizer=\"Hadifix");
        matches = tmpmatches + matches;
    }
    else
        kdDebug() << "SSMLConvert::appropriateTalker: No prosody-supporting talkers found" << endl;

    /// Return the first match that complies. Maybe a discrete way to 
    /// choose between all the matches could be offered in the future. Some form of preference.
    return matches[0];
}

/**
* Applies the spreadsheet for a talker to the SSML and returns the talker-native output.
* @param text               The markup to apply the spreadsheet to.
* @param xsltFilename       The name of the stylesheet file that will be applied (i.e freetts, flite).
* @returns                  False if an error occurs.
*
* This converts a piece of SSML into a format the given talker can understand. It applies
* an XSLT spreadsheet to the SSML and returns the output.
*
* Emits transformFinished signal when completed.  Caller then calls getOutput to retrieve
* the transformed text.
*/

bool SSMLConvert::transform(const TQString &text, const TQString &xsltFilename) {
    m_xsltFilename = xsltFilename;
    /// Write @param text to a temporary file.
    KTempFile inFile(locateLocal("tmp", "kttsd-"), ".ssml");
    m_inFilename = inFile.file()->name();
    TQTextStream* wstream = inFile.textStream();
    if (wstream == 0) {
        /// wtf...
        kdDebug() << "SSMLConvert::transform: Can't write to " << m_inFilename << endl;;
        return false;
    }
    // TODO: Is encoding an issue here?
    // TODO: It would be nice if we detected whether the XML is properly formed
    // with the required xml processing instruction and encoding attribute.  If
    // not wrap it in such.  But maybe this should be handled by SpeechData::setText()?
    *wstream << text;
    inFile.close();
#if TDE_VERSION >= TDE_MAKE_VERSION (3,3,0)
    inFile.sync();
#endif

    // Get a temporary output file name.
    KTempFile outFile(locateLocal("tmp", "kttsd-"), ".output");
    m_outFilename = outFile.file()->name();
    outFile.close();
    // outFile.unlink();    // only activate this if necessary.

    /// Spawn an xsltproc process to apply our stylesheet to our SSML file.
    m_xsltProc = new KProcess;
    *m_xsltProc << "xsltproc";
    *m_xsltProc << "-o" << m_outFilename  << "--novalid"
        << m_xsltFilename << m_inFilename;
    // Warning: This won't compile under KDE 3.2.  See FreeTTS::argsToStringList().
    // kdDebug() << "SSMLConvert::transform: executing command: " <<
    //     m_xsltProc->args() << endl;

    connect(m_xsltProc, TQT_SIGNAL(processExited(KProcess*)),
        this, TQT_SLOT(slotProcessExited(KProcess*)));
    if (!m_xsltProc->start(KProcess::NotifyOnExit, KProcess::NoCommunication))
    {
        kdDebug() << "SSMLConvert::transform: Error starting xsltproc" << endl;
        return false;
    }
    m_state = tsTransforming;
    return true;
}

void SSMLConvert::slotProcessExited(KProcess* /*proc*/)
{
    m_xsltProc->deleteLater();
    m_xsltProc = 0;
    m_state = tsFinished;
    emit transformFinished();
}

/**
* Returns current processing state.
*/
int SSMLConvert::getState() { return m_state; }

/**
* Returns the output from call to transform.
*/
TQString SSMLConvert::getOutput()
{
    /// Read back the data that was written to /tmp/fileName.output.
    TQFile readfile(m_outFilename);
    if(!readfile.open(IO_ReadOnly)) {
        /// uhh yeah... Issues writing to the SSML file.
        kdDebug() << "SSMLConvert::slotProcessExited: Could not read file " << m_outFilename << endl;
        return TQString();
    }
    TQTextStream rstream(&readfile);
    TQString convertedData = rstream.read();
    readfile.close();

    // kdDebug() << "SSMLConvert::slotProcessExited: Read SSML file at " + m_inFilename + " and created " + m_outFilename + " based on the stylesheet at " << m_xsltFilename << endl;

    // Clean up.
    TQFile::remove(m_inFilename);
    m_inFilename = TQString();
    TQFile::remove(m_outFilename);
    m_outFilename = TQString();

    // Ready for another transform.
    m_state = tsIdle;

    return convertedData;
}