summaryrefslogtreecommitdiffstats
path: root/kttsd/kttsd/speaker.h
blob: 648553f29b2e7a8497fbb1f28a21b070b12f5595 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
/***************************************************** vim:set ts=4 sw=4 sts=4:
  Speaker class.

  This class is in charge of getting the messages, warnings and text from
  the queue and call the plug ins function to actually speak the texts.
  -------------------
  Copyright:
  (C) 2002-2003 by José Pablo Ezequiel "Pupeno" Fernández <pupeno@kde.org>
  (C) 2003-2004 by Olaf Schmidt <ojschmidt@kde.org>
  (C) 2004 by Gary Cramblitt <garycramblitt@comcast.net>
  -------------------
  Original author: José Pablo Ezequiel "Pupeno" Fernández
 ******************************************************************************/

/******************************************************************************
 *                                                                            *
 *    This program is free software; you can redistribute it and/or modify    *
 *    it under the terms of the GNU General Public License as published by    *
 *    the Free Software Foundation; either version 2 of the License.          *
 *                                                                            *
 ******************************************************************************/

#ifndef _SPEAKER_H_
#define _SPEAKER_H_

// TQt includes.
#include <tqobject.h>
#include <tqvaluevector.h>
#include <tqevent.h>

// KTTSD includes.
#include <speechdata.h>
#include <pluginproc.h>
#include <stretcher.h>
#include <talkercode.h>
#include <ssmlconvert.h>

class Player;
class TQTimer;
class TalkerMgr;

/**
* Type of utterance.
*/
enum uttType
{
    utText,                      /**< Text */
    utInterruptMsg,              /**< Interruption text message */
    utInterruptSnd,              /**< Interruption sound file */
    utResumeMsg,                 /**< Resume text message */
    utResumeSnd,                 /**< Resume sound file */
    utMessage,                   /**< Message */
    utWarning,                   /**< Warning */
    utScreenReader,              /**< Screen Reader Output */
    utStartOfJob,                /**< Start-of-job */
    utEndOfJob                   /**< End-of-job */
};

/**
* Processing state of an utterance.
*/
enum uttState
{
    usNone,                      /**< Null state. Brand new utterance. */
    usWaitingTransform,          /**< Waiting to be transformed (XSLT) */
    usTransforming,              /**< Transforming the utterance (XSLT). */
    usWaitingSay,                /**< Waiting to start synthesis. */
    usWaitingSynth,              /**< Waiting to be synthesized and audibilized. */
    usWaitingSignal,             /**< Waiting to emit a textStarted or textFinished signal. */
    usSaying,                    /**< Plugin is synthesizing and audibilizing. */
    usSynthing,                  /**< Plugin is synthesizing only. */
    usSynthed,                   /**< Plugin has finished synthesizing.  Ready for stretch. */
    usStretching,                /**< Adjusting speed. */
    usStretched,                 /**< Speed adjustment finished.  Ready for playback. */
    usPlaying,                   /**< Playing on Audio Player. */
    usPaused,                    /**< Paused on Audio Player due to user action. */
    usPreempted,                 /**< Paused on Audio Player due to Screen Reader Output. */
    usFinished                   /**< Ready for deletion. */
};

/**
* Structure containing an utterance being synthesized or audibilized.
*/
struct Utt{
    mlText* sentence;            /* The text, talker, appId, and sequence num. */
    uttType utType;              /* The type of utterance (text, msg, screen reader) */
    bool isSSML;                 /* True if the utterance contains SSML markup. */
    uttState state;              /* Processing state of the utterance. */
    SSMLConvert* transformer;    /* XSLT transformer. */
    PlugInProc* plugin;          /* The plugin that synthesizes the utterance. */
    Stretcher* audioStretcher;   /* Audio stretcher object.  Adjusts speed. */
    TQString audioUrl;            /* Filename containing synthesized audio.  Null if
                                    plugin has not yet synthesized the utterance, or if
                                    plugin does not support synthesis. */
    Player* audioPlayer;         /* The audio player audibilizing the utterance.  Null
                                    if not currently audibilizing or if plugin doesn't
                                    support synthesis. */
};

/**
* Iterator for queue of utterances.
*/
typedef TQValueVector<Utt>::iterator uttIterator;

// Timer interval for checking whether audio playback is finished.
const int timerInterval = 500;

/**
 * This class is in charge of getting the messages, warnings and text from
 * the queue and call the plug ins function to actually speak the texts.
 */
class Speaker : public TQObject{
    Q_OBJECT
  

    public:
        /**
         * Constructor
         * Calls load plug ins
         */
        Speaker(SpeechData* speechData, TalkerMgr* talkerMgr,
                TQObject *parent = 0, const char *name = 0);

        /**
         * Destructor
         */
        ~Speaker();

        /**
         * Tells the thread to exit
         */
        void requestExit();

        /**
        * Main processing loop.  Dequeues utterances and sends them to the
        * plugins and/or Audio Player.
        */
        void doUtterances();

        /**
        * Determine if kttsd is currently speaking any text jobs.
        * @return               True if currently speaking any text jobs.
        */
        bool isSpeakingText();

        /**
        * Get the job number of the current text job.
        * @return               Job number of the current text job. 0 if no jobs.
        *
        * Note that the current job may not be speaking. See @ref isSpeakingText.
        * @see getTextJobState.
        * @see isSpeakingText
        */
        uint getCurrentTextJob();

        /**
        * Remove a text job from the queue.
        * @param jobNum         Job number of the text job.
        *
        * The job is deleted from the queue and the @ref textRemoved signal is emitted.
        *
        * If there is another job in the text queue, and it is marked speakable,
        * that job begins speaking.
        */
        void removeText(const uint jobNum);

        /**
        * Start a text job at the beginning.
        * @param jobNum         Job number of the text job.
        *
        * Rewinds the job to the beginning.
        *
        * The job is marked speakable.
        * If there are other speakable jobs preceeding this one in the queue,
        * those jobs continue speaking and when finished, this job will begin speaking.
        * If there are no other speakable jobs preceeding this one, it begins speaking.
        *
        * The @ref textStarted signal is emitted when the text job begins speaking.
        * When all the sentences of the job have been spoken, the job is marked for deletion from
        * the text queue and the @ref textFinished signal is emitted.
        */
        void startText(const uint jobNum);

        /**
        * Stop a text job and rewind to the beginning.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked not speakable and will not be speakable until @ref startText or @ref resumeText
        * is called.
        *
        * If there are speaking jobs preceeding this one in the queue, they continue speaking.
        * If the job is currently speaking, the @ref textStopped signal is emitted and the job stops speaking.
        * Depending upon the speech engine and plugin used, speeking may not stop immediately
        * (it might finish the current sentence).
        */
        void stopText(const uint jobNum);

        /**
        * Pause a text job.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked as paused and will not be speakable until @ref resumeText or
        * @ref startText is called.
        *
        * If there are speaking jobs preceeding this one in the queue, they continue speaking.
        * If the job is currently speaking, the @ref textPaused signal is emitted and the job stops speaking.
        * Depending upon the speech engine and plugin used, speeking may not stop immediately
        * (it might finish the current sentence).
        * @see resumeText
        */
        void pauseText(const uint jobNum);

        /**
        * Start or resume a text job where it was paused.
        * @param jobNum         Job number of the text job.
        *
        * The job is marked speakable.
        *
        * If the job is currently speaking, or is waiting to be spoken (speakable 
        * state), the resumeText() call is ignored.
        *
        * If the job is currently queued, or is finished, it is the same as calling
        * @ref startText .
        *
        * If there are speaking jobs preceeding this one in the queue, those jobs continue speaking and,
        * when finished this job will begin speaking where it left off.
        *
        * The @ref textResumed signal is emitted when the job resumes.
        * @see pauseText
        */
        void resumeText(const uint jobNum);

        /**
        * Move a text job down in the queue so that it is spoken later.
        * @param jobNum         Job number of the text job.
        *
        * If the job is currently speaking, it is paused.
        * If the next job in the queue is speakable, it begins speaking.
        */
        void moveTextLater(const uint jobNum);

        /**
        * Jump to the first sentence of a specified part of a text job.
        * @param partNum        Part number of the part to jump to.  Parts are numbered starting at 1.
        * @param jobNum         Job number of the text job.
        * @return               Part number of the part actually jumped to.
        *
        * If partNum is greater than the number of parts in the job, jumps to last part.
        * If partNum is 0, does nothing and returns the current part number.
        * If no such job, does nothing and returns 0.
        * Does not affect the current speaking/not-speaking state of the job.
        */
        int jumpToTextPart(const int partNum, const uint jobNum);

        /**
        * Advance or rewind N sentences in a text job.
        * @param n              Number of sentences to advance (positive) or rewind (negative)
        *                       in the job.
        * @param jobNum         Job number of the text job.
        * @return               Sequence number of the sentence actually moved to.
        *                       Sequence numbers are numbered starting at 1.
        *
        * If no such job, does nothing and returns 0.
        * If n is zero, returns the current sequence number of the job.
        * Does not affect the current speaking/not-speaking state of the job.
        */
        uint moveRelTextSentence(const int n, const uint jobNum);

    signals:
        /**
         * Emitted whenever reading a text was started or resumed
         */
        void readingStarted();

        /**
         * Emitted whenever reading a text was finished,
         * or paused, or stopped before it was finished
         */
        void readingStopped();

        /**
         * Emitted whenever a message or warning interrupts reading a text
         */
        void readingInterrupted();

        /**
         * Emitted whenever reading a text is resumed after it was interrupted
         * Note: In function resumeText, readingStarted is called instead
         */
        void readingResumed();

        /* The following signals correspond to the signals in the KSpeech interface. */

        /**
        * This signal is emitted when the speech engine/plugin encounters a marker in the text.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param markerName     The name of the marker seen.
        * @see markers
        */
        void markerSeen(const TQCString& appId, const TQString& markerName);

        /**
        * This signal is emitted whenever a sentence begins speaking.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param jobNum         Job number of the text job.
        * @param seq            Sequence number of the text.
        */
        void sentenceStarted(TQString text, TQString language, const TQCString& appId,
            const uint jobNum, const uint seq);

        /**
        * This signal is emitted when a sentence has finished speaking.
        * @param appId          DCOP application ID of the application that queued the text.
        * @param jobNum         Job number of the text job.
        * @param seq            Sequence number of the text.
        */        
        void sentenceFinished(const TQCString& appId, const uint jobNum, const uint seq);

        /**
        * This signal is emitted whenever speaking of a text job begins.
        * @param appId          The DCOP senderId of the application that created the job.  NULL if kttsd.
        * @param jobNum         Job number of the text job.
        */
        void textStarted(const TQCString& appId, const uint jobNum);

        /**
        * This signal is emitted whenever a text job is finished.  The job has
        * been marked for deletion from the queue and will be deleted when another
        * job reaches the Finished state. (Only one job in the text queue may be
        * in state Finished at one time.)  If @ref startText or @ref resumeText is
        * called before the job is deleted, it will remain in the queue for speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textFinished(const TQCString& appId, const uint jobNum);

        /**
        * This signal is emitted whenever a speaking text job stops speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textStopped(const TQCString& appId, const uint jobNum);
        /**
        * This signal is emitted whenever a speaking text job is paused.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textPaused(const TQCString& appId, const uint jobNum);
        /**
        * This signal is emitted when a text job, that was previously paused, resumes speaking.
        * @param appId          The DCOP senderId of the application that created the job.
        * @param jobNum         Job number of the text job.
        */
        void textResumed(const TQCString& appId, const uint jobNum);

    protected:
        /**
        * Processes events posted by ThreadedPlugIns.
        */
        virtual bool event ( TQEvent * e );

    private slots:
        /**
        * Received from PlugIn objects when they finish asynchronous synthesis.
        */
        void slotSynthFinished();
        /**
        * Received from PlugIn objects when they finish asynchronous synthesis
        * and audibilizing.
        */
        void slotSayFinished();
        /**
        * Received from PlugIn objects when they asynchronously stopText.
        */
        void slotStopped();
        /**
        * Received from audio stretcher when stretching (speed adjustment) is finished.
        */
        void slotStretchFinished();
        /**
        * Received from transformer (SSMLConvert) when transforming is finished.
        */
        void slotTransformFinished();
        /** Received from PlugIn object when they encounter an error.
         * @param keepGoing               True if the plugin can continue processing.
        *                                False if the plugin cannot continue, for example,
        *                                the speech engine could not be started.
        * @param msg                     Error message.
        */
        void slotError(bool keepGoing, const TQString &msg);
        /**
        * Received from Timer when it fires.
        * Check audio player to see if it is finished.
        */
        void slotTimeout();

    private:

        /**
        * Converts an utterance state enumerator to a displayable string.
        * @param state           Utterance state.
        * @return                Displayable string for utterance state.
        */
        TQString uttStateToStr(uttState state);

        /**
        * Converts an utterance type enumerator to a displayable string.
        * @param utType          Utterance type.
        * @return                Displayable string for utterance type.
        */
        TQString uttTypeToStr(uttType utType);

        /**
        * Converts a plugin state enumerator to a displayable string.
        * @param state           Plugin state.
        * @return                Displayable string for plugin state.
        */
        TQString pluginStateToStr(pluginState state);

        /**
        * Converts a job state enumerator to a displayable string.
        * @param state           Job state.
        * @return                Displayable string for job state.
        */
        TQString jobStateToStr(int state);

        /**
        * Determines whether the given text is SSML markup.
        */
        bool isSsml(const TQString &text);

        /**
        * Determines the initial state of an utterance.  If the utterance contains
        * SSML, the state is set to usWaitingTransform.  Otherwise, if the plugin
        * supports async synthesis, sets to usWaitingSynth, otherwise usWaitingSay.
        * If an utterance has already been transformed, usWaitingTransform is
        * skipped to either usWaitingSynth or usWaitingSay.
        * @param utt             The utterance.
        */
        void setInitialUtteranceState(Utt &utt);

        /**
        * Returns true if the given job and sequence number is already in the utterance queue.
        */
        bool isInUtteranceQueue(uint jobNum, uint seqNum);

        /**
        * Gets the next utterance to be spoken from speechdata and adds it to the queue.
        * @return                True if one or more utterances were added to the queue.
        *
        * Checks for waiting ScreenReaderOutput, Warnings, Messages, or Text,
        * in that order.
        * If Warning or Message and interruption messages have been configured,
        * adds those to the queue as well.
        * Determines which plugin should be used for the utterance.
        */
        bool getNextUtterance();

        /**
        * Given an iterator pointing to the m_uttQueue, deletes the utterance
        * from the queue.  If the utterance is currently being processed by a
        * plugin or the Audio Player, halts that operation and deletes Audio Player.
        * Also takes care of deleting temporary audio file.
        * @param it                      Iterator pointer to m_uttQueue.
        * @return                        Iterator pointing to the next utterance in the
        *                                queue, or m_uttQueue.end().
        */
        uttIterator deleteUtterance(uttIterator it);

        /**
        * Given an iterator pointing to the m_uttQueue, starts playing audio if
        *   1) An audio file is ready to be played, and
        *   2) It is not already playing.
        * If another audio player is already playing, pauses it before starting
        * the new audio player.
        * @param it                      Iterator pointer to m_uttQueue.
        * @return                        True if an utterance began playing or resumed.
        */
        bool startPlayingUtterance(uttIterator it);

        /**
        * Delete any utterances in the queue with this jobNum.
        * @param jobNum          The Job Number of the utterance(s) to delete.
        * If currently processing any deleted utterances, stop them.
        */
        void deleteUtteranceByJobNum(const uint jobNum);

        /**
        * Pause the utterance with this jobNum and if it is playing on the Audio Player,
        * pause the Audio Player.
        * @param jobNum          The Job Number of the utterance to pause.
        */
        void pauseUtteranceByJobNum(const uint jobNum);

        /**
        * Takes care of emitting reading interrupted/resumed and sentence started signals.
        * Should be called just before audibilizing an utterance.
        * @param it                      Iterator pointer to m_uttQueue.
        */
        void prePlaySignals(uttIterator it);

        /**
        * Takes care of emitting sentenceFinished signal.
        * Should be called immediately after an utterance has completed playback.
        * @param it                      Iterator pointer to m_uttQueue.
        */
        void postPlaySignals(uttIterator it);

        /**
        * Constructs a temporary filename for plugins to use as a suggested filename
        * for synthesis to write to.
        * @return                        Full pathname of suggested file.
        */
        TQString makeSuggestedFilename();

        /**
        * Creates and returns a player object based on user option.
        */
        Player* createPlayerObject();

        /**
         * SpeechData local pointer
         */
        SpeechData* m_speechData;

        /**
        * TalkerMgr local pointer.
        */
        TalkerMgr* m_talkerMgr;

        /**
        * True if the speaker was requested to exit.
        */
        volatile bool m_exitRequested;

        /**
        * Queue of utterances we are currently processing.
        */
        TQValueVector<Utt> m_uttQueue;

        /**
        * True when text job reading has been interrupted.
        */
        bool m_textInterrupted;

        /**
        * Used to prevent doUtterances from prematurely exiting.
        */
        bool m_again;

        /**
        * Which audio player to use.
        *  0 = aRts
        *  1 = gstreamer
        *  2 = ALSA
        */
        int m_playerOption;

        /**
        * Audio stretch factor (Speed).
        */
        float m_audioStretchFactor;

        /**
        * GStreamer sink name to use, or ALSA PCM device name.
        */
        TQString m_sinkName;

        /**
        * Timer for monitoring audio player.
        */
        TQTimer* m_timer;

        /**
        * Current Text job being processed.
        */
        uint m_currentJobNum;

        /**
        * Job Number, appId, and sequence number of the last text sentence queued.
        */
        uint m_lastJobNum;
        TQCString m_lastAppId;
        uint m_lastSeq;

        /**
        * Some parameters used by ALSA plugin.
        * Size of buffer interrupt period (in frames)
        * Number of periods in buffer.
        */
        uint m_periodSize;
        uint m_periods;

        /**
        * Debug level in players.
        */
        uint m_playerDebugLevel;
};

#endif // _SPEAKER_H_