festival-czech-0.3/0000755000175000017500000000000011026005751012402 5ustar pdmpdmfestival-czech-0.3/czech-debug.scm0000644000175000017500000001275111026005751015274 0ustar pdmpdm;;; Miscellaneous debugging functions for Czech synthesis ;; Copyright (C) 2004, 2005 Brailcom, o.p.s. ;; Author: Milan Zamazal ;; COPYRIGHT NOTICE ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ;; for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ;; Some data were created using the data files and tools contained in the ;; ispell-czech package available under GPL at ;; ftp://ftp.vslib.cz/pub/unix/ispell/czech. (require 'czech) (define (czech-debug-newline s) (format s "\n\n")) (define (czech-debug-prompt s prompt) (format s "* %s:\n" prompt)) (define (czech-debug-print-relation s utt relation features) (czech-debug-prompt s relation) (let ((i (utt.relation.first utt relation))) (while i (format s "%s " (item.name i)) (let ((feats '()) (features* features)) (while features* (let ((val (item.feat i (car features*)))) (if (and (not (string-matches val "0?")) (not (string-equal val "nil")) (not (string-equal val "NB"))) (set! feats (cons (cons (car features*) val) feats)))) (set! features* (cdr features*))) (if feats (begin (format s "(") (mapcar (lambda (fv) (if fv (format s " %s=%s " (car fv) (cdr fv)))) (reverse feats)) (format s ") ")))) (set! i (item.next i))))) (define (czech-debug-print-randomization s utt) (format s "randomized = %s" czech-randomize)) (define (czech-debug-print-tokens s utt) (czech-debug-print-relation s utt 'Token '(punc prepunctuation))) (define (czech-debug-print-words s utt) (czech-debug-print-relation s utt 'Word '(pbreak pos))) (define (czech-debug-print-segments s utt) (czech-debug-print-relation s utt 'Segment '())) (define (czech-debug-print-units s utt) (czech-debug-prompt s 'Units) (format s "||") (let ((i-unit (utt.relation.first utt 'IntStress))) (while i-unit (mapcar (lambda (s-unit) (format s " ") (mapcar (lambda (syl) (mapcar (lambda (ph) (format s "%s " (item.name ph))) (item.relation.daughters syl 'SylStructure))) (item.relation.daughters s-unit 'StressUnit)) (format s "%s/%s %l |" (item.feat s-unit 'position) (item.feat s-unit 'contourtype) (mapcar (lambda (x) (* 100 x)) (item.feat s-unit 'contour)))) (item.daughters i-unit)) (format s "|") (set! i-unit (item.next i-unit))))) (define (czech-debug-print-durfactors s utt) (czech-debug-prompt s "Duration factors") (let ((i (utt.relation.first utt 'Segment)) (segs '()) (last-dur 'none)) (while i (while (and i (eqv? (item.feat i 'dur_factor) last-dur)) (set! segs (cons (item.name i) segs)) (set! i (item.next i))) (if segs (begin (format s "[") (mapcar (lambda (seg) (format s "%s " seg)) (reverse segs)) (if (not (string-equal last-dur '0)) (format s "= %s" last-dur)) (format s "] ") (set! segs '()))) (set! last-dur (and i (item.feat i 'dur_factor)))))) (define (czech-debug-print-durations s utt) (czech-debug-prompt s 'Duration) (let ((last-end 0)) (mapcar (lambda (seg) (let ((dur (- (item.feat seg 'end) last-end))) (format s "%s %s " (item.name seg) dur)) (if (item.next seg) (format s "- ")) (set! last-end (item.feat seg 'end))) (utt.relation.items utt 'Segment)))) (define (czech-debug-print-f0 s utt) (czech-debug-prompt s 'F0) (let ((last-end 0)) (mapcar (lambda (seg) (let ((dur (- (item.feat seg 'end) last-end))) (format s "%s " (item.name seg)) (mapcar (lambda (f0) (format s "%s/%d " (item.feat f0 'f0) (/ (* 100 (- (item.feat f0 'pos) last-end)) dur))) (item.relation.daughters seg 'Target))) (if (item.next seg) (format s "- ")) (set! last-end (item.feat seg 'end))) (utt.relation.items utt 'Segment)))) (define (czech-debug-print* s utt) (czech-debug-print-randomization s utt) (czech-debug-newline s) (czech-debug-print-tokens s utt) (czech-debug-newline s) (czech-debug-print-words s utt) (czech-debug-newline s) (czech-debug-print-segments s utt) (czech-debug-newline s) (czech-debug-print-units s utt) (czech-debug-newline s) (czech-debug-print-durfactors s utt) (czech-debug-newline s) (czech-debug-print-durations s utt) (czech-debug-newline s) (czech-debug-print-f0 s utt) (czech-debug-newline s) (if (not (eq? s t)) (fflush s))) (define (czech-debug-print utt) (czech-debug-print* t utt)) (provide 'czech-debug) festival-czech-0.3/czech-lexicon.scm0000644000175000017500000002475611026005751015657 0ustar pdmpdm;;; Czech lexicon ;; Copyright (C) 2003, 2004, 2005, 2006, 2007 Brailcom, o.p.s. ;; Author: Milan Zamazal ;; COPYRIGHT NOTICE ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ;; for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ;;; Alphabet ("a" sym (((a:) 0))) ("a" nil (((a) 0))) ("á" sym (((d l o u h e: ) 0) ((a:) 0))) ("á" int (((a:) 0))) ("ä" nil (((p r~ e h l a s o v a n e:) 0) ((a:) 0))) ("b" nil (((b e:) 0))) ("c" nil (((c e:) 0))) ("č" nil (((c~ e:) 0))) ("d" nil (((d e:) 0))) ("ď" nil (((d~ e:) 0))) ("e" nil (((e:) 0))) ("é" nil (((d l o u h e: ) 0) ((e:) 0))) ("ě" nil (((i j e) 0))) ("f" nil (((e f) 0))) ("g" nil (((g e:) 0))) ("h" nil (((h a:) 0))) ("ch" nil (((ch a:) 0))) ("í" nil (((d l o u h e: ) 0) ((i:) 0))) ("j" nil (((j e:) 0))) ("k" sym (((k a:) 0))) ("k" nil (((k) 0))) ("l" nil (((e l) 0))) ("m" nil (((e m) 0))) ("n" nil (((e n) 0))) ("ň" nil (((e n~) 0))) ("o" sym (((o:) 0))) ("o" nil (((o) 0))) ("ó" sym (((d l o u h e: ) 0) ((o:) 0))) ("ó" int (((o:) 0))) ("ö" nil (((p r~ e h l a s o v a n e:) 0) ((o:) 0))) ("p" nil (((p e:) 0))) ("q" nil (((k v e:) 0))) ("r" nil (((e r) 0))) ("ř" nil (((e r~) 0))) ("s" sym (((e s) 0))) ("s" nil (((s) 0))) ("š" nil (((e s~) 0))) ("ß" nil (((o s t r e:) 0) ((e s) 0))) ("t" nil (((t e:) 0))) ("ť" nil (((t~ e:) 0))) ("u" sym (((u:) 0))) ("u" nil (((u) 0))) ("ú" nil (((d l o u h e: ) 0) ((u:) 0))) ("ů" nil (((u:) 0) ((s k r o u s~ k e m) 0))) ("ü" nil (((p r~ e h l a s o v a n e:) 0) ((u:) 0))) ("v" sym (((v e:) 0))) ("v" nil (((v) 0))) ("w" nil (((d v o j i t e:) 0) ((v e:) 0))) ("x" nil (((i k s) 0))) ("y" nil (((i p s i l o n) 0))) ("ý" nil (((d l o u h e:) 0) ((i p s i l o n) 0))) ("z" sym (((z e t) 0))) ("z" nil (((z) 0))) ("ž" nil (((z~ e t) 0))) ;;; Punctuation characters ("+" num (((p l u s) 0))) ("-" num (((m i n u s) 0))) ("." num (((t e c~ k a) 0))) ("," num (((c e l i: ch) 0))) ("-" range (((a z~) 0))) ("." punc ()) (":" punc ()) (";" punc ()) ("," punc ()) ("-" punc ()) ("?" punc ()) ("!" punc ()) ("`" punc ()) ("'" punc ()) ("\"" punc ()) ("(" punc ()) (")" punc ()) ;; These are multiword entries, but there's not another easy way to handle the ;; punctuation ("[" punc (((l e v a:) 0) ((h r a n a t a:) 0))) ("]" punc (((p r a v a:) 0) ((h r a n a t a:) 0))) ("{" punc (((l e v a:) 0) ((s l o z~ e n a:) 0))) ("}" punc (((p r a v a:) 0) ((s l o z~ e n a:) 0))) ("<" punc (((m e n s~ i:) 0) ((n e z~) 0))) (">" punc (((v j e t s~ i:) 0) ((n e z~) 0))) ("." nil (((t e c~ k a) 0))) (":" nil (((d v o j t e c~ k a) 0))) (";" nil (((s t r~ e d n~ i: k) 0))) ("," nil (((c~ a: r k a) 0))) ("-" nil (((p o m l c~ k a) 0))) ("?" nil (((o t a z n~ i: k) 0))) ("!" nil (((v i k r~ i c~ n~ i: k) 0))) ("'" nil (((a p o s t r o f) 0))) ("\"" nil (((u v o z o v k i) 0))) (" " nil (((m e z e r a) 0))) ;;; Other non-alphanumeric characters ("*" nil (((h v j e z d~ i c~ k a) 0))) ("%" nil (((p r o c e n t) 0))) ("&" nil (((a m p r s a n d) 0))) ("$" nil (((d o l a r) 0))) ("#" nil (((m r~ i: z~ k a) 0))) ("@" nil (((z a v i n a: c~) 0))) ("+" nil (((p l u s) 0))) ("=" nil (((r o v n a:) 0) ((s e) 0))) ("^" nil (((s t r~ i: s~ k a) 0))) ("~" nil (((v l n k a) 0))) ("/" nil (((l o m e n o) 0))) ("_" nil (((p o d t r z~ i: t k o) 0))) ("|" nil (((s v i s l i: t k o) 0))) ("\t" nil (((t a b u l a: t o r) 0))) ;;; Words ("Chocomyšl" nil (((ch o c o m i s~ l) 0))) ("Chocomyšle" nil (((ch o c o m i s~ l e) 0))) ("Chocomyšlemi" nil (((ch o c o m i s~ l e m i) 0))) ("Chocomyšli" nil (((ch o c o m i s~ l i) 0))) ("Chocomyšlí" nil (((ch o c o m i s~ l i:) 0))) ("Chocomyšlích" nil (((ch o c o m i s~ l i: ch) 0))) ("Chocomyšlím" nil (((ch o c o m i s~ l i: m) 0))) ("Emacs" nil (((i: m e k s) 0))) ("Emacsech" nil (((i: m e k s e ch) 0))) ("Emacsem" nil (((i: m e k s e m) 0))) ("Emacsu" nil (((i: m e k s u) 0))) ("Emacsy" nil (((i: m e k s i) 0))) ("Emacsů" nil (((i: m e k s u:) 0))) ("Emacsům" nil (((i: m e k s u: m) 0))) ("GNU" nil (((g n u:) 0))) ("Kč" nil (((k o r u n) 0))) ("cca" nil (((c i r k a) 0))) ("chocomyšlskou" nil (((ch o c o m i s~ l s k o u) 0))) ("chocomyšlsky" nil (((ch o c o m i s~ l s k i) 0))) ("chocomyšlská" nil (((ch o c o m i s~ l s k a:) 0))) ("chocomyšlské" nil (((ch o c o m i s~ l s k e:) 0))) ("chocomyšlského" nil (((ch o c o m i s~ l s k e: h o) 0))) ("chocomyšlském" nil (((ch o c o m i s~ l s k e: m) 0))) ("chocomyšlskému" nil (((ch o c o m i s~ l s k e: m u) 0))) ("chocomyšlský" nil (((ch o c o m i s~ l s k i:) 0))) ("chocomyšlských" nil (((ch o c o m i s~ l s k i: ch) 0))) ("chocomyšlským" nil (((ch o c o m i s~ l s k i: m) 0))) ("chocomyšlskýma" nil (((ch o c o m i s~ l s k i: m a) 0))) ("chocomyšlskými" nil (((ch o c o m i s~ l s k i: m i) 0))) ("chocomyšlští" nil (((ch o c o m i s~ l s~ t~ i:) 0))) ("control" nil (((k o n t r o l) 0))) ("copyright" nil (((k o p i r a j t) 0))) ("copyrightech" nil (((k o p i r a j t e ch) 0))) ("copyrightem" nil (((k o p i r a j t e m) 0))) ("copyrightu" nil (((k o p i r a j t u) 0))) ("copyrighty" nil (((k o p i r a j t i) 0))) ("copyrightů" nil (((k o p i r a j t u:) 0))) ("copyrightům" nil (((k o p i r a j t u: m) 0))) ("czech" nil (((c~ e k) 0))) ("emacsovou" nil (((i: m e k s o v o u) 0))) ("emacsová" nil (((i: m e k s o v a:) 0))) ("emacsové" nil (((i: m e k s o v e:) 0))) ("emacsového" nil (((i: m e k s o v e: h o) 0))) ("emacsovém" nil (((i: m e k s o v e: m) 0))) ("emacsovému" nil (((i: m e k s o v e: m u) 0))) ("emacsově" nil (((i: m e k s o v j e) 0))) ("emacsoví" nil (((i: m e k s o v i:) 0))) ("emacsový" nil (((i: m e k s o v i:) 0))) ("emacsových" nil (((i: m e k s o v i: ch) 0))) ("emacsovým" nil (((i: m e k s o v i: m) 0))) ("emacsovýma" nil (((i: m e k s o v i: m a) 0))) ("emacsovými" nil (((i: m e k s o v i: m i) 0))) ("escape" nil (((i s k e j p) 0))) ("foundation" nil (((f a u n d e j s~ n) 0))) ("freeware" nil (((f r i: v e: r) 0))) ("freewarech" nil (((f r i: v e: r e ch) 0))) ("freewarem" nil (((f r i: v e: r e m) 0))) ("freewarovou" nil (((f r i: v e: r o v o u) 0))) ("freewarová" nil (((f r i: v e: r o v a:) 0))) ("freewarové" nil (((f r i: v e: r o v e:) 0))) ("freewarového" nil (((f r i: v e: r o v e: h o) 0))) ("freewarovém" nil (((f r i: v e: r o v e: m) 0))) ("freewarovému" nil (((f r i: v e: r o v e: m u) 0))) ("freewarově" nil (((f r i: v e: r o v j e) 0))) ("freewaroví" nil (((f r i: v e: r o v i:) 0))) ("freewarový" nil (((f r i: v e: r o v i:) 0))) ("freewarových" nil (((f r i: v e: r o v i: ch) 0))) ("freewarovým" nil (((f r i: v e: r o v i: m) 0))) ("freewarovýma" nil (((f r i: v e: r o v i: m a) 0))) ("freewarovými" nil (((f r i: v e: r o v i: m i) 0))) ("freewaru" nil (((f r i: v e: r u) 0))) ("freewary" nil (((f r i: v e: r i) 0))) ("freewarů" nil (((f r i: v e: r u:) 0))) ("freewarům" nil (((f r i: v e: r u: m) 0))) ("hardware" nil (((h a r d v e: r) 0))) ("hardwarech" nil (((h a r d v e: r e ch) 0))) ("hardwarem" nil (((h a r d v e: r e m) 0))) ("hardwarovou" nil (((h a r d v e: r o v o u) 0))) ("hardwarová" nil (((h a r d v e: r o v a:) 0))) ("hardwarové" nil (((h a r d v e: r o v e:) 0))) ("hardwarového" nil (((h a r d v e: r o v e: h o) 0))) ("hardwarovém" nil (((h a r d v e: r o v e: m) 0))) ("hardwarovému" nil (((h a r d v e: r o v e: m u) 0))) ("hardwarově" nil (((h a r d v e: r o v j e) 0))) ("hardwaroví" nil (((h a r d v e: r o v i:) 0))) ("hardwarový" nil (((h a r d v e: r o v i:) 0))) ("hardwarových" nil (((h a r d v e: r o v i: ch) 0))) ("hardwarovým" nil (((h a r d v e: r o v i: m) 0))) ("hardwarovýma" nil (((h a r d v e: r o v i: m a) 0))) ("hardwarovými" nil (((h a r d v e: r o v i: m i) 0))) ("hardwaru" nil (((h a r d v e: r u) 0))) ("hardwary" nil (((h a r d v e: r i) 0))) ("hardwarů" nil (((h a r d v e: r u:) 0))) ("hardwarům" nil (((h a r d v e: r u: m) 0))) ("pst" nil (((p s t) 0))) ("shareware" nil (((s~ e: r v e: r) 0))) ("sharewarech" nil (((s~ e: r v e: r e ch) 0))) ("sharewarem" nil (((s~ e: r v e: r e m) 0))) ("sharewarovou" nil (((s~ e: r v e: r o v o u) 0))) ("sharewarová" nil (((s~ e: r v e: r o v a:) 0))) ("sharewarové" nil (((s~ e: r v e: r o v e:) 0))) ("sharewarového" nil (((s~ e: r v e: r o v e: h o) 0))) ("sharewarovém" nil (((s~ e: r v e: r o v e: m) 0))) ("sharewarovému" nil (((s~ e: r v e: r o v e: m u) 0))) ("sharewarově" nil (((s~ e: r v e: r o v j e) 0))) ("sharewaroví" nil (((s~ e: r v e: r o v i:) 0))) ("sharewarový" nil (((s~ e: r v e: r o v i:) 0))) ("sharewarových" nil (((s~ e: r v e: r o v i: ch) 0))) ("sharewarovým" nil (((s~ e: r v e: r o v i: m) 0))) ("sharewarovýma" nil (((s~ e: r v e: r o v i: m a) 0))) ("sharewarovými" nil (((s~ e: r v e: r o v i: m i) 0))) ("sharewaru" nil (((s~ e: r v e: r u) 0))) ("sharewary" nil (((s~ e: r v e: r i) 0))) ("sharewarů" nil (((s~ e: r v e: r u:) 0))) ("sharewarům" nil (((s~ e: r v e: r u: m) 0))) ("shift" nil (((s~ i f t) 0))) ("shluk" nil (((z h l u k) 0))) ("shora" nil (((z h o r a) 0))) ("shůry" nil (((z h u: r i) 0))) ("software" nil (((s o f t v e: r) 0))) ("softwarech" nil (((s o f t v e: r e ch) 0))) ("softwarem" nil (((s o f t v e: r e m) 0))) ("softwarovou" nil (((s o f t v e: r o v o u) 0))) ("softwarová" nil (((s o f t v e: r o v a:) 0))) ("softwarové" nil (((s o f t v e: r o v e:) 0))) ("softwarového" nil (((s o f t v e: r o v e: h o) 0))) ("softwarovém" nil (((s o f t v e: r o v e: m) 0))) ("softwarovému" nil (((s o f t v e: r o v e: m u) 0))) ("softwarově" nil (((s o f t v e: r o v j e) 0))) ("softwaroví" nil (((s o f t v e: r o v i:) 0))) ("softwarový" nil (((s o f t v e: r o v i:) 0))) ("softwarových" nil (((s o f t v e: r o v i: ch) 0))) ("softwarovým" nil (((s o f t v e: r o v i: m) 0))) ("softwarovýma" nil (((s o f t v e: r o v i: m a) 0))) ("softwarovými" nil (((s o f t v e: r o v i: m i) 0))) ("softwaru" nil (((s o f t v e: r u) 0))) ("softwary" nil (((s o f t v e: r i) 0))) ("softwarů" nil (((s o f t v e: r u:) 0))) ("softwarům" nil (((s o f t v e: r u: m) 0))) ("syntetizér" nil (((s i n t e t i z e: r) 0))) ("syntetizérům" nil (((s i n t e t i z e: r u: m) 0))) festival-czech-0.3/czech-mbrola.scm0000644000175000017500000000634711026005751015466 0ustar pdmpdm;;; Czech support for Festival Mbrola output module ;; Copyright (C) 2003, 2004, 2005 Brailcom, o.p.s. ;; Author: Milan Zamazal ;; COPYRIGHT NOTICE ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ;; for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. (require 'czech) (require 'mbrola) (lts.ruleset czech-mbrola-lts () (( [ z o: ] = z o ) ;;; ( [ # ] = # ) ( [ _ ] = _ ) ( [ a ] = a ) ( [ a: ] = a: ) ( [ b ] = b ) ( [ c ] = c ) ( [ c~ ] = c~ ) ( [ ch ] = ch ) ( [ ch* ] = ch* ) ( [ d ] = d ) ( [ d~ ] = d~ ) ( [ e ] = e ) ( [ e: ] = e: ) ( [ f ] = f ) ( [ g ] = g ) ( [ h ] = h ) ( [ i ] = i ) ( [ i: ] = i: ) ( [ j ] = j ) ( [ k ] = k ) ( [ l ] = l ) ( [ m ] = m ) ( [ n ] = n ) ( [ n* ] = n* ) ( [ n~ ] = n~ ) ( [ o ] = o ) ( [ o: ] = o: ) ( [ p ] = p ) ( [ r ] = r ) ( [ r~ ] = r~ ) ( [ r~* ] = r~* ) ( [ s ] = s ) ( [ s~ ] = s~ ) ( [ t ] = t ) ( [ t~ ] = t~ ) ( [ u ] = u ) ( [ u: ] = u: ) ( [ v ] = v ) ( [ z ] = z ) ( [ z~ ] = z~ ) ( [ dz ] = dz ) ( [ dz~ ] = dz~ ))) (defvar czech-mbrola-translations '(("#" "_") ("c" "ts") ("ch" "x") ("ch*" "x") ("c~" "tS") ("d~" "d'") ("dz~" "dZ") ("h" "h\\") ("n*" "n") ("n~" "n'") ("o:" "o") ("r~" "r'") ("r~*" "r'") ("s~" "S") ("t~" "t'") ("z~" "Z"))) (defvar czech-mbrola-phoneme-durations '( (# 0.25) (_ 0.01) (a 0.05) (a: 0.125) (b 0.0752) (c 0.095) (c~ 0.096) (ch 0.08) (ch* 0.08) (d 0.05) (d~ 0.07) (e 0.05) (e: 0.13) (f 0.08) (g 0.05) (h 0.05) (i 0.06) (i: 0.10) (j 0.06) (k 0.07) (l 0.05) (m 0.05) (n 0.05) (n* 0.05) (n~ 0.05) (o 0.05) (o: 0.1) (p 0.05) (r 0.05) (r~ 0.05) (r~* 0.05) (s 0.05) (s~ 0.05) (t 0.08) (t~ 0.062) (u 0.06) (u: 0.12) (v 0.05) (z 0.07) (z~ 0.05) (dz 0.05) (dz~ 0.07) )) (defvar czech-mbrola_database nil) (czech-proclaim-voice (mbrola_cz2 (gender male)) "Czech voice provided by the Mbrola cz2 database." (set! czech-phoneset-translation* czech-mbrola-translations) (set! czech-phoneme-durations* czech-mbrola-phoneme-durations) (set! czech-lts-extra-rules* '(czech-mbrola-lts)) (set! czech-volume-scale* 1.0) (set! czech-after-analysis-hooks* (list czech-translate-phonemes)) (if czech-mbrola_database (set! mbrola_database czech-mbrola_database) (error "czech-mbrola_database variable not set")) (Param.set 'Synth_Method MBROLA_Synth)) (provide 'czech-mbrola) festival-czech-0.3/czech-unisyn.scm0000644000175000017500000000503511026005751015530 0ustar pdmpdm;;; Czech UniSyn based voice example definition ;; Copyright (C) 2003, 2004, 2005 Brailcom, o.p.s. ;; Author: Milan Zamazal ;; COPYRIGHT NOTICE ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ;; for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ;; Since there is currently no Czech UniSyn diphone database available, this ;; file serves just as an example definition. (require 'czech) (define (czech-unisyn-dirname name) (substring name 0 (- (length name) (+ 1 (length (basename name)))))) (define (czech-unisyn-db-init name index-file) (if (not (member name (us_list_dbs))) (let ((lpc-dir (path-append (czech-unisyn-dirname (czech-unisyn-dirname index-file)) "lpc"))) (us_diphone_init (list (list 'name name) (list 'index_file index-file) (list 'grouped "false") (list 'coef_dir lpc-dir) (list 'sig_dir lpc-dir) (list 'coef_ext ".lpc") (list 'sig_ext ".res") (list 'default_diphone "#-#")))))) (define (czech-unisyn-group-db-init name group-file) (if (not (member name (us_list_dbs))) (us_diphone_init (list (list 'name name) (list 'index_file group-file) (list 'grouped "true") (list 'default_diphone "#-#"))))) (define (czech-unisyn-param-init) (set! us_abs_offset 0.0) (set! window_factor 1.0) (set! us_rel_offset 0.0) (set! us_gain 0.9) (Parameter.set 'us_sigpr 'lpc)) (define (czech-unisyn-init name index-file) ((if (string-matches index-file ".*\.group$") czech-unisyn-group-db-init czech-unisyn-db-init) name index-file) (czech-unisyn-param-init) (us_db_select name)) ;; Example definition ; (czech-proclaim-voice ; foo ; "Foo Czech voice." ; (czech-unisyn-init 'czech_foo "/path/to/the/index/file")) (provide 'czech-unisyn) festival-czech-0.3/czech.scm0000644000175000017500000023071311026005751014210 0ustar pdmpdm;;; Czech support for Festival ;; Copyright (C) 2003, 2004, 2005, 2006, 2007 Brailcom, o.p.s. ;; Author: Milan Zamazal ;; COPYRIGHT NOTICE ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ;; for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program; if not, write to the Free Software ;; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ;;; Utility functions (define (czech-min x y) (if (<= x y) x y)) (define (czech-max x y) (if (>= x y) x y)) (define (czech-item.has-feat item feat) (assoc feat (item.features item))) (define (czech-item.feat? item feat value) (and item (string-equal (item.feat item feat) value))) (define (czech-item.feat*? item feat value) (and item (string-matches (item.feat item feat) value))) (define (czech-all-same lst) (or (<= (length lst) 1) (and (string-equal (car lst) (cadr lst)) (czech-all-same (cdr lst))))) (define (czech-suffix string i) (substring string i (- (string-length string) i))) (defvar czech-randomize t) (defvar czech-rand-range nil) (defvar czech-moravian t) (defvar czech-insert-filling-vowels t) (defvar czech-group-digits 3) (define (czech-rand) (if czech-randomize (begin (if (not czech-rand-range) (let ((n 100) (max 0)) (while (> n 0) (let ((r (rand))) (if (> r max) (set! max r))) (set! n (- n 1))) (set! czech-rand-range 1) (while (> max czech-rand-range) (set! czech-rand-range (* 2 czech-rand-range))))) (/ (rand) czech-rand-range)) 0.5)) (define (czech-random-choice lst) (let ((max (length lst))) (let ((n (* (czech-rand) max))) (nth n lst)))) (define (czech-next-token-punc word) (if (item.relation.next word "Token") "0" (item.feat word "R:Token.n.daughter1.prepunctuation"))) (define (czech-next-punc word) (let ((token (item.next (item.parent (item.relation word 'Token))))) (while (and token (not (string-matches (item.feat token 'punc) "[^0]+"))) (set! token (item.next token))) (if token (item.feat token 'punc) 0))) (define (czech-prev-punc word) (let ((token (item.prev (item.parent (item.relation word 'Token))))) (while (and token (not (string-matches (item.feat token 'punc) "[^0]+"))) (set! token (item.prev token))) (if token (item.feat token 'punc) 0))) (define (czech-word-stress-unit word) (let ((sylword (item.relation word 'SylStructure))) (if (and sylword (item.daughter1 sylword)) (item.parent (item.relation (item.daughter1 sylword) 'StressUnit))))) (define (czech-stress-unit-punc unit) (and unit (item.feat unit "daughtern.R:SylStructure.parent.R:Token.parent.punc"))) ;;; Phone set (defPhoneSet czech (;; vowel or consonant: vowel consonant (vc + - 0) ;; vowel length: short long (vlng s l 0) ;; consonant voicing: yes no unique (cvox + - u 0) ;; can create a syllable: yes no (syl + - 0) ;; can make previous consonant nasal: yes no (postnas + - 0) ;; voiced/unvoiced counterpart: phone (partner b c c~ ch d d~ dz dz~ f g h k p r~* s s~ t t~ v z z~ 0) ) ( ;; c l v s n p (# 0 0 0 0 0 0) ; pause (_ 0 0 0 - 0 0) ; vowel-vowel stroke (a + s 0 + - 0) (a: + l 0 + - 0) (b - 0 + - - p) (c - 0 - - - dz) (c~ - 0 - - - dz~) (ch - 0 - - - 0) (d - 0 + - - t) (d~ - 0 + - - t~) (dz - 0 + - - c) (dz~ - 0 + - - c~) (e + s 0 + - 0) (e: + l 0 + - 0) (f - 0 - - - v) (g - 0 + - + k) (h - 0 + - - ch) (i + s 0 + - 0) (i: + l 0 + - 0) (j - 0 u - - 0) (k - 0 - - + g) (l - 0 u + - 0) (m - 0 u - - 0) (n - 0 u - - 0) (n* - 0 u - - 0) ; n before k or g (n~ - 0 u - - 0) (o + s 0 + - 0) (o: + l 0 + - 0) (p - 0 - - - b) (r - 0 u + - 0) (r~ - 0 + - - r~*) ; (default) voiced r~, may change to r~* (r~* - 0 - - - 0) ; unvoiced r~, can't change back to r~ (s - 0 - - - z) (s~ - 0 - - - z~) (t - 0 - - - d) (t~ - 0 - - - d~) (u + s 0 + - 0) (u: + l 0 + - 0) (v - 0 + - - f) (z - 0 + - - s) (z~ - 0 + - - s~) ) ) (PhoneSet.silences '(#)) (defvar czech-phoneset-translation '()) (defvar czech-phoneset-translation* nil) ;;; Text to phones (lts.ruleset czech-normalize ;; just transforms the texts to a canonical form () ( ( [ a ] = a ) ( [ á ] = á ) ( [ ä ] = e ) ( [ b ] = b ) ( [ c ] = c ) ( [ č ] = č ) ( [ d ] = d ) ( [ ď ] = ď ) ( [ e ] = e ) ( [ é ] = é ) ( [ ě ] = ě ) ( [ f ] = f ) ( [ g ] = g ) ( [ h ] = h ) ( [ i ] = i ) ( [ í ] = í ) ( [ j ] = j ) ( [ k ] = k ) ( [ l ] = l ) ( [ m ] = m ) ( [ n ] = n ) ( [ ň ] = ň ) ( [ o ] = o ) ( [ ó ] = ó ) ( [ ö ] = e ) ( [ p ] = p ) ( [ q ] = q ) ( [ r ] = r ) ( [ ř ] = ř ) ( [ s ] = s ) ( [ š ] = š ) ( [ ß ] = s ) ( [ t ] = t ) ( [ ť ] = ť ) ( [ u ] = u ) ( [ ú ] = ú ) ( [ ů ] = ů ) ( [ ü ] = y ) ( [ v ] = v ) ( [ w ] = w ) ( [ x ] = x ) ( [ y ] = y ) ( [ ý ] = ý ) ( [ z ] = z ) ( [ ž ] = ž ) ( [ A ] = a ) ( [ Á ] = á ) ( [ Ä ] = e ) ( [ B ] = b ) ( [ C ] = c ) ( [ Č ] = č ) ( [ D ] = d ) ( [ Ď ] = ď ) ( [ E ] = e ) ( [ É ] = é ) ( [ Ě ] = ě ) ( [ F ] = f ) ( [ G ] = g ) ( [ H ] = h ) ( [ I ] = i ) ( [ Í ] = í ) ( [ J ] = j ) ( [ K ] = k ) ( [ L ] = l ) ( [ M ] = m ) ( [ N ] = n ) ( [ Ň ] = ň ) ( [ O ] = o ) ( [ Ó ] = ó ) ( [ Ö ] = e ) ( [ P ] = p ) ( [ Q ] = q ) ( [ R ] = r ) ( [ Ř ] = ř ) ( [ S ] = s ) ( [ Š ] = š ) ( [ T ] = t ) ( [ Ť ] = ť ) ( [ U ] = u ) ( [ Ú ] = ú ) ( [ Ů ] = ů ) ( [ Ü ] = y ) ( [ V ] = v ) ( [ W ] = w ) ( [ X ] = x ) ( [ Y ] = y ) ( [ Ý ] = ý ) ( [ Z ] = z ) ( [ Ž ] = ž ) ;; digits are here to make this rule set usable in some other cases ( [ 0 ] = 0 ) ( [ 1 ] = 1 ) ( [ 2 ] = 2 ) ( [ 3 ] = 3 ) ( [ 4 ] = 4 ) ( [ 5 ] = 5 ) ( [ 6 ] = 6 ) ( [ 7 ] = 7 ) ( [ 8 ] = 8 ) ( [ 9 ] = 9 ) )) (lts.ruleset czech-orthography ;; transforms Czech written text to a phonetic form ((BPV b p v) (DTN d t n) (ĚI ě i í) (IY i y) (ÍÝ í ý) (#_ # _) (Vowel a á e é ě i í o ó u ú ů y ý) (Vowel+# a á e é ě i í o ó u ú ů y ý #) (SZ s z)) ( ;; Special rules ( [ d ] i SZ m u = d ) ( [ n ] i SZ m u = n ) ( [ t ] i SZ m u = t ) ( [ n ] i s t = n ) ( [ t ] i s t = t ) ( [ t ] i c k = t ) ( [ t ] i č t ě = t ) ( # a n [ t ] i = t ) ( # a n t [ i ] Vowel = i ) ( t e c h [ n ] i = n ) ( [ d ] i s p = d ) ( l i [ c ] o m = c ) ( [ c ] o m = k ) ( f r [ e e ] = i: ) ( m l a [ d ] i s t = d~ ) ( [ d ] i s t = d ) ( # t r a [ t ] i v = t~ ) ( á [ t ] i v = t~ ) ( b o l e s [ t ] i v = t~ ) ( c [ t ] i v = t~ ) ( c e [ t ] i v = t~ ) ( c h [ t ] i v = t~ ) ( c h a m [ t ] i v = t~ ) ( c h r a p [ t ] i v = t~ ) ( c h r o p [ t ] i v = t~ ) ( č [ t ] i v = t~ ) ( d r [ t ] i v = t~ ) ( ě [ t ] i v = t~ ) ( f i n [ t ] i v = t~ ) ( h l [ t ] i v = t~ ) ( h o [ t ] i v = t~ ) ( h ř m o [ t ] i v = t~ ) ( í [ t ] i v = t~ ) ( k l e v e [ t ] i v = t~ ) ( k r o u [ t ] i v = t~ ) ( o s [ t ] i v = t~ ) ( p i [ t ] i v = t~ ) ( p l e [ t ] i v = t~ ) ( p o l [ t ] i v = t~ ) ( r o [ t ] i v = t~ ) ( s e [ t ] i v = t~ ) ( s m r [ t ] i v = t~ ) ( s o p [ t ] i v = t~ ) ( š [ t ] i v = t~ ) ( v r [ t ] i v = t~ ) ( y [ t ] i v = t~ ) ( ž á d o s [ t ] i v = t~ ) ( d i g e s [ t ] i v = t ) ( f e s [ t ] i v = t ) ( k o n t r a s [ t ] i v = t ) ( r e z i s [ t ] i v = t ) ( s u g e s [ t ] i v = t ) ( s [ t ] i v = t~ ) ( [ t ] i v = t ) ;; Special orthography rules ( [ d ] ĚI = d~ ) ( [ t ] ĚI = t~ ) ( [ n ] ĚI = n~ ) ( DTN [ ě ] = e ) ( BPV [ ě ] = j e ) ( m [ ě ] = n~ e ) ;; `i' handling ( # m e z [ i ] Vowel = i _ ) ( #_ [ IY ] #_ = i ) ( Vowel+# [ IY ] Vowel+# = j ) ( Vowel [ ÍÝ ] Vowel = j i: j ) ( [ IY ] Vowel = i j ) ( [ ÍÝ ] Vowel = i: j ) ( IY [ IY ] = i ) ( Vowel [ IY ] = j ) ( Vowel [ ÍÝ ] = j i: ) ;; Some vowel-vowel pairs ( m i m [ o ] Vowel = o _ ) ( # m n o h [ o ] Vowel = o _ ) ;; Two-letter phonemes ( [ d ž ] = dz~ ) ( [ d z ] = dz ) ( [ c h ] = ch ) ;; Special letters ( [ ě ] = j e ) ( # [ ú ] = u: ) ( b e z [ ú ] = _ u: ) ( o [ ú ] = _ u: ) ( [ ú ] h = _ u: ) ( [ ú ] č e = _ u: ) ( [ ú ] č t = _ u: ) ( [ ú ] d r ž = _ u: ) ( [ ú ] l o h = _ u: ) ( [ ú ] r o č = _ u: ) ( [ ú ] r o d = _ u: ) ( [ ú ] r o v ň = _ u: ) ;; Simple letters ( [ a ] = a ) ( [ á ] = a: ) ( [ b ] = b ) ( [ c ] = c ) ( [ č ] = c~ ) ( [ d ] = d ) ( [ ď ] = d~ ) ( [ e ] = e ) ( [ é ] = e: ) ( [ f ] = f ) ( [ g ] = g ) ( [ h ] = h ) ( [ i ] = i ) ( [ í ] = i: ) ( [ j ] = j ) ( [ k ] = k ) ( [ l ] = l ) ( [ m ] = m ) ( [ n ] = n ) ( [ ň ] = n~ ) ( [ o ] = o ) ( [ ó ] = o: ) ( [ p ] = p ) ( [ q ] = k v ) ( [ r ] = r ) ( [ ř ] = r~ ) ( [ s ] = s ) ( [ š ] = s~ ) ( [ t ] = t ) ( [ ť ] = t~ ) ( [ u ] = u ) ( [ ú ] = u: ) ( [ ů ] = u: ) ( [ v ] = v ) ( [ w ] = v ) ( [ x ] = k s ) ( [ y ] = i ) ( [ ý ] = i: ) ( [ z ] = z ) ( [ ž ] = z~ ) )) ;; -- missing diphones: n-f n-g n-k ;; -- special diphones: a-a: a-e: a-o: a-u: a:-a a:-a: a:-e a:-e: a:-o a:-o: ;; a:-u a:-u: e-a: e-e: e-o: e-u: e:-a e:-a: atd. ;;;; (defvar czech-unknown-symbol-word "neznámý") (defvar czech-lts-extra-rules '()) (define (czech-basic-lts word) (let ((word (if (lts.in.alphabet word 'czech-normalize) word czech-unknown-symbol-word))) (if (string-equal word "") nil (let ((phonetic-form (lts.apply (lts.apply word 'czech-normalize) 'czech-orthography)) phonetic-form*) phonetic-form)))) (define (czech-syllabify-phstress phones) (if (null? phones) () (list (list phones 0)))) (define (czech-lts word features) (list word nil (let ((transformed (and (not (string-equal word "")) (czech-basic-lts word)))) (if transformed (czech-syllabify-phstress (let ((rules czech-lts-extra-rules*)) (while rules (set! transformed (lts.apply transformed (car rules))) (set! rules (cdr rules))) transformed)) '())))) (define (czech-downcase word) (if (lts.in.alphabet word 'czech-normalize) (apply string-append (lts.apply word 'czech-normalize)) word)) ;;; Tokenization (defvar czech-token.unknown-word-name "neznámé") (defvar czech-token.separator-word-name "oddělovač") ; our own variable (defvar czech-token.garbage-word-name "smetí") ; our own variable (defvar czech-token.whitespace "  \t\n\r") (defvar czech-token.punctuation "\"'`.,:;!?-(){}[]<>") (defvar czech-token.prepunctuation "\"'`({[<") ;;; Token to words processing (defvar czech-chars "a-zA-ZáäčďéěíňóöřšßťúůüýžÁÄČĎÉĚÍŇÓÖŘŠŤÚŮÜÝŽ") (defvar czech-char-regexp (string-append "[" czech-chars "]")) (defvar czech-multiword-abbrevs '(("`" ("obrácený" "apostrof")) ("\\" ("zpětné" "lomítko")) (">" ("větší" "než")) ("<" ("menší" "než")) ("[" ("levá" "hranatá")) ("]" ("pravá" "hranatá")) ("{" ("levá" "složená")) ("}" ("pravá" "složená")) ("(" ("levá" "kulatá")) (")" ("pravá" "kulatá")) ("=" ("rovná" "se")) ("\n" ("nový" "řádek")) ("os/2" ("OS" "2")) ("km/h" ("kilometrů" "za" "hodinu")) ("m/s" ("metrů" "za" "sekundu")) )) (define (czech-remove element list) (cond ((null? list) list) ((equal? element (car list)) (czech-remove element (cdr list))) (t (cons (car list) (czech-remove element (cdr list)))))) (define (czech-number name) (cond ((string-matches name "^[-+].*") (cons (substring name 0 1) (czech-number (czech-suffix name 1)))) ((string-matches name ".*[,.].*") (let ((comma (if (string-matches name ".*,.*") "," "."))) (append (czech-number (string-before name comma)) (list comma) (czech-number (string-after name comma))))) ((string-equal name "0") (list "nula")) ((string-matches name "^0..*") (cons "nula" (czech-number (czech-suffix name 1)))) (t (czech-number-from-digits (czech-remove (car (symbolexplode " ")) (symbolexplode name)))))) (define (czech-digits-1 digits) (if czech-group-digits (let ((n (string-length digits))) (while (> (- n czech-group-digits) 0) (set! n (- n czech-group-digits))) (append (czech-number (substring digits 0 n)) (if (> (length digits) czech-group-digits) (czech-digits (czech-suffix digits n)) nil))) (czech-number digits))) (define (czech-digits digits) (cond ((string-equal digits "") '()) ((string-matches digits "^0.*") (append (czech-number "0") (czech-digits (czech-suffix digits 1)))) (t (czech-digits-1 digits)))) (define (czech-prepend-numprefix token name) (if (czech-item.has-feat token 'numprefix) (string-append (item.feat token 'numprefix) name) name)) (define (czech-number* token name) (czech-number (czech-prepend-numprefix token name))) (define (czech-number@ name) (cond ((string-equal name "0") '("nula")) ((string-equal name "00") '("nula" "nula")) ((string-matches name "0[1-9]") (cons "nula" (czech-number (string-after name "0")))) (t (czech-number name)))) (define (czech-number-from-digits digits) (let ((len (length digits))) (cond ((equal? len 1) (let ((d (car digits))) (cond ((string-equal d "0") ()) ((string-equal d "1") (list "jedna")) ((string-equal d "2") (list "dva")) ((string-equal d "3") (list "tři")) ((string-equal d "4") (list "čtyři")) ((string-equal d "5") (list "pět")) ((string-equal d "6") (list "šest")) ((string-equal d "7") (list "sedm")) ((string-equal d "8") (list "osm")) ((string-equal d "9") (list "devět"))))) ((equal? len 2) (if (string-equal (car digits) "1") (let ((d (car (cdr digits)))) (cond ((string-equal d "0") (list "deset")) ((string-equal d "1") (list "jedenáct")) ((string-equal d "2") (list "dvanáct")) ((string-equal d "3") (list "třináct")) ((string-equal d "4") (list "čtrnáct")) ((string-equal d "5") (list "patnáct")) ((string-equal d "6") (list "šestnáct")) ((string-equal d "7") (list "sedmnáct")) ((string-equal d "8") (list "osmnáct")) ((string-equal d "9") (list "devatenáct")))) (append (let ((d (car digits))) (cond ((string-equal d "0") ()) ((string-equal d "2") (list "dvacet")) ((string-equal d "3") (list "třicet")) ((string-equal d "4") (list "čtyřicet")) ((string-equal d "5") (list "padesát")) ((string-equal d "6") (list "šedesát")) ((string-equal d "7") (list "sedmdesát")) ((string-equal d "8") (list "osmdesát")) ((string-equal d "9") (list "devadesát")))) (czech-number-from-digits (cdr digits))))) ((equal? len 3) (append (let ((d (car digits))) (cond ((string-equal d "0") ()) ((string-equal d "1") (list "sto")) ((string-equal d "2") (list "dvě" "stě")) ((string-equal d "3") (list "tři" "sta")) ((string-equal d "4") (list "čtyři" "sta")) ((string-equal d "5") (list "pět" "set")) ((string-equal d "6") (list "šest" "set")) ((string-equal d "7") (list "sedm" "set")) ((string-equal d "8") (list "osm" "set")) ((string-equal d "9") (list "devět" "set")))) (czech-number-from-digits (cdr digits)))) ((<= len 12) (let ((concatenations '((t "tisíc" "tisíce" "tisíc") (t "milion" "miliony" "milionů") (nil "miliarda" "miliardy" "miliard"))) (n (- len 3))) (while (> n 3) (set! concatenations (cdr concatenations)) (set! n (- n 3))) (let ((m n) (head-digits ()) (tail-digits digits) (words (car concatenations))) (while (> m 0) (set! head-digits (cons (car tail-digits) head-digits)) (set! tail-digits (cdr tail-digits)) (set! m (- m 1))) (set! head-digits (reverse head-digits)) (append (cond ((let ((all-zero t) (d head-digits)) (while (and all-zero d) (if (string-equal (car d) "0") (set! d (cdr d)) (set! all-zero nil))) all-zero) nil) ((and (equal? n 1) (string-equal (car digits) "1")) (list (car (cdr words)))) ((and (equal? n 1) (string-matches (car digits) "[2-4]")) (list (cond ((string-equal (car digits) "2") (if (car words) "dva" "dvě")) ((string-equal (car digits) "3") "tři") ((string-equal (car digits) "4") "čtyři")) (car (cdr (cdr words))))) (t (append (czech-number-from-digits head-digits) (list (car (cdr (cdr (cdr words)))))))) (czech-number-from-digits tail-digits))))) (t (if czech-group-digits (czech-digits (apply string-append digits)) (apply append (mapcar czech-number digits))))))) (define (czech-tokenize-on-nonalphas string) (cond ((string-equal string "") nil) ((string-matches string (string-append "^" czech-char-regexp "*$")) (list string)) ((string-matches string "^[0-9]+$") (symbolexplode string)) (t (let ((i 0)) (while (string-matches (substring string i 1) czech-char-regexp) (set! i (+ i 1))) (if (eq? i 0) (while (string-matches (substring string i 1) "[0-9]") (set! i (+ i 1)))) (append (if (> i 0) (let ((s (substring string 0 i))) (if (string-matches s "[0-9]+") (symbolexplode s) (list s))) nil) (list (substring string i 1)) (czech-tokenize-on-nonalphas (czech-suffix string (+ i 1)))))))) (define (czech-token-to-words token name) (cond ;; Special terms ((assoc_string (czech-downcase name) czech-multiword-abbrevs) (apply append (mapcar (lambda (w) (czech-token-to-words token w)) (cadr (assoc_string (czech-downcase name) czech-multiword-abbrevs))))) ((and (string-matches name "[ckm]m") (item.prev token) (czech-item.feat*? token "p.name" "[-+]?[0-9]+[.,]?[0-9]*")) (list (cadr (assoc_string name '(("cm" "centimetrů") ("km" "kilometrů") ("mm" "milimetrů")))))) ;; Spaced numbers ((and (or (string-matches name "^[-+]?[1-9][0-9]?[0-9]?$") (czech-item.has-feat token 'numprefix)) (not (czech-item.has-feat token 'punc)) (item.feat token "n.whitespace" " ") (string-matches (item.feat token "n.name") "^[0-9][0-9][0-9]$")) (item.set_feat (item.next token) 'numprefix (czech-prepend-numprefix token name)) nil) ;; Ordinal numbers ((and (string-matches name "^[0-9]+$") (czech-item.feat? token 'punc ".") (item.next token) (not (string-matches (item.feat token "n.whitespace") " +"))) (item.set_feat token 'punc "") (if (not (czech-item.has-feat token 'punctype)) (item.set_feat token 'punctype 'num)) (append (czech-number* token name) (list "."))) ;; Numbers beginning with the zero digit ((and (string-matches name "^0[0-9]*$") (not (czech-item.has-feat token 'numprefix))) (czech-digits name)) ;; Any other numbers ((let ((nname (czech-prepend-numprefix token name))) (or (string-matches nname "^[-+]?[0-9]+$") (string-matches nname "^[-+]?[0-9]+[.,][0-9]+$") (string-matches nname "^[-+]?[0-9]+,-$"))) (if (not (czech-item.has-feat token 'punctype)) (item.set_feat token 'punctype 'num)) (let ((nname (czech-prepend-numprefix token name))) (if (and (czech-item.feat? token "n.name" "Kč") (string-matches nname "^[-+]?[0-9]+,[-0-9]+$")) (append (czech-number (string-before nname ",")) (list "korun") (let ((hellers (string-after nname ","))) (if (not (string-equal hellers "-")) (append (czech-number hellers) (list "haléřů"))))) (czech-number nname)))) ;; Monetary sign ((and (string-equal name "Kč") (string-matches (item.feat token "p.name") "^[-+]?[0-9]+,[-0-9]+$")) nil) ;; Acronyms ((let ((capitals "^[A-ZÁÄČĎÉĚÍŇÓÖŘŠŤÚŮÜÝŽ]+$")) (and (string-matches name capitals) (not (lex.lookup_all name)) (not (string-matches (item.feat token "p.name") capitals)) (not (string-matches (item.feat token "p.next") capitals)) (<= (length name) 3) ; longer pronouncable acronyms are not spelled (not (string-equal name "Ř")) ; Festival bug workaround )) (let ((words ())) (mapcar (lambda (phoneme) (let ((expansion (cadr (assoc_string (czech-downcase phoneme) czech-multiword-abbrevs)))) (if expansion (set! words (append words (mapcar (lambda (w) `((name ,w) (pos sym))) expansion))) (set! words (append words (list `((name ,phoneme) (pos sym)))))))) (lts.apply name 'czech-normalize)) words)) ;; Abbreviations and other unpronouncable words ((and (string-matches name "^[bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQSTVWXZčďňřšßťžČĎŇŘŠŤŽ][bcdfghjkmnpqstvwxzBCDFGHJKMNPQSTVWXZčďňřšßťžČĎŇŘŠŤŽ]+$") (not (lex.lookup_all name))) (mapcar (lambda (phoneme) `((name ,phoneme) (pos sym))) (lts.apply name 'czech-normalize))) ;; Separators ((and (string-matches name (string-append "^[^" czech-chars "0-9]+$")) (>= (length name) 4) (czech-all-same (symbolexplode name))) (list czech-token.separator-word-name)) ((and (string-matches name (string-append "^[^" czech-chars "0-9]$")) (eqv? (length (item.daughters token)) 0) (let ((punc (item.feat token 'punc))) (and (string-matches punc "...+") ; excludes, among others, punc==0 (string-equal (substring punc 0 1) name) (czech-all-same (symbolexplode punc))))) (item.set_feat token 'punc 0) (list czech-token.separator-word-name)) ;; Time (just a few of many possible forms) ((and (string-matches name "^[0-9]+:[0-9][0-9]$") ;; try to identify ratios -- should be better done in POS tagging (not (string-matches (item.feat token "p.name") "^[Pp][Oo][Mm][Ěě].*")) (not (string-matches (item.feat token "p.name") "^[Pp][Rr][Aa][Vv][Dd][Ěě][Pp][Oo][Dd][Oo].*")) (not (string-matches (item.feat token "p.name") "^[Šš][Aa][Nn][Cc].*"))) (append (czech-number@ (string-before name ":")) (czech-number@ (string-after name ":")))) ((string-matches name "^[0-9]+:[0-9][0-9]:[0-9][0-9]$") (append (czech-number@ (string-before name ":")) (czech-number@ (string-before (string-after name ":") ":")) (czech-number@ (string-after (string-after name ":") ":")))) ;; Ratios ((string-matches name "^[0-9]+:[0-9]+$") (append (czech-number (string-before name ":")) '("ku") (czech-number (string-after name ":")))) ;; Numeric ranges (might be minus as well, but that's rare) ((string-matches name "[0-9]+[.,]*[0-9]*-[0-9]+[.,]*[0-9]*$") ;; we don't include signs here not to break phone numbers and such a ;; written form is incorrect anyway (append (czech-token-to-words token (string-append (substring name 0 1) (string-before (substring name 1 1000) "-"))) '(((name "-") (pos range))) (czech-token-to-words token (string-after (substring name 1 1000) "-")))) ;; Homogenous tokens ((string-matches name (string-append "^" czech-char-regexp "+$")) (if (string-equal (czech-downcase name) "ř") ; Festival bug workaround (list "eř") (list name))) ((string-matches name (string-append "^[^" czech-chars "0-9]+$")) (cond ((> (length name) 10) (list czech-token.garbage-word-name)) ((and (eqv? (length name) 1) (string-equal (item.name token) name) (or (not (string-matches (item.feat token 'prepunctuation) "0?")) (not (string-matches (item.feat token 'punctuation) "0?")))) ;; This handles the case when the whole token consists of two or more ;; punctuation characters. In such a case Festival picks one of the ;; characters as the name, while the other characters are treated as ;; punctuation. We want all the character being handled as punctuation. `(((name ,name) (pos punc)))) ((assoc_string name czech-multiword-abbrevs) (cadr (assoc_string name czech-multiword-abbrevs))) (t (symbolexplode name)))) ;; Hyphens ((string-matches name (string-append "^" czech-char-regexp "+-$")) (czech-token-to-words token (string-before name "-"))) ((string-matches name (string-append "^[" czech-chars "0-9]+-[-" czech-chars "0-9]+$")) (append (czech-token-to-words token (string-before name "-")) '(((name "-") (pos punc))) ; necessary for punctuation reading modes (czech-token-to-words token (string-after name "-")))) ;; Starting with digits ((string-matches name "^[0-9].*") (let ((i 0)) (while (member (substring name i 1) '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) (set! i (+ i 1))) (append (czech-digits (substring name 0 i)) (czech-token-to-words token (czech-suffix name i))))) ;; Digits inside ((string-matches name "^.*[0-9].*") (let ((i 0) j (digits '("0" "1" "2" "3" "4" "5" "6" "7" "8" "9"))) (while (not (member (substring name i 1) digits)) (set! i (+ i 1))) (set! j (+ i 1)) (while (member (substring name j 1) digits) (set! j (+ j 1))) (append (czech-token-to-words token (substring name 0 i)) (czech-digits (substring name i (- j i))) (czech-token-to-words token (czech-suffix name j))))) ;; Lexicon words ((lex.lookup_all name) (list name)) ;; TODO: roman numerals ;; Heterogenous tokens -- mixed alpha, numeric and non-alphanumeric ;; characters (t (if (not (string-matches name (string-append "^[-" czech-chars "]+$"))) (item.set_feat token 'punctype nil)) (apply append (mapcar (lambda (name) (czech-token-to-words token name)) (czech-tokenize-on-nonalphas name)))))) ;;; Lexicon (defvar czech-lexicon-file "czech-lexicon.out") (lex.create "czech") (lex.set.phoneset "czech") (lex.select "czech") (let ((dirs '("." "/usr/share/festival")) (lexfile nil)) (while dirs (let ((file (path-append (car dirs) czech-lexicon-file))) (if (probe_file file) (begin (set! lexfile file) (set! dirs nil)))) (set! dirs (cdr dirs))) (if lexfile (lex.set.compile.file lexfile) (format t "warning: Czech lexicon file not found\n"))) (lex.set.lts.method 'czech-lts) (lex.add.entry '("neznámé" nil (((n e z n a: m e:) 0)))) ;;; Part of Speech (defvar czech-guess-pos '((prep0 "k" "s" "v" "z") (prep "bez" "beze" "během" "do" "ke" "ku" "krom" "kromě" "mezi" "mimo" "místo" "na" "nad" "nade" "o" "od" "ode" "okolo" "po" "pod" "pode" "pro" "proti" "před" "přede" "přes" "přeze" "při" "se" "skrz" "skrze" "u" "ve" "vyjma" "za" "ze" "zpoza") (conj "a" "i" "ani" "nebo" "anebo") (particle "ať" "kéž" "nechť") (question "co" "čemu" "čí" "jak" "jaká" "jaké" "jaký" "kam" "kde" "kdo" "kdy" "koho" "kolik" "kolikátá" "kolikáté" "kolikátý" "komu" "kterak" "která" "které" "kterého" "kterému" "který" "kudy" "nač" "nakolik" "odkud" "pokolikáté" "proč") (misc "aby" "abych" "abys" "abychom" "abyste" "ale" "alespoň" "aneb" "ani" "aniž" "anžto" "aspoň" "avšak" "ač" "až" "ačkoli" "ačkoliv" "buď" "buďto" "buďsi" "by" "byť" "byťsi" "coby" "či" "čili" "div" "dokdy" "dokonce" "dokud" "dotud" "jakby" "jakkoli" "jakkoliv" "jakmile" "jako" "jakoby" "jakož" "jakožto" "jednak" "jednou" "jelikož" "jen" "jenom" "jenomže" "jenže" "jestli" "jestliže" "ještě" "ježto" "jinak" "kdežto" "kdybych" "kdybys" "kdyby" "kdybychom" "kdybyste" "když" "kvůli" "leda" "ledaže" "leč" "mezitímco" "mimoto" "načež" "neb" "neboli" "neboť" "nejen" "nejenže" "než" "nežli" "neřkuli" "nicméně" "nýbrž" "odkdy" "odkud" "pak" "pakli" "pakliže" "podle" "podmínky" "pokud" "poněvadž" "popřípadě" "potom" "potud" "poté" "pročež" "proto" "protože" "právě" "přece" "přestože" "přitom" "respektive" "sic" "sice" "sotva" "sotvaže" "tak" "takový" "taktak" "takže" "také" "tedy" "ten" "teprve" "to" "toho" "tolik" "tomu" "totiž" "tu" "tudíž" "tím" "třeba" "třebas" "třebasže" "třebaže" "však" "vždyť" "zatímco" "zda" "zdali" "zejména" "zrovna" "zvláště" "že"))) (define (czech-word-pos? word pos) (member (item.name word) (apply append (mapcar (lambda (p) (cdr (assoc p czech-guess-pos))) (if (consp pos) pos (list pos)))))) (define (czech-pos-in-phrase-from word) (let ((result 1) (w word)) (while (and (item.prev w) (or (not (czech-item.feat*? w "R:Token.p.name" "0?")) (and (czech-item.feat*? w "p.R:Token.parent.punc" "0?") (czech-item.feat*? w "R:Token.parent.prepunctuation" "0?") (not (czech-item.feat*? w "p.name" (string-append "^[^" czech-chars "0-9]+$")))))) (set! result (+ result 1)) (set! w (item.prev w))) result)) (define (czech-pos-first-in-phrase? word) (<= (czech-pos-in-phrase-from word) 1)) (define (czech-pos-in-phrase-to word) (let ((result 1) (w word)) (while (and (item.next w) (or (czech-item.feat*? w "R:Token.n.name" "0?") (and (czech-item.feat*? w "R:Token.parent.punc" "0?") (czech-item.feat*? w "R:Token.parent.n.prepunctuation" "0?") (not (czech-item.feat*? w "n.name" (string-append "^[^" czech-chars "0-9]+$")))))) (set! result (+ result 1)) (set! w (item.next w))) result)) (define (czech-pos-last-in-phrase? word) (<= (czech-pos-in-phrase-to word) 1)) (define (czech-pos utt) (mapcar (lambda (w) (let ((name (czech-downcase (item.name w))) (token (item.parent (item.relation w 'Token)))) (cond ;; Feature already assigned ((czech-item.has-feat w 'pos) nil) ;; Word followed by a punctuation ((and (czech-item.has-feat token 'punctype) (string-matches name (string-append "^[^" czech-chars "0-9]+$"))) (item.set_feat w 'pos (item.feat token 'punctype))) ;; Punctuation ((member name '("\"" "'" "`" "-" "." "," ":" ";" "!" "?" "(" ")")) ;; Is it a separate punctuation character? (if (eqv? (length (item.daughters (item.parent (item.relation w 'Token)))) 1) (item.set_feat w 'pos nil) (item.set_feat w 'pos 'punc))) ;; Special interjections ((member name '("á" "ó")) (item.set_feat w 'pos (if (czech-pos-first-in-phrase? w) 'int 'sym))) ;; Single letter, not in the role of a word ((and (eq? (string-length name) 1) (czech-pos-last-in-phrase? w)) (item.set_feat w 'pos 'sym)) ;; Word "se", not in the role of a preposition ((and (string-equal name "se") ; the word "se" (item.prev w) ; not the first word (or (czech-pos-last-in-phrase? w) ; final word (czech-word-pos? (item.next w) '(prep0 prep)) ; followed by a preposition )) (item.set_feat w 'pos 'se)) ;; Question words with the `pak' suffix ((and (string-matches name ".*pak") (member (substring name 0 (- (length name) 3)) (cdr (assoc 'question czech-guess-pos)))) (item.set_feat w 'pos 'question)) ;; Nothing special: check the czech-guess-pos tree (t (let ((pos-sets czech-guess-pos)) (while pos-sets (if (member name (cdar pos-sets)) (begin (item.set_feat w 'pos (caar pos-sets)) (set! pos-sets nil)) (set! pos-sets (cdr pos-sets))))) )))) (utt.relation.items utt 'Word)) ;; Add commas before conjunctions (mapcar (lambda (token) (if (and (czech-item.feat*? token 'punc "0?") (czech-item.feat? token "daughtern.R:Word.n.gpos" 'conj)) (item.set_feat token 'punc ","))) (utt.relation.items utt 'Token)) utt) ;;; Phrase breaks (define (czech-next-simple-punc word) (let ((unit (item.next (czech-word-stress-unit word)))) (cond ((not unit) 0) ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]") (czech-stress-unit-punc unit)) ((czech-item.feat? unit 'preelement 1) (czech-next-punc word)) (t 0)))) (define (czech-prev-simple-punc word) (let ((unit (item.prev (czech-word-stress-unit word)))) (cond ((not unit) 0) ((string-matches (czech-stress-unit-punc unit) ".*[.?!;:,-]") (czech-stress-unit-punc unit)) (t (let ((token (item.prev (item.parent (item.relation word 'Token))))) (while (and token (not (string-matches (item.feat token 'punc) ".+"))) (set! token (item.prev token))) (let ((pword (and token (item.next token) (item.daughter1 (item.next token))))) (if (and pword (czech-item.feat? (czech-word-stress-unit pword) 'preelement 1)) (item.feat token 'punc) 0))))))) (defvar czech-phrase-cart-tree ;; Note: Additional corrections are applied in czech-adjust-phrase-breaks ;; SB = (very) short break '(;; end of utterance (n.name is 0) ((BB)) ;; exclude "punctuation words" ((name matches "[][\"'`.,:;!?(){}<>-]+") ((NB)) ;; parentheses ((R:Token.parent.n.prepunctuation matches "(.*") ((R:Token.n.name is 0) ((B)) ((NB))) ((lisp_token_end_punc matches ".*)") ((B)) ;; ;; phonetic rules ;; ;; "big" punctuations ((lisp_token_end_punc matches ".*[.?!;]\"") ((BB)) ((lisp_token_end_punc matches ".*[.?!;]") ((lisp_czech-next-token-punc matches "\".*") ((BB)) ((XB1))) ; for following adjustments ;; "smaller" punctuations ((lisp_token_end_punc matches ".*[:-]") ;; dashes are treated as pbreaks only if separated by whitespaces ((R:Token.parent.n.daughter1.name is "-") ((R:Token.n.name is 0) ((B)) ((NB))) ((B))) ;; "comma" punctuations ((lisp_token_end_punc matches ".*,") ((XB2)) ; for following adjustments ;; nothing applies -- no break by default ((NB))))))))))) (define (czech-adjust-phrase-breaks utt) ;; This must be called after stress units are identified (mapcar (lambda (w) (cond ((czech-item.feat? w 'pbreak 'XB1) ; "big" punctuations ;; only one stress unit between punctuations makes them shorter (item.set_feat w 'pbreak (cond ((czech-item.feat? w "R:SylStructure.name" 0) ;; not a word actually 'BB) ((or (czech-item.feat*? (czech-word-stress-unit w) "n.lisp_czech-stress-unit-punc" ".*[.?!;]\"?") (czech-item.feat*? (czech-word-stress-unit w) "p.lisp_czech-stress-unit-punc" ".*[.?!;]\"?")) 'B) (t 'BB)))) ((czech-item.feat? w 'pbreak 'XB2) ; "comma" punctuations ;; if only one stress unit separates from other punctuation or ;; the neighbor stress unit contains preelement, phrase break ;; *may* become shorter (item.set_feat w 'pbreak (cond ((czech-item.feat? w "R:SylStructure.name" 0) ;; not a word actually 'B) ((czech-item.feat*? w "lisp_czech-next-simple-punc" ".*,") 'SB) ((czech-item.feat*? w "lisp_czech-prev-simple-punc" ".*,") 'B) ((czech-item.feat*? w "lisp_czech-prev-simple-punc" ".*[-.?!;:]\"?") 'SB) ((czech-item.feat*? (czech-word-stress-unit w) "n.lisp_czech-stress-unit-punc" ".*[-.?!;:]\"?") 'SB) (t 'B)))))) (utt.relation.items utt 'Word))) ;;; Segmentation (define (czech-adjust-segments segments) (if (not (null? segments)) (let ((item1 (nth 0 segments)) (item2 (nth 1 segments)) (item3 (nth 2 segments)) (item-word (lambda (i) (item.parent (item.parent (item.relation i 'SylStructure)))))) (let ((name1 (and item1 (item.name item1))) (name2 (and item2 (item.name item2))) (name3 (and item3 (item.name item3))) (same-word? (lambda (i1 i2) (equal? (item-word i1) (item-word i2))))) ;; nasals (if (and (string-equal name1 "n") (czech-item.feat? item2 "ph_postnas" '+) (same-word? item1 item2)) (item.set_name item1 "n*")) ;; sh (if (and (string-equal name1 "s") (string-equal name2 "h") (same-word? item1 item2)) (if czech-moravian (item.set_name item1 "z") (item.set_name item2 "ch"))) ;; unvoiced-r~ (if (and (string-equal name2 "r~") (czech-item.feat? item1 "ph_cvox" '-) (same-word? item1 item2)) (item.set_name item2 "r~*")) ;; voiced-unvoiced (if (and (czech-item.feat? item1 "ph_cvox" '+) (not (czech-item.feat? item1 "ph_partner" 0)) item2 (or (string-equal name2 "#") (string-equal name2 "_") (czech-item.feat? item2 "ph_cvox" '-) (and (czech-item.feat? item2 "ph_cvox" 'u) (not (same-word? item1 item2)) (not (member (item.name (item-word item1)) (append (list "v" "z") czech-proper-single-syl-prepositions)))))) (item.set_name item1 (item.feat item1 "ph_partner"))) ;; unvoiced-voiced (if (and (czech-item.feat? item1 "ph_cvox" '-) (not (czech-item.feat? item1 "ph_partner" 0)) item2 (czech-item.feat? item2 "ph_cvox" '+) (not (string-equal name2 "v")) (not (string-equal name2 "r~"))) (item.set_name item1 (item.feat item1 "ph_partner")))) (czech-adjust-segments (cdr segments))))) (define (czech-adjust-phonetic-form utt) (let ((items (utt.relation.items utt 'Segment))) (let ((names (mapcar item.name items)) (old-names '())) (while (not (equal? old-names names)) (czech-adjust-segments items) (set! old-names names) (set! names (mapcar item.name (utt.relation.items utt 'Segment)))))) utt) (define (czech-intonation-units utt) ;; Mark syllables before phrase breaks (let ((token (utt.relation utt 'Token))) (while token (if (or (czech-item.feat*? token "daughtern.pbreak" "[SBX]?B[12]?") (czech-item.feat*? token "daughtern.p.pbreak" "[SBX]?B[12]?")) (let ((w (item.daughtern token))) (while (and w (not (item.daughters (item.relation w 'SylStructure)))) (set! w (item.prev w))) (if w (item.set_feat (item.daughtern (item.relation w 'SylStructure)) "sentence_break" 1)))) (set! token (item.next token)))) ;; Make the intonation units (utt.relation.create utt 'IntUnit) (let ((sylwords (utt.relation.items utt 'Syllable)) (id 1) (unit-sylwords '())) (while sylwords (let ((w (car sylwords))) (set! unit-sylwords (cons w unit-sylwords)) (set! sylwords (cdr sylwords)) ;; If `w' is a last syllable before a relevant phrase break, make new ;; intonation unit (if (or (czech-item.feat*? w "sentence_break" 1) ;; This is the very last syllable (we reach this point when the ;; last token generates no words for whatever reason) (not (item.next w))) (begin (utt.relation.append utt 'IntUnit `("int" ((name ,(format nil "IUnit%d" id))))) (set! id (+ id 1)) ;; Add the syllables to the intonation unit (let ((i (utt.relation.last utt 'IntUnit))) (set! unit-sylwords (reverse unit-sylwords)) (while unit-sylwords (item.append_daughter i (car unit-sylwords)) (set! unit-sylwords (cdr unit-sylwords)))))))))) (define (czech-yes-no-question int-unit) (and (string-matches (item.feat int-unit "daughtern.R:SylStructure.parent.R:Token.parent.punc") ".*\\?") (not (czech-item.feat? int-unit "daughter1.R:SylStructure.parent.R:Word.pos" 'question)) (not (czech-item.feat? int-unit "daughter2.R:SylStructure.parent.R:Word.pos" 'question)))) (defvar czech-proper-single-syl-prepositions '("bez" "do" "ke" "ku" "na" "nad" "o" "od" "po" "pod" "pro" "před" "přes" "při" "se" "u" "ve" "za" "ze")) (defvar czech-special-final-words '("ho" "je" "jej" "ji" "jsem" "jsi" "jste" "mě" "mi" "se" "si" "tě" "ti")) (define (czech-syllable-kernels phonemes) (let ((kernels '())) (while phonemes ;; Starting syllabic consonant doesn't constitute syllable (if (and (czech-item.feat? (car phonemes) 'ph_vc '-) (czech-item.feat? (car phonemes) 'ph_syl '+)) (set! phonemes (cdr phonemes))) ;; Skip non-syllabic consonants (while (and phonemes (czech-item.feat? (car phonemes) 'ph_syl '-)) (set! phonemes (cdr phonemes))) (if phonemes ;; Now take the kernel (let ((kc '()) (kv '())) (if (czech-item.feat? (car phonemes) 'ph_vc '-) (while (and phonemes (czech-item.feat? (car phonemes) 'ph_vc '-) (czech-item.feat? (car phonemes) 'ph_syl '+)) (set! kc (cons (car phonemes) kc)) (set! phonemes (cdr phonemes)))) (while (and phonemes (czech-item.feat? (car phonemes) 'ph_vc '+) (czech-item.feat? (car phonemes) 'ph_syl '+)) (set! kv (cons (car phonemes) kv)) (set! phonemes (cdr phonemes))) (let ((k (reverse (or kv kc)))) (let ((seg (and k (item.prev (car k))))) (while (and seg (or (czech-item.feat? seg 'ph_cvox '+) (czech-item.feat? seg 'ph_cvox 'u))) (set! k (cons seg k)) (set! seg (item.prev seg)))) (set! kernels (cons k kernels)))))) (reverse kernels))) (define (czech-syllable-count phonemes) (length (czech-syllable-kernels phonemes))) (define (czech-stress-unit-phonemes unit) (if (and unit (not (consp unit))) (set! unit (item.daughters unit))) (apply append (mapcar (lambda (syl) (if (not (eq? syl 'preelement)) (item.daughters (item.relation syl 'SylStructure)))) unit))) (define (czech-unit-syllable-count unit) (czech-syllable-count (czech-stress-unit-phonemes unit))) (define (czech-identify-stress-units sylwords) (let ((units (mapcar list sylwords)) (unit-word (lambda (unit) (and (not (null? unit)) (item.parent (item.relation (car (last unit)) 'SylStructure))))) (unit-word-name (lambda (unit) (and (eqv? (length unit) 1) (item.feat (car unit) "R:SylStructure.parent.name")))) (merge (lambda (list) (set-car! list (append (car list) (cadr list))) (set-cdr! list (cddr list))))) ;; Nothing to do if there is at most one word (if (<= (length units) 1) units (begin ;; Basic joining (let ((units* units)) (while (cdr units*) (let ((w (unit-word (car units*)))) (if (or ;; Join non-syllabic prepositions (czech-item.feat? w 'pos 'prep0) ;; Join proper single-syllabic prepositions (and (member (czech-downcase (item.name w)) czech-proper-single-syl-prepositions) (not (czech-item.feat? w "pos" "se")))) (merge units*) (set! units* (cdr units*)))))) ;; At most 1 word now? (if (<= (length units) 1) units (let ((last-unit (car (last units)))) ;; Final single-syllabic word (if (and (<= (czech-unit-syllable-count last-unit) 1) (not (member (unit-word-name last-unit) czech-special-final-words))) (set-cdr! (nth_cdr (- (length units) 2) units) '()) (set! last-unit '())) ;; Initial single-syllabic words (let ((units* units) (singles '())) (while (and units* (<= (czech-unit-syllable-count (car units*)) 1)) (set! singles (cons (car units*) singles)) (set! units* (cdr units*))) (set! singles (reverse singles)) (let ((len (length singles))) (cond ((<= len 0) nil) ((<= len 1) (set! units (cons (append (car singles) '(preelement) (car units*)) (cdr units*))) (set! units* units)) ((<= len 4) (set! units (cons (apply append singles) units*))) (t (let ((first-unit '()) (n (/ len 2)) (i 0)) (while (< i n) (set! first-unit (append (car singles) first-unit)) (set! singles (cdr singles)) (set! i (+ i 1))) (set! units (cons (reverse first-unit) (cons (apply append singles) units*))))))) ;; Middle word processing (while units* (let ((u (car units*))) ;; The word "a" (if (string-equal (unit-word-name u) "a") (merge units*)) ;; Single-syllabic words (let ((len (czech-unit-syllable-count u)) (singles '()) (slen 0) (next-units* (cdr units*))) (while (and next-units* (<= (czech-unit-syllable-count (car next-units*)) 1) (not (string-equal (unit-word-name (car next-units*)) "a"))) (set! singles (cons (car next-units*) singles)) (set! slen (+ slen 1)) (set! next-units* (cdr next-units*))) (set! singles (reverse singles)) (let ((merge-n (lambda (n units) (while (> n 0) (merge units) (set! n (- n 1)))))) (cond ((eqv? slen 0) nil) ((eqv? slen 1) (merge units*)) ((eqv? slen 2) (if (and (<= len 4) (czech-random-choice '(t nil))) (merge-n 2 units*) (merge (cdr units*)))) ((eqv? slen 3) (if (<= len 3) (merge-n 3 units*) (merge-n 2 (cdr units*)))) ((eqv? slen 4) (cond ((>= len 5) (merge-n 3 (cdr units*))) ((and (<= len 2) (czech-random-choice '(t nil))) (merge-n 4 units*)) (t (merge-n 2 units*) (merge-n 1 (cdr units*))))) ((eqv? slen 5) (cond ((<= len 3) (merge-n 2 units*) (merge-n 2 (cdr units*))) ((<= len 4) (merge-n 1 (cdr units*)) (merge-n 2 (cddr units*))) (t (merge-n 2 (cdr units*)) (merge-n 1 (cddr units*))))) ((eqv? slen 6) (cond ((>= len 4) (merge-n 2 (cdr units*)) (merge-n 2 (cddr units*))) ((czech-random-choice '(t nil)) (merge-n 2 units*) (merge-n 3 (cdr units*))) (t (merge-n 2 units*) (merge-n 1 (cdr units*)) (merge-n 1 (cddr units*))))) (t ;; This very rare case is not defined in the rules (while (>= slen 4) (merge-n 1 (cdr units*)) (set! units* (cdr units*)) (set! slen (- slen 2))) (merge-n (- slen 1) (cdr units*)) )) (set! units* next-units*))))) ;; That's all (if last-unit (append units (list last-unit)) units)))))))) (define (czech-stress-units utt) (utt.relation.create utt 'IntStress) (utt.relation.create utt 'StressUnit) (let ((id 1) (int-unit (utt.relation.first utt 'IntUnit))) (while int-unit (let ((stress-units (czech-identify-stress-units (item.daughters int-unit)))) ;; Add the intonation unit at the top of the StressUnit relation (utt.relation.append utt 'IntStress int-unit) (while stress-units ;; Create new stress unit (item.relation.append_daughter int-unit 'IntStress `("stress" ((name ,(format nil "SUnit%d" id)) (position "M")))) (set! id (+ id 1)) (utt.relation.append utt 'StressUnit (item.relation.daughtern int-unit 'IntStress)) ;; Fill it with its words (let ((i (utt.relation.last utt 'StressUnit))) (mapcar (lambda (syl) (if (eq? syl 'preelement) (item.set_feat i "preelement" 1) (begin (item.append_daughter i syl) (let ((j (item.daughtern i))) (mapcar (lambda (seg) (item.append_daughter j seg)) (item.daughters syl)))))) (car stress-units))) (set! stress-units (cdr stress-units)))) ;; The first stress unit in an intonation unit has position I (item.set_feat (item.relation.daughter1 int-unit 'IntStress) "position" "I") ;; The last stress unit in an intonation unit has position F or FF ;; (overrides I in case of a conflict) (item.set_feat (item.relation.daughtern int-unit 'IntStress) "position" (if (string-matches (item.feat int-unit "daughtern.R:SylStructure.parent.R:Token.parent.punc") ".*[.!?;:].*") (if (czech-yes-no-question int-unit) "FF-IT" "FF-KKL") "F")) ;; Special case: F-1 positions overriding I and M (if (not (equal? (item.relation.daughtern int-unit 'IntStress) (item.relation.daughter1 int-unit 'IntStress))) (let ((last-pos (item.feat int-unit "R:IntStress.daughtern.position"))) (item.set_feat (item.prev (item.relation.daughtern int-unit 'IntStress)) "position" (string-append last-pos "-1")))) (set! int-unit (item.next int-unit))))) (define (czech-word utt) (Classic_Word utt) (czech-intonation-units utt) (czech-stress-units utt) (czech-adjust-phrase-breaks utt) utt) ;;; Pauses (define (czech-add-strokes utt) (let ((stroke '(_ (("name" _)))) (i (utt.relation.first utt 'SylStructure))) (while i ;; Insert _ before vowels at the beginning of word boundaries (if (and (czech-item.feat? i "daughter1.daughter1.ph_vc" '+) (item.prev i) (not (czech-item.feat? i "daughter1.daughter1.R:Segment.p.name" '#))) (item.insert (item.relation (item.daughter1 (item.daughter1 i)) 'Segment) stroke 'before)) (set! i (item.next i))))) (define (czech-pause-breaks utt) (Classic_Pauses utt) (let ((words (utt.relation.items utt 'Word))) ;; Handle SB -- Classic_Pauses doesn't know about it (mapcar (lambda (w) (if (czech-item.feat? w "pbreak" 'SB) (insert_pause utt w))) words))) (define (czech-pause utt) (czech-pause-breaks utt) (czech-add-strokes utt) (czech-adjust-phonetic-form utt) utt) ;;; Accents (defvar czech-accent-cart-tree '(NONE)) ;; Intonation (defvar czech-int-contours '(((A 1) (0.02 -0.05) (0.02 -0.04) (0 0)) ((B 1) (-0.01 0.02) (-0.02 0.04) (-0.02 0.05)) ((C 1) (-0.04 -0.10) (0.02 -0.16) (-0.02 -0.12) (-0.02 -0.14)) ((D 1) (-0.14 0.16) (-0.14 0.20)) ((FA 1) (0.02 -0.04) (0 0)) ((FB 1) (-0.02 0.04) (-0.02 0.05)) ((A 2) (0.02 -0.05) (0.04 -0.08) (-0.03 0)) ((B 2) (-0.04 0.06) (-0.02 0.04) (-0.02 0.07)) ((C 2) (0 -0.10) (-0.04 -0.10) (-0.02 -0.12) (0.02 -0.16)) ((D 2) (-0.06 0.08) (-0.10 0.14)) ((FA 2) (0.04 -0.08) (-0.03 0)) ((FB 2) (-0.02 0.04) (-0.02 0.07)) ((A 3) (0.02 -0.02 -0.04) (0.02 -0.04 -0.02) (0.04 -0.04 -0.04) (0 0 -0.02) (0 -0.04 0) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04) (-0.02 -0.01 0)) ((B 3) (0 -0.04 0.04) (0 -0.06 0.04) (-0.06 0.04 0.02) (-0.01 0.04 0.02) (-0.06 0 0.06) (-0.06 0.02 0.04) (-0.04 0.04 -0.04)) ((C 3) (0 -0.05 -0.05) (-0.04 -0.02 -0.08) (-0.06 -0.04 -0.04) (-0.06 -0.10 -0.02)) ((D 3) (-0.06 -0.01 0.09) (-0.06 0.08 -0.01)) ((FA 3) (-0.04 0.08 -0.10) (-0.04 0.04 -0.04) (-0.02 -0.01 0)) ((FB 3) (-0.06 0 0.06) (-0.06 0.02 0.04) (-0.04 0.04 -0.04)) ((A 4) (0 0 -0.02 -0.01) (-0.02 0 -0.03 0) (-0.03 0.03 -0.02 -0.01) (0 0 -0.01 0)) ((B 4) (0 -0.03 0.01 0.02) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02)) ((C 4) (-0.04 -0.06 -0.02 -0.02) (-0.02 -0.02 -0.04 -0.06) (-0.02 -0.08 -0.04 -0.02)) ((D 4) (-0.06 0 -0.01 0.12) (-0.06 0.12 0 -0.03)) ((FA 4) (-0.03 0.03 -0.02 -0.01) (0 0 -0.01 0)) ((FB 4) (-0.02 0 0.02 0.02) (0 -0.03 0.03 0.02)) ((A 5) (-0.02 0.02 -0.02 -0.01 0) (-0.03 0.03 0 0 -0.03) (-0.02 0.02 0 0 -0.02)) ((B 5) (0 -0.03 0.01 0.02 0.01) (0.01 -0.02 0 0 0.02) (-0.02 0 0.02 0.02 0)) ((C 5) (-0.02 0 -0.02 -0.04 -0.06) (-0.02 -0.08 -0.02 -0.02 -0.02) (-0.02 -0.02 -0.08 -0.02 -0.02)) ((D 5) (-0.06 0 -0.01 -0.01 0.13) (-0.06 0.13 0 -0.04 -0.04)) ((FA 5) (-0.02 0.02 0 0 -0.02)) ((FB 5) (-0.02 0 0.02 0.02 0)) ((A 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01)) ((B 6) (0 -0.01 0 0 (0) 0.01 0.01) (0 -0.02 0.01 0.01 (0) 0.01 0.02)) ((C 6) (-0.02 0 -0.02 -0.04 -0.06 0 (0)) (-0.02 -0.08 -0.02 -0.02 -0.02 (0)) (-0.02 -0.02 -0.08 -0.02 -0.02 -0.02 (0))) ((D 6) (-0.06 0 -0.01 -0.01 0 (0) 0.13) (0.13 0 -0.02 0 (0) -0.04 -0.04)) ((FA 6) (-0.02 0.02 -0.01 0 (0) -0.02 -0.01)) ((FB 6) (0 -0.02 0.01 0.01 (0) 0.01 0.02)) )) (defvar czech-int-contour-tree ;; Contourtype set: A, B, C, D, FA and FB (for F and F-1 positions) '((position is I) ((preelement > 0) ((B)) ((A))) ((position is M) ((p.contourtype is B) ((A)) ((B))) ((position is F-1) ((FB)) ((position is F) ((FA)) ((position is FF-KKL-1) ((A)) ((position is FF-KKL) ((C)) ((position is FF-IT-1) ((B)) ((position is FF-IT) ((D)) ((ERROR))))))))))) (define (czech-int-select-contours utt) (let ((unit (utt.relation utt 'StressUnit)) (last-contour nil)) (while unit (let ((position (item.feat unit 'position))) ;; Determine appropriate contour type (let ((contourtype (wagon_predict unit czech-int-contour-tree))) (item.set_feat unit "contourtype" contourtype) ;; Find particular contour (let ((nsyls (czech-unit-syllable-count unit))) (let ((contour (czech-random-choice (cdr (assoc (list contourtype (cond ((< nsyls 1) 1) ((> nsyls 6) 6) (t nsyls))) czech-int-contours))))) ;; Adjust the first syllables of final contours (if (or (string-equal position "F") (string-matches position "FF.*[A-Z]")) (let ((adjust-contour (lambda (c adj) (if last-contour (cons (+ (car (last last-contour)) adj) (cdr c)) c)))) (cond ((string-equal position "F") (set! contour (adjust-contour contour -0.02))) ((string-equal position "FF-KKL") (set! contour (adjust-contour contour 0.02))) ((string-equal position "FF-IT") (set! contour (adjust-contour contour -0.02)))))) ;; Set contour values for preelements (if (czech-item.feat? unit 'preelement 1) (set! contour (cons (- (car contour) 0.02) contour))) ;; Finalize contours of long units (let ((n (- nsyls 6))) (if (>= n 0) (let ((prefix '()) (contour* contour)) (while (not (consp (car contour*))) (set! prefix (cons (car contour*) prefix)) (set! contour* (cdr contour*))) (let ((val (caar contour*))) (set! contour* (cdr contour*)) (while (> n 0) (set! contour* (cons val contour*)) (set! n (- n 1))) (set! contour (append (reverse prefix) contour*)))))) (set! last-contour contour) (item.set_feat unit 'contour contour))))) (set! unit (item.next unit))) ;; Spread the contours on sylwords (set! unit (utt.relation utt 'StressUnit)) (while unit (let ((contour (item.feat unit 'contour)) (kernels (czech-syllable-kernels (czech-stress-unit-phonemes unit)))) (if (eqv? (length kernels) 1) ;; One-syllabic units have two-number contours ;; (they can occur only in the final positions) (let ((k (car kernels)) (contour-1 (car contour)) (contour-2 (cadr contour))) (let ((k* (reverse k)) (last-k (car (last k))) (contour-list (list (list 0.1 contour-1) (list 0.9 contour-2)))) (if (eqv? (length k) 1) ;; Single phone in kernel -- put both values on it (item.set_feat (car k) 'contourval contour-list) ;; Multiple phones -- spread the values over true kernel (begin (while (czech-item.feat? (cadr k*) 'ph_vc '+) (set! k* (cdr k*))) (if (eq? (car k*) last-k) (item.set_feat last-k 'contourval contour-list) (begin (item.set_feat (car k*) 'contourval contour-1) (item.set_feat last-k 'contourval contour-2))))) ;; Extend the contour pair to certain neighbors (set! k* (cdr k*)) (while k* (item.set_feat (car k*) 'contourval contour-1) (set! k* (cdr k*))) (let ((next-k (item.next last-k))) (while (or (czech-item.feat? next-k 'ph_cvox '+) (czech-item.feat? next-k 'ph_cvox 'u)) (item.set_feat next-k 'contourval contour-2) (set! next-k (item.next next-k)))))) ;; Otherwise spread the contour value over all kernels (while kernels (let ((contourval (car contour))) (mapcar (lambda (seg) (item.set_feat seg 'contourval contourval)) (car kernels))) (set! kernels (cdr kernels)) (set! contour (cdr contour))))) (set! unit (item.next unit))))) (defvar czech-int-simple-params '((f0_mean 100) (f0_std 10))) (define (czech-int-targets utt syl) (let ((segments (item.relation.daughters syl 'SylStructure)) (syl-start (item.feat syl 'syllable_start)) (f0-base (cadr (assq 'f0_mean int_general_params))) (f0-std (/ (cadr (assq 'f0_std int_general_params)) 10)) (times-values '())) (let ((last-seg-end syl-start) (f0-value (lambda (contourval) (* f0-base (+ 1 (* f0-std contourval)))))) (while segments (let ((s (car segments))) (let ((contourval (and (czech-item.has-feat s 'contourval) (item.feat s 'contourval))) (seg-end (item.feat s 'end))) (cond ((consp contourval) (let ((tlen (- seg-end last-seg-end))) (set! times-values (append (mapcar (lambda (v) (list (+ last-seg-end (* (read-from-string (car v)) tlen)) (f0-value (cadr v)))) (reverse contourval)) times-values)))) (contourval (let ((time (/ (+ last-seg-end seg-end) 2.0)) (value (f0-value contourval))) (set! times-values (cons (list time value) times-values))))) (set! last-seg-end seg-end) (set! segments (cdr segments)))))) ;; Festival apparently decreases F0 at the end of the utterance, prevent it (if (not (null? times-values)) (let ((last-time (car (car times-values))) (last-value (cadr (car times-values))) (last-seg (item.relation.daughtern syl 'SylStructure))) (set! times-values (cons (list (czech-max (- (item.feat last-seg 'end) 0.01) (+ last-time 0.001)) last-value) times-values)))) (reverse times-values))) ;;; Duration (defvar czech-phoneme-durations '((# 0.100) (_ 0.025) (a 0.098) (a: 0.142) (b 0.067) (c 0.102) (ch 0.087) (c~ 0.099) (d 0.062) (dz 0.108) (dz~ 0.094) (d~ 0.077) (e 0.099) (e: 0.126) (f 0.089) (g 0.067) (h 0.064) (i 0.077) (i: 0.120) (j 0.065) (k 0.080) (l 0.057) (m 0.068) (n 0.075) (n* 0.098) (n~ 0.079) (o 0.089) (o: 0.137) (p 0.079) (r 0.060) (r~ 0.065) (r~* 0.073) (s 0.098) (s~ 0.090) (t 0.082) (t~ 0.090) (u 0.082) (u: 0.139) (v 0.058) (z 0.077) (z~ 0.074) )) (defvar czech-silence-durations '(("BB" 0.206 0.238) ("B" 0.082 0.095) ("SB" 0.008 0.010))) (defvar czech-stress-duration-factors '((1 1.03) (2 1.02) (3 1.01) (4 1.00) (5 1.00) (6 0.99) (7 0.98) (8 0.96) (9 0.94) (10 0.93) (11 0.91) (12 0.90))) (defvar czech-duration-random-factor 0.2) (define (czech-duration-pauses utt) (let ((word (utt.relation.first utt 'Word))) (while word (let ((durspec (assoc_string (item.feat word "pbreak") czech-silence-durations))) (if durspec (let ((min (nth 1 durspec)) (max (nth 2 durspec)) (seg (find_last_seg word))) (if seg (item.set_feat (item.next (item.relation seg 'Segment)) 'dur_factor (* 10 (+ min (* (- max min) (czech-rand))))))))) (set! word (item.next word))))) (define (czech-duration-factors utt) (let ((sunit (utt.relation.first utt 'StressUnit))) (while sunit (let ((nphones (length (czech-stress-unit-phonemes sunit)))) (cond ((> nphones 12) (set! nphones 12)) ((< nphones 1) (set! nphones 1))) (let ((factor (cadr (assoc nphones czech-stress-duration-factors)))) (mapcar (lambda (syl) (mapcar (lambda (seg) (item.set_feat seg "dur_factor" factor)) (item.relation.daughters syl 'SylStructure))) (item.relation.leafs sunit 'StressUnit)))) (set! sunit (item.next sunit)))) ;; Adjust duration factors for initial single-syllabic word ;; (Take the initial word from Word, not just SylStructure, which may contain ;; prepunctuation.) (let ((1st-word (utt.relation.first utt 'Word))) (while (and 1st-word (item.daughter1 1st-word) (item.daughter1 (item.daughter1 1st-word))) (set! 1st-word (item.next 1st-word))) (let ((phonemes (and 1st-word (apply append (mapcar item.daughters (item.daughters (item.relation 1st-word 'SylStructure))))))) (if (eqv? (czech-syllable-count phonemes) 1) (let ((durfact (cadr (assoc (czech-min (length phonemes) 12) czech-stress-duration-factors)))) (mapcar (lambda (ph) (item.set_feat ph 'dur_factor durfact)) phonemes)))))) (define (czech-duration-compute utt) (mapcar (lambda (seg) (let ((factor (* (item.feat seg "dur_factor") (Param.get 'Duration_Stretch)))) (item.set_feat seg "end" (+ (item.feat seg "start") (* (if (<= factor 0) 1 factor) (cadr (assoc_string (item.name seg) czech-phoneme-durations*))))))) (utt.relation.items utt 'Segment))) (define (czech-duration utt) (czech-duration-pauses utt) (czech-duration-factors utt) (czech-duration-compute utt) utt) ;;; Volume (defvar czech-volume-scale 1.8) (defvar czech-volume-scale* nil) (define (czech-adjust-volume utt) (utt.wave.rescale utt czech-volume-scale*)) ;;; Final phoneme translation (define (czech-translate-add-vowels utt) (if (and (string-equal (Param.get 'Language) 'czech) czech-insert-filling-vowels) (let ((i (utt.relation.first utt 'Segment)) (insert-item (lambda (name orig-ph end pos) (let ((feats (item.features orig-ph)) (new-feats `((name ,name) (end ,end)))) (while feats (if (not (member (caar feats) '(id name end))) (set! new-feats (cons (car feats) new-feats))) (set! feats (cdr feats))) (item.insert orig-ph (cons name (list new-feats)) pos) (let ((new ((if (eq? pos 'after) item.next item.prev) orig-ph))) (if (member 'SylStructure (item.relations orig-ph)) (item.relation.insert orig-ph 'SylStructure new pos)))))) (vowel? (lambda (ph) (czech-item.feat? ph 'ph_vc '+))) (last-end 0.0)) (while i (let ((end (item.feat i 'end))) (cond ;; Duplicate vowels ((vowel? i) (insert-item (item.name i) i (/ (+ last-end end) 2) 'before))) (set! last-end end)) (set! i (item.next i))))) utt) (define (czech-translate-phonemes utt) (if (and (string-equal (Param.get 'Language) 'czech) czech-phoneset-translation*) (mapcar (lambda (item) (let ((tr (assoc (item.name item) czech-phoneset-translation*))) (if tr (item.set_name item (cadr tr))))) (utt.relation.items utt 'Segment))) utt) (defvar czech-after-analysis-hooks (list czech-translate-add-vowels czech-translate-phonemes)) ;;; Finally, the language definition itself (define (czech-reset-parameters) (set! czech-lts-extra-rules* czech-lts-extra-rules) (set! czech-int-simple-params* czech-int-simple-params) (set! czech-phoneme-durations* czech-phoneme-durations) (set! czech-volume-scale* czech-volume-scale) (set! czech-phoneset-translation* czech-phoneset-translation) (set! czech-after-analysis-hooks* czech-after-analysis-hooks) (Param.set 'Synth_Method 'UniSyn)) (define (voice-czech-common) (voice_reset) (Param.set 'Language 'czech) ;; Phone set (Param.set 'PhoneSet 'czech) (PhoneSet.select 'czech) (set! pos_lex_name nil) ;; Tokenization (set! token.unknown_word_name czech-token.unknown-word-name) (set! token.whitespace czech-token.whitespace) (set! token.punctuation czech-token.punctuation) (set! token.prepunctuation czech-token.prepunctuation) (set! token_to_words czech-token-to-words) (Param.set 'Token_Method 'Token_Any) ;; Lexicon selection (lex.select "czech") ;; Segmentation (Param.set 'Word_Method 'czech-word) ;; Part of speech (set! guess_pos czech-guess-pos) ; not actually used (Param.set 'POS_Method czech-pos) ;; Simple phrase break prediction by punctuation (set! pos_supported nil) (set! phrase_cart_tree czech-phrase-cart-tree) (Param.set 'Phrase_Method 'cart_tree) (Param.set 'Phrasify_Method Classic_Phrasify) ;; Pauses (Param.set 'Pause_Method czech-pause) ;; Accent prediction and intonation (set! int_accent_cart_tree czech-accent-cart-tree) (Param.set 'Int_Method czech-int-select-contours) (set! int_general_params (cons (list 'targ_func czech-int-targets) czech-int-simple-params*)) (Param.set 'Int_Target_Method Int_Targets_General) ;; Duration prediction (Param.set 'Duration_Method czech-duration) ;; Postlex rules (set! postlex_rules_hooks '()) (set! after_analysis_hooks czech-after-analysis-hooks*) ;; Final voice adjustment (set! after_synth_hooks (list czech-adjust-volume)) ;; Set current voice (set! current_voice_reset nil) (set! current-voice 'czech)) (defmac (czech-proclaim-voice form) (let ((name (nth 1 form)) (description (nth 2 form)) (body (nth_cdr 3 form)) (options ())) (if (consp name) (begin (set! options (cdr name)) (set! name (car name)))) (set! name (intern (string-append 'czech_ name))) (let ((parameters `((language czech) (dialect ,(cdr (assoc 'dialect options))) (gender ,(cadr (assoc 'gender options))) (coding ISO-8859-2) (description ,description)))) `(begin (define (,(intern (string-append 'voice_ name))) (czech-reset-parameters) ,@body (voice-czech-common) (set! current-voice (quote ,name))) (proclaim_voice (quote ,name) (quote ,parameters)))))) (provide 'czech) festival-czech-0.3/czech-lexicon.out0000644000175000017500000002257111026005751015675 0ustar pdmpdmMNCL (" " nil (((t a b u l a: t o r) 0))) (" " nil (((m e z e r a) 0))) ("!" nil (((v i k r~ i c~ n~ i: k) 0))) ("!" punc nil) ("\"" nil (((u v o z o v k i) 0))) ("\"" punc nil) ("#" nil (((m r~ i: z~ k a) 0))) ("$" nil (((d o l a r) 0))) ("%" nil (((p r o c e n t) 0))) ("&" nil (((a m p r s a n d) 0))) ("'" nil (((a p o s t r o f) 0))) ("'" punc nil) ("(" punc nil) (")" punc nil) ("*" nil (((h v j e z d~ i c~ k a) 0))) ("+" nil (((p l u s) 0))) ("+" num (((p l u s) 0))) ("," nil (((c~ a: r k a) 0))) ("," num (((c e l i: ch) 0))) ("," punc nil) ("-" nil (((p o m l c~ k a) 0))) ("-" num (((m i n u s) 0))) ("-" punc nil) ("-" range (((a z~) 0))) ("." nil (((t e c~ k a) 0))) ("." num (((t e c~ k a) 0))) ("." punc nil) ("/" nil (((l o m e n o) 0))) (":" nil (((d v o j t e c~ k a) 0))) (":" punc nil) (";" nil (((s t r~ e d n~ i: k) 0))) (";" punc nil) ("<" punc (((m e n s~ i:) 0) ((n e z~) 0))) ("=" nil (((r o v n a:) 0) ((s e) 0))) (">" punc (((v j e t s~ i:) 0) ((n e z~) 0))) ("?" nil (((o t a z n~ i: k) 0))) ("?" punc nil) ("@" nil (((z a v i n a: c~) 0))) ("[" punc (((l e v a:) 0) ((h r a n a t a:) 0))) ("]" punc (((p r a v a:) 0) ((h r a n a t a:) 0))) ("^" nil (((s t r~ i: s~ k a) 0))) ("_" nil (((p o d t r z~ i: t k o) 0))) ("`" punc nil) ("a" nil (((a) 0))) ("a" sym (((a:) 0))) ("b" nil (((b e:) 0))) ("c" nil (((c e:) 0))) ("cca" nil (((c i r k a) 0))) ("ch" nil (((ch a:) 0))) ("Chocomyšl" nil (((ch o c o m i s~ l) 0))) ("Chocomyšle" nil (((ch o c o m i s~ l e) 0))) ("Chocomyšlemi" nil (((ch o c o m i s~ l e m i) 0))) ("Chocomyšli" nil (((ch o c o m i s~ l i) 0))) ("chocomyšlskou" nil (((ch o c o m i s~ l s k o u) 0))) ("chocomyšlsky" nil (((ch o c o m i s~ l s k i) 0))) ("chocomyšlská" nil (((ch o c o m i s~ l s k a:) 0))) ("chocomyšlské" nil (((ch o c o m i s~ l s k e:) 0))) ("chocomyšlského" nil (((ch o c o m i s~ l s k e: h o) 0))) ("chocomyšlském" nil (((ch o c o m i s~ l s k e: m) 0))) ("chocomyšlskému" nil (((ch o c o m i s~ l s k e: m u) 0))) ("chocomyšlský" nil (((ch o c o m i s~ l s k i:) 0))) ("chocomyšlských" nil (((ch o c o m i s~ l s k i: ch) 0))) ("chocomyšlským" nil (((ch o c o m i s~ l s k i: m) 0))) ("chocomyšlskýma" nil (((ch o c o m i s~ l s k i: m a) 0))) ("chocomyšlskými" nil (((ch o c o m i s~ l s k i: m i) 0))) ("chocomyšlští" nil (((ch o c o m i s~ l s~ t~ i:) 0))) ("Chocomyšlí" nil (((ch o c o m i s~ l i:) 0))) ("Chocomyšlích" nil (((ch o c o m i s~ l i: ch) 0))) ("Chocomyšlím" nil (((ch o c o m i s~ l i: m) 0))) ("control" nil (((k o n t r o l) 0))) ("copyright" nil (((k o p i r a j t) 0))) ("copyrightech" nil (((k o p i r a j t e ch) 0))) ("copyrightem" nil (((k o p i r a j t e m) 0))) ("copyrightu" nil (((k o p i r a j t u) 0))) ("copyrighty" nil (((k o p i r a j t i) 0))) ("copyrightů" nil (((k o p i r a j t u:) 0))) ("copyrightům" nil (((k o p i r a j t u: m) 0))) ("czech" nil (((c~ e k) 0))) ("d" nil (((d e:) 0))) ("e" nil (((e:) 0))) ("Emacs" nil (((i: m e k s) 0))) ("Emacsech" nil (((i: m e k s e ch) 0))) ("Emacsem" nil (((i: m e k s e m) 0))) ("emacsovou" nil (((i: m e k s o v o u) 0))) ("emacsová" nil (((i: m e k s o v a:) 0))) ("emacsové" nil (((i: m e k s o v e:) 0))) ("emacsového" nil (((i: m e k s o v e: h o) 0))) ("emacsovém" nil (((i: m e k s o v e: m) 0))) ("emacsovému" nil (((i: m e k s o v e: m u) 0))) ("emacsově" nil (((i: m e k s o v j e) 0))) ("emacsoví" nil (((i: m e k s o v i:) 0))) ("emacsový" nil (((i: m e k s o v i:) 0))) ("emacsových" nil (((i: m e k s o v i: ch) 0))) ("emacsovým" nil (((i: m e k s o v i: m) 0))) ("emacsovýma" nil (((i: m e k s o v i: m a) 0))) ("emacsovými" nil (((i: m e k s o v i: m i) 0))) ("Emacsu" nil (((i: m e k s u) 0))) ("Emacsy" nil (((i: m e k s i) 0))) ("Emacsů" nil (((i: m e k s u:) 0))) ("Emacsům" nil (((i: m e k s u: m) 0))) ("escape" nil (((i s k e j p) 0))) ("f" nil (((e f) 0))) ("foundation" nil (((f a u n d e j s~ n) 0))) ("freeware" nil (((f r i: v e: r) 0))) ("freewarech" nil (((f r i: v e: r e ch) 0))) ("freewarem" nil (((f r i: v e: r e m) 0))) ("freewarovou" nil (((f r i: v e: r o v o u) 0))) ("freewarová" nil (((f r i: v e: r o v a:) 0))) ("freewarové" nil (((f r i: v e: r o v e:) 0))) ("freewarového" nil (((f r i: v e: r o v e: h o) 0))) ("freewarovém" nil (((f r i: v e: r o v e: m) 0))) ("freewarovému" nil (((f r i: v e: r o v e: m u) 0))) ("freewarově" nil (((f r i: v e: r o v j e) 0))) ("freewaroví" nil (((f r i: v e: r o v i:) 0))) ("freewarový" nil (((f r i: v e: r o v i:) 0))) ("freewarových" nil (((f r i: v e: r o v i: ch) 0))) ("freewarovým" nil (((f r i: v e: r o v i: m) 0))) ("freewarovýma" nil (((f r i: v e: r o v i: m a) 0))) ("freewarovými" nil (((f r i: v e: r o v i: m i) 0))) ("freewaru" nil (((f r i: v e: r u) 0))) ("freewary" nil (((f r i: v e: r i) 0))) ("freewarů" nil (((f r i: v e: r u:) 0))) ("freewarům" nil (((f r i: v e: r u: m) 0))) ("g" nil (((g e:) 0))) ("GNU" nil (((g n u:) 0))) ("h" nil (((h a:) 0))) ("hardware" nil (((h a r d v e: r) 0))) ("hardwarech" nil (((h a r d v e: r e ch) 0))) ("hardwarem" nil (((h a r d v e: r e m) 0))) ("hardwarovou" nil (((h a r d v e: r o v o u) 0))) ("hardwarová" nil (((h a r d v e: r o v a:) 0))) ("hardwarové" nil (((h a r d v e: r o v e:) 0))) ("hardwarového" nil (((h a r d v e: r o v e: h o) 0))) ("hardwarovém" nil (((h a r d v e: r o v e: m) 0))) ("hardwarovému" nil (((h a r d v e: r o v e: m u) 0))) ("hardwarově" nil (((h a r d v e: r o v j e) 0))) ("hardwaroví" nil (((h a r d v e: r o v i:) 0))) ("hardwarový" nil (((h a r d v e: r o v i:) 0))) ("hardwarových" nil (((h a r d v e: r o v i: ch) 0))) ("hardwarovým" nil (((h a r d v e: r o v i: m) 0))) ("hardwarovýma" nil (((h a r d v e: r o v i: m a) 0))) ("hardwarovými" nil (((h a r d v e: r o v i: m i) 0))) ("hardwaru" nil (((h a r d v e: r u) 0))) ("hardwary" nil (((h a r d v e: r i) 0))) ("hardwarů" nil (((h a r d v e: r u:) 0))) ("hardwarům" nil (((h a r d v e: r u: m) 0))) ("j" nil (((j e:) 0))) ("k" nil (((k) 0))) ("k" sym (((k a:) 0))) ("Kč" nil (((k o r u n) 0))) ("l" nil (((e l) 0))) ("m" nil (((e m) 0))) ("n" nil (((e n) 0))) ("o" nil (((o) 0))) ("o" sym (((o:) 0))) ("p" nil (((p e:) 0))) ("pst" nil (((p s t) 0))) ("q" nil (((k v e:) 0))) ("r" nil (((e r) 0))) ("s" nil (((s) 0))) ("s" sym (((e s) 0))) ("shareware" nil (((s~ e: r v e: r) 0))) ("sharewarech" nil (((s~ e: r v e: r e ch) 0))) ("sharewarem" nil (((s~ e: r v e: r e m) 0))) ("sharewarovou" nil (((s~ e: r v e: r o v o u) 0))) ("sharewarová" nil (((s~ e: r v e: r o v a:) 0))) ("sharewarové" nil (((s~ e: r v e: r o v e:) 0))) ("sharewarového" nil (((s~ e: r v e: r o v e: h o) 0))) ("sharewarovém" nil (((s~ e: r v e: r o v e: m) 0))) ("sharewarovému" nil (((s~ e: r v e: r o v e: m u) 0))) ("sharewarově" nil (((s~ e: r v e: r o v j e) 0))) ("sharewaroví" nil (((s~ e: r v e: r o v i:) 0))) ("sharewarový" nil (((s~ e: r v e: r o v i:) 0))) ("sharewarových" nil (((s~ e: r v e: r o v i: ch) 0))) ("sharewarovým" nil (((s~ e: r v e: r o v i: m) 0))) ("sharewarovýma" nil (((s~ e: r v e: r o v i: m a) 0))) ("sharewarovými" nil (((s~ e: r v e: r o v i: m i) 0))) ("sharewaru" nil (((s~ e: r v e: r u) 0))) ("sharewary" nil (((s~ e: r v e: r i) 0))) ("sharewarů" nil (((s~ e: r v e: r u:) 0))) ("sharewarům" nil (((s~ e: r v e: r u: m) 0))) ("shift" nil (((s~ i f t) 0))) ("shluk" nil (((z h l u k) 0))) ("shora" nil (((z h o r a) 0))) ("shůry" nil (((z h u: r i) 0))) ("software" nil (((s o f t v e: r) 0))) ("softwarech" nil (((s o f t v e: r e ch) 0))) ("softwarem" nil (((s o f t v e: r e m) 0))) ("softwarovou" nil (((s o f t v e: r o v o u) 0))) ("softwarová" nil (((s o f t v e: r o v a:) 0))) ("softwarové" nil (((s o f t v e: r o v e:) 0))) ("softwarového" nil (((s o f t v e: r o v e: h o) 0))) ("softwarovém" nil (((s o f t v e: r o v e: m) 0))) ("softwarovému" nil (((s o f t v e: r o v e: m u) 0))) ("softwarově" nil (((s o f t v e: r o v j e) 0))) ("softwaroví" nil (((s o f t v e: r o v i:) 0))) ("softwarový" nil (((s o f t v e: r o v i:) 0))) ("softwarových" nil (((s o f t v e: r o v i: ch) 0))) ("softwarovým" nil (((s o f t v e: r o v i: m) 0))) ("softwarovýma" nil (((s o f t v e: r o v i: m a) 0))) ("softwarovými" nil (((s o f t v e: r o v i: m i) 0))) ("softwaru" nil (((s o f t v e: r u) 0))) ("softwary" nil (((s o f t v e: r i) 0))) ("softwarů" nil (((s o f t v e: r u:) 0))) ("softwarům" nil (((s o f t v e: r u: m) 0))) ("syntetizér" nil (((s i n t e t i z e: r) 0))) ("syntetizérům" nil (((s i n t e t i z e: r u: m) 0))) ("t" nil (((t e:) 0))) ("u" nil (((u) 0))) ("u" sym (((u:) 0))) ("v" nil (((v) 0))) ("v" sym (((v e:) 0))) ("w" nil (((d v o j i t e:) 0) ((v e:) 0))) ("x" nil (((i k s) 0))) ("y" nil (((i p s i l o n) 0))) ("z" nil (((z) 0))) ("z" sym (((z e t) 0))) ("{" punc (((l e v a:) 0) ((s l o z~ e n a:) 0))) ("|" nil (((s v i s l i: t k o) 0))) ("}" punc (((p r a v a:) 0) ((s l o z~ e n a:) 0))) ("~" nil (((v l n k a) 0))) ("š" nil (((e s~) 0))) ("ť" nil (((t~ e:) 0))) ("ž" nil (((z~ e t) 0))) ("ß" nil (((o s t r e:) 0) ((e s) 0))) ("á" int (((a:) 0))) ("á" sym (((d l o u h e:) 0) ((a:) 0))) ("ä" nil (((p r~ e h l a s o v a n e:) 0) ((a:) 0))) ("č" nil (((c~ e:) 0))) ("é" nil (((d l o u h e:) 0) ((e:) 0))) ("ě" nil (((i j e) 0))) ("í" nil (((d l o u h e:) 0) ((i:) 0))) ("ď" nil (((d~ e:) 0))) ("ň" nil (((e n~) 0))) ("ó" int (((o:) 0))) ("ó" sym (((d l o u h e:) 0) ((o:) 0))) ("ö" nil (((p r~ e h l a s o v a n e:) 0) ((o:) 0))) ("ř" nil (((e r~) 0))) ("ů" nil (((u:) 0) ((s k r o u s~ k e m) 0))) ("ú" nil (((d l o u h e:) 0) ((u:) 0))) ("ü" nil (((p r~ e h l a s o v a n e:) 0) ((u:) 0))) ("ý" nil (((d l o u h e:) 0) ((i p s i l o n) 0))) festival-czech-0.3/COPYING0000644000175000017500000004313311026005751013441 0ustar pdmpdm GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. festival-czech-0.3/ChangeLog0000644000175000017500000010507311026005751014162 0ustar pdmpdm2005-08-28 Milan Zamazal * czech.scm (czech-int-select-contours): Use generalized contour lists instead of pairs. (czech-int-targets): Honor the above change. 2005-08-26 Milan Zamazal * czech.scm (czech-token-to-words): Handle R-caron in a special way, to work around a Festival bug. 2005-06-30 Milan Zamazal * czech.scm (czech-basic-lts): If the word is empty or it contains non-readable characters and czech-unknown-symbol-word is empty, return nil. (czech-lts): If the word or its transformation is empty, don't syllabify it. 2005-04-13 Milan Zamazal * czech.scm (phoneSet): Typo in C's partner fixed; make CH* unvoiced. (czech-adjust-segments): Typo in name reference fixed. 2005-04-12 Milan Zamazal * czech.scm (czech-adjust-segments): Don't modify unvoiced-voiced if the voiced consonant is V; test for pauses properly. (czech-orthography): Missing declarations of #_ and Vowel+# added. (czech-word): Don't call check-adjust-phonetic-form. (czech-pause): Call check-adjust-phonetic-form. 2005-04-12 Milan Zamazal * czech.scm (czech-diphones): Removed. (czech-basic-lts): Don't apply czech-diphones rule set. (czech-adjust-phonetic-form): New function. (czech-word): Call it. (phoneset): New feature `postnas'; feature `ctype' removed; `n' value added to the `cvox' feature; (czech-adjust-segments): New function. 2005-04-12 Milan Zamazal * czech.scm (czech-orthography): Handle Y in a similar way as I. 2005-04-11 Milan Zamazal * czech-lexicon.scm: Another diphtong removed. 2005-04-11 Milan Zamazal * czech-mbrola.scm (mbrola_cz2): Set the `gender' voice property. 2005-04-11 Milan Zamazal * czech.scm (czech-token-to-words): Handle r-caron in a special way, to work around a Festival bug. 2005-04-07 Milan Zamazal * czech.scm: Phoneme durations changed, taken from voice-czech-ph. 2005-04-07 Milan Zamazal * README.cs: Some credits added. 2005-04-06 Milan Zamazal * czech.scm (czech-diphones): r~+# -> r~*+#. 2005-04-06 Milan Zamazal * czech-lexicon.scm: Don't use the `ou' phone. 2005-03-14 Milan Zamazal * New diphone set. 2005-02-17 Milan Zamazal * czech.scm (czech-next-punc, czech-prev-punc): Ensure that nil is never returned. 2005-02-14 Milan Zamazal * czech.scm (czech-token-end-punc): Removed, no longer needed with the right tokenization method. (czech-phrase-cart-tree): Use token_end_punc instead of czech-token-end-punc. 2005-02-14 Milan Zamazal * czech.scm (czech-prev-simple-punc): Don't crash on non-existent utterance elements. 2005-02-14 Milan Zamazal * czech.scm (czech-word-stress-unit): Czech whether sylword has any daughters before calling functions on them. 2005-02-12 Milan Zamazal * czech.scm (voice-czech-common): Set Token_Method to Token_Any. 2005-02-12 Milan Zamazal * czech.scm (czech-next-punc, czech-prev-punc): Punctuation detection regexp fixed. 2005-02-12 Milan Zamazal * czech.scm (czech-identify-stress-units): Apply czech-downcase on item.name before looking it in czech-proper-single-syl-prepositions. 2005-02-10 Milan Zamazal * czech.scm (czech-next-token-punc, czech-word-stress-unit) (czech-stress-unit-punc, czech-next-punc, czech-prev-punc) (czech-next-simple-punc, czech-prev-simple-punc): New functions. (czech-phrase-cart-tree): Completely reworked, according to phonetic rules. (czech-intonation-units): Small break separates intonation units too. (czech-pos): Put commas before conjunctions. (czech-token-end-punc): New function. (czech-adjust-phrase-breaks): New function. (czech-word): Call it. (czech-duration): Perform pause duration adjustments. 2005-02-10 Milan Zamazal * czech.scm (czech-phoneme-durations): Pause duration set to 0.1. (czech-duration): Multiply czech-silence-durations by 10. 2005-02-10 Milan Zamazal * czech.scm (czech-intonation-units): Check for the very last syllable directly in the Syllable relation; check for sentence break there too. 2005-02-05 Milan Zamazal * czech.scm (czech-silence-duration-factors): Replaced by czech-silence-durations. (czech-phoneme-durations): Pause duration changed to 1.00. (czech-duration): Compute durations in the given range. 2005-01-07 Milan Zamazal * czech.scm (czech-insert-filling-vowels): New variable. (czech-translate-add-vowels): Honor it. 2005-01-07 Milan Zamazal * czech.scm (czech-translate-add-vowels): Don't try to insert into the SylStructure relation if it is not present. 2004-07-19 Milan Zamazal * czech-lexicon.scm (=): Added. 2004-05-20 Milan Zamazal * czech.scm (czech-pos): Set `pos' of separate punctuation characters to `nil'. 2004-05-20 Milan Zamazal * czech.scm (czech-intonation-units): Ensure the last syllable always makes a new unit. 2004-05-03 Milan Zamazal * czech.scm (voice-czech-common): Reset postlex_rules_hooks. 2004-04-20 Milan Zamazal * czech.scm (voice-czech-common): Let after_synth_hooks is a list. 2004-04-07 Milan Zamazal * czech.scm (czech-translate-split-diphthongs): Insert the new item into the SylStructure relation too. 2004-04-04 Milan Zamazal * czech.scm (czech-token-to-words): Don't crash in separator testing when there's no punctuation. 2004-04-01 Milan Zamazal * czech.scm (czech-translate-add-vowels): Put the inserted segments into the SylStructure too. 2004-04-01 Milan Zamazal * czech.scm (czech-min, czech-max): New functions. (czech-duration): Don't fail on initial single-syllabic words with more than 12 phonemes. 2004-03-31 Milan Zamazal * czech.scm (czech-token-to-words): Handle multiple punctuation as a separator. 2004-03-30 Milan Zamazal * czech.scm (czech-token-to-words): Downcase `name' before checking it in czech-multiword-abbrevs. (czech-multiword-abbrevs): Honor the previous change. 2004-03-30 Milan Zamazal * czech.scm (czech-duration): Don't crash on initial empty words carrying phrase breaks. 2004-03-29 Milan Zamazal * czech.scm (czech-lts): Handle empty word correctly. (czech-duration): Don't look at word leafs, look at its granddaughters. 2004-03-24 Milan Zamazal * czech.scm (czech-duration): When checking first word, start from the Word relation, not from SylStructure. 2004-03-23 Milan Zamazal * czech.scm (lts.ruleset): Some vowel-vowel stroke insertion disabled. 2004-03-23 Milan Zamazal * czech.scm (lts.ruleset): Typo in phoneme names fixed. (czech-normalize): Sharp S added. 2004-03-23 Milan Zamazal * czech-lexicon.scm: `copyright' added. * czech.scm (lts.ruleset): i-vowel handling fixed; `copy' removed. 2004-03-23 Milan Zamazal * czech-mbrola.scm (czech-mbrola-lts): `_' added. 2004-03-23 Milan Zamazal * czech-lexicon.scm: Typo in e: transcription fixed. 2004-03-22 Milan Zamazal * czech.scm (lts.ruleset): Vowel-vowel rules added. 2004-03-22 Milan Zamazal * czech-lexicon.scm: Lexicon entries fixed. * dump-pronounciation.scm (dump-pronounciation): Dump pronounciation in a form suitable for the lexicon. * czech.scm (czech-multiword-abbrevs): `=' added. (lts.ruleset): New special rules added. 2004-03-22 Milan Zamazal * czech.scm (czech): `syl' feature of _ and @ set to `-'. 2004-03-20 Milan Zamazal * gen-all-words.sh: New script. 2004-03-19 Milan Zamazal * Makefile: New file. * czech-lexicon.scm: Everything except lexicon entries moved to czech.scm. * czech.scm: Load compiled lexicon. (czech-lexicon-file): New variable. 2004-03-18 Milan Zamazal * czech-words: Updated. * czech-words-all: Removed. 2004-03-18 Milan Zamazal * czech.scm (czech-item.has_feat, czech-token.unknown_word_name) (czech-token.separator_word_name, czech-token.garbage_word_name) (czech-token_to_words): Underscores in names replaced by hyphens. 2004-03-18 Milan Zamazal * czech.scm (czech-int-select-contours): First syllable adjustment of final contours fixed. 2004-03-18 Milan Zamazal * czech.scm (czech-int-contour-tree): New contourtypes FA and FB. (czech-int-contours): Likewise; one missing FA/3 contour added. 2004-03-18 Milan Zamazal * czech.scm (czech-int-select-contours): Redundant code removed. (czech-int-contour-tree): Typo in M contour determination fixed; test for preelement fixed. 2004-03-18 Milan Zamazal * czech.scm (czech-guess-pos): "jak" and derivatives added to `question'; "*pak" question words added; question words removed from `misc'. (czech-pos): Handle "*pak" question words; cosmetic changes. 2004-03-17 Milan Zamazal * czech.scm (czech-identify-stress-units): Invalid counting in many single-syllabic word processing fixed; single syllabic initial processing fixed; ensure that in many single syllabic chains no single syllabic element remains. (czech-stress-unit-phonemes): Protection against preelement mark added. 2004-03-17 Milan Zamazal * czech.scm (czech-token_to_words): Read "[ckm]m" as a measure length only if the previous token is a number. 2004-03-17 Milan Zamazal * czech.scm (czech-pos-in-phrase-from, czech-pos-in-phrase-to): New functions. (czech-pos-last-in-phrase?): Use them. (czech-item.feat*?): New function. (czech-phrase-cart-tree): Standard phrase position feature functions that don't work replaced by our own functions; number of allowed in conjunction break guessing neighboring words reduced to 1. 2004-03-17 Milan Zamazal * czech.scm (czech-int-select-contours): Contour values of final single syllabic units fixed. (czech-item.feat?): Don't crash when item is nil. 2004-03-16 Milan Zamazal * czech.scm (czech-identify-stress-units): Don't append last-unit if it is nil. 2004-03-16 Milan Zamazal * czech.scm (czech-translate-split-diphthongs): make-item undefined. (czech-translate-add-vowels): make-item replaced by insert-item. 2004-03-16 Milan Zamazal * czech.scm (czech-int-select-contours): Bug in long contour handling fixed. 2004-03-16 Milan Zamazal * czech.scm (czech-duration): Compute duration factor from the number of phonemes, not syllables. (czech-duration): Attach special durations to initial single-syllabic words. 2004-03-16 Milan Zamazal * czech.scm (czech-add-strokes): Pause testing condition fixed. 2004-03-16 Milan Zamazal * czech-debug.scm (czech-debug-print-relation): Exclude `NB' feature values from the report; don't report empty feature sets. (czech-debug-print-durations): The first occurance of this function renamed to czech-debug-print-durfactors. (czech-debug-print): Honor the previous change. (czech-debug-print-units): Print contours multiplied by 100. 2004-03-15 Milan Zamazal * czech-debug.scm: New file. 2004-03-15 Milan Zamazal * czech.scm (czech-identify-stress-units): List handling in initial single-syllabic word processing fixed. 2004-03-15 Milan Zamazal * czech.scm (czech-word): Don't call czech-add-keystrokes. (czech-pause): Call czech-add-keystrokes. (czech-add-strokes): Don't insert stroke if pause is present at that place. 2004-03-15 Milan Zamazal * czech.scm (czech-pause-method): Renamed to czech-pause-breaks. (czech-pause): New function. (voice-czech-common): Honor the previous changes. 2004-03-15 Milan Zamazal * czech.scm (czech-duration): Honor Duration_Stretch. 2004-03-12 Milan Zamazal * czech.scm (czech-stress-units): When testing for an FF unit, match any punctuation containing an FF termination character. 2004-03-12 Milan Zamazal * czech.scm (czech-pos-last-in-phrase?): Extended and improved. 2004-03-12 Milan Zamazal * czech.scm (czech-duration): Set dur_factor on non-syllabic stress units too. 2004-03-12 Milan Zamazal * czech.scm (czech-phoneme-durations): Pause length increased. (czech-silence-duration-factors): B and SB lengths decreased. 2004-03-12 Milan Zamazal * czech.scm (czech-phoneset-translation): Long vowels removed. 2004-03-12 Milan Zamazal * czech.scm (czech-int-targets): Don't ignore entries with contourval 0 present. 2004-03-12 Milan Zamazal * czech-mbrola.scm (mbrola_cz2): Adjust czech-after-analysis-hooks*; use Param.Set for setting the synthesis method. (czech-mbrola-lts): Diphthongs added. (czech-mbrola-phoneme-durations): Updated to the new phoneset. * czech.scm (czech-translate-split-diphthongs): New function. (czech-add-segments): Renamed to czech-translate-add-vowels; run only when the current language is Czech. (czech-phone-adjustment): Renamed to czech-translate-phonemes. (czech-after-analysis-hooks): Honor previous changes; turned into defvar. (czech-phoneme-durations): Diphthong lengths increased. 2004-03-11 Milan Zamazal * czech-lexicon.scm (lex.add.entry): Pronounciation of angle brackets unified with czech-multiword-abbrevs. * czech.scm (czech-token_to_words): Look into lexicon only after punctuation is processed; handle punctuation-only tokens. 2004-03-10 Milan Zamazal * czech.scm (czech-token_to_words): Assign `sym' pos to single letter word expansions. 2004-03-10 Milan Zamazal * czech.scm (czech-identify-stress-units): Paren typo fixed; append last-unit to the result; invalid nth_cdr index fixed in final single-syllabic processing. 2004-03-10 Milan Zamazal * czech.scm (czech-stress-units): Handle multiple character punctuation. (czech-yes-no-question): Likewise. 2004-03-10 Milan Zamazal * czech.scm (czech-phrase-cart-tree): Put B before left parenthesis; handle multiple character punctuation. 2004-03-10 Milan Zamazal * czech.scm (czech-phrase-cart-tree): Dash handling fixed. 2004-03-10 Milan Zamazal * czech.scm (czech-token.punctuation): `-' added. (czech-token_to_words): Don't handle dashes; don't throw away dashes inside words. 2004-03-10 Milan Zamazal * czech-lexicon.scm: Various kinds of braces added for `punc'. 2004-03-09 Milan Zamazal * czech.scm (czech-int-targets): Apply the new contour value conversion also when contourval is a list and fix the order of values in such a case. (czech-int-select-contours): Coding typo in parenthesis fixed for long contours. 2004-03-08 Milan Zamazal * czech.scm (czech-int-targets): Use int_general_params, not czech-int-simple-params; value computation fixed. (czech-int-contours): Values converted to from +-1 to +-0. 2004-03-05 Milan Zamazal * czech.scm (czech-add-segments): Don't perform schwa insertion. 2004-03-05 Milan Zamazal * czech.scm (czech-guess-pos): New words added to question. 2004-03-05 Milan Zamazal * czech.scm (czech-identify-stress-units, czech-add-strokes): Use POS information stored in word features. 2004-03-05 Milan Zamazal * czech.scm (czech-pos): Check all words against czech-guess-pos. 2004-03-05 Milan Zamazal * czech.scm (czech-downcase, czech-word-pos?): New functions. (czech-identify-stress-units, czech-pos, czech-add-strokes): Use them. 2004-03-05 Milan Zamazal * czech.scm (czech-int-contour-tree): New variable. (czech-int-select-contours): Use it; preelements contour handling fixed. 2004-03-04 Milan Zamazal * czech.scm (czech-randomize): New variable. (czech-rand): Honor it. 2004-03-04 Milan Zamazal * czech.scm (czech-stress-units): Put last unit under type F instead of FF if it is not followed by punctuation. 2004-03-04 Milan Zamazal * czech.scm (czech-duration-random-factor): New variable. (czech-duration): Randomize durations. 2004-03-04 Milan Zamazal * czech.scm (czech-int-simple-params): f0_std set to 10. (czech-int-targets): Honor f0_std in some way. 2004-03-04 Milan Zamazal * czech.scm (czech-yes-no-question): Implementated. (czech-guess-pos): `question' added. 2004-03-04 Milan Zamazal * czech.scm (czech-int-countours): New variable. (czech-intonation-targets): Completely rewritten and renamed to czech-int-targets. (czech-int-select-contours): New function. (voice-czech-common): Set it as Int_Method. (czech-stress-units): Handle question types and F*-1 positions. (czech-yes-no-question): New function. (czech-syllable-kernels): New function. (czech-stress-unit-phonemes): Return items, not their names. (czech-unit-syllable-count, czech-stress-unit-phonemes): Accept units themselves, not only lists. (czech-duration): Take advantage of the previous change. (defPhoneSet): New phone feature `syl'. (czech-syllabic-vocals, czech-syllabic-consonants): Removed. (czech-item.feat?): New function. (czech-rand-range): New variable. (czech-rand): New function. (czech-random-choice): Use it. 2004-03-02 Milan Zamazal * czech-mbrola.scm (mbrola_cz2): Set czech-after-analysis-hooks*. * czech.scm (czech-after-analysis-hooks): New variable. (czech-reset-parameters): Set new variable czech-after-analysis-hooks*. (voice-czech-common): Use czech-after-analysis-hooks*. 2004-03-02 Milan Zamazal * czech.scm (voice-czech-common): Call czech-add-segments in after_analysis_hooks, not postlex_rules_hooks. (czech-add-segments): Adjust `end' features. (czech-phoneme-durations): Don't consider segment adjustments. (czech-add-segments, czech-phone-adjustment): Return utterance. 2004-02-28 Milan Zamazal * czech.scm (czech-duration-cart-tree): Removed. (czech-duration): New function. (voice-czech-common): Set it as the duration method. (czech-silence-duration-factors) (czech-stress-duration-factors): New variables. (czech-stress-units): Create IntStress relation; don't put intonation units to StressUnit, put segments there. 2004-02-28 Milan Zamazal * czech.scm (czech-add-strokes): New function. (czech-word): Call it. (czech-add-segments): Don't insert strokes. 2004-02-28 Milan Zamazal * czech.scm (czech-word): czech-add-segments moved to postlex_rules_hooks. 2004-02-28 Milan Zamazal * czech.scm (czech-pos): Final word in phrase detection fixed and other fixes. (czech-pos-last-in-phrase?): New function. 2004-02-28 Milan Zamazal * czech.scm (czech-phone-adjustment): Typo in string-equal fixed. 2004-02-28 Milan Zamazal * czech.scm (czech-stress-units): Paren typo in unit insertion fixed; final punctuation check fixed. (czech-syllabic-vocals): Diphthongs added. (czech-identify-stress-units): Test for units* end in the while loop condition; use cycle instead of recursion in merge-n; preelement placement fixed. (czech-word): Debugging code removed. 2004-02-26 Milan Zamazal * czech-lexicon.scm (czech-multiword-abbrevs): New variable. * czech.scm (czech-word): Renamed to czech-add-segments. (czech-word, czech-stress-units, czech-intonation-units) (czech-identify-stress-units: New functions. (czech-token_to_words): Handle multiword abbreviations. (czech-non-syllabic-prepositions) (czech-proper-1-syllabic-prepositions, czech-special-final-words): New variables. (czech-guess-pos): "ku" added. (czech-syllabic-vocals, czech-syllabic-consonants): New variables. (czech-syllable-count, czech-unit-syllable-count) (czech-stress-unit-phonemes): New functions. (czech-random-choice): New function. 2004-02-26 Milan Zamazal * czech.scm (czech-phrase-cart-tree): Don't put breaks before vowels. (czech-word): Insert _ between vowels on word boundaries. 2004-02-26 Milan Zamazal * czech.scm (lts.ruleset): New phoneme _. (czech-phoneme-durations): _ added. (czech-phoneset-translation): Translate _ to #. 2004-02-26 Milan Zamazal * czech.scm (lts.ruleset): Unused symbol SchwaCons removed. 2004-02-26 Milan Zamazal (lex.add.entry): Entry syllabification for the "unknown" word fixed. 2004-02-26 Milan Zamazal * czech.scm (czech-non-pause-words, czech-phrasify): Removed. (voice-czech-common): Set Phrasify_Method to Classic_Phrasify. 2004-02-25 Milan Zamazal * czech.scm: Use Param.get and Param.set instead of Parameter.get and Parameter.set. 2004-02-25 Milan Zamazal * czech.scm (czech-phrase-cart-tree): Put B before some no-comma conjunctions. 2004-02-25 Milan Zamazal * czech.scm (czech-phrasify): New function. (czech-pause-method): Long phrase breaking moved to czech-phrasify. (voice-czech-common): Set Phrasify_Method. 2004-02-25 Milan Zamazal * czech.scm (lts.ruleset): Vowel duplication and schwa insertion retracted. (czech-word): New function. (voice-czech-common): Set Word_Method parameter. 2004-02-25 Milan Zamazal * czech-lexicon.scm: Moved to new syllabification. 2004-02-24 Milan Zamazal * czech.scm (czech-phoneme-durations): New phonemes included; phoneme lengths adjusted considering diphone construction. 2004-02-24 Milan Zamazal * czech.scm (lts.ruleset): Completely reworked to match the new diphone set. 2004-02-24 Milan Zamazal * czech.scm (czech-syllabify): Removed. (czech-syllabify-phstress): Make the word as a single syllable. 2004-02-23 Milan Zamazal * czech.scm (czech-phoneset-translation): New variable. (czech-reset-parameters): Use it as default. 2004-02-23 Milan Zamazal * czech.scm (defPhoneSet): Phonemes from literature added; phoneme properties changed. 2004-02-23 Milan Zamazal * czech.scm (czech-accent-cart-tree): Return NONE unconditionally. (czech-syllabify-phstress): Don't generate starting accent. 2004-02-23 Milan Zamazal * README.Czech.cs: New file. 2004-02-10 Milan Zamazal * czech.scm (czech-proclaim-voice): Introduce voice options, support dialect and gender settings. 2004-02-03 Milan Zamazal * czech.scm (lts.ruleset): Infinite loop bug fixed; special words moved to lexicon. 2004-02-03 Milan Zamazal * czech-lexicon.scm: New file. 2004-02-02 Milan Zamazal * czech-mbrola.scm (mbrola_cz2): Set czech-volume-scale*. 2004-01-30 Milan Zamazal * czech.scm (czech-int-lr-params): Removed, use czech-int-simple-params instead. (czech-reset-parameters, voice-czech-common): Honor the previous change. (czech-int-simple-params): Values changed to reasonable numbers. (voice-czech-common): Switch to general intonation method. (czech-intonation-targets): New function. 2004-01-30 Milan Zamazal * czech.scm (czech-duration-cart-tree): Use other means than the undocumented feature syl_break for determining clause initials/finals; factor of stressed initials/finals reduced to 1.3. 2004-01-29 Milan Zamazal * czech.scm (czech-syllabify-phstress, czech-syllabify): New functions. (czech-lts): Use it instead of lex.syllabify.phstress. 2004-01-29 Milan Zamazal * czech.scm (czech-int-lr-params): Changed to respect the English intonation tree parameters and an experimental Czech voice. 2004-01-29 Milan Zamazal * czech.scm (czech-duration-tree): New variable. (voice-czech-common): Duration method changed to Tree_ZScores. 2004-01-29 Milan Zamazal * czech.scm (czech-volume-scale, czech-volume-scale*): New variables. (czech-adjust-volume): New function. (czech-reset-parameters): Reset czech-volume-scale*. (voice-czech-common): Set after_synth_hooks. 2004-01-29 Milan Zamazal * czech.scm (czech-non-pause-words): New function. (czech-pause-method): Insert pauses into long non-breaking phrases. 2004-01-29 Milan Zamazal * czech.scm (defPhoneSet, PhoneSet.silences) (czech-phoneme-durations): ## removed. (czech-phrase-cart-tree): Use defvar instead of set; SB introduced. (czech-phone-adjustment): ## support removed. (czech-pause-method): Handle SB instead of generating ##. 2004-01-28 Milan Zamazal * czech.scm (czech-f2b-f0-lr-end): Renamed to czech-f0-lr-end. 2004-01-15 Milan Zamazal * czech-unisyn.scm: Completely rewritten. * czech-mbrola.scm (czech-mbrola-init, czech-mbrola-description): Removed. * czech.scm (czech-description, czech-parameter): Removed. (czech-phoneset-translation*): New variable. (czech-reset-parameters): Reset synthesis-method and czech-phoneset-translation. (czech-phone-adjustment, voice-czech-common): Honor the previous changes. (voice-czech-common): Don't initialize synthesizer. * czech.scm (czech): Starting dz and dz~ fix in the ruleset. 2004-01-07 Milan Zamazal * czech.scm (czech-reset-parameters): New function. (czech-proclaim-voice): Call it. (czech-guess_pos): Renamed to czech-guess-pos. (czech-phrase_cart_tree): Renamed to czech-phrase-cart-tree. (czech-int_simple_params, czech-int_lr_params) (czech-accent_cart_tree, czech-int-tone-cart-tree) (czech-f0-lr-start, czech-f0-lr-mid, czech-f2b-f0-lr-end) (czech-phoneme-durations): Likewise. (czech-parameter): Use czech-description* instead of czech-description. (voice-czech-common): Use parameter variables. * czech-mbrola.scm (czech-mbrola-lts): New ruleset. (mbrola_cz2): Enable it. * czech.scm (czech-basic-lts): New function. (czech-lts-extra-rules): New variable. (czech-lts): Apply czech-lts-extra-rules. (voice-czech-common): Initialize czech-lts-extra-rules. 2003-12-31 Milan Zamazal * czech.scm (czech-phoneme_durations): `e' and `e:' durations increased. 2003-12-22 Milan Zamazal * czech.scm (czech-proclaim-voice): Set current voice. Voice name in voice proclamation fixed. * czech-mbrola.scm (czech-mbrola-phoneme_durations): New variable. (mbrola_cz2): Set czech-phoneme_durations. * czech.scm (czech-phoneme_durations): Completely changed. 2003-12-17 Milan Zamazal * czech-unisyn.scm: New file. * czech-mbrola.scm: Require `czech'. (voice_czech_mbrola_cz2): New function. (czech_mbrola_cz2): Voice declared. * czech.scm: Unisyn database declaration etc. moved to czech-unisyn.scm. (voice_czech): Renamed to voice-czech-common. (czech-proclaim-voice): New macro. * czech.scm (czech-int_lr_params): Means set to 105. 2003-12-01 Milan Zamazal * czech.scm (czech): Phoneset made compatible with the radio_phones phoneset. (czech-phoneme_durations): Changed to defvar. i: shortened, u: lenghtened. (lex.add.entry): New words added. (czech-int_tone_cart_tree, czech-int_lr_params): New variables. (voice_czech): Use Intonation_Tree intonation method. (czech-int_simple_params, czech-accent_cart_tree): Changed to defvar. (czech-f0_lr_start, czech-f0_lr_mid, czech-f2b_f0_lr_end): New variables. (voice_czech): Use them. 2003-11-28 Milan Zamazal * czech.scm (lex.add.entry): m/s and km/h added. (czech-token_to_words): Check for presence of the token name in lexicon at the proper place. (czech-prepend-numprefix, czech-number*): New functions. (czech-token_to_words): Support spaced numbers. (czech-number-from-digits): Don't read thousands etc. when they are zero. (czech-item.has_feat): New function. (czech-prepend-numprefix, czech-token_to_words, czech-pos): Use it. (czech-token_to_words): Time and ratio support added. (czech-number@): New function. (czech-downcase): Renamed to czech-normalize; some German characters added. (czech-char-regexp, czech-chars): New variables. (czech-tokenize-on-nonalphas, czech-token_to_words, czech-pos): Use them. (czech-token_to_words): Missing argument to string-match added. (czech-phrase_cart_tree): Make semicolon cause BB, exclude quote from the rules. (czech-guess_pos): New variable. (voice_czech): Set guess_pos. (czech-phrase_cart_tree): Insert B before some conjunctions. (voice_czech): Int_Method value fixed. (czech-int_simple_params): f0_std reduced to 5. (czech-accent_cart_tree): Completely rewritten. (czech-int_simple_params): f0_mean increased to 100. (lex.add.entry): "GNU" added. (czech-all-same): New function. (czech-token_to_words): Use it, for separator matching fix. 2003-11-27 Milan Zamazal * czech.scm (czech): proclaim_voice added. (czech-token_to_words, lex.add.entry): Czech monetary support added. (czech-token.punctuation): Dash removed. (czech-token_to_words): Handle dashes. (czech-token_to_words): Try to identify more acronyms; apply czech-downcase on them. (czech-token_to_words): Don't include spaces in number regexps. (czech-token_to_words): Support for numeric ranges added. (czech-pos): Don't set `pos' feature if already set. (czech-token_to_words): Split non-alphanumeric tokens to characters; handle separators and long sequences of non-alphanumeric characters. (czech-token.separator_word_name) (czech-token.garbage_word_name): New variables. 2003-11-26 Milan Zamazal * czech.scm (czech-int_simple_params): New variable. (voice_czech): Use `simple' intonation method. (czech-default-synthesis-init): Initialize all required UniSyn parameters. 2003-11-10 Milan Zamazal * czech.scm (lex.add.entry): Non-words removed. 2003-11-06 Milan Zamazal * czech.scm (czech-pos): Put the `sym' type on certain words. 2003-11-05 Milan Zamazal * czech.scm (lex.add.entry): Pronounciation of standalone one-letter words added. 2003-10-27 Milan Zamazal * czech.scm (czech-pause_method): New function. (voice_czech): Set it as the pause method. (defPhoneSet): New phone `##'. (PhoneSet.silences): Added. (czech-phoneme_durations): Added. 2003-10-17 Milan Zamazal * czech.scm (lex.add.entry): "shift" and "control" added. 2003-10-01 Milan Zamazal * czech.scm (czech-phone-adjustment): Run only if the current language is Czech. 2003-09-23 Milan Zamazal * czech.scm (lex.add.entry): Braces added. (lex.add.entry): `punc' parentheses added. (czech-pos): Handle parentheses as punctuation. 2003-09-22 Milan Zamazal * czech.scm (lex.add.entry): Mistakenly used y's replaced by i's. (czech-pos): If there's no `punctype' feature set on the token, mark punctuation characters as punctuation. 2003-09-19 Milan Zamazal * czech.scm (czech-token.whitespace): New variable. (voice_czech): Set token.whitespace. 2003-09-18 Milan Zamazal * czech.scm (lts.ruleset): Bug fixes, additions, improvements. (czech-number): Handle introducing + and -; handle commas. (czech-tokenize-on-nonalphas): Handle numbers too. (czech-token_to_words): Don't fail on numbers containing punctuation; handle numbers with +, -, and comma. (lex.add.entry): Numerical punctuation added; `range' - added. (czech-number): Use lexicon lookup for + and -. (czech-token_to_words): `punctype' token feature introduced. (czech-pos): Consider it. (czech-number): Use the comma character directly, not its spoken form. (czech-token_to_words): Don't accept final `,' or `.' as comma. Handle dot just after dot in a very special way, (lex.add.entry): Pronounce ("." num) as dot. (czech-token_to_words, czech-pos): `punctype' token feature handling cosmetic changes. (czech-token_to_words): Don't duplicate after-number dot at end of string. 2003-09-17 Milan Zamazal * czech.scm (czech-token_to_words): Spell numbers starting with 0. (czech-token_to_words): Exclude R from the list of spell-only letters. (czech-token_to_words): Don't split non-syllable words that are in lexicon. Let starting L or R doesn't make syllable. (lex.add.entry): Duplicate backslash definition removed, double quote definition added. 2003-08-29 Milan Zamazal * czech.scm (czech-int_simple_params): Removed. (voice_czech): Don't set czech-int_simple_params. Set Int_Targets. Set Int_Method to Intonation_Tree. 2003-08-28 Milan Zamazal * czech.scm (czech-pos): New function. (voice_czech): Set it as POS_Method. (lex.add.entry): Punctuation entries defined properly. (lex.add.entry): Pauses removed from the lexicon words. (czech-token_to_words): Don't consider `s' being syllabic. (lex.add.entry): New words. 2003-08-27 Milan Zamazal * czech-mbrola.scm (PhoneSet.silences): Removed. (provide): Added. * czech.scm (czech-unknown-symbol-message): Renamed to czech-unknown-symbol-word. Value changed to a single word value. (czech-downcase): Don't include punctuation. (czech-token.punctuation, czech-token.prepunctuation): New variables. (voice_czech): Use them. (lex.set.lts.ruleset): Don't call it. (lex.add.entry): Parentheses added. (lex.add.entry): Proper sylabification and accents added. (czech-phoneme_durations): Typo in e:'s value fixed. (czech-phoneme_durations): Random vowel duration adjustments. (czech-token.punctuation): Dash added. (lex.add.entry): "Emacs" and "copyright" added. (czech-tokenize-on-nonalphas): New function. (czech-token_to_words): Handle punctuation characters in tokens. (czech-token_to_words): Apply recursively on parts separated by punctuation. Spell non-syllabic words. festival-czech-0.3/FAQ0000644000175000017500000000054111026005751012734 0ustar pdmpdmQ: How to change intonation of a Czech voice? A: Set the int_general_params variable after the Czech voice is selected, e.g.: (voice_czech_ph) (set! int_general_params (cons (list 'targ_func czech-int-targets) '((f0_mean 102) (f0_std 30)))) f0_mean sets the base frequency, f0_std determines how much expressive intonation should be. festival-czech-0.3/INSTALL.cs0000644000175000017500000000355411026005751014046 0ustar pdmpdmStandardní instalace: - V adresáři `festival-czech' proveďte příkaz make pro vygenerování lexikonu. - Proveďte příkaz make install Pokud máte festivalové *.scm soubory nainstalovány jinde než v /usr/share/festival, uveďte správný adresář jako proměnnou `festival_path' příkazu `make', například: make install festival_path=/usr/local/festival - Odinstalaci lze provést příkazem `make uninstall', opět s případným uvedením festival_path. Pokud nechcete, nemusíte festival-czech instalovat do Festivalu, ale můžete ho používat například přímo z instalačního adresáře. V takovém případě je potřeba místo `make install' udělat následující kroky: - Přidat adresář do proměnné `load-path' Festivalu, například: (set! load-path (cons ".../festival-czech" load-path)) - Nastavit cestu k lexikonu: (set! czech-lexicon-file ".../festival-czech/czech-lexicon.out") Toto nastavení musí být provedeno ještě před natažením češtiny popsaném v následujícím kroku. - Po startu Festivalu natáhnout podporu češtiny, například: (require 'czech) festival-czech samotné implementuje pouze pravidla české syntézy. Pokud chcete provádět syntézu samotnou, potřebujete k ní i český hlas. Existuje svobodný český hlas pro Festival voice-czech-ph dostupný z http://www.freebsoft.org/festival-czech-diphone-database. Kromě toho je podporována i syntéza prostřednictvím proprietárního syntetizéru Mbrola. Chcete-li jej používat, musíte udělat následující: - Nastavit proměnnou czech-mbrola_database: (set! czech-mbrola_database ".../cz2") - Nastavit jméno binárky mbrola: (set! mbrola_progname ".../mbrola") - Natáhnout soubor czech-mbrola.scm: (require 'czech-mbrola) - Nastavit český hlas z Mbrola: (voice_czech_mbrola_cz2) Prosím uvědomte si, že Mbrola není svobodný software a můžete jej používat jen do míry svolení a podpory jeho výrobce. festival-czech-0.3/Makefile0000644000175000017500000000356511026005751014053 0ustar pdmpdm# Makefile for festival-czech # # Copyright (C) 2004, 2005, 2006 Brailcom, o.p.s. # # Author: Milan Zamazal # # COPYRIGHT NOTICE # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. package := festival-czech version := 0.3 festival_path = /usr/share/festival INSTALL_PROGRAM = install distfiles := *.scm *.out .PHONY: all install install-strip uninstall clean distclean mostlyclean \ maintainer-clean TAGS info dvi dist check all: czech-lexicon.out %.out: %.scm festival --batch '(lex.compile "$<" "$@")' install: all $(INSTALL_PROGRAM) -m 644 $(distfiles) $(festival_path)/ install-strip: $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install uninstall: all for f in $(distfiles); do rm $(festival_path)/$$f; done mostlyclean: clean: mostlyclean distclean: clean rm -rf $(package)-$(version) *.tar *.tar.gz maintainer-clean: distclean rm -f *.out TAGS: info: dvi: dist: all distclean mkdir $(package)-$(version) cp $(distfiles) COPYING ChangeLog FAQ INSTALL* Makefile NEWS* README* $(package)-$(version)/ make -C $(package)-$(version) all chmod 755 $(package)-$(version) chmod 644 $(package)-$(version)/* tar cf $(package)-$(version).tar $(package)-$(version) gzip -9 $(package)-$(version).tar check: festival-czech-0.3/NEWS.cs0000644000175000017500000000033511026005751013506 0ustar pdmpdmCo je novĂŠho: * festival-czech 0.3 - Opravy chyb. * festival-czech 0.2 - DlouhĂĄ čísla jsou čtena s číslicemi sdruĹženĂ˝mi do skupin. - Opravy chyb. * festival-czech 0.1 PrvnĂ­ vypuĹĄtěnĂĄ verze. festival-czech-0.3/README0000644000175000017500000000047011026005751013263 0ustar pdmpdmfestival-czech ============== This is Czech support for Festival. More information is available in Czech in the files README.cs, INSTALL.cs, and README.Czech.cs. If you don't understand Czech and you look for more information, feel free to contact us at festival-czech@lists.freebsoft.org . -- Milan Zamazal festival-czech-0.3/README.Czech.cs0000644000175000017500000001741111026005751014725 0ustar pdmpdmPopis postupů české syntézy použitých ve festival-czech ======================================================= * Základní jednotky řeči Základní jednotky řeči byly definovány dle difonové sady vypracované Mgr. Pavlem Machačem. Tento materiál je dostupný v balíku voice-czech-ph. ** Fonémy Fonémová sada je definovaná standardním způsobem v czech.scm. Vlastnosti fonémů definované v czech.scm byly definovány dle potřeb české syntézy, bez přímé návaznosti na konkrétní jazykové nebo fonetické poznatky. Různé fonémy přitom mohou mít zcela shodné vlastnosti, nebylo snahou učinit fonémy svými vlastnostmi unikátní. ** Difony Používáme difonovou sadu z výše zmíněného materiálu Mgr. Pavla Machače. Ta v rozumné míře aproximuje množinu dvojic hlásek vyskytujících se v češtině. ** Přidávání segmentů Část procesu stanovení výsledné sekvence difonů se provádí až před samotnou syntézou, v czech-after-analysis-hooks. Do sekvence segmentů se zde přidávají umělé segmenty. Cílem těchto akcí je dosažení lepšího výsledného zvuku a případně též konverze fonémů pro syntetizéry používající jinou fonémovou sadu. Protože se přidávání segmentů provádí až těsně před syntézou, není nutno je nijak zohledňovat v určování prozodie. Propagace prozodických informací do přidaných segmentů je zajištěna automaticky. ** Slabiky Ve festival-czech je za slabiku považováno celé mluvené slovo. Prvním důvodem je, že je to výhodné při určování prozodie. Druhým důvodem je, že rozklad slov na slabiky je v češtině obtížně určitelný. Při zkoumání slabik je relevantní pouze jejich počet, který je dán výskytem samohlásek a slabikotvorných souhlásek. Na to není rozklad na slabiky zapotřebí. * Výslovnost jednotlivých slov Určování výslovnosti jednotlivých slov je řešeno primárně LTS pravidly `czech'. Korekce fonetické formy, již nezávislé na pravopisných pravidlech, se provádí ve funkci czech-adjust-segments. Tato funkce se volá až po zpracování pauz, protože pauzy jsou pro určení výsledné fonetické podoby podstatný. Protože současný český pravopis je příliš nepravidelný, je nutno si při převodu psané formy na fonetickou vypomáhat lexikonem. Lexikon je pojatý jako seznam přesně těch slov, která nejsou a ani rozumně být nemohou pokryta LTS pravidly nebo pro která by nesprávně proběhl převod z tokenu na slovo (například v případě slova _pst_, které by bylo hláskováno). ** Jak postupovat při nalezení nesprávné výslovnosti slova Pokud je slovo obsaženo v lexikonu, příslušný záznam se jednoduše opraví. Pokud slovo není obsaženo v lexikonu, mělo by být v první řadě posouzeno, zda příslušný problém není širšího rázu a nestálo by za to jej ošetřit přidáním LTS pravidla. Jedná-li se spíše o výjimku, kterou nemá smysl zohledňovat v LTS pravidlech, přidá se do lexikonu. Jde-li o slovo ohebné, měly by být přidány všechny jeho tvary, jsou-li vyslovovány chybně též. Je dobré zamyslet se i nad formami vytvořenými přidáním nebo odebráním předpon. Pokud dojde na přidání nového LTS pravidla, je zapotřebí prověřit, jaké změny způsobí. Optimální by bylo nechat vygenerovat výslovnost všech českých slov před změnou LTS pravidel a po ní a následně srovnat jejich diff výstupy. To je však na běžných strojích poměrně zdlouhavá záležitost a je tedy praktičtější ji aplikovat pouze na množinu slov, které mohou být příslušným pravidlem dotčeny. Je-li nové LTS pravidlo řádně ověřeno, může být přidáno. Volitelně lze prověřit slova v lexikonu, zda se výslovnost některých z nich neshoduje s výsledkem po aplikaci nových LTS pravidel, a taková slova z lexikonu vyřadit. ** Tokeny, které se expandují na více slov Je-li zápis určitého slova expandován na vícero slov, je nutno jeho expanzi definovat v proměnné czech-multiword-abbrevs. Taková slova nepatří do lexikonu -- lexikon definuje jen výslovnost jednotlivých slov. ** Pravidla pro přepis z textu do fonetické podoby Při přepisu textu do fonetické podoby je v lexikonu dovoleno používat veškeré fonémy definované v české fonémové sadě. Přitom musí být dodržena následující pravidla. *** Konverze znělých souhlásek na neznělé a naopak Neprovádí se konverze znělých souhlásek na neznělé a naopak, kde to není vyloženě nutné. Například správný (z hlediska festival-czech) fonetický přepis slova _ovce_ je (o v c e), nikoliv (o f c e). Výslovnost difonu v-c se totiž jednak v tomto slově podstatně neliší od výslovnosti tohoto difonu v jiných slovech a jednak se s opravdovou znělou výslovností tohoto difonu v češtině nesetkáme (ani na hranicích slov a slovech přejatých). Jeho správná výslovnost tedy bude zajištěna v difonové databázi nebo jejích konverzních pravidlech. Naproti správný přepis slova _magnetismus_ je (m a g n e t i z m u s), protože v něm se skutečně jedná o pozměněnou výslovnost slova přejatého a v koncovce je český difon z-m, nikoliv český difon s-m. Toto pravidlo nedává z fonetického hlediska smysl. Jeho cílem je však zjednodušení tvorby položek lexikonu (přispěvatel se může více držet psané formy a nemusí se zabývat speciálními případy) a zamezení lidové tvořivosti v nejednoznačných případech. *** Vkládání rázů Je nutno nezapomínat na vložení rázů mezi samohláskami. Správný přepis slova _neefektivní_ je (n e _ e f e k t i v n~ i:). Naopak správný přepis slova _poet_ je (p o e t). *** Dvojhlásky Dvojhlásky se reprezentují odpovídající dvojicí hlásek. Speciálním případem jsou dvojhlásky obsahující `i' nebo `i:'. Ve skutečnosti se nejedná o dvojhlásky, vyslovuje se totiž mezi nimi hláska `j'. Správný přepis slova _poezie_ tedy je (p o e z i j e). * Prozodie Prozodie ve festival-czech zahrnuje intonaci, délku, pauzy a přízvuk. Tyto parametry jsou konstruovány převážně na základě pravidel publikovaných v [palková:04]. Větné úseky a přízvukové takty jsou konstruovány ve fázi Word, jako zvláštní relace IntUnit a StressUnit. ** Intonace Pravidla pro intonaci byla převzata z [palková:04]. Konkrétní intonační tabulka kadencí byla převzata z [palková-ptáček:97]. V publikovaných prozodických pravidlech jsou určité nejasnosti, které byly rozřešeny následujícím způsobem: - Pro šesti a víceslabičné přízvukové takty se v pozici F používá intonační křivka ze skupiny A (v [palková-ptáček:97] příslušné pravidlo pro pozici F chybí). - Pro pozici F se volí intonační křivka ze skupiny A (v [palková:04] není specifikováno). - Pro pozici F-1 se vybírá pouze z intonačních křivek povolených pro skupinu F (pro jistotu -- v [palková:04] není specifikováno, k čemu by s ohledem na předchozí úpravu měly být F-křivky skupiny B). Nedořešena je detekce doplňkových tázacích vět ve funkci czech-yes-no-question, kde chybí dostatečně obsáhlý seznam tázacích zájmen, číslovek a příslovcí. Mělo by být možné jej získat s pomocí českého ispellu. ** Délka Délka se stanovuje na základě počtu slabik v přízvukovém taktu dle tabulky z [palková:04]. ** Pauzy Používáme pauzy tří délek: nejdelší (BB), střední (B) a krátká (SB). Umisťování pauz se definuje v proměnné czech-phrase-cart-tree a funkci czech-adjust-phrase-breaks. Použitá metoda určování pauz vychází z [palková-ptáček:66], není však přesnou implementací těchto pravidel. ** Přízvuk Dle [palková:04], sekce 1.2db), není v syntéze češtiny žádoucí přízvuk explicitně generovat změnou dynamiky. Přízvuk je modelován intonační křivkou. * Odkazy [palková:04] Zdena Palková: Soubor fonetických pravidel jako podklad pro prozodický komponent automatické syntézy TTS v češtině [palková-ptáček:97] Zdena Palková, Miroslav Ptáček: Modelling Prosody in TTS Diphone Synthesis in Czech; Forum Phoneticum 63, Frankfurt am Main 1997 [palková-ptáček:66] Zdena Palková, Miroslav Ptáček: TTS Issues: Prosody modifications in Text; in Speech processing, 6th Czech-German Workshop, Prag 1966, R. Vích (ed.) pp.32-34 + interní materiál Z. Palková, Závěrečná zpráva grantu GAČR 405/96/0301 -- Milan Zamazal Local variables: mode: outline end: festival-czech-0.3/README.Festival.cs0000644000175000017500000001525511026005751015452 0ustar pdmpdmStručný popis převodu textu do zvukové podoby ve Festivalu ========================================================== Celý proces řečové syntézy Festivalu je velmi podrobně popsán v dokumentu Building Synthetic Voices, který je k mání na www.festvox.org a lze jej doporučit každému, kdo chce festivalové mechanismy pochopit do dostatečné hloubky. Další informace jsou pak k dispozici v manuálu Festivalu, ty jsou však v určitých směrech neúplné a bez výše uvedeného dokumentu se při serióznější práci na novém festivalovém jazyce a/nebo hlasu nelze obejít. Tento dokument je stručným popisem syntézy a dostupných nástrojů pro ty, kdo potřebují získat základní orientaci v procesu festivalové syntézy a přitom nemají čas anebo zájem se věnovat zdlouhavému a podrobnému studiu výše uvedených dokumentů. Nejedná se o *uživatelský* úvod do Festivalu, znalost uživatelské práce s Festivalem je předpokládána. Konkrétní postupy použité pro češtinu jsou popsány ve zdrojovém kódu festival-czech (technické informace) a v souboru README.Czech.cs (jazykové informace). * Základní principy Festivalu Festival je systém modulární a celý proces se skládá z provedení libovolné sekvence modulů. Každý modul má svůj účel a pro některé části zpracování si lze vybrat z více alternativních modulů. Lze též použít libovolné moduly vlastní. Moduly se obvykle píšou ve schemovém programovacím jazyce Festivalu zvaném SIOD. Tento jazyk je popsán v manuálu Festivalu. * Fáze zpracování Zpracování textu se standardně skládá z následujících fází, definovaných v synthesis.scm: (defUttType Text (Initialize utt) (Text utt) (Token_POS utt) (Token utt) (POS utt) (Phrasify utt) (Word utt) (Pauses utt) (Intonation utt) (PostLex utt) (Duration utt) (Int_Targets utt) (Wave_Synth utt) ) Vše se točí okolo tzv. utterance, které představuje jakousi jednotku, více či méně anotovanou, kterou lze postupným doplňováním anotací (*features*) dostat až do stavu, kdy je schopna odeslání na zvukový výstup. Každá z výše uvedených fází je volání funkce na utterance `utt'. * Práce s utterance Potřebujete-li provést syntézu určitého textu, vytvoříte si příslušné utterance například následujícím způsobem: (Utterance Text "nějaký text") Funkce vrátí nezpracované utterance, které lze nechat plně zanalyzovat voláním funkce `utt.synth'. Pozor, ve festivalovém řádkovém rozhraní nelze zadávat 8-bitové znaky, v případě potřeby je nutno syntézu provést přes soubor. Utterance se skládá z tzv. relací, jejichž jména lze zjistit pomocí (utt.relationnames utterance) Obsah (seznam položek, items) žádané relace se vytáhne pomocí (utt.relation.items utterance 'relation-name) Mnohé položky se vyskytují ve více relacích. Relace má obecně podobu stromu a prostřednictvím položek mohou být tyto stromy vzájemně provázány. Relace položky lze zjistit pomocí (item.relations item) Základními vlastnostmi položky jsou její jméno a rysy: (item.name item) Rysy položky lze zjistit pomocí (item.features item) Lze si vyzkoušet: (mapcar item.features (utt.relation.items utterance 'Word)) Pro vypsání relace je přehlednější výstup z (utt.relation.print utterance 'Word) Lze též použít (utt.relation_tree utt 'SylStructure) Funkce utt.relation_tree vrací kompletnější informaci o dané relaci, obsahuje celý strom dat obsažených v relaci, zatímco utt.relation.print vypisuje pouze prvky nejvyšší úrovně. Občas narážíme na pojem globálního parametru. Hodnotu globálního parametru lze získat voláním (Param.get 'jméno-parametru) * Stručný popis fází zpracování ** Initialize Jen vytvoří prázdné utterance. ** Text Provede rozdělení textu na tokeny. Funkce `Text' je napsána v C++, využívá však proměnné `token.*' definované v token.scm. Definovaná interpunkční znaménka jsou oddělena od výsledných tokenů a jsou dostupná jako jejich features. ** Token_POS Provádí, je-li třeba, kontextové označkování tokenu pro rozlišení různého významu shodných tokenů. Tato fáze slouží pro určení správné výslovnosti slov ve fázi bezprostředně následující, pozdější fáze POS má účel jiný. ** Token Převádí tokeny na slova. Převod lze provést definicí funkce token_to_words. Při konverzi tokenu je možno se podívat na okolní tokeny (nebo lépe využívat kontextových informací získaných ve fázi Token_POS), takže je možno provádět i sofistikovanější operace než pouhé mapování jediného tokenu na jedno nebo více slov. ** POS (POS == Part of Speech) Provádí tagování konkrétních slov dle jejich pozice v řeči. Nepovinná část. Parametrizovatelné pomocí několika proměnných. Tato fáze, na rozdíl od fáze Token_POS, určuje význam jednotlivých slov již nikoliv pro určení výslovnosti slov, nýbrž pro určení jejich role ve vztahu k následnému určení prozodie, tj. pauz, délek, přízvuku a intonace. ** Phrasify Identifikace pauz (žádná, normální, dlouhá) mezi slovy. Mezi většinu slov se nevkládá žádná pauza, vkládá se za interpunkci, někdy krátká (po čárce), někdy dlouhá (konec věty). Tato funkce sama o sobě pauzy nevkládá, to se děje až na základě jí generovaných informací v Pauses. ** Word Provádí převod slov na fonémy a slabiky v implicitním festivalovém formátu. Definováno v lexicon.scm, není-li globálním parametrem Word_Method řečeno jinak, volá se C++ funkce Classic_Word. Celý process je poměrně podrobně popsán v dokumentaci, sekce Lexicons. ** Pauses Vkládá pauzy. Pro tuto akci se používá funkce definovaná globálním parametrem Pause_Method. Implicitní je funkce Classic_Pauses, definovaná v pauses.scm. Funkce dělá v zásadě to, že vloží počáteční pauzu a pak vloží pauzy dle informací vytvořených ve fázi Phrasify. Navíc vyřadí z utterance slova označená jako interpunkce. Vkládání úvodní pauzy lze odrušit předefinováním funkce insert_initial_space, nemělo by se to však dít v jiných než odůvodněných případech, jinak tím utrpí výsledná kvalita syntézy. ** Intonation Generuje akcenty pro intonaci (první fáze zpracování intonace), ale nevytváří ještě intonaci samotnou (druhá fáze zpracování intonace), to dělá až Int_Targets. Používá se funkce definovaná globálním parametrem Int_Method. ** PostLex V této fázi lze aplikovat jakékoliv dodatečné transformace nad utterance, zařazené v seznamu postlex_rules_hooks. ** Duration Určí délky trvání jednotlivých segmentů (fonémů) syntetizovaného textu. ** Int_Targets Určí parametry intonační křivky, obvykle po slabikách. ** Wave_Synth Samotné sestavení zvuku na základě již všech dostupných anotací. Funkce, která se pro syntézu zavolá, je definována globálním parametrem `Synth_Method'. Kostra funkce Wave_Synth je napsána ve Scheme a nachází se v synthesis.scm. * Praktické poznámky. Docstringy umí vypsat funkce `doc'. Readline umí doplňovat jména funkcí, což je často užitečné. -- Milan Zamazal festival-czech-0.3/README.cs0000644000175000017500000000513211026005751013667 0ustar pdmpdmfestival-czech -- podpora češtiny pro Festival ============================================== Cílem festival-czech je poskytnout kvalitní kompletní svobodnou českou řečovou syntézu. Je využito systému Festival, který nabízí solidní svobodný framework pro tvorbu řečových syntéz a disponuje kvalitní svobodnou anglickou řečovou syntézou. Řečová syntéza je důležitým prvkem svobodných operačních systémů. Kromě jiného tvoří zásadní komponenty uživatelského rozhraní pro zrakově postižené uživatele. Současný stav svobodné české řečové syntézy je neuspokojivý, neexistuje žádný dostatečně kvalitní kompletní systém. Proto byla zahájena práce na festival-czech, kde je snahou dosáhnout s využitím solidní festivalové infrastruktury zaplnění této mezery. Projekt též může v budoucnu sloužit jako základ a zdroj nezbytného festivalového know-how pro další jazyky a přispět tak k vytvoření vícejazyčného svobodného řečového syntetizéru. festival-czech řeší dvě úlohy: 1. Převod textu do fonetického popisu, nezávislý na modulu provádějícím konečnou syntézu zvuku. 2. Difonovou databázi pro festivalový syntetizér. Tato část se nachází samostatně v podprojektu voice-czech-ph. Stručný návod k použití naleznete v souboru INSTALL.cs. festival-czech je nyní ve stavu rozpracovanosti. Máme funkční jazykový modul pro češtinu a funkční český difonový hlas. Jazykový modul se skládá z několika částí: fonémová a difonová sada (z větší části hotovo), základní pravidla pro převod psaného textu do hláskové podoby (ta musí být ještě doplněna a laděna), výslovnostní slovník (jeho obsah je zatím zcela minimální), minimální jazyková analýza (široké pole pro budoucí vylepšování interpretace psaného textu), prozodická pravidla (ta jsou víceméně kompletní, ale mohou v nich být ještě chyby). Difonový hlas, vytvořený v rámci podprojektu voice-czech-ph, je nyní v provozuschopném stavu, ale zatím neproběhlo ladění kvality jeho výstupu. Výsledek projektu závisí na dostupných zdrojích, na jednu stranu má potenciál dosáhnout poměrně slušného výsledku, na druhou stranu však může zajít na úbytě. Proto je vítána každá pomoc -- jazykovědná, programátorská, finanční nebo jakákoliv jiná. Projekt české festivalové syntézy je realizován společností Brailcom, o.p.s. Finančně na něj přispěly Nadační fond Českého rozhlasu, společnost Seznam.cz a Evropská komise (v rámci programu Leonardo da Vinci). Odbornou pomoc poskytl Fonetický ústav Filozofické fakulty Univerzity Karlovy v čele s prof. PhDr. Zdenou Palkovou, CSc. S případnými dotazy, náměty a nabídkami pomoci se lze obracet na adresu festival-czech@lists.freebsoft.org . -- Milan Zamazal