julius-4.2.2/0000755001051700105040000000000012004463507011331 5ustar ritrlabjulius-4.2.2/jclient-perl/0000755001051700105040000000000012004463507013721 5ustar ritrlabjulius-4.2.2/jclient-perl/Makefile.in0000644001051700105040000000101412004452401015751 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ TARGET=jclient.pl all: chmod +x $(TARGET) install: install.bin install.bin: ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ clean: echo nothing to do distclean: $(RM) Makefile julius-4.2.2/jclient-perl/00readme.txt0000644001051700105040000000301011071102423016037 0ustar ritrlab jclient.pl JCLIENT.PL(1) JCLIENT.PL(1) NAME jclient.pl - sample client for module mode (perl version) SYNOPSIS jclient.pl DESCRIPTION This is yet another sample client written in perl. It will connect to Julius running in module mode, receive recognition results from Julius, and cna send commands to control Julius. This is a tiny program with only 57 lines. You can use it for free. EXAMPLES Invoke Julius with module mode by specifying "-module" option: Then, at other terminal or other host, invoke jclient.pl like below. The default hostname is "localhost", and port number is 10500. You can change them by editing the top part of the script. It will then receive the outputs of Julius and output the raw message to standard out. Also, by inputting a raw module command to the standard input of jclient.pl, it will be sent to Julius. See manuals for the specification of module mode. SEE ALSO julius ( 1 ) , jcontrol ( 1 ) COPYRIGHT "jclient.pl" has been developed by Dr. Ryuichi Nisimura (nisimura@sys.wakayama-u.ac.jp). Use at your own risk. If you have any feedback, comment or request, please contact the E-mail address above, or look at the Web page below. http://w3voice.jp/ 10/02/2008 JCLIENT.PL(1) julius-4.2.2/jclient-perl/jclient.pl0000644001051700105040000000242511063627104015710 0ustar ritrlab#! /usr/bin/perl use strict; use IO::Socket; use IO::Select; my $host = "localhost"; my $port = 10500; print STDERR "$host($port) に接続します\n"; # Socketを生成して接続 my $socket; while(!$socket){ $socket = IO::Socket::INET->new(PeerAddr => $host, PeerPort => $port, Proto => 'tcp', ); if (!$socket){ printf STDERR "$host($port) の接続に失敗しました\n"; printf STDERR "再接続を試みます\n"; sleep 10; } } print STDERR "$host($port) に接続しました\n"; # バッファリングをしない $| = 1; my($old) = select($socket); $| = 1; select($old); # Selecterを生成 my $selecter = IO::Select->new; $selecter->add($socket); $selecter->add(\*STDIN); # 入力待ち while(1){ my ($active_socks) = IO::Select->select($selecter, undef, undef, undef); foreach my $sock (@{$active_socks}){ # Juliusからの出力を表示 if ($sock == $socket){ while(<$socket>){ print; last if(/^\./); } # 標準入力をJuliusに送信 }else{ my $input = ; # 小文字を大文字に変換 $input =~ tr/a-z/A-Z/d; print $socket $input; } } } julius-4.2.2/jclient-perl/00readme-ja.txt0000644001051700105040000000313511071102423016437 0ustar ritrlab jclient.pl JCLIENT.PL(1) JCLIENT.PL(1) O jclient.pl - perl TvNCAg Tv jclient.pl DESCRIPTION Julius tTvNCAg "jcontrol" Perl D W[iT[oj[h Julius FッC Julius D 57 sPvODAvP[V Julius g QlKDゥRpB EXAMPLES L Julius W[[hNCjclient.pl NDzXgftHg localhost, |[g 10500 DXNvg`D sCCxge jclient.pl MC Wo oDCjclient.pl R}h i Enter jCJulius R}hMCJulius D R}hCdlW[R}hLqD SEE ALSO julius ( 1 ) , jcontrol ( 1 ) COPYRIGHT jclient.pl (nisimura@sys.wakayama-u.ac.jp) D{vOpCメ DeゥCpD zAAv]tB[hobN}C L[ AhXCLz[y[WAD http://w3voice.jp/ 10/02/2008 JCLIENT.PL(1) julius-4.2.2/configure0000755001051700105040000013317211556416023013251 0ustar ritrlab#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated automatically using autoconf version 2.13 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # Defaults: ac_help= ac_default_prefix=/usr/local # Any additions from configure.in: # Initialize some variables set by options. # The variables have the same names as the options, with # dashes changed to underlines. build=NONE cache_file=./config.cache exec_prefix=NONE host=NONE no_create= nonopt=NONE no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= target=NONE verbose= x_includes=NONE x_libraries=NONE bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' # Initialize some other variables. subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. ac_max_here_lines=12 ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi case "$ac_option" in -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) ac_optarg= ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case "$ac_option" in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir="$ac_optarg" ;; -build | --build | --buil | --bui | --bu) ac_prev=build ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build="$ac_optarg" ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file="$ac_optarg" ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir="$ac_optarg" ;; -disable-* | --disable-*) ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` eval "enable_${ac_feature}=no" ;; -enable-* | --enable-*) ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "enable_${ac_feature}='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix="$ac_optarg" ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he) # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat << EOF Usage: configure [options] [host] Options: [defaults in brackets after descriptions] Configuration: --cache-file=FILE cache test results in FILE --help print this message --no-create do not create output files --quiet, --silent do not print \`checking...' messages --version print the version of autoconf that created configure Directory and file names: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [same as prefix] --bindir=DIR user executables in DIR [EPREFIX/bin] --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] --libexecdir=DIR program executables in DIR [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data in DIR [PREFIX/share] --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data in DIR [PREFIX/com] --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] --libdir=DIR object code libraries in DIR [EPREFIX/lib] --includedir=DIR C header files in DIR [PREFIX/include] --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] --infodir=DIR info documentation in DIR [PREFIX/info] --mandir=DIR man documentation in DIR [PREFIX/man] --srcdir=DIR find the sources in DIR [configure dir or ..] --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names EOF cat << EOF Host type: --build=BUILD configure for building on BUILD [BUILD=HOST] --host=HOST configure for HOST [guessed] --target=TARGET configure for TARGET [TARGET=HOST] Features and packages: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR EOF if test -n "$ac_help"; then echo "--enable and --with options recognized:$ac_help" fi exit 0 ;; -host | --host | --hos | --ho) ac_prev=host ;; -host=* | --host=* | --hos=* | --ho=*) host="$ac_optarg" ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir="$ac_optarg" ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir="$ac_optarg" ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir="$ac_optarg" ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir="$ac_optarg" ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir="$ac_optarg" ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir="$ac_optarg" ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir="$ac_optarg" ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix="$ac_optarg" ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix="$ac_optarg" ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix="$ac_optarg" ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name="$ac_optarg" ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir="$ac_optarg" ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir="$ac_optarg" ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site="$ac_optarg" ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir="$ac_optarg" ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir="$ac_optarg" ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target="$ac_optarg" ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers) echo "configure generated by autoconf version 2.13" exit 0 ;; -with-* | --with-*) ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "with_${ac_package}='$ac_optarg'" ;; -without-* | --without-*) ac_package=`echo $ac_option|sed -e 's/-*without-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` eval "with_${ac_package}=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes="$ac_optarg" ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries="$ac_optarg" ;; -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } ;; *) if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then echo "configure: warning: $ac_option: invalid host type" 1>&2 fi if test "x$nonopt" != xNONE; then { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } fi nonopt="$ac_option" ;; esac done if test -n "$ac_prev"; then { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } fi trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 # File descriptor usage: # 0 standard input # 1 file creation # 2 errors and warnings # 3 some systems may open it to /dev/tty # 4 used on the Kubota Titan # 6 checking for... messages and results # 5 compiler messages saved in config.log if test "$silent" = yes; then exec 6>/dev/null else exec 6>&1 fi exec 5>./config.log echo "\ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. " 1>&5 # Strip out --no-create and --no-recursion so they do not pile up. # Also quote any args containing shell metacharacters. ac_configure_args= for ac_arg do case "$ac_arg" in -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) ac_configure_args="$ac_configure_args '$ac_arg'" ;; *) ac_configure_args="$ac_configure_args $ac_arg" ;; esac done # NLS nuisances. # Only set these to C if already set. These must not be set unconditionally # because not all systems understand e.g. LANG=C (notably SCO). # Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! # Non-C LC_CTYPE values break the ctype check. if test "${LANG+set}" = set; then LANG=C; export LANG; fi if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo > confdefs.h # A filename unique to this package, relative to the directory that # configure is in, which we can look for to find out if srcdir is correct. ac_unique_file=Sample.jconf # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_prog=$0 ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } else { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } fi fi srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then echo "loading site script $ac_site_file" . "$ac_site_file" fi done if test -r "$cache_file"; then echo "loading cache $cache_file" . $cache_file else echo "creating cache $cache_file" > $cache_file fi ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross ac_exeext= ac_objext=o if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then ac_n= ac_c=' ' ac_t=' ' else ac_n=-n ac_c= ac_t= fi else ac_n= ac_c='\c' ac_t= fi ac_aux_dir= for ac_dir in support $srcdir/support; do if test -f $ac_dir/install-sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f $ac_dir/install.sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break fi done if test -z "$ac_aux_dir"; then { echo "configure: error: can not find install-sh or install.sh in support $srcdir/support" 1>&2; exit 1; } fi ac_config_guess=$ac_aux_dir/config.guess ac_config_sub=$ac_aux_dir/config.sub ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. subdirs="mkgshmm gramtools jcontrol julius libjulius libsent" # Make sure we can run config.sub. if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then : else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; } fi echo $ac_n "checking host system type""... $ac_c" 1>&6 echo "configure:555: checking host system type" >&5 host_alias=$host case "$host_alias" in NONE) case $nonopt in NONE) if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then : else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; } fi ;; *) host_alias=$nonopt ;; esac ;; esac host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias` host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` echo "$ac_t""$host" 1>&6 echo $ac_n "checking host specific optimization flag""... $ac_c" 1>&6 echo "configure:577: checking host specific optimization flag" >&5 if test -z "$CFLAGS" ; then OPTFLAG=support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG echo "$ac_t""$OPTFLAG" 1>&6 else echo "$ac_t""no" 1>&6 fi else echo "$ac_t""skipped" 1>&6 fi # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:593: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="gcc" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:623: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_prog_rejected=no ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" break fi done IFS="$ac_save_ifs" if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# -gt 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift set dummy "$ac_dir/$ac_word" "$@" shift ac_cv_prog_CC="$@" fi fi fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then case "`uname -s`" in *win32* | *WIN32*) # Extract the first word of "cl", so it can be a program name with args. set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:674: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="cl" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi ;; esac fi test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 echo "configure:706: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext << EOF #line 717 "configure" #include "confdefs.h" main(){return(0);} EOF if { (eval echo configure:722: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then ac_cv_prog_cc_cross=no else ac_cv_prog_cc_cross=yes fi else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 ac_cv_prog_cc_works=no fi rm -fr conftest* ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 echo "configure:748: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 echo "configure:753: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no fi fi echo "$ac_t""$ac_cv_prog_gcc" 1>&6 if test $ac_cv_prog_gcc = yes; then GCC=yes else GCC= fi ac_test_CFLAGS="${CFLAGS+set}" ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 echo "configure:781: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else echo 'void f(){}' > conftest.c if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then ac_cv_prog_cc_g=yes else ac_cv_prog_cc_g=no fi rm -f conftest* fi echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 if test "$ac_test_CFLAGS" = set; then CFLAGS="$ac_save_CFLAGS" elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 echo "configure:813: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else # This must be in double quotes, not single quotes, because CPP may get # substituted into the Makefile and "${CC-cc}" will confuse make. CPP="${CC-cc} -E" # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:834: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:851: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:868: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP=/lib/cpp fi rm -f conftest* fi rm -f conftest* fi rm -f conftest* ac_cv_prog_CPP="$CPP" fi CPP="$ac_cv_prog_CPP" else ac_cv_prog_CPP="$CPP" fi echo "$ac_t""$CPP" 1>&6 # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 echo "configure:904: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" for ac_dir in $PATH; do # Account for people who put trailing slashes in PATH elements. case "$ac_dir/" in /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do if test -f $ac_dir/$ac_prog; then if test $ac_prog = install && grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : else ac_cv_path_install="$ac_dir/$ac_prog -c" break 2 fi fi done ;; esac done IFS="$ac_save_IFS" fi if test "${ac_cv_path_install+set}" = set; then INSTALL="$ac_cv_path_install" else # As a last resort, use the slow shell script. We don't cache a # path for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the path is relative. INSTALL="$ac_install_sh" fi fi echo "$ac_t""$INSTALL" 1>&6 # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' # Extract the first word of "rm", so it can be a program name with args. set dummy rm; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:959: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_RM'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$RM" in /*) ac_cv_path_RM="$RM" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_RM="$RM" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_RM="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi RM="$ac_cv_path_RM" if test -n "$RM"; then echo "$ac_t""$RM" 1>&6 else echo "$ac_t""no" 1>&6 fi echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 echo "configure:992: checking for Cygwin environment" >&5 if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_cygwin=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_cygwin=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_cygwin" 1>&6 CYGWIN= test "$ac_cv_cygwin" = yes && CYGWIN=yes echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6 echo "configure:1025: checking for mingw32 environment" >&5 if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_mingw32=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_mingw32=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_mingw32" 1>&6 MINGW32= test "$ac_cv_mingw32" = yes && MINGW32=yes echo $ac_n "checking for executable suffix""... $ac_c" 1>&6 echo "configure:1056: checking for executable suffix" >&5 if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test "$CYGWIN" = yes || test "$MINGW32" = yes; then ac_cv_exeext=.exe else rm -f conftest* echo 'int main () { return 0; }' > conftest.$ac_ext ac_cv_exeext= if { (eval echo configure:1066: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then for file in conftest.*; do case $file in *.c | *.o | *.obj) ;; *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;; esac done else { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } fi rm -f conftest* test x"${ac_cv_exeext}" = x && ac_cv_exeext=no fi fi EXEEXT="" test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} echo "$ac_t""${ac_cv_exeext}" 1>&6 ac_exeext=$EXEEXT trap '' 1 2 15 cat > confcache <<\EOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs. It is not useful on other systems. # If it contains results you don't want to keep, you may remove or edit it. # # By default, configure uses ./config.cache as the cache file, # creating it if it does not exist already. You can give configure # the --cache-file=FILE option to use a different cache file; that is # what configure does when it calls configure scripts in # subdirectories, so they share the cache. # Giving --cache-file=/dev/null disables caching, for debugging configure. # config.status only pays attention to the cache file if you give it the # --recheck option to rerun configure. # EOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote substitution # turns \\\\ into \\, and sed turns \\ into \). sed -n \ -e "s/'/'\\\\''/g" \ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' ;; esac >> confcache if cmp -s $cache_file confcache; then : else if test -w $cache_file; then echo "updating cache $cache_file" cat confcache > $cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Any assignment to VPATH causes Sun make to only execute # the first set of double-colon rules, so remove it if not needed. # If there is a colon in the path, we need to keep it. if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' fi trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. cat > conftest.defs <<\EOF s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g s%\[%\\&%g s%\]%\\&%g s%\$%$$%g EOF DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` rm -f conftest.defs # Without the "./", some shells look in PATH for config.status. : ${CONFIG_STATUS=./config.status} echo creating $CONFIG_STATUS rm -f $CONFIG_STATUS cat > $CONFIG_STATUS </dev/null | sed 1q`: # # $0 $ac_configure_args # # Compiler output produced by configure, useful for debugging # configure, is in ./config.log if it exists. ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" for ac_option do case "\$ac_option" in -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; -version | --version | --versio | --versi | --vers | --ver | --ve | --v) echo "$CONFIG_STATUS generated by autoconf version 2.13" exit 0 ;; -help | --help | --hel | --he | --h) echo "\$ac_cs_usage"; exit 0 ;; *) echo "\$ac_cs_usage"; exit 1 ;; esac done ac_given_srcdir=$srcdir ac_given_INSTALL="$INSTALL" trap 'rm -fr `echo "Makefile mkbingram/Makefile mkbinhmm/Makefile adinrec/Makefile adintool/Makefile mkss/Makefile generate-ngram/Makefile jclient-perl/Makefile man/Makefile" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 EOF cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF $ac_vpsub $extrasub s%@SHELL@%$SHELL%g s%@CFLAGS@%$CFLAGS%g s%@CPPFLAGS@%$CPPFLAGS%g s%@CXXFLAGS@%$CXXFLAGS%g s%@FFLAGS@%$FFLAGS%g s%@DEFS@%$DEFS%g s%@LDFLAGS@%$LDFLAGS%g s%@LIBS@%$LIBS%g s%@exec_prefix@%$exec_prefix%g s%@prefix@%$prefix%g s%@program_transform_name@%$program_transform_name%g s%@bindir@%$bindir%g s%@sbindir@%$sbindir%g s%@libexecdir@%$libexecdir%g s%@datadir@%$datadir%g s%@sysconfdir@%$sysconfdir%g s%@sharedstatedir@%$sharedstatedir%g s%@localstatedir@%$localstatedir%g s%@libdir@%$libdir%g s%@includedir@%$includedir%g s%@oldincludedir@%$oldincludedir%g s%@infodir@%$infodir%g s%@mandir@%$mandir%g s%@subdirs@%$subdirs%g s%@host@%$host%g s%@host_alias@%$host_alias%g s%@host_cpu@%$host_cpu%g s%@host_vendor@%$host_vendor%g s%@host_os@%$host_os%g s%@CC@%$CC%g s%@CPP@%$CPP%g s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g s%@INSTALL_DATA@%$INSTALL_DATA%g s%@RM@%$RM%g s%@EXEEXT@%$EXEEXT%g CEOF EOF cat >> $CONFIG_STATUS <<\EOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. ac_file=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_cmds # Line after last line for current file. ac_more_lines=: ac_sed_cmds="" while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file else sed "${ac_end}q" conftest.subs > conftest.s$ac_file fi if test ! -s conftest.s$ac_file; then ac_more_lines=false rm -f conftest.s$ac_file else if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f conftest.s$ac_file" else ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" fi ac_file=`expr $ac_file + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_cmds` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case "$ac_file" in *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; *) ac_file_in="${ac_file}.in" ;; esac # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. # Remove last slash and all that follows it. Not all systems have dirname. ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then # The file is in a subdirectory. test ! -d "$ac_dir" && mkdir "$ac_dir" ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" # A "../" for each directory in $ac_dir_suffix. ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` else ac_dir_suffix= ac_dots= fi case "$ac_given_srcdir" in .) srcdir=. if test -z "$ac_dots"; then top_srcdir=. else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; *) # Relative path. srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" top_srcdir="$ac_dots$ac_given_srcdir" ;; esac case "$ac_given_INSTALL" in [/$]*) INSTALL="$ac_given_INSTALL" ;; *) INSTALL="$ac_dots$ac_given_INSTALL" ;; esac echo creating "$ac_file" rm -f "$ac_file" configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." case "$ac_file" in *Makefile*) ac_comsub="1i\\ # $configure_input" ;; *) ac_comsub= ;; esac ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` sed -e "$ac_comsub s%@configure_input@%$configure_input%g s%@srcdir@%$srcdir%g s%@top_srcdir@%$top_srcdir%g s%@INSTALL@%$INSTALL%g " $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file fi; done rm -f conftest.s* EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF exit 0 EOF chmod +x $CONFIG_STATUS rm -fr confdefs* $ac_clean_files test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 if test "$no_recursion" != yes; then # Remove --cache-file and --srcdir arguments so they do not pile up. ac_sub_configure_args= ac_prev= for ac_arg in $ac_configure_args; do if test -n "$ac_prev"; then ac_prev= continue fi case "$ac_arg" in -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) ;; *) ac_sub_configure_args="$ac_sub_configure_args $ac_arg" ;; esac done for ac_config_dir in mkgshmm gramtools jcontrol julius libjulius libsent; do # Do not complain, so a configure script can configure whichever # parts of a large source tree are present. if test ! -d $srcdir/$ac_config_dir; then continue fi echo configuring in $ac_config_dir case "$srcdir" in .) ;; *) if test -d ./$ac_config_dir || mkdir ./$ac_config_dir; then :; else { echo "configure: error: can not create `pwd`/$ac_config_dir" 1>&2; exit 1; } fi ;; esac ac_popdir=`pwd` cd $ac_config_dir # A "../" for each directory in /$ac_config_dir. ac_dots=`echo $ac_config_dir|sed -e 's%^\./%%' -e 's%[^/]$%&/%' -e 's%[^/]*/%../%g'` case "$srcdir" in .) # No --srcdir option. We are building in place. ac_sub_srcdir=$srcdir ;; /*) # Absolute path. ac_sub_srcdir=$srcdir/$ac_config_dir ;; *) # Relative path. ac_sub_srcdir=$ac_dots$srcdir/$ac_config_dir ;; esac # Check for guested configure; otherwise get Cygnus style configure. if test -f $ac_sub_srcdir/configure; then ac_sub_configure=$ac_sub_srcdir/configure elif test -f $ac_sub_srcdir/configure.in; then ac_sub_configure=$ac_configure else echo "configure: warning: no configuration information is in $ac_config_dir" 1>&2 ac_sub_configure= fi # The recursion is here. if test -n "$ac_sub_configure"; then # Make the cache file name correct relative to the subdirectory. case "$cache_file" in /*) ac_sub_cache_file=$cache_file ;; *) # Relative path. ac_sub_cache_file="$ac_dots$cache_file" ;; esac case "$ac_given_INSTALL" in [/$]*) INSTALL="$ac_given_INSTALL" ;; *) INSTALL="$ac_dots$ac_given_INSTALL" ;; esac echo "running ${CONFIG_SHELL-/bin/sh} $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_sub_srcdir" # The eval makes quoting arguments work. if eval ${CONFIG_SHELL-/bin/sh} $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_sub_srcdir then : else { echo "configure: error: $ac_sub_configure failed for $ac_config_dir" 1>&2; exit 1; } fi fi cd $ac_popdir done fi julius-4.2.2/mkbinhmm/0000755001051700105040000000000012004463507013133 5ustar ritrlabjulius-4.2.2/mkbinhmm/Makefile.in0000644001051700105040000000251512004452411015173 0ustar ritrlab# Copyright (c) 2003-2012 Kawahara Lab., Kyoto University # Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # $Id: Makefile.in,v 1.8 2012/07/27 08:44:57 sumomo Exp $ # SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../libsent CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=-I$(LIBSENT)/include @CPPFLAGS@ @DEFS@ `$(LIBSENT)/libsent-config --cflags` LDFLAGS=@LDFLAGS@ -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ all: mkbinhmm@EXEEXT@ mkbinhmmlist@EXEEXT@ mkbinhmm@EXEEXT@: mkbinhmm.o $(LIBSENT)/libsent.a $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ mkbinhmm.o $(LDFLAGS) mkbinhmmlist@EXEEXT@: mkbinhmmlist.o $(LIBSENT)/libsent.a $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ mkbinhmmlist.o $(LDFLAGS) install: install.bin install.bin: mkbinhmm@EXEEXT@ mkbinhmmlist@EXEEXT@ ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ mkbinhmm@EXEEXT@ mkbinhmmlist@EXEEXT@ @bindir@ clean: $(RM) mkbinhmm.o mkbinhmmlist.o $(RM) *~ core $(RM) mkbinhmm mkbinhmm.exe $(RM) mkbinhmmlist mkbinhmmlist.exe distclean: $(RM) mkbinhmm.o mkbinhmmlist.o $(RM) *~ core $(RM) mkbinhmm mkbinhmm.exe $(RM) mkbinhmmlist mkbinhmmlist.exe $(RM) Makefile julius-4.2.2/mkbinhmm/mkbinhmm.c0000644001051700105040000000714312004452411015076 0ustar ritrlab/* * Copyright (c) 2003-2012 Kawahara Lab., Kyoto University * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* mkbinhmm --- read in ascii hmmdefs file and write in binary format */ /* $Id: mkbinhmm.c,v 1.6 2012/07/27 08:44:57 sumomo Exp $ */ #include #include HTK_HMM_INFO *hmminfo; Value para, para_htk; static void usage(char *s) { printf("mkbinhmm: convert HMM definition file to binary format for Julius\n"); printf("usage: %s [-htkconf HTKConfig] hmmdefs binhmm\n", s); printf("\nLibrary configuration: "); confout_version(stdout); confout_am(stdout); printf("\n"); } int main(int argc, char *argv[]) { FILE *fp; char *infile; char *outfile; char *conffile; int i; infile = outfile = conffile = NULL; for(i=1;i= argc) { usage(argv[0]); return -1; } conffile = argv[i]; } else { if (infile == NULL) { infile = argv[i]; } else if (outfile == NULL) { outfile = argv[i]; } else { usage(argv[0]); return -1; } } } if (infile == NULL || outfile == NULL) { usage(argv[0]); return -1; } hmminfo = hmminfo_new(); printf("---- reading hmmdefs ----\n"); printf("filename: %s\n", infile); /* read hmmdef file */ undef_para(¶); if (init_hmminfo(hmminfo, infile, NULL, ¶) == FALSE) { fprintf(stderr, "--- terminated\n"); return -1; } if (conffile != NULL) { /* if input HMMDEFS already has embedded parameter they will be overridden by the parameters in the config file */ printf("\n---- reading HTK Config ----\n"); if (para.loaded == 1) { printf("Warning: input hmmdefs has acoustic analysis parameter information\n"); printf("Warning: they are overridden by the HTK Config file...\n"); } /* load HTK config file */ undef_para(¶); if (htk_config_file_parse(conffile, ¶) == FALSE) { fprintf(stderr, "Error: failed to read %s\n", conffile); return(-1); } /* set some parameters from HTK HMM header information */ printf("\nsetting TARGETKIND and NUMCEPS from HMM definition header..."); calc_para_from_header(¶, hmminfo->opt.param_type, hmminfo->opt.vec_size); printf("done\n"); /* fulfill unspecified values with HTK defaults */ printf("fulfill unspecified values with HTK defaults..."); undef_para(¶_htk); make_default_para_htk(¶_htk); apply_para(¶, ¶_htk); printf("done\n"); } printf("\n------------------------------------------------------------\n"); print_hmmdef_info(stdout, hmminfo); printf("\n"); if (para.loaded == 1) { put_para(stdout, ¶); } printf("------------------------------------------------------------\n"); printf("---- writing ----\n"); printf("filename: %s\n", outfile); if ((fp = fopen_writefile(outfile)) == NULL) { fprintf(stderr, "failed to open %s for writing\n", outfile); return -1; } if (write_binhmm(fp, hmminfo, (para.loaded == 1) ? ¶ : NULL) == FALSE) { fprintf(stderr, "failed to write to %s\n", outfile); return -1; } if (fclose_writefile(fp) != 0) { fprintf(stderr, "failed to close %s\n", outfile); return -1; } printf("\n"); if (para.loaded == 1) { printf("binary HMM written to \"%s\", with acoustic parameters embedded for Julius.\n", outfile); } else { printf("binary HMM written to \"%s\"\n", outfile); } return 0; } julius-4.2.2/mkbinhmm/00readme.txt0000644001051700105040000000500511071102424015260 0ustar ritrlab mkbinhmm MKBINHMM(1) MKBINHMM(1) NAME mkbinhmm - convert HMM definition file in HTK ascii format to Julius binary format SYNOPSIS mkbinhmm [-htkconf HTKConfigFile] {hmmdefs_file} {binhmm_file} DESCRIPTION mkbinhmm convert an HMM definition file in HTK ascii format into a binary HMM file for Julius. It will greatly speed up the launch process. You can also embed acoustic analysis condition parameters needed for recognition into the output file. To embed the parameters, specify the HTK Config file you have used to extract acoustic features for training the HMM by the optione "-htkconf". The embedded parameters in a binary HMM format will be loaded into Julius automatically, so you do not need to specify the acoustic feature options at run time. It will be convenient when you deliver an acoustic model. You can also specify binary file as the input. This can be used to update the old binary format into new one, or to embed the config parameters into the already existing binary files. If the input binhmm already has acoustic analysis parameters embedded, they will be overridden by the specified values. mkbinhmm can read gzipped file as input. OPTIONS -htkconf HTKConfigFile HTK Config file you used at training time. If specified, the values are embedded to the output file. hmmdefs_file The source HMm definitino file in HTK ascii format or Julius binary format. hmmdefs_file Output file. EXAMPLES Convert HTK ascii format HMM definition file into Julius binary file: Furthermore, embed acoustic feature parameters as specified by Config file Embed the acoustic parameters into an existing binary file SEE ALSO julius ( 1 ) , mkbingram ( 1 ) , mkbinhmmlist ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 MKBINHMM(1) julius-4.2.2/mkbinhmm/00readme-ja.txt0000644001051700105040000000412411071102424015651 0ustar ritrlab mkbinhmm MKBINHMM(1) MKBINHMM(1) O mkbinhmm - oCi HMM Tv mkbinhmm [-htkconf HTKConfigFile] {hmmdefs_file} {binhmm_file} DESCRIPTION mkbinhmm CHTKAXL[`ョHMM`t@CCJuliuspoCi `ョDg JuliusN D foot@Cwb_ DCwKop HTK Config t@C "-htkconf" wDwb_oC Fッゥ IKvop[^ZbgCD CHTKAXL[`ョCJuliuspoCiHMM ^D-htkconf pC oCiHMM op[^D mkbinhmm gzip kHMM`t@CD OPTIONS -htkconf HTKConfigFile wKogpHTK Configt@CwDw Clot@Cwb_D wb_D hmmdefs_file f`t@C (MMF)DHTK ASCII `ョC Julius oCi`ョD hmmdefs_file JuliuspoCi`ョt@CoD EXAMPLES HTK ASCII `ョ HMM `oCi`ョF HTKt@C Config ewb_oF oCi`ョt@Cwb_F SEE ALSO julius ( 1 ) , mkbingram ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 MKBINHMM(1) julius-4.2.2/mkbinhmm/mkbinhmmlist.c0000644001051700105040000000520312004452411015765 0ustar ritrlab/* * Copyright (c) 2003-2012 Kawahara Lab., Kyoto University * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* mkbinhmmlist --- read in ascii hmmlist file and write in binary format */ /* $Id: mkbinhmmlist.c,v 1.4 2012/07/27 08:44:57 sumomo Exp $ */ #include #include HTK_HMM_INFO *hmminfo; Value para, para_htk; static void usage(char *s) { printf("mkbinhmmlist: convert HMMList file to binary format for Julius\n"); printf("usage: %s hmmdefs hmmlist binhmmlist\n", s); printf("\nLibrary configuration: "); confout_version(stdout); confout_am(stdout); printf("\n"); } int main(int argc, char *argv[]) { FILE *fp; char *hmmdefs_file; char *hmmlist_file; char *outfile; int i; hmmdefs_file = hmmlist_file = outfile = NULL; for(i=1;iis_triphone) { fprintf(stderr, "making pseudo bi/mono-phone for IW-triphone\n"); if (make_cdset(hmminfo) == FALSE) { fprintf(stderr, "ERROR: m_fusion: failed to make context-dependent state set\n"); return -1; } } printf("\n------------------------------------------------------------\n"); print_hmmdef_info(stdout, hmminfo); printf("\n"); printf("------------------------------------------------------------\n"); printf("---- writing logical-to-physical mapping and pseudo phone info ----\n"); printf("filename: %s\n", outfile); if ((fp = fopen_writefile(outfile)) == NULL) { fprintf(stderr, "failed to open %s for writing\n", outfile); return -1; } if (save_hmmlist_bin(fp, hmminfo) == FALSE) { fprintf(stderr, "failed to write to %s\n", outfile); return -1; } if (fclose_writefile(fp) != 0) { fprintf(stderr, "failed to close %s\n", outfile); return -1; } printf("\n"); printf("binary HMMList and pseudo phone definitions are written to \"%s\"\n", outfile); return 0; } julius-4.2.2/libjulius/0000755001051700105040000000000012004463507013333 5ustar ritrlabjulius-4.2.2/libjulius/configure0000755001051700105040000020674512004463434015257 0ustar ritrlab#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated automatically using autoconf version 2.13 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # Defaults: ac_help= ac_default_prefix=/usr/local # Any additions from configure.in: ac_help="$ac_help --disable-pthread [debug] do not use pthread for speech input" ac_help="$ac_help --enable-setup=... select algorithm set standard high accuracy, slow speed fast balanced for both speed and accuracy (default) v2.1 backward compatible with Rev.2.1" ac_help="$ac_help --enable-factor1 use 1-gram factoring on 1st pass" ac_help="$ac_help --enable-factor2 use 2-gram factoring on 1st pass" ac_help="$ac_help --enable-lowmem all words share a single root on lexicon tree" ac_help="$ac_help --enable-lowmem2 separate hi-freq words from lexicon tree" ac_help="$ac_help --enable-monotree monophone lexicon on 1st pass (EXPERIMENTAL)" ac_help="$ac_help --disable-score-beam enable score envelope beaming on 2nd pass scan" ac_help="$ac_help --enable-iwcd1 handle inter-word triphone on 1st pass" ac_help="$ac_help --enable-strict-iwcd2 strict IWCD scoring on 2nd pass" ac_help="$ac_help --enable-wpair use word-pair approximation on 1st pass" ac_help="$ac_help --enable-wpair-nlimit keep only N-best path with wpair (-nlimit)" ac_help="$ac_help --enable-word-graph use word graph instead of trellis between passes" ac_help="$ac_help --disable-cm disable confidence measure computation" ac_help="$ac_help --enable-cm-nbest use N-best CM instead of search CM" ac_help="$ac_help --enable-cm-multiple-alpha EXPERIMENTAL: test multi alphas (need much mem)" ac_help="$ac_help --disable-lmfix make LM computing compatible with < 3.4" ac_help="$ac_help --enable-graphout-nbest word graph output from N-best sentence" ac_help="$ac_help --enable-cmthres enable confidence score based pruning on 2nd pass" ac_help="$ac_help --enable-gmm-vad enable GMM-based VAD (EXPERIMENTAL)" ac_help="$ac_help --enable-decoder-vad enable a new decoder-based VAD by NAIST team" ac_help="$ac_help --enable-power-reject enable post rejection by power" ac_help="$ac_help --disable-plugin disable plugin support" # Initialize some variables set by options. # The variables have the same names as the options, with # dashes changed to underlines. build=NONE cache_file=./config.cache exec_prefix=NONE host=NONE no_create= nonopt=NONE no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= target=NONE verbose= x_includes=NONE x_libraries=NONE bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' # Initialize some other variables. subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. ac_max_here_lines=12 ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi case "$ac_option" in -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) ac_optarg= ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case "$ac_option" in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir="$ac_optarg" ;; -build | --build | --buil | --bui | --bu) ac_prev=build ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build="$ac_optarg" ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file="$ac_optarg" ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir="$ac_optarg" ;; -disable-* | --disable-*) ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` eval "enable_${ac_feature}=no" ;; -enable-* | --enable-*) ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "enable_${ac_feature}='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix="$ac_optarg" ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he) # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat << EOF Usage: configure [options] [host] Options: [defaults in brackets after descriptions] Configuration: --cache-file=FILE cache test results in FILE --help print this message --no-create do not create output files --quiet, --silent do not print \`checking...' messages --version print the version of autoconf that created configure Directory and file names: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [same as prefix] --bindir=DIR user executables in DIR [EPREFIX/bin] --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] --libexecdir=DIR program executables in DIR [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data in DIR [PREFIX/share] --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data in DIR [PREFIX/com] --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] --libdir=DIR object code libraries in DIR [EPREFIX/lib] --includedir=DIR C header files in DIR [PREFIX/include] --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] --infodir=DIR info documentation in DIR [PREFIX/info] --mandir=DIR man documentation in DIR [PREFIX/man] --srcdir=DIR find the sources in DIR [configure dir or ..] --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names EOF cat << EOF Host type: --build=BUILD configure for building on BUILD [BUILD=HOST] --host=HOST configure for HOST [guessed] --target=TARGET configure for TARGET [TARGET=HOST] Features and packages: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR EOF if test -n "$ac_help"; then echo "--enable and --with options recognized:$ac_help" fi exit 0 ;; -host | --host | --hos | --ho) ac_prev=host ;; -host=* | --host=* | --hos=* | --ho=*) host="$ac_optarg" ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir="$ac_optarg" ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir="$ac_optarg" ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir="$ac_optarg" ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir="$ac_optarg" ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir="$ac_optarg" ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir="$ac_optarg" ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir="$ac_optarg" ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix="$ac_optarg" ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix="$ac_optarg" ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix="$ac_optarg" ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name="$ac_optarg" ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir="$ac_optarg" ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir="$ac_optarg" ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site="$ac_optarg" ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir="$ac_optarg" ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir="$ac_optarg" ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target="$ac_optarg" ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers) echo "configure generated by autoconf version 2.13" exit 0 ;; -with-* | --with-*) ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "with_${ac_package}='$ac_optarg'" ;; -without-* | --without-*) ac_package=`echo $ac_option|sed -e 's/-*without-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` eval "with_${ac_package}=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes="$ac_optarg" ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries="$ac_optarg" ;; -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } ;; *) if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then echo "configure: warning: $ac_option: invalid host type" 1>&2 fi if test "x$nonopt" != xNONE; then { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } fi nonopt="$ac_option" ;; esac done if test -n "$ac_prev"; then { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } fi trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 # File descriptor usage: # 0 standard input # 1 file creation # 2 errors and warnings # 3 some systems may open it to /dev/tty # 4 used on the Kubota Titan # 6 checking for... messages and results # 5 compiler messages saved in config.log if test "$silent" = yes; then exec 6>/dev/null else exec 6>&1 fi exec 5>./config.log echo "\ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. " 1>&5 # Strip out --no-create and --no-recursion so they do not pile up. # Also quote any args containing shell metacharacters. ac_configure_args= for ac_arg do case "$ac_arg" in -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) ac_configure_args="$ac_configure_args '$ac_arg'" ;; *) ac_configure_args="$ac_configure_args $ac_arg" ;; esac done # NLS nuisances. # Only set these to C if already set. These must not be set unconditionally # because not all systems understand e.g. LANG=C (notably SCO). # Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! # Non-C LC_CTYPE values break the ctype check. if test "${LANG+set}" = set; then LANG=C; export LANG; fi if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo > confdefs.h # A filename unique to this package, relative to the directory that # configure is in, which we can look for to find out if srcdir is correct. ac_unique_file=src/search_bestfirst_main.c # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_prog=$0 ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } else { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } fi fi srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then echo "loading site script $ac_site_file" . "$ac_site_file" fi done if test -r "$cache_file"; then echo "loading cache $cache_file" . $cache_file else echo "creating cache $cache_file" > $cache_file fi ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross ac_exeext= ac_objext=o if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then ac_n= ac_c=' ' ac_t=' ' else ac_n=-n ac_c= ac_t= fi else ac_n= ac_c='\c' ac_t= fi ac_aux_dir= for ac_dir in ../support $srcdir/../support; do if test -f $ac_dir/install-sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f $ac_dir/install.sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break fi done if test -z "$ac_aux_dir"; then { echo "configure: error: can not find install-sh or install.sh in ../support $srcdir/../support" 1>&2; exit 1; } fi ac_config_guess=$ac_aux_dir/config.guess ac_config_sub=$ac_aux_dir/config.sub ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. JULIUS_PRODUCTNAME=JuliusLib JULIUS_VERSION=4.2.2 # Check whether --enable-pthread or --disable-pthread was given. if test "${enable_pthread+set}" = set; then enableval="$enable_pthread" want_pthread=$enableval else want_pthread=yes fi # Check whether --enable-setup or --disable-setup was given. if test "${enable_setup+set}" = set; then enableval="$enable_setup" easy_setup=$enableval else easy_setup=fast fi if test "$easy_setup" = standard; then want_unigram_factoring=yes want_pass1_iwcd=yes want_pass2_strict_iwcd=yes want_gprune_default=safe elif test "$easy_setup" = fast; then want_unigram_factoring=yes want_lowmem2=yes want_pass1_iwcd=yes want_gprune_default=beam elif test "$easy_setup" = v2.1; then want_gprune_default=safe fi # Check whether --enable-factor1 or --disable-factor1 was given. if test "${enable_factor1+set}" = set; then enableval="$enable_factor1" want_unigram_factoring=$enableval fi # Check whether --enable-factor2 or --disable-factor2 was given. if test "${enable_factor2+set}" = set; then enableval="$enable_factor2" if test "$enableval" = yes; then want_unigram_factoring=no else want_unigram_factoring=yes fi fi # Check whether --enable-lowmem or --disable-lowmem was given. if test "${enable_lowmem+set}" = set; then enableval="$enable_lowmem" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define LOWMEM 1 EOF fi fi # Check whether --enable-lowmem2 or --disable-lowmem2 was given. if test "${enable_lowmem2+set}" = set; then enableval="$enable_lowmem2" want_lowmem2=$enableval fi # Check whether --enable-monotree or --disable-monotree was given. if test "${enable_monotree+set}" = set; then enableval="$enable_monotree" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define MONOTREE 1 EOF fi fi # Check whether --enable-score-beam or --disable-score-beam was given. if test "${enable_score_beam+set}" = set; then enableval="$enable_score_beam" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define SCAN_BEAM 1 EOF fi else cat >> confdefs.h <<\EOF #define SCAN_BEAM 1 EOF fi # Check whether --enable-iwcd1 or --disable-iwcd1 was given. if test "${enable_iwcd1+set}" = set; then enableval="$enable_iwcd1" want_pass1_iwcd=$enableval fi # Check whether --enable-strict-iwcd2 or --disable-strict-iwcd2 was given. if test "${enable_strict_iwcd2+set}" = set; then enableval="$enable_strict_iwcd2" want_pass2_strict_iwcd=$enableval fi # Check whether --enable-wpair or --disable-wpair was given. if test "${enable_wpair+set}" = set; then enableval="$enable_wpair" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define WPAIR 1 EOF fi fi # Check whether --enable-wpair-nlimit or --disable-wpair-nlimit was given. if test "${enable_wpair_nlimit+set}" = set; then enableval="$enable_wpair_nlimit" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define WPAIR 1 EOF cat >> confdefs.h <<\EOF #define WPAIR_KEEP_NLIMIT 1 EOF fi fi # Check whether --enable-word-graph or --disable-word-graph was given. if test "${enable_word_graph+set}" = set; then enableval="$enable_word_graph" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define WORD_GRAPH 1 EOF fi fi # Check whether --enable-cm or --disable-cm was given. if test "${enable_cm+set}" = set; then enableval="$enable_cm" use_cm=$enableval else use_cm=yes fi # Check whether --enable-cm-nbest or --disable-cm-nbest was given. if test "${enable_cm_nbest+set}" = set; then enableval="$enable_cm_nbest" use_cm_nbest=$enableval else use_cm_nbest=no fi # Check whether --enable-cm-multiple-alpha or --disable-cm-multiple-alpha was given. if test "${enable_cm_multiple_alpha+set}" = set; then enableval="$enable_cm_multiple_alpha" use_cm_multiple_alpha=$enableval else use_cm_multiple_alpha=no fi # disable lmfix # Check whether --enable-lmfix or --disable-lmfix was given. if test "${enable_lmfix+set}" = set; then enableval="$enable_lmfix" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define LM_FIX_DOUBLE_SCORING 1 EOF fi else cat >> confdefs.h <<\EOF #define LM_FIX_DOUBLE_SCORING 1 EOF fi # enable word graph output from N-best sentences # Check whether --enable-graphout-nbest or --disable-graphout-nbest was given. if test "${enable_graphout_nbest+set}" = set; then enableval="$enable_graphout_nbest" use_graphout_nbest=$enableval else use_graphout_nbest=no fi # enable pruning by confidence score on 2nd pass # Check whether --enable-cmthres or --disable-cmthres was given. if test "${enable_cmthres+set}" = set; then enableval="$enable_cmthres" use_cmthres=$enableval else use_cmthres=no fi # Check whether --enable-gmm-vad or --disable-gmm-vad was given. if test "${enable_gmm_vad+set}" = set; then enableval="$enable_gmm_vad" want_gmm_vad=$enableval else want_gmm_vad=no fi # Check whether --enable-decoder-vad or --disable-decoder-vad was given. if test "${enable_decoder_vad+set}" = set; then enableval="$enable_decoder_vad" want_spseg_naist=$enableval else want_spseg_naist=no fi # Check whether --enable-power-reject or --disable-power-reject was given. if test "${enable_power_reject+set}" = set; then enableval="$enable_power_reject" want_power_reject=$enableval else want_power_reject=no fi # Check whether --enable-plugin or --disable-plugin was given. if test "${enable_plugin+set}" = set; then enableval="$enable_plugin" if test "$enableval" = yes; then cat >> confdefs.h <<\EOF #define ENABLE_PLUGIN 1 EOF fi else cat >> confdefs.h <<\EOF #define ENABLE_PLUGIN 1 EOF fi if test "$use_cm" = yes; then cat >> confdefs.h <<\EOF #define CONFIDENCE_MEASURE 1 EOF fi if test "$use_cm_nbest" = yes; then if test "$use_cm" = yes; then cat >> confdefs.h <<\EOF #define CM_NBEST 1 EOF else { echo "configure: error: --disable-cm and --enable-cm-nbest conflicts" 1>&2; exit 1; } fi fi if test "$use_cm_multiple_alpha" = yes; then if test "$use_cm" = yes; then cat >> confdefs.h <<\EOF #define CM_MULTIPLE_ALPHA 1 EOF else { echo "configure: error: --disable-cm and --enable-cm-multiple-alpha conflicts" 1>&2; exit 1; } fi fi if test "$use_cmthres" = yes; then if test "$use_cm" = yes; then if test "$use_cm_nbest" = yes; then { echo "configure: error: --enable-cmthres cannot be used with --enable-cm-nbest" 1>&2; exit 1; } elif test "$use_cm_multiple_alpha" = yes; then { echo "configure: error: --enable-cmthres cannot be used with --enable-cm-multiple-alpha" 1>&2; exit 1; } else cat >> confdefs.h <<\EOF #define CM_SEARCH_LIMIT 1 EOF fi else { echo "configure: error: --disable-cm and --enable-cmthres conflicts" 1>&2; exit 1; } fi fi if test "$use_graphout_nbest" = no; then cat >> confdefs.h <<\EOF #define GRAPHOUT_DYNAMIC 1 EOF cat >> confdefs.h <<\EOF #define GRAPHOUT_SEARCH 1 EOF fi if test ! -z "$want_unigram_factoring"; then if test "$want_unigram_factoring" = yes; then cat >> confdefs.h <<\EOF #define UNIGRAM_FACTORING 1 EOF fi fi if test ! -z "$want_pass1_iwcd"; then if test "$want_pass1_iwcd" = yes; then cat >> confdefs.h <<\EOF #define PASS1_IWCD 1 EOF fi fi if test ! -z "$want_pass2_strict_iwcd"; then if test "$want_pass2_strict_iwcd" = yes; then cat >> confdefs.h <<\EOF #define PASS2_STRICT_IWCD 1 EOF fi fi if test ! -z "$want_lowmem2"; then if test "$want_lowmem2" = yes; then cat >> confdefs.h <<\EOF #define LOWMEM2 1 EOF fi fi if test "$want_gprune_default" = safe; then cat >> confdefs.h <<\EOF #define GPRUNE_DEFAULT_SAFE 1 EOF elif test "$want_gprune_default" = beam; then cat >> confdefs.h <<\EOF #define GPRUNE_DEFAULT_BEAM 1 EOF fi if test "$want_gmm_vad" = yes; then cat >> confdefs.h <<\EOF #define GMM_VAD 1 EOF cat >> confdefs.h <<\EOF #define BACKEND_VAD 1 EOF fi if test "$want_spseg_naist" = yes; then cat >> confdefs.h <<\EOF #define SPSEGMENT_NAIST 1 EOF cat >> confdefs.h <<\EOF #define BACKEND_VAD 1 EOF fi if test "$want_power_reject" = yes; then cat >> confdefs.h <<\EOF #define POWER_REJECT 1 EOF fi # Make sure we can run config.sub. if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then : else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; } fi echo $ac_n "checking host system type""... $ac_c" 1>&6 echo "configure:1011: checking host system type" >&5 host_alias=$host case "$host_alias" in NONE) case $nonopt in NONE) if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then : else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; } fi ;; *) host_alias=$nonopt ;; esac ;; esac host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias` host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` echo "$ac_t""$host" 1>&6 echo $ac_n "checking host-specific optimization flag""... $ac_c" 1>&6 echo "configure:1033: checking host-specific optimization flag" >&5 if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG echo "$ac_t""$OPTFLAG" 1>&6 else echo "$ac_t""no" 1>&6 fi else echo "$ac_t""skipped" 1>&6 fi # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1049: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="gcc" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1079: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_prog_rejected=no ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" break fi done IFS="$ac_save_ifs" if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# -gt 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift set dummy "$ac_dir/$ac_word" "$@" shift ac_cv_prog_CC="$@" fi fi fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then case "`uname -s`" in *win32* | *WIN32*) # Extract the first word of "cl", so it can be a program name with args. set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1130: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="cl" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi ;; esac fi test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 echo "configure:1162: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext << EOF #line 1173 "configure" #include "confdefs.h" main(){return(0);} EOF if { (eval echo configure:1178: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then ac_cv_prog_cc_cross=no else ac_cv_prog_cc_cross=yes fi else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 ac_cv_prog_cc_works=no fi rm -fr conftest* ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 echo "configure:1204: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 echo "configure:1209: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no fi fi echo "$ac_t""$ac_cv_prog_gcc" 1>&6 if test $ac_cv_prog_gcc = yes; then GCC=yes else GCC= fi ac_test_CFLAGS="${CFLAGS+set}" ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 echo "configure:1237: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else echo 'void f(){}' > conftest.c if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then ac_cv_prog_cc_g=yes else ac_cv_prog_cc_g=no fi rm -f conftest* fi echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 if test "$ac_test_CFLAGS" = set; then CFLAGS="$ac_save_CFLAGS" elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 echo "configure:1269: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else # This must be in double quotes, not single quotes, because CPP may get # substituted into the Makefile and "${CC-cc}" will confuse make. CPP="${CC-cc} -E" # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1290: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1307: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1324: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP=/lib/cpp fi rm -f conftest* fi rm -f conftest* fi rm -f conftest* ac_cv_prog_CPP="$CPP" fi CPP="$ac_cv_prog_CPP" else ac_cv_prog_CPP="$CPP" fi echo "$ac_t""$CPP" 1>&6 # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 echo "configure:1361: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" for ac_dir in $PATH; do # Account for people who put trailing slashes in PATH elements. case "$ac_dir/" in /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do if test -f $ac_dir/$ac_prog; then if test $ac_prog = install && grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : else ac_cv_path_install="$ac_dir/$ac_prog -c" break 2 fi fi done ;; esac done IFS="$ac_save_IFS" fi if test "${ac_cv_path_install+set}" = set; then INSTALL="$ac_cv_path_install" else # As a last resort, use the slow shell script. We don't cache a # path for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the path is relative. INSTALL="$ac_install_sh" fi fi echo "$ac_t""$INSTALL" 1>&6 # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' # Extract the first word of "rm", so it can be a program name with args. set dummy rm; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1416: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_RM'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$RM" in /*) ac_cv_path_RM="$RM" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_RM="$RM" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_RM="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi RM="$ac_cv_path_RM" if test -n "$RM"; then echo "$ac_t""$RM" 1>&6 else echo "$ac_t""no" 1>&6 fi # Extract the first word of "ar", so it can be a program name with args. set dummy ar; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1451: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_AR'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$AR" in /*) ac_cv_path_AR="$AR" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_AR="$AR" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_AR="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi AR="$ac_cv_path_AR" if test -n "$AR"; then echo "$ac_t""$AR" 1>&6 else echo "$ac_t""no" 1>&6 fi # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1486: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_RANLIB="ranlib" break fi done IFS="$ac_save_ifs" test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" fi fi RANLIB="$ac_cv_prog_RANLIB" if test -n "$RANLIB"; then echo "$ac_t""$RANLIB" 1>&6 else echo "$ac_t""no" 1>&6 fi echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 echo "configure:1514: checking for Cygwin environment" >&5 if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_cygwin=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_cygwin=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_cygwin" 1>&6 CYGWIN= test "$ac_cv_cygwin" = yes && CYGWIN=yes echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6 echo "configure:1547: checking for mingw32 environment" >&5 if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_mingw32=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_mingw32=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_mingw32" 1>&6 MINGW32= test "$ac_cv_mingw32" = yes && MINGW32=yes echo $ac_n "checking for executable suffix""... $ac_c" 1>&6 echo "configure:1578: checking for executable suffix" >&5 if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test "$CYGWIN" = yes || test "$MINGW32" = yes; then ac_cv_exeext=.exe else rm -f conftest* echo 'int main () { return 0; }' > conftest.$ac_ext ac_cv_exeext= if { (eval echo configure:1588: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then for file in conftest.*; do case $file in *.$ac_ext | *.c | *.o | *.obj) ;; *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;; esac done else { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } fi rm -f conftest* test x"${ac_cv_exeext}" = x && ac_cv_exeext=no fi fi EXEEXT="" test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} echo "$ac_t""${ac_cv_exeext}" 1>&6 ac_exeext=$EXEEXT echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 echo "configure:1611: checking for ANSI C header files" >&5 if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include #include #include EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1624: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* ac_cv_header_stdc=yes else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_header_stdc=no fi rm -f conftest* if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat > conftest.$ac_ext < EOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | egrep "memchr" >/dev/null 2>&1; then : else rm -rf conftest* ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat > conftest.$ac_ext < EOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | egrep "free" >/dev/null 2>&1; then : else rm -rf conftest* ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat > conftest.$ac_ext < #define ISLOWER(c) ('a' <= (c) && (c) <= 'z') #define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); exit (0); } EOF if { (eval echo configure:1691: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null then : else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -fr conftest* ac_cv_header_stdc=no fi rm -fr conftest* fi fi fi echo "$ac_t""$ac_cv_header_stdc" 1>&6 if test $ac_cv_header_stdc = yes; then cat >> confdefs.h <<\EOF #define STDC_HEADERS 1 EOF fi echo $ac_n "checking for working const""... $ac_c" 1>&6 echo "configure:1716: checking for working const" >&5 if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; } ; return 0; } EOF if { (eval echo configure:1770: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_c_const=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_c_const=no fi rm -f conftest* fi echo "$ac_t""$ac_cv_c_const" 1>&6 if test $ac_cv_c_const = no; then cat >> confdefs.h <<\EOF #define const EOF fi echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6 echo "configure:1792: checking return type of signal handlers" >&5 if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < #include #ifdef signal #undef signal #endif #ifdef __cplusplus extern "C" void (*signal (int, void (*)(int)))(int); #else void (*signal ()) (); #endif int main() { int i; ; return 0; } EOF if { (eval echo configure:1814: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_type_signal=void else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_type_signal=int fi rm -f conftest* fi echo "$ac_t""$ac_cv_type_signal" 1>&6 cat >> confdefs.h <&6 echo "configure:1833: checking for dlopen" >&5 if eval "test \"`echo '$''{'ac_cv_func_dlopen'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < /* Override any gcc2 internal prototype to avoid an error. */ /* We use char because int might match the return type of a gcc2 builtin and then its argument prototype would still apply. */ char dlopen(); int main() { /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined (__stub_dlopen) || defined (__stub___dlopen) choke me #else dlopen(); #endif ; return 0; } EOF if { (eval echo configure:1861: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_func_dlopen=yes" else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* eval "ac_cv_func_dlopen=no" fi rm -f conftest* fi if eval "test \"`echo '$ac_cv_func_'dlopen`\" = yes"; then echo "$ac_t""yes" 1>&6 : else echo "$ac_t""no" 1>&6 echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 echo "configure:1879: checking for dlopen in -ldl" >&5 ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_save_LIBS="$LIBS" LIBS="-ldl $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=no" fi rm -f conftest* LIBS="$ac_save_LIBS" fi if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then echo "$ac_t""yes" 1>&6 ac_tr_lib=HAVE_LIB`echo dl | sed -e 's/^a-zA-Z0-9_/_/g' \ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` cat >> confdefs.h <&6 fi fi if test "$want_pthread" = yes; then case "$host_os" in freebsd*) # FreeBSD echo $ac_n "checking for linking POSIX threaded process""... $ac_c" 1>&6 echo "configure:1933: checking for linking POSIX threaded process" >&5 ac_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -pthread" cat > conftest.$ac_ext < int main() { pthread_equal(NULL,NULL); ; return 0; } EOF if { (eval echo configure:1944: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* use_pthread=yes cat >> confdefs.h <<\EOF #define HAVE_PTHREAD 1 EOF CPPFLAGS="$CPPFLAGS -pthread" else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* use_pthread=no fi rm -f conftest* CFLAGS="$ac_save_CFLAGS" ;; *) # other with libpthread echo $ac_n "checking for POSIX thread library in -lpthread""... $ac_c" 1>&6 echo "configure:1965: checking for POSIX thread library in -lpthread" >&5 ac_save_LIBS_p="$LIBS" LIBS="$LIBS -lpthread" cat > conftest.$ac_ext < int main() { pthread_equal(NULL,NULL); ; return 0; } EOF if { (eval echo configure:1976: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* use_pthread=yes cat >> confdefs.h <<\EOF #define HAVE_PTHREAD 1 EOF else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* use_pthread=no LIBS="$ac_save_LIBS_p" fi rm -f conftest* esac echo "$ac_t"""$use_pthread"" 1>&6 else use_pthread=no fi cat >> confdefs.h <> confdefs.h <> confdefs.h <> confdefs.h < confcache <<\EOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs. It is not useful on other systems. # If it contains results you don't want to keep, you may remove or edit it. # # By default, configure uses ./config.cache as the cache file, # creating it if it does not exist already. You can give configure # the --cache-file=FILE option to use a different cache file; that is # what configure does when it calls configure scripts in # subdirectories, so they share the cache. # Giving --cache-file=/dev/null disables caching, for debugging configure. # config.status only pays attention to the cache file if you give it the # --recheck option to rerun configure. # EOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote substitution # turns \\\\ into \\, and sed turns \\ into \). sed -n \ -e "s/'/'\\\\''/g" \ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' ;; esac >> confcache if cmp -s $cache_file confcache; then : else if test -w $cache_file; then echo "updating cache $cache_file" cat confcache > $cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Any assignment to VPATH causes Sun make to only execute # the first set of double-colon rules, so remove it if not needed. # If there is a colon in the path, we need to keep it. if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' fi trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 DEFS=-DHAVE_CONFIG_H # Without the "./", some shells look in PATH for config.status. : ${CONFIG_STATUS=./config.status} echo creating $CONFIG_STATUS rm -f $CONFIG_STATUS cat > $CONFIG_STATUS </dev/null | sed 1q`: # # $0 $ac_configure_args # # Compiler output produced by configure, useful for debugging # configure, is in ./config.log if it exists. ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" for ac_option do case "\$ac_option" in -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; -version | --version | --versio | --versi | --vers | --ver | --ve | --v) echo "$CONFIG_STATUS generated by autoconf version 2.13" exit 0 ;; -help | --help | --hel | --he | --h) echo "\$ac_cs_usage"; exit 0 ;; *) echo "\$ac_cs_usage"; exit 1 ;; esac done ac_given_srcdir=$srcdir ac_given_INSTALL="$INSTALL" trap 'rm -fr `echo "Makefile libjulius-config libjulius-config-dist src/version.c doxygen.conf.ver include/julius/config.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 EOF cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF $ac_vpsub $extrasub s%@SHELL@%$SHELL%g s%@CFLAGS@%$CFLAGS%g s%@CPPFLAGS@%$CPPFLAGS%g s%@CXXFLAGS@%$CXXFLAGS%g s%@FFLAGS@%$FFLAGS%g s%@DEFS@%$DEFS%g s%@LDFLAGS@%$LDFLAGS%g s%@LIBS@%$LIBS%g s%@exec_prefix@%$exec_prefix%g s%@prefix@%$prefix%g s%@program_transform_name@%$program_transform_name%g s%@bindir@%$bindir%g s%@sbindir@%$sbindir%g s%@libexecdir@%$libexecdir%g s%@datadir@%$datadir%g s%@sysconfdir@%$sysconfdir%g s%@sharedstatedir@%$sharedstatedir%g s%@localstatedir@%$localstatedir%g s%@libdir@%$libdir%g s%@includedir@%$includedir%g s%@oldincludedir@%$oldincludedir%g s%@infodir@%$infodir%g s%@mandir@%$mandir%g s%@host@%$host%g s%@host_alias@%$host_alias%g s%@host_cpu@%$host_cpu%g s%@host_vendor@%$host_vendor%g s%@host_os@%$host_os%g s%@CC@%$CC%g s%@CPP@%$CPP%g s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g s%@INSTALL_DATA@%$INSTALL_DATA%g s%@RM@%$RM%g s%@AR@%$AR%g s%@RANLIB@%$RANLIB%g s%@EXEEXT@%$EXEEXT%g s%@JULIUS_PRODUCTNAME@%$JULIUS_PRODUCTNAME%g s%@JULIUS_VERSION@%$JULIUS_VERSION%g s%@easy_setup@%$easy_setup%g CEOF EOF cat >> $CONFIG_STATUS <<\EOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. ac_file=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_cmds # Line after last line for current file. ac_more_lines=: ac_sed_cmds="" while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file else sed "${ac_end}q" conftest.subs > conftest.s$ac_file fi if test ! -s conftest.s$ac_file; then ac_more_lines=false rm -f conftest.s$ac_file else if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f conftest.s$ac_file" else ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" fi ac_file=`expr $ac_file + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_cmds` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case "$ac_file" in *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; *) ac_file_in="${ac_file}.in" ;; esac # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. # Remove last slash and all that follows it. Not all systems have dirname. ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then # The file is in a subdirectory. test ! -d "$ac_dir" && mkdir "$ac_dir" ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" # A "../" for each directory in $ac_dir_suffix. ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` else ac_dir_suffix= ac_dots= fi case "$ac_given_srcdir" in .) srcdir=. if test -z "$ac_dots"; then top_srcdir=. else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; *) # Relative path. srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" top_srcdir="$ac_dots$ac_given_srcdir" ;; esac case "$ac_given_INSTALL" in [/$]*) INSTALL="$ac_given_INSTALL" ;; *) INSTALL="$ac_dots$ac_given_INSTALL" ;; esac echo creating "$ac_file" rm -f "$ac_file" configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." case "$ac_file" in *Makefile*) ac_comsub="1i\\ # $configure_input" ;; *) ac_comsub= ;; esac ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` sed -e "$ac_comsub s%@configure_input@%$configure_input%g s%@srcdir@%$srcdir%g s%@top_srcdir@%$top_srcdir%g s%@INSTALL@%$INSTALL%g " $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file fi; done rm -f conftest.s* # These sed commands are passed to sed as "A NAME B NAME C VALUE D", where # NAME is the cpp macro being defined and VALUE is the value it is being given. # # ac_d sets the value in "#define NAME VALUE" lines. ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' ac_dB='\([ ][ ]*\)[^ ]*%\1#\2' ac_dC='\3' ac_dD='%g' # ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE". ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' ac_uB='\([ ]\)%\1#\2define\3' ac_uC=' ' ac_uD='\4%g' # ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE". ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' ac_eB='$%\1#\2define\3' ac_eC=' ' ac_eD='%g' if test "${CONFIG_HEADERS+set}" != set; then EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF fi for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case "$ac_file" in *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; *) ac_file_in="${ac_file}.in" ;; esac echo creating $ac_file rm -f conftest.frag conftest.in conftest.out ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` cat $ac_file_inputs > conftest.in EOF # Transform confdefs.h into a sed script conftest.vals that substitutes # the proper values into config.h.in to produce config.h. And first: # Protect against being on the right side of a sed subst in config.status. # Protect against being in an unquoted here document in config.status. rm -f conftest.vals cat > conftest.hdr <<\EOF s/[\\&%]/\\&/g s%[\\$`]%\\&%g s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp s%ac_d%ac_u%gp s%ac_u%ac_e%gp EOF sed -n -f conftest.hdr confdefs.h > conftest.vals rm -f conftest.hdr # This sed command replaces #undef with comments. This is necessary, for # example, in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. cat >> conftest.vals <<\EOF s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% EOF # Break up conftest.vals because some shells have a limit on # the size of here documents, and old seds have small limits too. rm -f conftest.tail while : do ac_lines=`grep -c . conftest.vals` # grep -c gives empty output for an empty file on some AIX systems. if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi # Write a limited-size here document to conftest.frag. echo ' cat > conftest.frag <> $CONFIG_STATUS sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS echo 'CEOF sed -f conftest.frag conftest.in > conftest.out rm -f conftest.in mv conftest.out conftest.in ' >> $CONFIG_STATUS sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail rm -f conftest.vals mv conftest.tail conftest.vals done rm -f conftest.vals cat >> $CONFIG_STATUS <<\EOF rm -f conftest.frag conftest.h echo "/* $ac_file. Generated automatically by configure. */" > conftest.h cat conftest.in >> conftest.h rm -f conftest.in if cmp -s $ac_file conftest.h 2>/dev/null; then echo "$ac_file is unchanged" rm -f conftest.h else # Remove last slash and all that follows it. Not all systems have dirname. ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then # The file is in a subdirectory. test ! -d "$ac_dir" && mkdir "$ac_dir" fi rm -f $ac_file mv conftest.h $ac_file fi fi; done EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF chmod +x libjulius-config libjulius-config-dist exit 0 EOF chmod +x $CONFIG_STATUS rm -fr confdefs* $ac_clean_files test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 julius-4.2.2/libjulius/libjulius-config-dist.in0000755001051700105040000000207011071161147020071 0ustar ritrlab#!/bin/sh # # libsent configuration result reporter # # "libsent-config --cflags" generates C defines, and # "libsent-config --libs" generates libraries needed for compile with # the sentlib # # by Doshita Lab. Speech Group, Kyoto University 1991-2000 # by Shikano Lab. Speech Group, NAIST 2002 # # ripped from gtk's gtk-config.in # # $Id: libjulius-config-dist.in,v 1.1 2008/10/02 15:01:59 sumomo Exp $ # # # @configure_input@ # version="\ Julius/Julian library rev.@JULIUS_VERSION@" usage="\ Usage: libjulius-config [--libs] [--cflags] [--info] [--version]" prefix="@prefix@" exec_prefix="@exec_prefix@" if test $# -eq 0; then echo "${usage}" 1>&2 exit 1 fi while test $# -gt 0; do case "$1" in -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) optarg= ;; esac case $1 in --cflags) echo -I@includedir@ @CPPFLAGS@ ;; --libs) echo -L@libdir@ -ljulius @LDFLAGS@ @LIBS@ ;; --version) echo "${version}" ;; *) echo "${usage}" 1>&2 exit 1 ;; esac shift done echo exit 0 julius-4.2.2/libjulius/Makefile.in0000644001051700105040000000547212004452401015377 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # $Id: Makefile.in,v 1.8 2012/07/27 08:44:49 sumomo Exp $ # SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../libsent CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=-Iinclude -I$(LIBSENT)/include @CPPFLAGS@ `$(LIBSENT)/libsent-config --cflags` LDFLAGS=@LDFLAGS@ @LIBS@ -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f AR=@AR@ r RANLIB=@RANLIB@ ## install prefix=@prefix@ exec_prefix=@exec_prefix@ bindir=@bindir@ libdir=@libdir@ includedir=@includedir@ INSTALL=@INSTALL@ INSTALL_DATA=@INSTALL_DATA@ INSTALL_PROGRAM=@INSTALL_PROGRAM@ ############################################################ TARGET = libjulius.a OBJ = \ src/recogmain.o \ src/instance.o \ src/default.o \ src/jfunc.o \ src/callback.o \ src/useropt.o \ src/m_usage.o \ src/m_options.o \ src/m_jconf.o \ src/m_chkparam.o \ src/m_info.o \ src/m_fusion.o \ src/hmm_check.o \ src/multi-gram.o \ src/gramlist.o \ src/wchmm.o \ src/wchmm_check.o \ src/m_adin.o \ src/adin-cut.o \ src/wav2mfcc.o \ src/beam.o \ src/pass1.o \ src/spsegment.o \ src/realtime-1stpass.o \ src/factoring_sub.o \ src/outprob_style.o \ src/backtrellis.o \ src/search_bestfirst_main.o \ src/search_bestfirst_v1.o \ src/search_bestfirst_v2.o \ src/ngram_decode.o \ src/dfa_decode.o \ src/graphout.o \ src/confnet.o \ src/gmm.o \ src/word_align.o \ src/plugin.o \ src/version.o ############################################################ all: $(TARGET) $(TARGET): $(OBJ) $(AR) $@ $? $(RANLIB) $@ ############################################################ install: install.lib install.include install.bin install.lib: $(TARGET) ${INSTALL} -d ${libdir} ${INSTALL_DATA} $(TARGET) ${libdir} install.include: ${INSTALL} -d ${includedir}/julius ${INSTALL_DATA} include/julius/*.h ${includedir}/julius install.bin: libjulius-config-dist ${INSTALL} -d ${bindir} ${INSTALL_PROGRAM} libjulius-config-dist ${bindir}/libjulius-config depend: makedepend -- $(CFLAGS) -- $(OBJSENT) clean: $(RM) *.bak *~ core TAGS $(RM) src/*.o src/*.bak src/*~ src/core src/TAGS $(RM) include/julius/*~ $(RM) config.log config.cache distclean: $(RM) *.bak *~ core TAGS $(RM) src/*.o src/*.bak src/*~ src/core src/TAGS $(RM) include/julius/*~ $(RM) config.log config.cache $(RM) $(TARGET) $(RM) src/version.c $(RM) libjulius-config libjulius-config-dist $(RM) doxygen.conf.ver $(RM) config.status include/julius/config.h $(RM) Makefile ############################################################ configure: autoconf $(RM) include/julius/config.h.in autoheader julius-4.2.2/libjulius/configure.in0000644001051700105040000002520012004463434015642 0ustar ritrlabdnl Copyright (c) 1991-2012 Kawahara Lab., Kyoto University dnl Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan dnl Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology dnl Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl dnl $Id: configure.in,v 1.17 2012/07/27 10:02:04 sumomo Exp $ dnl AC_INIT(src/search_bestfirst_main.c) AC_CONFIG_HEADER(include/julius/config.h) AC_CONFIG_AUX_DIR(../support) JULIUS_PRODUCTNAME=JuliusLib JULIUS_VERSION=4.2.2 dnl Checks for options dnl pthread support AC_ARG_ENABLE(pthread, [ --disable-pthread [debug] do not use pthread for speech input], want_pthread=$enableval ,want_pthread=yes) dnl easy setup AC_ARG_ENABLE(setup, [ --enable-setup=... select algorithm set standard high accuracy, slow speed fast balanced for both speed and accuracy (default) v2.1 backward compatible with Rev.2.1], easy_setup=$enableval, easy_setup=fast ) dnl expand easy setup if test "$easy_setup" = standard; then dnl JULIUS related begin dnl AC_DEFINE(UNIGRAM_FACTORING) want_unigram_factoring=yes dnl JULIUS related end dnl AC_DEFINE(PASS1_IWCD) want_pass1_iwcd=yes dnl AC_DEFINE(PASS2_STRICT_IWCD) want_pass2_strict_iwcd=yes dnl AC_DEFINE(GPRUNE_DEFAULT_SAFE) want_gprune_default=safe elif test "$easy_setup" = fast; then dnl JULIUS related begin dnl AC_DEFINE(UNIGRAM_FACTORING) want_unigram_factoring=yes dnl AC_DEFINE(LOWMEM2) want_lowmem2=yes dnl JULIUS related end dnl AC_DEFINE(PASS1_IWCD) want_pass1_iwcd=yes dnl AC_DEFINE(GPRUNE_DEFAULT_BEAM) want_gprune_default=beam elif test "$easy_setup" = v2.1; then dnl AC_DEFINE(GPRUNE_DEFAULT_SAFE) want_gprune_default=safe fi dnl multipath version dnl AC_ARG_ENABLE(multipath, dnl [ --enable-multipath compile as multipath version], dnl want_multipath=$enableval dnl ,want_multipath=no) dnl JULIUS related begin dnl enable 1-gram factoring AC_ARG_ENABLE(factor1, [ --enable-factor1 use 1-gram factoring on 1st pass], dnl AC_DEFINE(UNIGRAM_FACTORING) want_unigram_factoring=$enableval) AC_ARG_ENABLE(factor2, [ --enable-factor2 use 2-gram factoring on 1st pass], if test "$enableval" = yes; then want_unigram_factoring=no else want_unigram_factoring=yes fi) dnl no linear tree separation AC_ARG_ENABLE(lowmem, [ --enable-lowmem all words share a single root on lexicon tree], if test "$enableval" = yes; then AC_DEFINE(LOWMEM) fi) dnl linear tree separation for hi-freq words AC_ARG_ENABLE(lowmem2, [ --enable-lowmem2 separate hi-freq words from lexicon tree], dnl AC_DEFINE(LOWMEM2) want_lowmem2=$enableval) dnl JULIUS related end dnl use monophone tree on 1st pass AC_ARG_ENABLE(monotree, [ --enable-monotree monophone lexicon on 1st pass (EXPERIMENTAL)], if test "$enableval" = yes; then AC_DEFINE(MONOTREE) fi) dnl enable score envelope beaming on 2nd pass forward scan AC_ARG_ENABLE(score-beam, [ --disable-score-beam enable score envelope beaming on 2nd pass scan], if test "$enableval" = yes; then AC_DEFINE(SCAN_BEAM) fi ,AC_DEFINE(SCAN_BEAM)) dnl enable inter-word context dependency handling on 1st pass AC_ARG_ENABLE(iwcd1, [ --enable-iwcd1 handle inter-word triphone on 1st pass], dnl AC_DEFINE(PASS1_IWCD) want_pass1_iwcd=$enableval) dnl enable strict inter-word context handling on 2nd pass (slow) AC_ARG_ENABLE(strict-iwcd2, [ --enable-strict-iwcd2 strict IWCD scoring on 2nd pass], dnl AC_DEFINE(PASS2_STRICT_IWCD) want_pass2_strict_iwcd=$enableval) dnl use word-pair approximation on 1st pass AC_ARG_ENABLE(wpair, [ --enable-wpair use word-pair approximation on 1st pass], if test "$enableval" = yes; then AC_DEFINE(WPAIR) fi) dnl use N-best approximation AC_ARG_ENABLE(wpair-nlimit, [ --enable-wpair-nlimit keep only N-best path with wpair (-nlimit)], if test "$enableval" = yes; then AC_DEFINE(WPAIR) AC_DEFINE(WPAIR_KEEP_NLIMIT) fi) dnl output word graph on 1st pass (default: trellis) AC_ARG_ENABLE(word-graph, [ --enable-word-graph use word graph instead of trellis between passes], if test "$enableval" = yes; then AC_DEFINE(WORD_GRAPH) fi) dnl disable confidence measure computation AC_ARG_ENABLE(cm, [ --disable-cm disable confidence measure computation], use_cm=$enableval ,use_cm=yes) dnl use confidence measure based on N-best candidate AC_ARG_ENABLE(cm-nbest, [ --enable-cm-nbest use N-best CM instead of search CM], use_cm_nbest=$enableval ,use_cm_nbest=no) dnl for exprtiment: confidence measure output for multiple alphas AC_ARG_ENABLE(cm-multiple-alpha, [ --enable-cm-multiple-alpha EXPERIMENTAL: test multi alphas (need much mem)], use_cm_multiple_alpha=$enableval ,use_cm_multiple_alpha=no) # disable lmfix AC_ARG_ENABLE(lmfix, [ --disable-lmfix make LM computing compatible with < 3.4], if test "$enableval" = yes; then AC_DEFINE(LM_FIX_DOUBLE_SCORING) fi ,AC_DEFINE(LM_FIX_DOUBLE_SCORING)) # enable word graph output from N-best sentences AC_ARG_ENABLE(graphout-nbest, [ --enable-graphout-nbest word graph output from N-best sentence], use_graphout_nbest=$enableval ,use_graphout_nbest=no) # enable pruning by confidence score on 2nd pass AC_ARG_ENABLE(cmthres, [ --enable-cmthres enable confidence score based pruning on 2nd pass], use_cmthres=$enableval ,use_cmthres=no) dnl GMM VAD AC_ARG_ENABLE(gmm-vad, [ --enable-gmm-vad enable GMM-based VAD (EXPERIMENTAL)], want_gmm_vad=$enableval ,want_gmm_vad=no ) dnl naist spsegment AC_ARG_ENABLE(decoder-vad, [ --enable-decoder-vad enable a new decoder-based VAD by NAIST team], want_spseg_naist=$enableval ,want_spseg_naist=no ) dnl naist spsegment AC_ARG_ENABLE(power-reject, [ --enable-power-reject enable post rejection by power], want_power_reject=$enableval ,want_power_reject=no ) dnl plugin support AC_ARG_ENABLE(plugin, [ --disable-plugin disable plugin support], if test "$enableval" = yes; then AC_DEFINE(ENABLE_PLUGIN) fi ,AC_DEFINE(ENABLE_PLUGIN)) dnl CM options check if test "$use_cm" = yes; then AC_DEFINE(CONFIDENCE_MEASURE) fi if test "$use_cm_nbest" = yes; then if test "$use_cm" = yes; then AC_DEFINE(CM_NBEST) else AC_MSG_ERROR([--disable-cm and --enable-cm-nbest conflicts]) fi fi if test "$use_cm_multiple_alpha" = yes; then if test "$use_cm" = yes; then AC_DEFINE(CM_MULTIPLE_ALPHA) else AC_MSG_ERROR([--disable-cm and --enable-cm-multiple-alpha conflicts]) fi fi if test "$use_cmthres" = yes; then if test "$use_cm" = yes; then if test "$use_cm_nbest" = yes; then AC_MSG_ERROR([--enable-cmthres cannot be used with --enable-cm-nbest]) elif test "$use_cm_multiple_alpha" = yes; then AC_MSG_ERROR([--enable-cmthres cannot be used with --enable-cm-multiple-alpha]) else AC_DEFINE(CM_SEARCH_LIMIT) fi else AC_MSG_ERROR([--disable-cm and --enable-cmthres conflicts]) fi fi dnl graphout option if test "$use_graphout_nbest" = no; then AC_DEFINE(GRAPHOUT_DYNAMIC) AC_DEFINE(GRAPHOUT_SEARCH) fi dnl final definition based on easy setup and separate option if test ! -z "$want_unigram_factoring"; then if test "$want_unigram_factoring" = yes; then AC_DEFINE(UNIGRAM_FACTORING) fi fi if test ! -z "$want_pass1_iwcd"; then if test "$want_pass1_iwcd" = yes; then AC_DEFINE(PASS1_IWCD) fi fi if test ! -z "$want_pass2_strict_iwcd"; then if test "$want_pass2_strict_iwcd" = yes; then AC_DEFINE(PASS2_STRICT_IWCD) fi fi if test ! -z "$want_lowmem2"; then if test "$want_lowmem2" = yes; then AC_DEFINE(LOWMEM2) fi fi if test "$want_gprune_default" = safe; then AC_DEFINE(GPRUNE_DEFAULT_SAFE) elif test "$want_gprune_default" = beam; then AC_DEFINE(GPRUNE_DEFAULT_BEAM) fi dnl multipath version dnl if test "$want_multipath" = yes; then dnl AC_DEFINE(MULTIPATH_VERSION) dnl VERSION="${VERSION}-multipath" dnl fi if test "$want_gmm_vad" = yes; then AC_DEFINE(GMM_VAD) AC_DEFINE(BACKEND_VAD) fi if test "$want_spseg_naist" = yes; then AC_DEFINE(SPSEGMENT_NAIST) AC_DEFINE(BACKEND_VAD) fi if test "$want_power_reject" = yes; then AC_DEFINE(POWER_REJECT) fi dnl ------------------------------------------------------------------- dnl Checks for system. AC_CANONICAL_HOST dnl Checks for optimization flag AC_MSG_CHECKING([host-specific optimization flag]) if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG AC_MSG_RESULT([$OPTFLAG]) else AC_MSG_RESULT([no]) fi else AC_MSG_RESULT([skipped]) fi dnl Checks for compiler. AC_PROG_CC AC_PROG_CPP dnl Checks for programs. AC_PROG_INSTALL AC_PATH_PROG(RM, rm) AC_PATH_PROG(AR, ar) AC_PROG_RANLIB AC_EXEEXT dnl Checks for libraries. dnl add '-lLIBRARY' to LIBS and define 'HAVE_LIBLIBRARY' dnl AC_CHECK_LIB(m, log10) dnl Checks for header files. AC_HEADER_STDC dnl AC_CHECK_HEADERS(fcntl.h strings.h sys/file.h sys/ioctl.h unistd.h) dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST dnl AC_TYPE_SIZE_T dnl Checks for library functions. dnl AC_PROG_GCC_TRADITIONAL AC_TYPE_SIGNAL dnl AC_CHECK_FUNCS(strdup strstr) dnl AC_CHECK_FUNC(gethostbyname,,AC_CHECK_LIB(nsl,gethostbyname)) dnl AC_CHECK_FUNC(connect,,AC_CHECK_LIB(socket, connect)) AC_CHECK_FUNC(dlopen,,AC_CHECK_LIB(dl, dlopen)) dnl check for POSIX thread support if test "$want_pthread" = yes; then case "$host_os" in freebsd*) # FreeBSD AC_MSG_CHECKING([for linking POSIX threaded process]) ac_save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -pthread" AC_TRY_LINK([#include ],[pthread_equal(NULL,NULL);], use_pthread=yes AC_DEFINE(HAVE_PTHREAD) CPPFLAGS="$CPPFLAGS -pthread", use_pthread=no ) CFLAGS="$ac_save_CFLAGS" ;; *) # other with libpthread AC_MSG_CHECKING([for POSIX thread library in -lpthread]) ac_save_LIBS_p="$LIBS" LIBS="$LIBS -lpthread" AC_TRY_LINK([#include ],[pthread_equal(NULL,NULL);], use_pthread=yes AC_DEFINE(HAVE_PTHREAD), use_pthread=no LIBS="$ac_save_LIBS_p" ) esac AC_MSG_RESULT("$use_pthread") else use_pthread=no fi dnl write names AC_DEFINE_UNQUOTED(JULIUS_PRODUCTNAME, "$JULIUS_PRODUCTNAME") AC_DEFINE_UNQUOTED(JULIUS_VERSION, "$JULIUS_VERSION") AC_DEFINE_UNQUOTED(JULIUS_SETUP, "$easy_setup") AC_DEFINE_UNQUOTED(JULIUS_HOSTINFO, "$host") AC_SUBST(JULIUS_PRODUCTNAME) AC_SUBST(JULIUS_VERSION) AC_SUBST(easy_setup) AC_OUTPUT_COMMANDS( [chmod +x libjulius-config libjulius-config-dist ]) AC_OUTPUT(Makefile libjulius-config libjulius-config-dist src/version.c doxygen.conf.ver) julius-4.2.2/libjulius/include/0000755001051700105040000000000012004463507014756 5ustar ritrlabjulius-4.2.2/libjulius/include/julius/0000755001051700105040000000000012004463507016271 5ustar ritrlabjulius-4.2.2/libjulius/include/julius/multi-gram.h0000644001051700105040000000426112004452401020512 0ustar ritrlab/** * @file multi-gram.h * * * @brief 複数の文法を同時に扱うための定義. * * * * @brief Definitions for managing multiple grammars. * * * @author Akinobu Lee * @date Fri Jul 8 14:47:05 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_MULTI_GRAM_H__ #define __J_MULTI_GRAM_H__ /// Maximum length of grammar name #define MAXGRAMNAMELEN 512 /// Grammar holder typedef struct __multi_gram__ { char name[MAXGRAMNAMELEN]; ///< Unique name given by user unsigned short id; ///< Unique ID DFA_INFO *dfa; ///< DFA describing syntax of this grammar WORD_INFO *winfo; ///< Dictionary of this grammar int hook; ///< Work area to store command hook boolean newbie; ///< TRUE if just read and not yet configured boolean active; ///< TRUE if active for recognition ///< below vars holds the location of this grammar within the global grammar */ int state_begin; ///< Location of DFA states in the global grammar int cate_begin; ///< Location of category entries in the global grammar int word_begin; ///< Location of words in the dictionary of global grammar struct __multi_gram__ *next; ///< Link to the next grammar entry } MULTIGRAM; /// List of grammars to be read at startup typedef struct __gram_list__ { char *dfafile; ///< DFA file path char *dictfile; ///< Dict file path struct __gram_list__ *next; ///< Link to next entry } GRAMLIST; /* for command hook */ #define MULTIGRAM_DEFAULT 0 ///< Grammar hook value of no operation #define MULTIGRAM_DELETE 1 ///< Grammar hook bit specifying that this grammar is to be deleted #define MULTIGRAM_ACTIVATE 2 ///< Grammar hook bit specifying that this grammar is to be activated #define MULTIGRAM_DEACTIVATE 4 ///< Grammar hook bit specifying that this grammar is to be deactivated #define MULTIGRAM_MODIFIED 8 /// < Grammar hook bit indicating modification and needs rebuilding the whole lexicon #endif /* __J_MULTI_GRAM_H__ */ julius-4.2.2/libjulius/include/julius/search.h0000644001051700105040000001551012004452401017700 0ustar ritrlab/** * @file search.h * * * @brief 第2パスで使用する仮説候補を扱う構造体 * * ここでは,第2パスのスタックデコーディングで用いられる仮説候補の構造体 * が定義されています. NODE は部分文候補を保持し,合計スコアや現在のViterbi * スコア,言語スコア,信頼度スコア,推定された終端フレームなどの様々な仮説 * 情報を保持します. WordGraph は単語グラフ生成時にグラフ中の単語をあらわす * のに用いられます. NEXTWORD は単語展開時に次単語候補を表現します. POPNODE * は探索空間可視化機能 (--enable-visualize) 指定時に,探索の過程を残しておく * のに使われます. * * * * @brief Strucures for handling hypotheses on the 2nd pass. * * * This file includes definitions for handling hypothesis used on the 2nd * pass stack decoding. Partial sentence hypotheses are stored in NODE * structure, with its various information about total scores, viterbi scores, * language scores, confidence scores, estimated end frame, and so on. * WordGraph express a word in graph, generated through the 2nd pass. * NEXTWORD is used to hold next word information at * hypothesis expantion stage. POPNODE will be used when Visualization is * enabled to store the search trail. * * @author Akinobu Lee * @date Wed Sep 07 07:40:11 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_SEARCH_H__ #define __J_SEARCH_H__ /** * * 第2パスの次単語候補. ある仮説から次に接続しうる単語の集合をあらわすのに * 用いられる. * * * Next word candidate in the 2nd pass. This will be used to hold word * candidates that can be connected to a given hypothesis. * */ typedef struct __nextword__ { WORD_ID id; ///< Word ID LOGPROB lscore; ///< Language score of this word (always 0 for dfa) int next_state; ///< (dfa) Next DFA grammar state ID boolean can_insert_sp; ///< (dfa) TRUE if a short pause can insert between source hypothesis and this word TRELLIS_ATOM *tre; ///< Pointer to the corresponding word in trellis } NEXTWORD; #ifdef VISUALIZE /** * * 可視化機能用に,第2パスでpopされたトレリス単語の情報を保持する. * * * Store popped trellis words on the 2nd pass for visualization. * */ typedef struct __popnode__ { TRELLIS_ATOM *tre; ///< Last referred trellis word LOGPROB score; ///< Total score when expanded (g(x)+h(x)) struct __popnode__ *last; ///< Link to previous word context struct __popnode__ *next; ///< List pointer to next data } POPNODE; #endif /* VISUALIZE */ /** * * 第2パスの文仮説 * * * Sentence hypothesis at 2nd pass * */ typedef struct __node__ { struct __node__ *next; ///< Link to next hypothesis, used in stack struct __node__ *prev; ///< Link to previous hypothesis, used in stack boolean endflag; ///< TRUE if this is a final sentence result WORD_ID seq[MAXSEQNUM]; ///< Word sequence short seqnum; ///< Length of @a seq LOGPROB score; ///< Total score (forward+backward, LM+AM) short bestt; ///< Best connection frame of last word in word trellis short estimated_next_t; ///< Estimated next connection time frame (= beginning of last word on word trellis): next word hypothesis will be looked up near this frame on word trellis LOGPROB *g; ///< Current forward viterbi score in each frame LOGPROB final_g; ///< Extra forward score on end of frame for multipath mode int state; ///< (dfa) Current DFA state ID TRELLIS_ATOM *tre; ///< Trellis word of last word #ifndef PASS2_STRICT_IWCD /* for inter-word context dependency, the last phone on previous word need to be calculated later */ LOGPROB *g_prev; ///< Viterbi score back to last 1 phoneme #endif HMM_Logical *last_ph; ///< Last applied triphone boolean last_ph_sp_attached; ///< Last phone which the inter-word sp has been attached for multipath mode LOGPROB lscore; ///< N-gram score of last word (will be used for 1-phoneme backscan and graph output, always 0 for dfa LOGPROB totallscore; ///< (n-gram) Accumulated language score (LM only) #ifdef CONFIDENCE_MEASURE #ifdef CM_MULTIPLE_ALPHA LOGPROB cmscore[MAXSEQNUM][100]; ///< Confidence score of each word (multiple) #else LOGPROB cmscore[MAXSEQNUM]; ///< Confidence score of each word #endif /* CM_MULTIPLE_ALPHA */ #endif /* CONFIDENCE_MEASURE */ #ifdef VISUALIZE POPNODE *popnode; ///< Pointer to last popped node #endif #ifdef GRAPHOUT_PRECISE_BOUNDARY short *wordend_frame; ///< Buffer to store propagated word end score for word boundary adjustment LOGPROB *wordend_gscore; ///< Buffer to store propagated scores at word end for word boundary adjustment #endif WordGraph *prevgraph; ///< Graph word corresponding to the last word WordGraph *lastcontext; ///< Graph word of next previous word #ifndef GRAPHOUT_PRECISE_BOUNDARY LOGPROB tail_g_score; ///< forward g score for later score adjustment #endif struct __recogprocess__ *region; ///> Where this node belongs to } NODE; /* HOW SCORES ARE CALCULATED: 0 bestt T-1 |-h(n)---->|<------------g(n)--------------| ============================================================== |\ | ..... ..... | \estimated_next_t | =backward trellis --------------------\------------------------------------| (1st pass) | \ | seq[seqnum-1] | \_ | | \bestt | =========================+==================================================== | \ |<-g[0..T-1] | \ | seq[seqnum-2] | \__ | | \ | --------------------------------\------------------------| (last_ph)| \__ | |_ _ _ _ _ _ _ _ _ _ _\ _ _ _ _ _ _ _ _ _ _| seq[seqnum-3] | \______ |<--g_prev[0..T-1] | \___ | | \ | -------------------------------------------------\-------| ...... ...... (2nd pass) | \_| =============================================================== */ #endif /* __J_SEARCH_H__ */ julius-4.2.2/libjulius/include/julius/callback.h0000644001051700105040000001560112004452401020170 0ustar ritrlab/** * @file callback.h * * * @brief Definitions for callback API * * * * @brief コールバックAPI用定義 * * * @author Akinobu Lee * @date Mon Nov 5 18:30:04 2007 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_CALLBACK_H__ #define __J_CALLBACK_H__ /** * Callback IDs. * */ enum { /** * Callback to be called periodically while recognition. * */ CALLBACK_POLL, /** * Event callback to be called when the engine becomes active and * start running. (ex. resume by j_request_resume()) * */ CALLBACK_EVENT_PROCESS_ONLINE, /** * Event callback to be called when the engine becomes inactive and * stop running. (ex. pause or terminate by user request) * */ CALLBACK_EVENT_PROCESS_OFFLINE, /** * (not implemented yet) * */ CALLBACK_EVENT_STREAM_BEGIN, /** * (not implemented yet) * */ CALLBACK_EVENT_STREAM_END, /** * Event callback to be called when engine is ready for recognition * and start listening to the audio input. * */ CALLBACK_EVENT_SPEECH_READY, /** * Event callback to be called when input speech processing starts. * This will be called at speech up-trigger detection by level and * zerocross. When the detection is disabled (i.e. file input), * This will be called immediately after opening the file. * */ CALLBACK_EVENT_SPEECH_START, /** * Event callback to be called when input speech ends. This will be * called at speech down-trigger detection by level and zerocross. * When the detection is disabled (i.e. file input), this will be called * just after the whole input has been read. * */ CALLBACK_EVENT_SPEECH_STOP, /** * Event callback to be called when a valid input segment has been found * and speech recognition process starts. This can be used to know the * actual start timing of recognition process. On short-pause segmentation * mode and decoder-based VAD mode, this will be called only once at a * triggered long input. @sa CALLBACK_EVENT_SEGMENT_BEGIN. * */ CALLBACK_EVENT_RECOGNITION_BEGIN, /** * Event callback to be called when a valid input segment has ended * up, speech recognition process ends and return to wait status for * another input to come. On short-pause segmentation mode and * decoder-based VAD mode, this will be called only once after a * triggered long input. @sa CALLBACK_EVENT_SEGMENT_END. * */ CALLBACK_EVENT_RECOGNITION_END, /** * On short-pause segmentation and decoder-based VAD mode, this * callback will be called at the beginning of each segment, * segmented by short pauses. * */ CALLBACK_EVENT_SEGMENT_BEGIN, /** * On short-pause segmentation and decoder-based VAD mode, this * callback will be called at the end of each segment, * segmented by short pauses. * */ CALLBACK_EVENT_SEGMENT_END, /** * Event callback to be called when the 1st pass of recognition process * starts for the input. * */ CALLBACK_EVENT_PASS1_BEGIN, /** * Event callback to be called periodically at every input frame. This can * be used to get progress status of the first pass at each frame. * */ CALLBACK_EVENT_PASS1_FRAME, /** * Event callback to be called when the 1st pass of recognition process * ends for the input and proceed to 2nd pass. * */ CALLBACK_EVENT_PASS1_END, /** * Result callback to be called periodically at the 1st pass of * recognition process, to get progressive output. * */ CALLBACK_RESULT_PASS1_INTERIM, /** * Result callback to be called just at the end of 1st pass, to provide * recognition status and result of the 1st pass. * */ CALLBACK_RESULT_PASS1, /** * When compiled with "--enable-word-graph", this callback will be called * at the end of 1st pass to provide word graph generated at the 1st pass. * */ CALLBACK_RESULT_PASS1_GRAPH, /** * Status callback to be called after the 1st pass to provide information * about input (length etc.) * */ CALLBACK_STATUS_PARAM, /** * Event callback to be called when the 2nd pass of recognition * process starts. * */ CALLBACK_EVENT_PASS2_BEGIN, /** * Event callback to be called when the 2nd pass of recognition * process ends. * */ CALLBACK_EVENT_PASS2_END, /** * Result callback to provide final recognition result and status. * */ CALLBACK_RESULT, /** * Result callback to provide result of GMM computation, if GMM is used. * */ CALLBACK_RESULT_GMM, /** * Result callback to provide the whole word lattice generated at * the 2nd pass. Use with "-lattice" option. * */ CALLBACK_RESULT_GRAPH, /** * Result callback to provide the whole confusion network generated at * the 2nd pass. Use with "-confnet" option. * */ CALLBACK_RESULT_CONFNET, /** * A/D-in plugin callback to access to the captured input. This * will be called at every time a small audio fragment has been read * into Julius. This callback will be processed first in Julius, * and after that Julius will process the content for recognition. * This callback can be used to monitor or modify the raw audio * input in user-side application. * */ CALLBACK_ADIN_CAPTURED, /** * A/D-in plugin callback to access to the triggered input. This * will be called for input segments triggered by level and * zerocross. After processing this callback, Julius will process * the content for recognition. This callback can be used to * monitor or modify the triggered audio input in user-side * application. * */ CALLBACK_ADIN_TRIGGERED, /** * Event callback to be called when the engine becomes paused. * */ CALLBACK_EVENT_PAUSE, /** * Event callback to be called when the engine becomes resumed. * */ CALLBACK_EVENT_RESUME, /** * Plugin callback that will be called inside Julius when the engine * becomes paused. When Julius engine is required to stop by user * application, Julius interrupu the recognition and start calling * the functions registered here. After all the functions are * executed, Julius will resume to the recognition loop. So if you * want to use the pause / resume facility of Julius, You should * also set callback function to this to stop and do something, else * Julius returns immediately. */ CALLBACK_PAUSE_FUNCTION, CALLBACK_DEBUG_PASS2_POP, CALLBACK_DEBUG_PASS2_PUSH, CALLBACK_RESULT_PASS1_DETERMINED, SIZEOF_CALLBACK_ID }; /** * Maximum number of callbacks that can be registered for each ID. * */ #define MAX_CALLBACK_HOOK 10 #endif /* __J_CALLBACK_H__ */ julius-4.2.2/libjulius/include/julius/useropt.h0000644001051700105040000000233212004452401020132 0ustar ritrlab/** * @file useropt.h * * * @brief ユーザ指定の jconf オプション拡張 * * * * @brief User-defined jconf options * * * @author Akinobu Lee * @date Sun Sep 02 03:09:12 2007 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_USEROPT_H__ #define __J_USEROPT_H__ /** * User-defined option * */ typedef struct __j_useropt__ { char *optstr; ///< Option string char *desc; ///< Description that will be output on help int argnum; ///< Number of arguments int reqargnum; ///< Number of optional arguments in argnum boolean (*func)(Jconf *jconf, char *arg[], int argnum); ///< Handling function struct __j_useropt__ *next; ///< Pointer to next data } USEROPT; boolean j_add_option(char *fmt, int argnum, int reqargnum, char *desc, boolean (*func)(Jconf *jconf, char *arg[], int argnum)); void useropt_free_all(); int useropt_exec(Jconf *jconf, char *argv[], int argc, int *n); void useropt_show_desc(FILE *fp); #endif /* __J_USEROPT_H__ */ julius-4.2.2/libjulius/include/julius/define.h0000644001051700105040000002346712004452401017677 0ustar ritrlab/** * @file define.h * * * @brief 内部処理選択のためのシンボル定義 * * configure スクリプトは,システム/ユーザごとの設定を config.h に書き出し * ます. このファイルでは,その configure で設定された config.h 内の定義を * 元に,Julius/Julian のための内部シンボルの定義を行います. * これらは実験用のコードの切り替えや古いオプションとの互換性のために * 定義されているものがほとんどです. 通常のユーザはこの定義を書き換える * 必要はありません. * * * * @brief Internal symbol definitions * * The "configure" script will output the system- and user-dependent * configuration in "config.h". This file defines some symboles * according to the generated config.h, to switch internal functions. * Most of the definitions here are for disabling experimental or debug * code for development, or to keep compatibility with old Julius. These * definitions are highly internal, and normal users should not alter * these definitions without knowning what to do. * * * @author Akinobu LEE * @date Mon Mar 7 15:17:26 2005 * * $Revision: 1.8 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_DEFINE_H__ #define __J_DEFINE_H__ /*****************************************************************************/ /** DO NOT MODIFY MANUALLY DEFINES BELOW UNLESS YOU KNOW WHAT YOU ARE DOING **/ /*****************************************************************************/ /* type of language model */ #define LM_UNDEF 0 ///< not specified #define LM_PROB 1 ///< Statistical (N-gram - Julius) #define LM_DFA 2 ///< DFA (Julian) /* LM variation specification */ #define LM_NGRAM 0 ///< N-gram #define LM_DFA_GRAMMAR 1 ///< DFA grammar #define LM_DFA_WORD 2 ///< Isolated word recognition #define LM_NGRAM_USER 3 ///< User-defined statistical LM /* recognition status */ #define J_RESULT_STATUS_BUFFER_OVERFLOW -7 ///< Input buffer overflow #define J_RESULT_STATUS_REJECT_POWER -6 ///< Input rejected by power #define J_RESULT_STATUS_TERMINATE -5 ///< Input was terminated by app. request #define J_RESULT_STATUS_ONLY_SILENCE -4 ///< Input contains only silence #define J_RESULT_STATUS_REJECT_GMM -3 ///< Input rejected by GMM #define J_RESULT_STATUS_REJECT_SHORT -2 ///< Input rejected by short input #define J_RESULT_STATUS_FAIL -1 ///< Recognition ended with no candidate #define J_RESULT_STATUS_SUCCESS 0 ///< Recognition output some result /* delete incoherent option */ /* CATEGORY_TREE: DFA=always on, NGRAM=always off */ /* switch with recog->category_tree */ /* UNIGRAM_FACTORING: DFA=always off, NGRAM=option */ /* enclose UNIGRAM_FACTORING section with "if (lmtype == LM_NGRAM)" */ /* abbreviations for verbose message output */ #define VERMES if (verbose_flag) jlog /** * define this to report memory usage on exit (Linux only) * */ #undef REPORT_MEMORY_USAGE /*** N-gram tree construction ***/ /* With 1-best approximation, Constructing a single tree from all words causes much error by factoring. Listing each word flatly with no tree-organization will not cause this error, but the network becomes much larger and, especially, the inter-word LM handling becomes much more complex (O(n^2)). The cost may be eased by LM caching, but it needs much memory. */ /* This is a trade-off of accuracy and cost */ #define SHORT_WORD_LEN 2 #ifdef LOWMEM /* don't separate, construct a single tree from all words */ /* root nodes are about 50 in monophone, cache size will be 5MB on max */ #define NO_SEPARATE_SHORT_WORD #else #ifdef LOWMEM2 /* experimental: separate words frequently appears in corpus (1-gram) */ /* root nodes will be "-sepnum num" + 50, cache size will be 10MB or so */ #define NO_SEPARATE_SHORT_WORD #define SEPARATE_BY_UNIGRAM #else /* separate all short words (<= 2 phonemes) */ /* root nodes are about 1100 in 20k (proportional to vocabulary), cache size will be about 100MB on max */ #endif /* LOWMEM2 */ #endif /* LOWMEM */ /*#define HASH_CACHE_IW*/ /* "./configure --enable-lowmem" defines NO_SEPARATE_SHORT_WORD instead */ /* default language model weight and insertion penalty for pass1 and pass2 */ /* these values come from the best parameters in IPA evaluation result */ #define DEFAULT_LM_WEIGHT_MONO_PASS1 5.0 #define DEFAULT_LM_PENALTY_MONO_PASS1 -1.0 #define DEFAULT_LM_WEIGHT_MONO_PASS2 6.0 #define DEFAULT_LM_PENALTY_MONO_PASS2 0.0 #ifdef PASS1_IWCD #define DEFAULT_LM_WEIGHT_TRI_PASS1 8.0 #define DEFAULT_LM_PENALTY_TRI_PASS1 -2.0 #define DEFAULT_LM_WEIGHT_TRI_PASS2 8.0 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0 #else #define DEFAULT_LM_WEIGHT_TRI_PASS1 9.0 #define DEFAULT_LM_PENALTY_TRI_PASS1 8.0 #define DEFAULT_LM_WEIGHT_TRI_PASS2 11.0 #define DEFAULT_LM_PENALTY_TRI_PASS2 -2.0 #endif /* PASS1_IWCD */ /* Switch head/tail word insertion penalty to be inserted */ #undef FIX_PENALTY /* some definitions for short-pause segmentation */ #undef SP_BREAK_EVAL /* output messages for evaluation */ #undef SP_BREAK_DEBUG /* output messages for debug */ #undef SP_BREAK_RESUME_WORD_BEGIN /* resume word = maxword at beginning of sp area */ #ifdef GMM_VAD #define DEFAULT_GMM_MARGIN 20 /* backstep margin / determine buffer length */ #define GMM_VAD_AUTOSHRINK_LIMIT 500 #undef GMM_VAD_DEBUG /* output debug message */ #endif /* default values for spseg_naist */ #ifdef SPSEGMENT_NAIST #define DEFAULT_SP_MARGIN 40 #define DEFAULT_SP_DELAY 4 #define SPSEGMENT_NAIST_AUTOSHRINK_LIMIT 500 #endif /* '01/10/18 by ri: enable fix for trellis lookup order */ #define PREFER_CENTER_ON_TRELLIS_LOOKUP /* '01/11/28 by ri: malloc step for startnode for multipath mode */ #define STARTNODE_STEP 300 /* default dict entry for IW-sp word that will be added to dict with -iwspword */ #define IWSPENTRY_DEFAULT " [sp] sp sp" /* confidence scoring method */ #ifdef CONFIDENCE_MEASURE # ifndef CM_NBEST /* use conventional N-best CM, will be defined if "--enable-cm-nbest" specified */ # define CM_SEARCH /* otherwise, use on-the-fly CM scoring */ # endif #endif /* dynamic word graph generation */ #undef GRAPHOUT_SEARCH_CONSIDER_RIGHT /* if defined, only hypothesis whose left/right contexts is already included in popped hypo will be merged. EXPERIMENTAL, should not be defined. */ #ifdef CM_SEARCH_LIMIT #undef CM_SEARCH_LIMIT_AFTER /* enable above only after 1 sentence found */ #undef CM_SEARCH_LIMIT_POP /* terminate hypo of low CM on pop */ #endif /* compute exact boundary instead of using 1st pass result */ /* also propagate exact time boundary to the right context after generation */ /* this may produce precise word boundary, but cause bigger word graph output */ #define GRAPHOUT_PRECISE_BOUNDARY #undef GDEBUG /* enable debug message in graphout.c */ /* some decoding fix candidates */ #undef FIX_35_PASS2_STRICT_SCORE /* fix hypothesis scores by enabling bt_discount_pescore() in standard mode with PASS2_STRICT_IWCD, */ #define FIX_35_INHIBIT_SAME_WORD_EXPANSION /* privent connecting the same trellis word in 2nd pass */ /* below are new since 3.5.2 */ /** * Allow overwriting existing graph word if score is higher. * By default, while search, Julius merges the same graph words appeared at the * same location as previously stored word, and terminate search. This * option make Julius to continue search in that case if fscore_head of * current hypo. is greater than the already existing one. In that case, * the score of existing one will be overridden by the new higher one. * (from 3.5.2) * */ #define GRAPHOUT_OVERWRITE /* with GRAPHOUT_OVERWRITE, use gscore_head instead of fscore_head */ /** * (EXPERIMENTAL) With GRAPHOUT_OVERWRITE, use gscore_head for the * comparison instead of fscore_head. * */ #undef GRAPHOUT_OVERWRITE_GSCORE /** * At post-processing of graph words, this option limits the number of * "fit boundary" loop up to this value. This option is made to avoid * long loop by the "boundary oscillation" of short words. (from 3.5.2) * */ #define GRAPHOUT_LIMIT_BOUNDARY_LOOP /** * This option enables "-graphsearchdelay" and "-nographsearchdelay" option. * When "-graphsearchdelay" option is set, Julius modifies its alogrithm of * graph generation on the 2nd pass not to apply search termination by graph * merging until the first sentence candidate is found. * * This option may result in slight improvement of graph accuracy only * when you are going to generate a huge word graph by setting broad search. * Namely, it may result in better graph accuracy when you set wide beams on * both 1st pass "-b" and 2nd pass "-b2", and large number for "-n". * */ #define GRAPHOUT_SEARCH_DELAY_TERMINATION /** * This option enables word graph cutting by word depth at post-processing. * This option will erase many short words to explode at a wide beam width. * */ #define GRAPHOUT_DEPTHCUT /** * Mimimal beam width that will be auto-determined for the 1st pass. * See set_beam_width() and default_width() for details. * */ #define MINIMAL_BEAM_WIDTH 200 /** * (DEBUG) Use old full lcdset instead of category-pair-aware lcdset * on Julian (-oldiwcd on 3.5.3 and previous) */ #undef USE_OLD_IWCD /** * (EXPERIMENTAL) early word determination on isolated word recognition mode. * Results will be shown via CALLBACK_RESULT_PASS1_DETERMINED. * */ #undef DETERMINE #define FWD_NGRAM #define MAX_SPEECH_ALLOC_STEP 320000 #define POWER_REJECT_DEFAULT_THRES 9.0 /** * A test to find optimal warping factor for VTLN (EXPERIMENTAL) * */ #undef DEBUG_VTLN_ALPHA_TEST #define VTLN_RANGE 0.2 #define VTLN_STEP 0.02 /** * Use fast successor composition at 1-gram factoring. * */ #define FAST_FACTOR1_SUCCESSOR_LIST /** * Enable score based pruning at the 1st pass. * */ #define SCORE_PRUNING #endif /* __J_DEFINE_H__ */ julius-4.2.2/libjulius/include/julius/beam.h0000644001051700105040000000246412004452401017343 0ustar ritrlab/** * @file beam.h * * * @brief 第1パスのフレーム同期ビーム探索用定義 * * * * @brief Definitions for frame-synchronous beam search on 1st pass. * * * @author Akinobu LEE * @date Mon Mar 7 15:12:29 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_BEAM_H__ #define __J_BEAM_H__ /// token id for the 1st pass typedef int TOKENID; /// id for undefined token #define TOKENID_UNDEFINED -1 /// Token to hold viterbi pass history typedef struct { TRELLIS_ATOM *last_tre; ///< Previous word candidate in word trellis WORD_ID last_cword; ///< Previous context-aware (not transparent) word for N-gram LOGPROB last_lscore; ///< Currently assigned word-internal LM score for factoring for N-gram LOGPROB score; ///< Current accumulated score (AM+LM) int node; ///< Lexicon node ID to which this token is assigned #ifdef WPAIR TOKENID next; ///< ID pointer to next token at same node, for word-pair approx. #endif } TOKEN2; #define FILLWIDTH 70 ///< Word-wrap character length for progressive output #endif /* __J_BEAM_H__ */ julius-4.2.2/libjulius/include/julius/graph.h0000644001051700105040000000720012004452401017531 0ustar ritrlab/** * @file graph.h * * * @brief 単語グラフの構造体定義 * * 単語グラフ中の単語を表す構造体,および confusion network 中の * 単語を表す構造体が定義されています. * * * * @brief Structure definition for word graph. * * This file defines instances for word graph and confusion network. * * * @author Akinobu Lee * @date Thu Aug 16 00:30:54 2007 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_GRAPH_H__ #define __J_GRAPH_H__ #define FANOUTSTEP 7 ///< Memory allocation step for connection words in WordGraph /** * * 単語グラフ上の単語候補. * * * Word arc on the word graph. * */ typedef struct __word_graph__ { WORD_ID wid; ///< Word ID int lefttime; ///< Head frame where this word begins int righttime; ///< Tail frame where this word ends LOGPROB fscore_head; ///< Partial sentence score 'f' when the next (left) word of this word was expanded at 2nd pass. f = g(thisword) + h(nextword) LOGPROB fscore_tail; ///< Partial sentence score when this word was expanded in 2nd pass. f = g(rightword) + h(thisword) LOGPROB gscore_head; ///< Accumulated viterbi score at the head state of this word on lefttime. This value includes both accumulated AM score and LM score of this word. LOGPROB gscore_tail; ///< Accumultaed viterbi score at the head state of previous (right) word. LOGPROB lscore_tmp; ///< Temporally holds LM score LOGPROB forward_score; ///< Forward score at right edge, incl. LM, obtained by forward-backward process LOGPROB backward_score; ///< Backward score at left edge, incl. LM, obtained by forward-backward process #ifdef CM_SEARCH LOGPROB cmscore; ///< Confidence score obtained while search #endif LOGPROB amavg; ///< average acoustic score of matched frame HMM_Logical *headphone; ///< Applied phone HMM at the head of the word HMM_Logical *tailphone; ///< Applied phone HMM at the end of the word struct __word_graph__ **leftword; ///< List of left context LOGPROB *left_lscore; ///< List of LM score for left contexts int leftwordnum; ///< Stored num of @a leftword int leftwordmaxnum; ///< Allocated size of @a leftword struct __word_graph__ **rightword; ///< List of right context LOGPROB *right_lscore; ///< List of LM score for right contexts int rightwordnum; ///< Stored num of @a leftword int rightwordmaxnum; ///< Allocated size of @a letfword struct __word_graph__ *next; ///< Pointer to next wordgraph for throughout access boolean mark; ///< Delete mark for compaction operation int id; ///< Unique ID within the graph boolean saved; ///< Save mark for graph generation #ifdef GRAPHOUT_DYNAMIC boolean purged; ///< Purged mark for graph generation #endif LOGPROB graph_cm; ///< Confidense score computed from the graph } WordGraph; /** * Word Cluster for confusion network generation * */ typedef struct __confnet_cluster__ { WordGraph **wg; ///< List of graph words in this cluster int wgnum; ///< Number of @a wg; int wgnum_alloc; ///< Allocated size of @a wg; WORD_ID *words; ///< List of words in this cluster (WORD_INVALID) means skip ("-") LOGPROB *pp; ///< Posterior probability of each word int wordsnum; ///< Number of @a words struct __confnet_cluster__ *next; ///< Pointer to next structure } CN_CLUSTER; /** * Number of allocation step for CN_CLUSTER * */ #define CN_CLUSTER_WG_STEP 10 #endif /* __J_GRAPH_H__ */ julius-4.2.2/libjulius/include/julius/julius.h0000644001051700105040000000312212004452401017742 0ustar ritrlab/** * @file julius.h * * * @brief Julius 用のトップヘッダファイル * * * * @brief Top common header for Julius * * * @author Akinobu LEE * @date Thu Mar 17 21:08:21 2005 * * $Revision: 1.8 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_JULIUS_H__ #define __J_JULIUS_H__ /* read configurable definitions */ #if defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__MINGW32__) # include # include #else #include #include #endif /* read built-in definitions */ #include /* read libsent includes */ #include #include #include #include #include #include #include #include #include #include /* read Julius/Julian includes */ #ifdef ENABLE_PLUGIN #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* __J_JULIUS_H__ */ julius-4.2.2/libjulius/include/julius/config.h.in0000644001051700105040000001522611066615325020326 0ustar ritrlab/* config.h.in. Generated automatically from configure.in by autoheader 2.13. */ /** * @file acconfig.h * * * @brief config.h.in を configure.in から生成するための autoconf 用ヘッダ * * このファイルはソースからインクルードされることはありません. * 実際にはこの内容は config.h.in に埋め込まれており, * configure によって config.h.in から生成された config.h が * プログラムによって使用されます. * * @sa config.h, config.h.in, configure, configure.in * * * * @brief Autoconf header to generate config.h.in from configure.in * * This file is not included by any source file. The contents of this file * is already copied in config.h.in, and the configuration script "configure" * will generate config.h from config.h.in. It sets definitions according to * the running environment and user-specified setting. The final config.h * will be included by the sources. * * @sa config.h, config.h.in, configure, configure.in * * * @author Akinobu LEE * @date Sat Feb 19 12:53:54 2005 * * $Revision: 1.3 $ * */ /// Name of the Product. #undef JULIUS_PRODUCTNAME /// Version string #undef JULIUS_VERSION /// Engine setting (value of "--enable-setup=..."). #undef JULIUS_SETUP /// Compilation host information #undef JULIUS_HOSTINFO /* Define to empty if the keyword does not work. */ #undef const /* Define as the return type of signal handlers (int or void). */ #undef RETSIGTYPE /* Define if you have the ANSI C header files. */ #undef STDC_HEADERS /* Define if you have POSIX thread */ #undef HAVE_PTHREAD /// For Julius, defined if using 1-gram factoringon the 1st pass instead of 2-gram factoring. #undef UNIGRAM_FACTORING /** * For Julius, defined if dictionary forms a single tree lexicon, sharing * only a single root node. This saves memory of inter-word LM cache. * */ #undef LOWMEM /** * For Julius, defined if frequent words should be separated from the lexicon * tree. This will improve accuracy on small beam, and default of "fast" * setting. If none of LOWMEM and LOWMEM2 is defined, separation of short * words from lexicon tree will be performed to get the better accuracy, at * a cost of LM cache area on word head. * */ #undef LOWMEM2 /** * If defined, use word-pair approximation on the 1st pass instead of * 1-best approximation. * */ #undef WPAIR /** * When WPAIR is defined, only up to N tokens will be kept for each node * instead of keeping tokens depending on the previous word. This may * improve memory efficiency when word-pair approximation is used. * */ #undef WPAIR_KEEP_NLIMIT /** * If defined, generate a simple word graph instead of word trellis on * the 1st pass. This limits word expansion on the 2nd pass * to only the words on the word graph, and the final recognition accuracy * can be decreased. You should enable this with WPAIR to get reasonable * output. Please note that this is different * from Word Graph Output of the 2nd pass which can be enabled by GRAPHOUT. * */ #undef WORD_GRAPH /** * If defined, use monophone tree lexicon on the 1st pass for speed up * the search. This is EXPERIMENTAL, and should not be used. * */ #undef MONOTREE /** * Handle inter-word triphone on the 1st pass. This should be defined * if using context-dependent acoustic model. If not defined, the context * will not be considered any more. This is defined by default. * */ #undef PASS1_IWCD /** * On word expansion of the 2nd pass, Julius and Julian by default does * not handle inter-word context dependency of the newly expanded words * on the expansion time, and they will be computed when the hypothesis * is popped from the stack at the later processing. If PASS2_STRICT_IWCD * is defined, a strict inter-word triphone will be computed just on the * word expansion time, by re-computing word edge phones on the connection * point for all the word candidates. * * This option will results in a better * recognition accuracy. However, the 2nd pass will become slower by the * increasing acoustic matching cost. * */ #undef PASS2_STRICT_IWCD /** * Enable score envelope beaming on the hypothesis scoring in the 2nd pass. * This will be defined by default. * */ #undef SCAN_BEAM /// Set the default method of Gaussian pruning for tied-mixture model to safe algorithm #undef GPRUNE_DEFAULT_SAFE /// Set the default method of Gaussian pruning for tied-mixture model to beam algorithm #undef GPRUNE_DEFAULT_BEAM /** * Enables confidence scoring for the output words. This will be defined * by default. * */ #undef CONFIDENCE_MEASURE /* use N-best confidence measure instead of search-time computation */ /** * By default, Julius/Julian uses search-time heuristic scores to get the * posterior probability based word confidence measures on the search time. * This default algorithm can output word confidence scores with a little * additional computation without searching for much sentences. * * If you still use a trivial method of computing the word confidence scores * from the N-best sentence list, you can define this. * */ #undef CM_NBEST /** * If defined, compute confidence scores for multiple alpha values. * */ #undef CM_MULTIPLE_ALPHA /** * Enable search space visualization feature. You need X11 and GTK to * use this. * */ #undef VISUALIZE /** * When VISUALIZE is defind, this defines a command to play the recorded * sound on the visualization window. */ #undef PLAYCOMMAND /** * On Julius, if defined, fix some language model scoring bug on the 2nd pass. * */ #undef LM_FIX_DOUBLE_SCORING /** * Use dynamic word graph generation on the 2nd pass. * The word candidates are fixed as soon as the word boundary is fixed * in search, and as soon as same word appears in the same position, * they will be merged. It results in much more words to be * remained in the graph. * */ #undef GRAPHOUT_DYNAMIC /** * If defined with GRAPHOUT_DYNAMIC, use modified stack * decoding algorithm for efficient word graph generation. * */ #undef GRAPHOUT_SEARCH /** * If defined, avoid expansion of low CM word on search. This may * speed up * */ #undef CM_SEARCH_LIMIT /** * If defined, enable decoder-oriented VAD using short-pause segmentation * scheme developed by NAIST team * */ #undef SPSEGMENT_NAIST /** * If defined, enable a simple GMM-based VAD. Both frontend VAD and * postprocessing rejection will be performed using the same GMM. * */ #undef GMM_VAD /** * This will be defined internally when in-decoder type VAD is enabled. * */ #undef BACKEND_VAD /** * If enabled, do post-rejection by power * */ #undef POWER_REJECT /** * If defined, enable plugin support using dynamic object loading * */ #undef ENABLE_PLUGIN julius-4.2.2/libjulius/include/julius/acconfig.h0000644001051700105040000001445310731704275020227 0ustar ritrlab/** * @file acconfig.h * * * @brief config.h.in を configure.in から生成するための autoconf 用ヘッダ * * このファイルはソースからインクルードされることはありません. * 実際にはこの内容は config.h.in に埋め込まれており, * configure によって config.h.in から生成された config.h が * プログラムによって使用されます. * * @sa config.h, config.h.in, configure, configure.in * * * * @brief Autoconf header to generate config.h.in from configure.in * * This file is not included by any source file. The contents of this file * is already included in config.h.in, and the configuration script "configure" * will generate config.h from config.h.in. It sets definitions according to * the running environment and user-specified setting. The final config.h * will be included by the sources. * * @sa config.h, config.h.in, configure, configure.in * * * @author Akinobu LEE * @date Sat Feb 19 12:53:54 2005 * * $Revision: 1.2 $ * */ /// Name of the Product. #undef PRODUCTNAME /// Version string #undef VERSION /// Engine setting (value of "--enable-setup=..."). #undef SETUP /// Compilation host information #undef HOSTINFO @TOP@ /// For Julius, defined if using 1-gram factoringon the 1st pass instead of 2-gram factoring. #undef UNIGRAM_FACTORING /** * For Julius, defined if dictionary forms a single tree lexicon, sharing * only a single root node. This saves memory of inter-word LM cache. * */ #undef LOWMEM /** * For Julius, defined if frequent words should be separated from the lexicon * tree. This will improve accuracy on small beam, and default of "fast" * setting. If none of LOWMEM and LOWMEM2 is defined, separation of short * words from lexicon tree will be performed to get the better accuracy, at * a cost of LM cache area on word head. * */ #undef LOWMEM2 /** * If defined, use word-pair approximation on the 1st pass instead of * 1-best approximation. * */ #undef WPAIR /** * When WPAIR is defined, only up to N tokens will be kept for each node * instead of keeping tokens depending on the previous word. This may * improve memory efficiency when word-pair approximation is used. * */ #undef WPAIR_KEEP_NLIMIT /** * If defined, generate a simple word graph instead of word trellis on * the 1st pass. This limits word expansion on the 2nd pass * to only the words on the word graph, and the final recognition accuracy * can be decreased. You should enable this with WPAIR to get reasonable * output. Please note that this is different * from Word Graph Output of the 2nd pass which can be enabled by GRAPHOUT. * */ #undef WORD_GRAPH /** * If defined, use monophone tree lexicon on the 1st pass for speed up * the search. This is EXPERIMENTAL, and should not be used. * */ #undef MONOTREE /** * Handle inter-word triphone on the 1st pass. This should be defined * if using context-dependent acoustic model. If not defined, the context * will not be considered any more. This is defined by default. * */ #undef PASS1_IWCD /** * On word expansion of the 2nd pass, Julius and Julian by default does * not handle inter-word context dependency of the newly expanded words * on the expansion time, and they will be computed when the hypothesis * is popped from the stack at the later processing. If PASS2_STRICT_IWCD * is defined, a strict inter-word triphone will be computed just on the * word expansion time, by re-computing word edge phones on the connection * point for all the word candidates. * * This option will results in a better * recognition accuracy. However, the 2nd pass will become slower by the * increasing acoustic matching cost. * */ #undef PASS2_STRICT_IWCD /** * Enable score envelope beaming on the hypothesis scoring in the 2nd pass. * This will be defined by default. * */ #undef SCAN_BEAM /// Set the default method of Gaussian pruning for tied-mixture model to safe algorithm #undef GPRUNE_DEFAULT_SAFE /// Set the default method of Gaussian pruning for tied-mixture model to heuristic algorithm #undef GPRUNE_DEFAULT_HEURISTIC /// Set the default method of Gaussian pruning for tied-mixture model to beam algorithm #undef GPRUNE_DEFAULT_BEAM /** * Enables confidence scoring for the output words. This will be defined * by default. * */ #undef CONFIDENCE_MEASURE /* use N-best confidence measure instead of search-time computation */ /** * By default, Julius/Julian uses search-time heuristic scores to get the * posterior probability based word confidence measures on the search time. * This default algorithm can output word confidence scores with a little * additional computation without searching for much sentences. * * If you still use a trivial method of computing the word confidence scores * from the N-best sentence list, you can define this. * */ #undef CM_NBEST /** * If defined, compute confidence scores for multiple alpha values. * */ #undef CM_MULTIPLE_ALPHA /** * Enable search space visualization feature. You need X11 and GTK to * use this. * */ #undef VISUALIZE /** * When VISUALIZE is defind, this defines a command to play the recorded * sound on the visualization window. */ #undef PLAYCOMMAND /** * On Julius, if defined, fix some language model scoring bug on the 2nd pass. * */ #undef LM_FIX_DOUBLE_SCORING /** * Use dynamic word graph generation on the 2nd pass. * The word candidates are fixed as soon as the word boundary is fixed * in search, and as soon as same word appears in the same position, * they will be merged. It results in much more words to be * remained in the graph. * */ #undef GRAPHOUT_DYNAMIC /** * If defined with GRAPHOUT_DYNAMIC, use modified stack * decoding algorithm for efficient word graph generation. * */ #undef GRAPHOUT_SEARCH /** * If defined, avoid expansion of low CM word on search. This may * speed up * */ #undef CM_SEARCH_LIMIT /** * If defined, enable decoder-oriented VAD using short-pause segmentation * scheme developed by NAIST team * */ #undef SPSEGMENT_NAIST /** * If defined, enable a simple GMM-based VAD. Both frontend VAD and * postprocessing rejection will be performed using the same GMM. * */ #undef GMM_VAD /** * This will be defined internally when in-decoder type VAD is enabled. * */ #undef BACKEND_VAD /** * If enabled, do post-rejection by power * */ #undef POWER_REJECT julius-4.2.2/libjulius/include/julius/extern.h0000644001051700105040000003267012004452401017746 0ustar ritrlab/** * @file extern.h * * * @brief 外部関数宣言 * * * * @brief External function declarations * * * @author Akinobu LEE * @date Mon Mar 7 23:19:14 2005 * * $Revision: 1.19 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* should be included after all include files */ #ifdef __cplusplus extern "C" { #endif /* backtrellis.c */ void bt_init(BACKTRELLIS *bt); void bt_prepare(BACKTRELLIS *bt); void bt_free(BACKTRELLIS *bt); TRELLIS_ATOM *bt_new(BACKTRELLIS *bt); void bt_store(BACKTRELLIS *bt, TRELLIS_ATOM *aotm); void bt_relocate_rw(BACKTRELLIS *bt); void set_terminal_words(RecogProcess *r); void bt_discount_pescore(WCHMM_INFO *wchmm, BACKTRELLIS *bt, HTK_Param *param); void bt_discount_lm(BACKTRELLIS *bt); void bt_sort_rw(BACKTRELLIS *bt); TRELLIS_ATOM *bt_binsearch_atom(BACKTRELLIS *bt, int time, WORD_ID wkey); /* factoring_sub.c */ void make_iwcache_index(WCHMM_INFO *wchmm); void adjust_sc_index(WCHMM_INFO *wchmm); void make_successor_list(WCHMM_INFO *wchmm); void make_successor_list_unigram_factoring(WCHMM_INFO *wchmm); void max_successor_cache_init(WCHMM_INFO *wchmm); void max_successor_cache_free(WCHMM_INFO *wchmm); LOGPROB max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node); LOGPROB *max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword); void calc_all_unigram_factoring_values(WCHMM_INFO *wchmm); boolean can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node); /* beam.c */ boolean get_back_trellis_init(HTK_Param *param, RecogProcess *r); boolean get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath); void get_back_trellis_end(HTK_Param *param, RecogProcess *r); void fsbeam_free(FSBeam *d); void finalize_1st_pass(RecogProcess *r, int len); /* pass1.c */ #ifdef POWER_REJECT boolean power_reject(Recog *recog); #endif int decode_proceed(Recog *recog); void decode_end_segmented(Recog *recog); void decode_end(Recog *recog); boolean get_back_trellis(Recog *recog); /* spsegment.c */ boolean is_sil(WORD_ID w, RecogProcess *r); void mfcc_copy_to_rest_and_shrink(MFCCCalc *mfcc, int start, int end); void mfcc_shrink(MFCCCalc *mfcc, int p); boolean detect_end_of_segment(RecogProcess *r, int time); void finalize_segment(Recog *recog); void spsegment_init(Recog *recog); boolean spsegment_trigger_sync(Recog *recog); boolean spsegment_need_restart(Recog *recog, int *rf_ret, boolean *repro_ret); void spsegment_restart_mfccs(Recog *recog, int rewind_frame, boolean reprocess); /* outprob_style.c */ #ifdef PASS1_IWCD void outprob_style_cache_init(WCHMM_INFO *wchmm); CD_Set *lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category); void lcdset_register_with_category_all(WCHMM_INFO *wchmm); void lcdset_remove_with_category_all(WCHMM_INFO *wchmm); #endif LOGPROB outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param); void error_missing_right_triphone(HMM_Logical *base, char *rc_name); void error_missing_left_triphone(HMM_Logical *base, char *lc_name); /* ngram_decode.c */ #include "search.h" int ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r); int ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r); boolean ngram_acceptable(NODE *hypo, RecogProcess *r); int dfa_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r); int dfa_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r); boolean dfa_acceptable(NODE *hypo, RecogProcess *r); boolean dfa_look_around(NEXTWORD *nword, NODE *hypo, RecogProcess *r); /* search_bestfirst_main.c */ void segment_set_last_nword(NODE *hypo, RecogProcess *r); void pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final); void wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num); void wchmm_fbs_prepare(RecogProcess *r); void wchmm_fbs_free(RecogProcess *r); /* search_bestfirst_v?.c */ void clear_stocker(StackDecode *s); void free_node(NODE *node); NODE *cpy_node(NODE *dst, NODE *src); NODE *newnode(RecogProcess *r); void malloc_wordtrellis(RecogProcess *r); void free_wordtrellis(StackDecode *dwrk); void scan_word(NODE *now, HTK_Param *param, RecogProcess *r); void next_word(NODE *now, NODE *newParam, NEXTWORD *nword, HTK_Param *param, RecogProcess *r); void start_word(NODE *newParam, NEXTWORD *nword, HTK_Param *param, RecogProcess *r); void last_next_word(NODE *now, NODE *newParam, HTK_Param *param, RecogProcess *r); /* wav2mfcc.c */ boolean wav2mfcc(SP16 speech[], int speechlen, Recog *recog); /* version.c */ void j_put_header(FILE *stream); void j_put_version(FILE *stream); void j_put_compile_defs(FILE *stream); void j_put_library_defs(FILE *stream); /* wchmm.c */ WCHMM_INFO *wchmm_new(); void wchmm_free(WCHMM_INFO *w); void print_wchmm_info(WCHMM_INFO *wchmm); boolean build_wchmm(WCHMM_INFO *wchmm, JCONF_LM *lmconf); boolean build_wchmm2(WCHMM_INFO *wchmm, JCONF_LM *lmconf); /* wchmm_check.c */ void wchmm_check_interactive(WCHMM_INFO *wchmm); void check_wchmm(WCHMM_INFO *wchmm); /* realtime.c --- callback for adin_cut() */ boolean RealTimeInit(Recog *recog); boolean RealTimePipeLinePrepare(Recog *recog); boolean RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen); int RealTimePipeLine(SP16 *Speech, int len, Recog *recog); int RealTimeResume(Recog *recog); boolean RealTimeParam(Recog *recog); void RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog); void RealTimeTerminate(Recog *recog); void realbeam_free(Recog *recog); int mfcc_go(Recog *recog, int (*ad_check)(Recog *)); /* word_align.c */ void word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void phoneme_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void state_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void phoneme_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void state_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r); void do_alignment_all(RecogProcess *r, HTK_Param *param); /* m_usage.c */ void opt_terminate(); void j_output_argument_help(FILE *fp); /* m_options.c */ char *filepath(char *filename, char *dirname); boolean opt_parse(int argc, char *argv[], char *cwd, Jconf *jconf); void opt_release(Jconf *jconf); /* m_jconf.c */ void get_dirname(char *path); boolean config_string_parse(char *str, Jconf *jconf); boolean config_file_parse(char *conffile, Jconf *jconf); /* m_chkparam.c */ boolean checkpath(char *filename); boolean j_jconf_finalize(Jconf *jconf); int set_beam_width(WCHMM_INFO *wchmm, int specified); /* m_info.c */ void print_jconf_overview(Jconf *jconf); void print_engine_info(Recog *recog); /* m_bootup.c */ void system_bootup(Recog *recog); /* m_adin.c */ boolean adin_initialize(Recog *recog); /* m_fusion.c */ boolean j_load_am(Recog *recog, JCONF_AM *amconf); boolean j_load_lm(Recog *recog, JCONF_LM *lmconf); boolean j_load_all(Recog *recog, Jconf *jconf); boolean j_launch_recognition_instance(Recog *recog, JCONF_SEARCH *sconf); boolean j_final_fusion(Recog *recog); void create_mfcc_calc_instances(Recog *recog); /* hmm_check.c */ void hmm_check(RecogProcess *r); /* visual.c */ void visual_init(Recog *recog); void visual_show(BACKTRELLIS *bt); void visual2_init(int maxhypo); void visual2_popped(NODE *n, int popctr); void visual2_next_word(NODE *next, NODE *prev, int popctr); void visual2_best(NODE *now, WORD_INFO *winfo); /* gmm.c */ boolean gmm_init(Recog *recog); void gmm_prepare(Recog *recog); void gmm_proceed(Recog *recog); void gmm_end(Recog *recog); boolean gmm_valid_input(Recog *recog); void gmm_free(Recog *recog); #ifdef GMM_VAD void gmm_check_trigger(Recog *recog); #endif /* graphout.c */ void wordgraph_init(WCHMM_INFO *wchmm); void wordgraph_free(WordGraph *wg); void put_wordgraph(FILE *fp, WordGraph *wg, WORD_INFO *winfo); void wordgraph_dump(FILE *fp, WordGraph *root, WORD_INFO *winfo); WordGraph *wordgraph_assign(WORD_ID wid, WORD_ID wid_left, WORD_ID wid_right, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cmscore, RecogProcess *r); boolean wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right, LOGPROB lscore); boolean wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left, LOGPROB lscore); void wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root); WordGraph *wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p, JCONF_SEARCH *jconf); WordGraph *wordgraph_dup(WordGraph *wg, WordGraph **root); void wordgraph_purge_leaf_nodes(WordGraph **rootp, RecogProcess *r); void wordgraph_depth_cut(WordGraph **rootp, RecogProcess *r); void wordgraph_adjust_boundary(WordGraph **rootp, RecogProcess *r); void wordgraph_clean(WordGraph **rootp); void wordgraph_compaction_thesame(WordGraph **rootp); void wordgraph_compaction_exacttime(WordGraph **rootp, RecogProcess *r); void wordgraph_compaction_neighbor(WordGraph **rootp, RecogProcess *r); int wordgraph_sort_and_annotate_id(WordGraph **rootp, RecogProcess *r); void wordgraph_check_coherence(WordGraph *rootp, RecogProcess *r); void graph_forward_backward(WordGraph *root, RecogProcess *r); /* default.c */ void jconf_set_default_values(Jconf *j); void jconf_set_default_values_am(JCONF_AM *j); void jconf_set_default_values_lm(JCONF_LM *j); void jconf_set_default_values_search(JCONF_SEARCH *j); /* multi-gram.c */ int multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm); boolean multigram_delete(int gid, PROCESS_LM *lm); void multigram_delete_all(PROCESS_LM *lm); boolean multigram_update(PROCESS_LM *lm); boolean multigram_build(RecogProcess *r); int multigram_activate(int gid, PROCESS_LM *lm); int multigram_deactivate(int gid, PROCESS_LM *lm); boolean multigram_load_all_gramlist(PROCESS_LM *lm); int multigram_get_gram_from_category(int category, PROCESS_LM *lm); int multigram_get_gram_from_wid(WORD_ID wid, PROCESS_LM *lm); int multigram_get_all_num(PROCESS_LM *lm); void multigram_free_all(MULTIGRAM *root); int multigram_get_id_by_name(PROCESS_LM *lm, char *gramname); MULTIGRAM *multigram_get_grammar_by_name(PROCESS_LM *lm, char *gramname); MULTIGRAM *multigram_get_grammar_by_id(PROCESS_LM *lm, unsigned short id); boolean multigram_add_words_to_grammar(PROCESS_LM *lm, MULTIGRAM *m, WORD_INFO *winfo); boolean multigram_add_words_to_grammar_by_name(PROCESS_LM *lm, char *gramname, WORD_INFO *winfo); boolean multigram_add_words_to_grammar_by_id(PROCESS_LM *lm, unsigned short id, WORD_INFO *winfo); /* gramlist.c */ void multigram_add_gramlist(char *dfafile, char *dictfile, JCONF_LM *j, int lmvar); void multigram_remove_gramlist(JCONF_LM *j); boolean multigram_add_prefix_list(char *prefix_list, char *cwd, JCONF_LM *j, int lmvar); boolean multigram_add_prefix_filelist(char *listfile, JCONF_LM *j, int lmvar); /* adin-cut.c */ boolean adin_setup_param(ADIn *adin, Jconf *jconf); boolean adin_thread_create(Recog *recog); boolean adin_thread_cancel(Recog *recog); int adin_go(int (*ad_process)(SP16 *, int, Recog *), int (*ad_check)(Recog *), Recog *recog); boolean adin_standby(ADIn *a, int freq, void *arg); boolean adin_begin(ADIn *a, char *file_or_dev_name); boolean adin_end(ADIn *a); void adin_free_param(Recog *recog); /* confnet.c */ CN_CLUSTER *confnet_create(WordGraph *root, RecogProcess *r); void graph_make_order(WordGraph *root, RecogProcess *r); void graph_free_order(RecogProcess *r); void cn_free_all(CN_CLUSTER **croot); /* callback.c */ void callback_init(Recog *recog); int callback_add(Recog *recog, int code, void (*func)(Recog *recog, void *data), void *data); int callback_add_adin(Recog *recog, int code, void (*func)(Recog *recog, SP16 *buf, int len, void *data), void *data); void callback_exec(int code, Recog *recog); void callback_exec_adin(int code, Recog *recog, SP16 *buf, int len); boolean callback_exist(Recog *recog, int code); boolean callback_delete(Recog *recog, int id); /* recogmain.c */ int adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog); SentenceAlign *result_align_new(); void result_align_free(SentenceAlign *a); void result_sentence_malloc(RecogProcess *r, int num); void result_sentence_free(RecogProcess *r); void clear_result(RecogProcess *r); /* plugin.c */ int plugin_get_id(char *name); void plugin_init(); boolean plugin_load_file(char *file); boolean plugin_load_dir(char *dir); void plugin_load_dirs(char *dirent); int plugin_find_optname(char *optfuncname, char *str); FUNC_VOID plugin_get_func(int sid, char *name); boolean plugin_exec_engine_startup(Recog *recog); void plugin_exec_adin_captured(short *buf, int len); void plugin_exec_adin_triggered(short *buf, int len); void plugin_exec_vector_postprocess(VECT *vecbuf, int veclen, int nframe); void plugin_exec_vector_postprocess_all(HTK_Param *param); void plugin_exec_process_result(Recog *recog); boolean mfc_module_init(MFCCCalc *mfcc, Recog *recog); boolean mfc_module_set_header(MFCCCalc *mfcc, Recog *recog); boolean mfc_module_standby(MFCCCalc *mfcc); boolean mfc_module_begin(MFCCCalc *mfcc); boolean mfc_module_end(MFCCCalc *mfcc); int mfc_module_read(MFCCCalc *mfcc, int *new_t); char *mfc_module_input_name(MFCCCalc *mfcc); #ifdef __cplusplus } #endif julius-4.2.2/libjulius/include/julius/plugin.h0000644001051700105040000000436612004452401017740 0ustar ritrlab/** * @file plugin.h * * * @brief Plugin related header * * * * @brief プラグイン用ヘッダ * * * @author Akinobu Lee * @date Sat Aug 2 13:04:15 2008 * * $Revision: 1.3 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology * All rights reserved */ #ifndef __JULIUS_PLUGIN__ #define __JULIUS_PLUGIN__ #include /** * Plug-in file suffix * */ #define PLUGIN_SUFFIX ".jpi" /** * List of plugin function names * */ #define PLUGIN_FUNCTION_NAMELIST { \ "adin_get_optname", \ "adin_get_configuration", "adin_standby", \ "adin_open", "adin_read", "adin_close", \ "adin_resume", "adin_pause", "adin_terminate", \ "adin_postprocess", "adin_postprocess_triggered", \ "fvin_get_optname", \ "fvin_get_configuration", "fvin_standby", \ "fvin_open", "fvin_read", "fvin_close", \ "fvin_resume", "fvin_pause", "fvin_terminate", \ "fvin_postprocess", \ "calcmix_get_optname", "calcmix", "calcmix_init", "calcmix_free", \ "result_best_str", \ "startup"} /** * Typedef for loaded module * */ #if defined(_WIN32) && !defined(__CYGWIN32__) typedef HMODULE PLUGIN_MODULE; #else typedef void* PLUGIN_MODULE; #endif /** * define for "none" * */ #define PLUGIN_NONE NULL /** * Function definition * */ typedef void (*FUNC_VOID)(); typedef char * (*FUNC_STR)(); typedef int (*FUNC_INT)(); /** * Plugin function entry * */ typedef struct __j_plugin_entry__ { int id; int source_id; FUNC_VOID func; struct __j_plugin_entry__ *next; } PLUGIN_ENTRY; /* include headers for dynamic loading */ /* unix, cygwin = dlopen */ /* mingw, VS = non (should emulate using win32 func.) */ #ifdef _WIN32 # ifdef __CYGWIN32__ # include # else # include # include # define dlopen(P,G) (void *)LoadLibrary(P) # define dlsym(D, F) (void *)GetProcAddress((HMODULE)D, F) # define dlclose(D) FreeLibrary((HMODULE)D) /* dlerror() is defined in plugins.c */ # define RTLD_LAZY 0 /* dummy */ # endif #else /* UNIX */ # include #endif #endif /* __JULIUS_PLUGIN__ */ julius-4.2.2/libjulius/include/julius/recog.h0000644001051700105040000010113212004452401017526 0ustar ritrlab/** * @file recog.h * * * @brief エンジンインスタンスの定義 * * 認識エンジンのインスタンス定義を行います.インスタンスは, * Recog をトップインスタンスとして,使用する音響モデル,言語モデル, * それらを組み合わせた認識処理インスタンスを複数持ちます. * * 各部のインスタンスは,対応する jconf 内の設定構造体,および * 使用するサブインスタンスへのポインタを持ちます.PROCESS_AM は音響モデル, * PROCESS_LM は言語モデルごとに定義されます. * * MFCCCalc は, * 音響モデルおよび GMM で要求されるパラメータタイプを調べたのち, * それらを生成するのに必要なだけ生成されます.同一のMFCC型および * その他のフロントエンド処理条件を持つ音響モデルおよびGMMどうしでは * 同じ MFCCCalc が共有されます. * * * * * @brief Enging instance definitions * * This file defines the engine instance and all its sub instances. * The top instance is Recog, and it consists of several * sub instances for LM, AM, and recognition process instances. * * Each sub-instance keeps pointer to corresponding jconf setting * part, and also has pointers to other instances to use. * PROCESS_AM will be generated for each acoustic model, and PROCESS_LM * will be for each language model. * * MFCCCalc will be generated for each required MFCC frontend types * by inspecting all AMs and GMM. The AM's and GMMs that requires * exactly the same MFCC frontend will share the same MFCC frontend. * * * *
 * Recog
 *    +- *JCONF
 *    +- input related work area
 *    +- MFCCCalc[] (linked list) (generated from HMM + GMM)
 *    +- PROCESS_AM[] (linked list)
 *       +- *pointer to JCONF_AM
 *       +- *pointer to MFCCCalc
 *       +- hmminfo, hmm_gs
 *       +- hmmwrk
 *       +- multipath, ccd_flag, cmn_loaded
 *    +- PROCESS_LM[] (linked list)
 *       +- *pointer to JCONF_LM
 *       +- *pointer to PROCESS_AM
 *       +- lmtype, lmvar
 *       +- winfo
 *       +- ngram or grammars
 *       +- lmfunc
 *    +- RecogProcess process[] (linked list)
 *       +- *pointer to JCONF_SEARCH
 *       +- *pointer to PROCESS_AM
 *       +- *pointer to PROCESS_LM
 *       +- lmtype, lmvar
 *       +- misc. param
 *    +- GMMCalc
 *       +- *JCONF_AM for GMM
 *       +- *pointer to MFCCCalc
 * 
* * @author Akinobu Lee * @date Fri Feb 16 13:42:28 2007 * * $Revision: 1.16 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* */ #ifndef __J_RECOG_H__ #define __J_RECOG_H__ #include #include #include #include #include #include #include #include #include /* How tokens are managed: o tlist[][] is a token stocker. It holds all tokens in sequencial buffer. They are malloced first on startup, and refered by ID while Viterbi procedure. In word-pair mode, each token also has a link to another token to allow a node to have more than 1 token. o token[n] holds the current ID number of a token associated to a lexicon tree node 'n'. */ /** * Work area for the first pass * */ typedef struct __FSBeam__ { /* token stocker */ TOKEN2 *tlist[2]; ///< Token space to hold all token entities. TOKENID *tindex[2]; ///< Token index corresponding to @a tlist for sort int maxtnum; ///< Allocated number of tokens (will grow) int expand_step; ///< Number of tokens to be increased per expansion boolean expanded; ///< TRUE if the tlist[] and tindex[] has been expanded at last create_token(); int tnum[2]; ///< Current number of tokens used in @a tlist int n_start; ///< Start index of in-beam nodes on @a tindex int n_end; ///< end index of in-beam nodes on @a tindex int tl; ///< Current work area id (0 or 1, swapped for each frame) int tn; ///< Next work area id (0 or 1, swapped for each frame) #ifdef SCORE_PRUNING LOGPROB score_pruning_max; ///< Maximum score at current frame LOGPROB score_pruning_threshold;///< Score threshold for score pruning int score_pruning_count; ///< Number of tokens pruned by score (debug) #endif /* Active token list */ TOKENID *token; ///< Active token list that holds currently assigned tokens for each tree node #ifdef UNIGRAM_FACTORING /* for wordend processing with 1-gram factoring */ LOGPROB wordend_best_score; ///< Best score of word-end nodes int wordend_best_node; ///< Node id of the best wordend nodes TRELLIS_ATOM *wordend_best_tre; ///< Trellis word corresponds to above WORD_ID wordend_best_last_cword; ///< Last context-aware word of above #endif int totalnodenum; ///< Allocated number of nodes in @a token TRELLIS_ATOM bos; ///< Special token for beginning-of-sentence boolean nodes_malloced; ///< Flag to check if tokens already allocated LOGPROB lm_weight; ///< Language score weight (local copy) LOGPROB lm_penalty; ///< Word insertion penalty (local copy) LOGPROB lm_penalty_trans; ///< Additional insertion penalty for transparent words (local copy) LOGPROB penalty1; ///< Word insertion penalty for DFA (local copy) #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) boolean wpair_keep_nlimit; ///< Keeps only N token on word-pair approx. (local copy from jconf) #endif /* for short-pause segmentation */ boolean in_sparea; ///< TRUE when we are in a pause area now int tmp_sparea_start; ///< Memorize where the current pause area begins #ifdef SP_BREAK_RESUME_WORD_BEGIN WORD_ID tmp_sp_break_last_word; ///< Keep the max word hypothesis at beginning of this segment as the starting word of next segment #else WORD_ID last_tre_word; ///< Keep ths max word hypothesis at the end of this segment for as the starting word of the next segment #endif boolean first_sparea; ///< TRUE when we are in the first pause area int sp_duration; ///< Number of current successive sp frame #ifdef SPSEGMENT_NAIST boolean after_trigger; ///< TRUE if speech already triggered int trigger_duration; ///< Current speech duration at uptrigger detection boolean want_rewind; ///< TRUE if process wants mfcc rewinding int rewind_frame; ///< Place to rewind to boolean want_rewind_reprocess; ///< TRUE if requires re-processing after rewind #endif char *pausemodelnames; ///< pause model name string to detect segment char **pausemodel; ///< each pause model name to detect segment int pausemodelnum; ///< num of pausemodel } FSBeam; /** * Work area for realtime processing of 1st pass * */ typedef struct __RealBeam__ { /* input parameter */ int maxframelen; ///< Maximum allowed input frame length SP16 *window; ///< Window buffer for MFCC calculation int windowlen; ///< Buffer length of @a window int windownum; ///< Currently left samples in @a window /* for short-pause segmentation */ boolean last_is_segmented; ///< TRUE if last pass was a segmented input SP16 *rest_Speech; ///< Speech samples left unprocessed by segmentation at previous segment int rest_alloc_len; ///< Allocated length of rest_Speech int rest_len; ///< Current stored length of rest_Speech } RealBeam; /** * Work area for the 2nd pass * */ typedef struct __StackDecode__ { int hypo_len_count[MAXSEQNUM+1]; ///< Count of popped hypothesis per each length int maximum_filled_length; ///< Current least beam-filled depth #ifdef SCAN_BEAM LOGPROB *framemaxscore; ///< Maximum score of each frame on 2nd pass for score enveloping #endif NODE *stocker_root; ///< Node stocker for recycle int popctr; ///< Num of popped hypotheses from stack int genectr; ///< Num of generated hypotheses int pushctr; ///< Num of hypotheses actually pushed to stack int finishnum; ///< Num of found sentence hypothesis NODE *current; ///< Current node for debug #ifdef CONFIDENCE_MEASURE LOGPROB cm_alpha; ///< alpha scaling value from jconf # ifdef CM_MULTIPLE_ALPHA LOGPROB *cmsumlist; ///< Sum of cm score for each alpha coef. int cmsumlistlen; ///< Allocated length of cmsumlist. # endif # ifdef CM_SEARCH LOGPROB cm_tmpbestscore; ///< Temporal best score for summing up scores # ifndef CM_MULTIPLE_ALPHA LOGPROB cm_tmpsum; ///< Sum of CM score # endif int l_stacksize; ///< Local stack size for CM int l_stacknum; ///< Num of hypo. in local stack for CM NODE *l_start; ///< Top node of local stack for CM NODE *l_bottom; ///< bottom node of local stack for CM # endif # ifdef CM_NBEST LOGPROB *sentcm = NULL; ///< Confidence score of each sentence LOGPROB *wordcm = NULL; ///< Confidence score of each word voted from @a sentcm int sentnum; ///< Allocated length of @a sentcm int wordnum; ///< Allocated length of @a wordcm # endif #endif /* CONFIDENCE_MEASURE */ LOGPROB *wordtrellis[2]; ///< Buffer to compute viterbi path of a word LOGPROB *g; ///< Buffer to hold source viterbi scores HMM_Logical **phmmseq; ///< Phoneme sequence to be computed int phmmlen_max; ///< Maximum length of @a phmmseq. boolean *has_sp; ///< Mark which phoneme allow short pause for multi-path mode #ifdef GRAPHOUT_PRECISE_BOUNDARY short *wend_token_frame[2]; ///< Propagating token of word-end frame to detect corresponding end-of-words at word head LOGPROB *wend_token_gscore[2]; ///< Propagating token of scores at word-end to detect corresponding end-of-words at word head short *wef; ///< Work area for word-end frame tokens for v2 LOGPROB *wes; ///< Work area for word-end score tokens for v2 #endif WORD_ID *cnword; ///< Work area for N-gram computation WORD_ID *cnwordrev; ///< Work area for N-gram computation } StackDecode; /** * User LM function entry point * */ typedef struct { LOGPROB (*uniprob)(WORD_INFO *, WORD_ID, LOGPROB); ///< Pointer to function returning word occurence probability LOGPROB (*biprob)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB); ///< Pointer to function returning a word probability given a word context (corresponds to bi-gram) LOGPROB (*lmprob)(WORD_INFO *, WORD_ID *, int, WORD_ID, LOGPROB); ///< Pointer to function returning LM probability } LMFunc; /** * Work area for GMM calculation * */ typedef struct __gmm_calc__{ LOGPROB *gmm_score; ///< Current accumurated scores for each GMM boolean *is_voice; ///< True if corresponding model designates speech, FALSE if noise int framecount; ///< Current frame count short OP_nstream; ///< Number of input stream for GMM VECT *OP_vec_stream[MAXSTREAMNUM]; ///< input vector for each stream at that frame short OP_veclen_stream[MAXSTREAMNUM]; ///< vector length for each stream LOGPROB *OP_calced_score; ///< Work area for Gaussian pruning on GMM: scores int *OP_calced_id; ///< Work area for Gaussian pruning on GMM: id int OP_calced_num; ///< Work area for Gaussian pruning on GMM: number of above int OP_calced_maxnum; ///< Work area for Gaussian pruning on GMM: size of allocated area int OP_gprune_num; ///< Number of Gaussians to be computed in Gaussian pruning VECT *OP_vec; ///< Local workarea to hold the input vector of current frame short OP_veclen; ///< Local workarea to hold the length of above HTK_HMM_Data *max_d; ///< Hold model of the maximum score int max_i; ///< Index of max_d #ifdef CONFIDENCE_MEASURE LOGPROB gmm_max_cm; ///< Hold maximum score #endif #ifdef GMM_VAD LOGPROB *rates; ///< voice rate of recent N frames (cycle buffer) int nframe; ///< Length of rates boolean filled; int framep; ///< Current frame pointer boolean in_voice; ///< TRUE if currently in voice area boolean up_trigger; ///< TRUE when detect up trigger boolean down_trigger; ///< TRUE when detect down trigger boolean after_trigger; ///< TRUE when currently we are processing speech segment boolean want_rewind; ///< TRUE if GMM wants rewinding its MFCC boolean want_rewind_reprocess; ///< TRUE if GMM wants re-processing after rewind int rewind_frame; ///< Frame to rewind int duration; ///< Current GMM duration work #endif } GMMCalc; /** * Alignment result, valid when forced alignment was done * */ typedef struct __sentence_align__ { int num; ///< Number of units short unittype; ///< Unit type (one of PER_*) WORD_ID *w; ///< word sequence by id (PER_WORD) HMM_Logical **ph; ///< Phone sequence (PER_PHONEME, PER_STATE) short *loc; ///< sequence of state location in a phone (PER_STATE) boolean *is_iwsp; ///< TRUE if PER_STATE and this is the inter-word pause state at multipath mode int *begin_frame; ///< List of beginning frame int *end_frame; ///< List of ending frame LOGPROB *avgscore; ///< Score averaged by frames LOGPROB allscore; ///< Re-computed acoustic score struct __sentence_align__ *next; ///< data chain pointer } SentenceAlign; /** * Output result structure * */ typedef struct __sentence__ { WORD_ID word[MAXSEQNUM]; ///< Sequence of word ID int word_num; ///< Number of words in the sentence LOGPROB score; ///< Likelihood (LM+AM) LOGPROB confidence[MAXSEQNUM]; ///< Word confidence scores LOGPROB score_lm; ///< Language model likelihood (scaled) for N-gram LOGPROB score_am; ///< Acoustic model likelihood for N-gram int gram_id; ///< The grammar ID this sentence belongs to for DFA SentenceAlign *align; } Sentence; /** * A/D-in work area * */ typedef struct __adin__ { /* functions */ /// Pointer to function for device initialization (call once on startup) boolean (*ad_standby)(int, void *); /// Pointer to function to open audio stream for capturing boolean (*ad_begin)(char *); /// Pointer to function to close audio stream capturing boolean (*ad_end)(); /// Pointer to function to begin / restart recording boolean (*ad_resume)(); /// Pointer to function to pause recording boolean (*ad_pause)(); /// Pointer to function to terminate current recording immediately boolean (*ad_terminate)(); /// Pointer to function to read samples int (*ad_read)(SP16 *, int); /// Pointer to function to return current input source name (filename, devname, etc.) char * (*ad_input_name)(); /* configuration parameters */ int thres; ///< Input Level threshold (0-32767) int noise_zerocross; ///< Computed threshold of zerocross num in the cycle buffer int nc_max; ///< Computed number of fragments for tail margin int chunk_size; ///< audio process unit boolean adin_cut_on; ///< TRUE if do input segmentation by silence boolean silence_cut_default; ///< Device-dependent default value of adin_cut_on() boolean strip_flag; ///< TRUE if skip invalid zero samples boolean enable_thread; ///< TRUE if input device needs threading boolean need_zmean; ///< TRUE if perform zmeansource /* work area */ int c_length; ///< Computed length of cycle buffer for zero-cross, actually equals to head margin length int c_offset; ///< Static data DC offset (obsolute, should be 0) SP16 *swapbuf; ///< Buffer for re-triggering in tail margin int sbsize; ///< Size of @a swapbuf int sblen; ///< Current length of @a swapbuf int rest_tail; ///< Samples not processed yet in swap buffer ZEROCROSS zc; ///< Work area for zero-cross computation #ifdef HAVE_PTHREAD /* Variables related to POSIX threading */ pthread_t adin_thread; ///< Thread information pthread_mutex_t mutex; ///< Lock primitive SP16 *speech; ///< Unprocessed samples recorded by A/D-in thread int speechlen; ///< Current length of @a speech /* * Semaphore to start/stop recognition. * * If TRUE, A/D-in thread will store incoming samples to @a speech and * main thread will detect and process them. * If FALSE, A/D-in thread will still get input and check trigger as the same * as TRUE case, but does not store them to @a speech. * */ boolean transfer_online; /** * TRUE if buffer overflow occured in adin thread. * */ boolean adinthread_buffer_overflowed; /** * TRUE if adin thread ended * */ boolean adinthread_ended; boolean ignore_speech_while_recog; ///< TRUE if ignore speech input between call, while waiting recognition process #endif /* Input data buffer */ SP16 *buffer; ///< Temporary buffer to hold input samples int bpmax; ///< Maximum length of @a buffer int bp; ///< Current point to store the next data int current_len; ///< Current length of stored samples SP16 *cbuf; ///< Buffer for flushing cycle buffer just after detecting trigger boolean down_sample; ///< TRUE if perform down sampling from 48kHz to 16kHz SP16 *buffer48; ///< Another temporary buffer to hold 48kHz inputs int io_rate; ///< frequency rate (should be 3 always for 48/16 conversion boolean is_valid_data; ///< TRUE if we are now triggered int nc; ///< count of current tail silence segments boolean end_of_stream; ///< TRUE if we have reached the end of stream boolean need_init; ///< if TRUE, initialize buffer on startup DS_BUFFER *ds; ///< Filter buffer for 48-to-16 conversion boolean rehash; ///< TRUE is want rehash at rewinding on decoder-based VAD boolean input_side_segment; ///< TRUE if segmentation requested by ad_read unsigned int total_captured_len; ///< Total number of recorded samples from start until now unsigned int last_trigger_sample; ///< Last speech area was triggeed at this sample unsigned int last_trigger_len; // Length of last speech area char current_input_name[MAXPATHLEN]; ///< File or device name of current input } ADIn; /** * Recognition result output structure. You may want to use with model data * to get fully detailed results. * */ typedef struct __Output__ { /** * 1: recognition in progress * 0: recognition succeeded (at least one candidate has been found) * -1: search failed, no candidate has been found * -2: input rejected by short input * -3: input rejected by GMM * */ int status; int num_frame; ///< Number of frames of the recognized part int length_msec; ///< Length of the recognized part Sentence *sent; ///< List of (N-best) recognition result sentences int sentnum; ///< Number of sentences WordGraph *wg1; ///< List of word graph generated on 1st pass int wg1_num; ///< Num of words in the wg1 WordGraph *wg; ///< List of word graph CN_CLUSTER *confnet; ///< List of confusion network clusters Sentence pass1; ///< Recognition result on the 1st pass } Output; /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ /** * instance for a parameter vector computation * */ typedef struct __mfcc_calc__ { /** * Unique id * */ short id; /** * Parameter setting (entity in JCONF_AM) * */ Value *para; /** * TRUE if the para came from "-htkconf" * */ boolean htk_loaded; /** * TRUE if the para came from binhmm embedded header * */ boolean hmm_loaded; /** * Check input parameter type with header of the hmmdefs * (-notypecheck to unset) */ boolean paramtype_check_flag; /** * Parameter extraction work area * */ MFCCWork *wrk; /** * Parameter vector sequence to be recognized * */ HTK_Param *param; /** * Rest parameter for next segment for short-pause segmentation */ HTK_Param *rest_param; /** * Work area and setting for cepstral mean normalization * */ struct { /** * CMN: load initial cepstral mean from file at startup (-cmnload) */ char *load_filename; /** * CMN: update cepstral mean while recognition * (-cmnnoupdate to unset) */ boolean update; /** * CMN: save cepstral mean to file at end of every recognition (-cmnsave) */ char *save_filename; /** * CMN: MAP weight for initial cepstral mean on (-cmnmapweight) */ float map_weight; /** * TRUE if CMN parameter loaded from file at boot up */ boolean loaded; /** * realtime CMN work area * */ CMNWork *wrk; } cmn; /** * Work area for front-end processing * */ struct { /** * Estimated noise spectrum */ float *ssbuf; /** * Length of @a ssbuf */ int sslen; /** * Alpha coefficient for spectral subtraction * */ float ss_alpha; /** * Flooring coefficient for spectral subtraction * */ float ss_floor; /** * SS: compute noise spectrum from head silence on file input (-sscalc) */ boolean sscalc; /** * With "-sscalc", specify noise length at input head in msec (-sscalclen) */ int sscalc_len; /** * Load noise spectrum data from file (-ssload), that was made by "mkss". */ char *ssload_filename; /** * Parameter extraction work area for spectral subtraction * */ MFCCWork *mfccwrk_ss; } frontend; /** * work area for energy normalization on real time processing * */ ENERGYWork ewrk; /** * delta MFCC cycle buffer * */ DeltaBuf *db; /** * accel MFCC cycle buffer * */ DeltaBuf *ab; /** * working buffer holding current computing mfcc vector * */ VECT *tmpmfcc; /** * FALSE indicates that the current frame (f) is not valid and should * not be used for recognition * */ boolean valid; /** * Current frame * */ int f; /** * Processed frame length when segmented * */ int last_time; /** * Re-start frame if segmenetd * */ int sparea_start; /** * TRUE if a parent instance has decided segmented * */ boolean segmented; /** * TRUE if an input functionhas decided segmented * */ boolean segmented_by_input; /** * id of an plugin module if MFCC should be obtained via plugin * */ int plugin_source; /** * Function entry points for plugin input * */ struct { /// Pointer to function for device initialization (call once on startup) boolean (*fv_standby)(); /// Pointer to function to open audio stream for capturing boolean (*fv_begin)(); /// Pointer to function to read samples int (*fv_read)(VECT *, int); /// Pointer to function to close audio stream capturing boolean (*fv_end)(); /// Pointer to function to begin / restart recording boolean (*fv_resume)(); /// Pointer to function to pause recording boolean (*fv_pause)(); /// Pointer to function to terminate current recording immediately boolean (*fv_terminate)(); /// Pointer to function to return current input name char * (*fv_input_name)(); } func; #ifdef POWER_REJECT float avg_power; #endif /** * pointer to next * */ struct __mfcc_calc__ *next; } MFCCCalc; /** * instance for an AM. * */ typedef struct __process_am__ { /** * Configuration parameters * */ JCONF_AM *config; /** * Corresponding input parameter vector instance * */ MFCCCalc *mfcc; /** * Main phoneme HMM */ HTK_HMM_INFO *hmminfo; /** * HMM for Gaussian Selection */ HTK_HMM_INFO *hmm_gs; /** * Work area and outprob cache for HMM output probability computation */ HMMWork hmmwrk; /** * pointer to next * */ struct __process_am__ *next; } PROCESS_AM; /** * instance for a LM. * */ typedef struct __process_lm__ { /** * Configuration parameters * */ JCONF_LM *config; /** * Corresponding AM * */ PROCESS_AM *am; /** * the LM type of this Model holder: will be set from Jconf used for loading * */ int lmtype; /** * the LM variation type of this Model holder: will be set from * Jconf used for loading * */ int lmvar; /** * Main Word dictionary for all LM types */ WORD_INFO *winfo; /** * Main N-gram language model (do not use with grammars) */ NGRAM_INFO *ngram; /** * List of all loaded grammars (do not use with ngram) */ MULTIGRAM *grammars; /** * Current maximum value of assigned grammar ID. * A new grammar ID will be assigned to each new grammar. * */ int gram_maxid; /** * Global DFA for recognition. This will be generated from @a grammars, * concatinating each DFA into one. */ DFA_INFO *dfa; /** * TRUE if modified in multigram_update() * */ boolean global_modified; /** * LM User function entry point * */ LMFunc lmfunc; /** * pointer to next * */ struct __process_lm__ *next; } PROCESS_LM; /** * instance for a decoding, i.e. set of LM, AM and parameters * */ typedef struct __recogprocess__ { /** * TRUE is this instance is alive, or FALSE when temporary disabled. * */ boolean live; /** * 1 if this instance should be made alive in the next recognition, * -1 if should become dead in the next recognition, * or 0 to leave unchanged. * */ short active; /** * search configuration data * */ JCONF_SEARCH *config; /** * acoustic model instance to use * */ PROCESS_AM *am; /** * language model instance to use * */ PROCESS_LM *lm; /** * Language model type: one of LM_UNDEF, LM_NGRAM, LM_DFA * */ int lmtype; /** * Variation type of language model: one of LM_NGRAM, LM_DFA_GRAMMAR, * LM_DFA_WORD * */ int lmvar; /** * Whether handle phone context dependency (local copy from jconf) */ boolean ccd_flag; /** * Word-conjunction HMM as tree lexicon */ WCHMM_INFO *wchmm; /** * Actual beam width of 1st pass (will be set on startup) */ int trellis_beam_width; /** * Word trellis index generated at the 1st pass */ BACKTRELLIS *backtrellis; /** * Work area for the first pass */ FSBeam pass1; /** * Work area for second pass * */ StackDecode pass2; /** * Word sequence of best hypothesis on 1st pass */ WORD_ID pass1_wseq[MAXSEQNUM]; /** * Number of words in @a pass1_wseq */ int pass1_wnum; /** * Score of @a pass1_wseq */ LOGPROB pass1_score; /** * Last maximum word hypothesis on the begin point for short-pause segmentation */ WORD_ID sp_break_last_word; /** * Last (not transparent) context word for LM for short-pause segmentation */ WORD_ID sp_break_last_nword; /** * Allow override of last context word from result of 2nd pass for short-pause segmentation */ boolean sp_break_last_nword_allow_override; /** * Search start word on 2nd pass for short-pause segmentation */ WORD_ID sp_break_2_begin_word; /** * Search end word on 2nd pass for short-pause segmentation */ WORD_ID sp_break_2_end_word; /** * Input length in frames */ int peseqlen; /** * GraphOut: total number of words in the generated graph */ int graph_totalwordnum; /** * Recognition results * */ Output result; /** * graphout: will be set from value from jconf->graph.enabled * */ boolean graphout; /** * Temporal matrix work area to hold the order relations between words * for confusion network construction. * */ char *order_matrix; /** * Number of words to be expressed in the order matrix for confusion network * construction. * */ int order_matrix_count; #ifdef DETERMINE int determine_count; LOGPROB determine_maxnodescore; boolean determined; LOGPROB determine_last_wid; boolean have_determine; #endif /** * TRUE if has something to output at CALLBACK_RESULT_PASS1_INTERIM. * */ boolean have_interim; /** * User-defined data hook. JuliusLib does not concern about its content. * */ void *hook; /** * Pointer to next instance * */ struct __recogprocess__ *next; } RecogProcess; /** * Top level instance for the whole recognition process * */ typedef struct __Recog__ { /*******************************************/ /** * User-specified configuration parameters * */ Jconf *jconf; /*******************************************/ /** * A/D-in buffers * */ ADIn *adin; /** * Work area for the realtime processing of first pass */ RealBeam real; /** * Linked list of MFCC calculation/reading instances * */ MFCCCalc *mfcclist; /** * Linked list of acoustic model instances * */ PROCESS_AM *amlist; /** * Linked list of language model instances * */ PROCESS_LM *lmlist; /** * Linked list of recognition process instances * */ RecogProcess *process_list; /** * TRUE when engine is processing a segment (for short-pause segmentation) * */ boolean process_segment; /*******************************************/ /* inputs */ /** * Input speech data */ SP16 *speech; /** * Allocate length of speech * */ int speechalloclen; /** * Input length in samples */ int speechlen; /** * Input length in frames */ int peseqlen; /*******************************************/ /** * GMM definitions * */ HTK_HMM_INFO *gmm; /** * Pointer to MFCC instance for GMM * */ MFCCCalc *gmmmfcc; /** * Work area for GMM calculation * */ GMMCalc *gc; /*******************************************/ /* misc. */ /** * Status flag indicating whether the recognition is alive or not. If * TRUE, the process is currently activated, either monitoring an * audio input or recognizing the current input. If FALSE, the recognition * is now disabled until some activation command has been arrived from * client. While disabled, all the inputs are ignored. * * If set to FALSE in the program, Julius/Julian will stop after * the current recognition ends, and enter the disabled status. * */ boolean process_active; /** * If set to TRUE, Julius/Julian stops recognition immediately, terminating * the currenct recognition process, and enter into disabled status. * */ boolean process_want_terminate; /** * If set to TRUE, Julius/Julian stops recognition softly. If it is * performing recognition of the 1st pass, it immediately segments the * current input, process the 2nd pass, and output the result. Then it * enters the disabled status. * */ boolean process_want_reload; /** * When to refresh the global lexicon if received while recognition for * DFA * */ short gram_switch_input_method; /** * TRUE if audio stream is now open and engine is either listening * audio stream or recognizing a speech. FALSE on startup or when * in pause specified by a module command. * */ boolean process_online; /** * Function pointer to parameter vector computation for realtime 1st pass. * default: RealTimeMFCC() in realtime-1stpass.c * */ boolean (*calc_vector)(MFCCCalc *, SP16 *, int); /** * TRUE when recognition triggered and some recognition started, * FALSE if engine terminated with no input. * */ boolean triggered; /** * Callback entry point * */ void (*callback_function[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK])(); /** * Callback user data * */ void *callback_user_data[SIZEOF_CALLBACK_ID][MAX_CALLBACK_HOOK]; /** * Numbers of callbacks registered * */ int callback_function_num[SIZEOF_CALLBACK_ID]; /** * Callback function code list * */ int callback_list_code[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID]; /** * Callback function location list * */ int callback_list_loc[MAX_CALLBACK_HOOK*SIZEOF_CALLBACK_ID]; /** * Number of callbacks * */ int callback_num; /*******************************************/ /** * User-defined data hook. JuliusLib does not concern about its content. * */ void *hook; } Recog; #endif /* __J_RECOG_H__ */ julius-4.2.2/libjulius/include/julius/jconf.h0000644001051700105040000006674312004452401017550 0ustar ritrlab/** * @file jconf.h * * * @brief Jconf 構造体の定義 * * * * * @brief Jconf structure * * * *
 * JCONF
 *   +- JCONF_AM[] (linked list)
 *   +- JCONF_LM[] (linked list)
 *   +- JCONF_SEARCH[] (linked list) -> each has pointer to *JCONF_AM, *JCONF_LM
 *   +- JCONF_AM for GMM
 *   +- (engine configurations)
 * 
* * @author Akinobu Lee * @date Fri Feb 16 13:42:28 2007 * * $Revision: 1.13 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* */ #ifndef __J_JCONF_H__ #define __J_JCONF_H__ #include #include #include #include #include #include #include #include #include /** * Maximum length of module name string * */ #define JCONF_MODULENAME_MAXLEN 64 /** * Default module name if not specified (in case of old jconf) * */ #define JCONF_MODULENAME_DEFAULT "_default" /** * Configuration parameters (AM) * */ typedef struct __jconf_am__ { /** * Unique ID * */ short id; /** * Unique name * */ char name[JCONF_MODULENAME_MAXLEN]; /** * HMM definition file (-h) */ char *hmmfilename; /** * HMMList file to map logical (tri)phones to physical models (-hlist) */ char *mapfilename; /** * Gaussian pruning method (-gprune) * Default: use value from compile-time engine configuration default. */ int gprune_method; /** * Number of Gaussian to compute per mixture on Gaussian pruning (-tmix) */ int mixnum_thres; /** * Logical HMM name of short pause model (-spmodel) * Default: "sp" */ char *spmodel_name; /** * GMS: HMM definition file for GMS (-gshmm) */ char *hmm_gs_filename; /** * GMS: number of mixture PDF to select (-gsnum) */ int gs_statenum; /** * Calculation method for outprob score of a lcdset on cross-word * triphone (-iwcd1) */ short iwcdmethod; /** * N-best states to be calculated on IWCD_NBEST (-iwcd1 best N) */ short iwcdmaxn; /** * Transition penalty of inter-word short pause (-iwsppenalty) * for multi-path mode */ LOGPROB iwsp_penalty; /** * force multipath mode * */ boolean force_multipath; /** * Acoustic Analysis Conditions. Parameter setting priority is: * user-specified > specified HTK Config > model-embedded > Julius default. * */ struct { /** * All MFCC computation parameters, actually used for recognition. */ Value para; /** * default parameters of Julius */ Value para_default; /** * parameters from binhmm header */ Value para_hmm; /** * parameters from HTK Config (-htkconf) */ Value para_htk; /** * CMN: load initial cepstral mean from file at startup (-cmnload) */ char *cmnload_filename; /** * CMN: update cepstral mean while recognition * (-cmnnoupdate to unset) */ boolean cmn_update; /** * CMN: save cepstral mean to file at end of every recognition (-cmnsave) */ char *cmnsave_filename; /** * CMN: MAP weight for initial cepstral mean on (-cmnmapweight) */ float cmn_map_weight; } analysis; /** * Frontend processing parameter for this AM * */ struct { /** * Alpha coefficient for spectral subtraction * */ float ss_alpha; /** * Flooring coefficient for spectral subtraction * */ float ss_floor; /** * SS: compute noise spectrum from head silence on file input (-sscalc) */ boolean sscalc; /** * With "-sscalc", specify noise length at input head in msec (-sscalclen) */ int sscalc_len; /** * Load noise spectrum data from file (-ssload), that was made by "mkss". */ char *ssload_filename; } frontend; /** * Plugin source ID when using plugin (gprune_method is GPRUNE_SEL_USER) */ int gprune_plugin_source; /* pointer to next instance */ struct __jconf_am__ *next; } JCONF_AM; /** * Name lister for language model configurations * */ typedef struct __jconf_lm_namelist__ { /** * Entry name */ char *name; /** * Pointer to next object */ struct __jconf_lm_namelist__ *next; } JCONF_LM_NAMELIST; /** * Language models (N-gram / DFA), dictionary, and related parameters. * */ typedef struct __jconf_lm__ { /** * Unique ID * */ short id; /** * Unique name * */ char name[JCONF_MODULENAME_MAXLEN]; /** * Language model type: one of LM_UNDEF, LM_NGRAM, LM_DFA * */ int lmtype; /** * Variation type of language model: one of LM_NGRAM, LM_DFA_GRAMMAR, * LM_DFA_WORD * */ int lmvar; /** * Word dictionary file (-v) */ char *dictfilename; /** * Silence word to be placed at beginning of speech (-silhead) for N-gram */ char *head_silname; /** * Silence word to be placed at end of search (-siltail) for N-gram */ char *tail_silname; /** * Skip error words in dictionary and continue (-forcedict) */ boolean forcedict_flag; /** * N-gram in binary format (-d) */ char *ngram_filename; /** * LR 2-gram in ARPA format (-nlr) */ char *ngram_filename_lr_arpa; /** * RL 3-gram in ARPA format (-nrl) */ char *ngram_filename_rl_arpa; /** * DFA grammar file (-dfa, for single use) */ char *dfa_filename; /** * List of grammars to be read at startup (-gram) (-gramlist) */ GRAMLIST *gramlist_root; /** * List of word lists to be read at startup (-w) (-wlist) */ GRAMLIST *wordlist_root; /** * Enable inter-word short pause handling on multi-path version (-iwsp) * for multi-path mode */ boolean enable_iwsp; /** * Enable automatic addition of "short pause word" to the dictionary * (-iwspword) for N-gram */ boolean enable_iwspword; /** * Dictionary entry to be added on "-iwspword" (-iwspentry) for N-gram */ char *iwspentry; #ifdef SEPARATE_BY_UNIGRAM /** * Number of best frequency words to be separated (linearized) * from lexicon tree (-sepnum) */ int separate_wnum; #endif /** * For isolated word recognition mode: name of head silence model */ char wordrecog_head_silence_model_name[MAX_HMMNAME_LEN]; /** * For isolated word recognition mode: name of tail silence model */ char wordrecog_tail_silence_model_name[MAX_HMMNAME_LEN]; /** * For isolated word recognition mode: name of silence as phone context */ char wordrecog_silence_context_name[MAX_HMMNAME_LEN]; /** * Name string of Unknown word for N-gram */ char unknown_name[UNK_WORD_MAXLEN]; /** * List of additional dictionary files */ JCONF_LM_NAMELIST *additional_dict_files; /** * List of additional dictionary entries */ JCONF_LM_NAMELIST *additional_dict_entries; /** * Pointer to next instance * */ struct __jconf_lm__ *next; } JCONF_LM; /** * Search parameters * */ typedef struct __jconf_search__ { /** * Unique ID * */ short id; /** * Unique name * */ char name[JCONF_MODULENAME_MAXLEN]; /** * Which AM configuration to refer * */ JCONF_AM *amconf; /** * Which LM configuration to refer * */ JCONF_LM *lmconf; /** * Compute only 1pass (-1pass) */ boolean compute_only_1pass; /** * context handling */ boolean ccd_handling; /** * force context-dependent handling */ boolean force_ccd_handling; /** * LM weights * */ struct { /** * N-gram Language model weight (-lmp) */ LOGPROB lm_weight; /** * N-gram Word insertion penalty (-lmp) */ LOGPROB lm_penalty; /** * N-gram Language model weight for 2nd pass (-lmp2) */ LOGPROB lm_weight2; /** * N-gram Word insertion penalty for 2nd pass (-lmp2) */ LOGPROB lm_penalty2; /** * N-gram Additional insertion penalty for transparent words (-transp) */ LOGPROB lm_penalty_trans; /** * Word insertion penalty for DFA grammar on 1st pass (-penalty1) */ LOGPROB penalty1; /** * Word insertion penalty for DFA grammar on 2nd pass (-penalty2) */ LOGPROB penalty2; /** * INTERNAL: TRUE if -lmp2 specified */ boolean lmp2_specified; /** * INTERNAL: TRUE if -lmp specified */ boolean lmp_specified; } lmp; /** * First pass parameters * */ struct { /** * Beam width of rank pruning for the 1st pass. If value is -1 * (not specified), system will guess the value from dictionary * size. If 0, a possible maximum value will be assigned to do * full search. */ int specified_trellis_beam_width; #ifdef SCORE_PRUNING /** * Another beam width for score pruning at the 1st pass. If value * is -1, or not specified, score pruning will be disabled. */ #endif LOGPROB score_pruning_width; #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) /** * Keeps only N token on word-pair approximation (-nlimit) */ int wpair_keep_nlimit; #endif #ifdef HASH_CACHE_IW /** * Inter-word LM cache size rate (-iwcache) */ int iw_cache_rate; #endif /** * (DEBUG) use old build_wchmm() instead of build_wchmm2() for lexicon * construction (-oldtree) */ boolean old_tree_function_flag; #ifdef DETERMINE /** * (EXPERIMENTAL) score threshold between maximum node score and * maximum word end score for early word determination * */ LOGPROB determine_score_thres; /** * (EXPERIMENTAL) frame duration threshold for early word determination * */ int determine_duration_thres; #endif /* DETERMINE */ } pass1; /** * Second pass parameters * */ struct { /** * Search until N-best sentences are found (-n). Also see "-output". */ int nbest; /** * Word beam width of 2nd pass. -1 means no beaming (-b2) */ int enveloped_bestfirst_width; #ifdef SCAN_BEAM /** * Score beam threshold of 2nd pass (-sb) */ LOGPROB scan_beam_thres; #endif /** * Hypothesis overflow threshold at 2nd pass (-m) */ int hypo_overflow; /** * Hypothesis stack size of 2nd pass (-s) */ int stack_size; /** * Get next words from word trellis with a range of this frames * on 2nd pass (-lookuprange) */ int lookup_range; /** * Limit expansion words for trellis words on neighbor frames * at 2nd pass of DFA for speedup (-looktrellis) */ boolean looktrellis_flag; } pass2; /** * Word graph output * */ struct { /** * GraphOut: if enabled, graph search is enabled. * */ boolean enabled; /** * GraphOut: if enabled, output word graph * */ boolean lattice; /** * GraphOut: if enabled, generate confusion network * */ boolean confnet; /** * GraphOut: allowed margin for post-merging on word graph generation * (-graphrange) if set to -1, same word with different phone context * will be separated. */ int graph_merge_neighbor_range; #ifdef GRAPHOUT_DEPTHCUT /** * GraphOut: density threshold to cut word graph at post-processing. * (-graphcut) Setting larger value is safe for all condition. */ int graphout_cut_depth; #endif #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP /** * GraphOut: limitation of iteration loop for word boundary adjustment * (-graphboundloop) */ int graphout_limit_boundary_loop_num; #endif #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION /** * GraphOut: delay the termination of search on graph merging until * at least one sentence candidate is found * (-graphsearchdelay / -nographsearchdelay) */ boolean graphout_search_delay; #endif } graph; /** * Successive decoding (--enable-sp-segment) * */ struct { /** * TRUE if short-pause segmentation enabled for this instance * */ boolean enabled; /** * Length threshold to detect short-pause segment in frames */ int sp_frame_duration; /** * name string of pause model * */ char *pausemodelname; #ifdef SPSEGMENT_NAIST /** * Backstep margin when speech trigger detected by NAIST short-pause * detection system * */ int sp_margin; /** * Delay frame of speech trigger detection in NAIST short-pause * detection system * */ int sp_delay; #endif } successive; /** * Annotation to the output * */ struct { #ifdef CONFIDENCE_MEASURE /** * Scaling factor for confidence scoring (-cmalpha) */ LOGPROB cm_alpha; #ifdef CM_MULTIPLE_ALPHA /** * Begin value of alpha */ LOGPROB cm_alpha_bgn; /** * End value of alpha */ LOGPROB cm_alpha_end; /** * Number of test values (will be set from above values) */ int cm_alpha_num; /** * Step value of alpha */ LOGPROB cm_alpha_step; #endif #ifdef CM_SEARCH_LIMIT /** * Cut-off threshold for generated hypo. for confidence decoding (-cmthres) */ LOGPROB cm_cut_thres; #endif #ifdef CM_SEARCH_LIMIT_POPO /** * Cut-off threshold for popped hypo. for confidence decoding (-cmthres2) */ LOGPROB cm_cut_thres_pop; #endif #endif /* CONFIDENCE_MEASURE */ /** * Forced alignment: per word (-walign) */ boolean align_result_word_flag; /** * Forced alignment: per phoneme (-palign) */ boolean align_result_phoneme_flag; /** * Forced alignment: per state (-salign) */ boolean align_result_state_flag; } annotate; /** * Output configurations * */ struct { /** * Result: number of sentence to output (-output) , also see @a nbest (-n). */ int output_hypo_maxnum; /** * Result: output partial recognition result on the 1st pass (-progout) */ boolean progout_flag; /** * Result: Progressive output interval on 1st pass in msec (-proginterval) */ int progout_interval; /** * Result: INTERNAL: interval in number of frames */ int progout_interval_frame; /** * Get results for all grammars independently on 2nd pass on DFA * (-multigramout / -nomultigramout) */ boolean multigramout_flag; } output; /** * Misc. switches * */ struct { /** * Enter trellis interactive check routine after boot (-check trellis) */ boolean trellis_check_flag; /** * Enter triphone existence check routine after boot (-check triphone) */ boolean triphone_check_flag; /** * Enter lexicon structure consulting mode after boot (-check wchmm) */ boolean wchmm_check_flag; /** * should be set to TRUE at startup when this process should start * with inactive status * */ boolean start_inactive; /** * In case the 2nd pass search fails, this option specifies Julius * to use the result of the previous 1st pass as final result. * When this is TRUE, no RECOGFAIL occur. * */ boolean fallback_pass1_flag; } sw; /* pointer to next instance */ struct __jconf_search__ *next; } JCONF_SEARCH; /** * Configuration parameters (global) * */ typedef struct __Jconf__ { /** * Input source information, gathered from all AM conf. */ struct { /** * Input source type. (waveform / mfc) * */ int type; /** * Input source. * */ int speech_input; /** * Input device. * */ int device; /** * id of the selected plug-in if using plugin * */ int plugin_source; /** * Sampling frequency * */ long sfreq; /** * Sampling period in 100ns units * */ long period; /** * Window size in samples, similar to WINDOWSIZE in HTK (unit is different) * */ int framesize; /** * Frame shift length in samples * */ int frameshift; /** * Use 48kHz input and perform down sampling to 16kHz (-48) */ boolean use_ds48to16; /** * List of input files for rawfile / mfcfile input (-filelist) */ char *inputlist_filename; /** * Port number for adinnet input (-adport) */ int adinnet_port; #ifdef USE_NETAUDIO /** * Host/unit name for NetAudio/DatLink input (-NA) */ char *netaudio_devname; #endif /** * Check input parameter type with header of the hmmdefs * for parameter file input. FALSE avoids the check. */ boolean paramtype_check_flag; } input; /** * Configurations for Voice activity detection * */ struct { /** * Input level threshold from 0 to 32767 (-lv) */ int level_thres; /** * Head margin in msec (-headmargin) */ int head_margin_msec; /** * Tail margin in msec (-tailmargin) */ int tail_margin_msec; /** * Zero cross number threshold per a second (-zc) */ int zero_cross_num; /** * Silence detection and cutting: 0=off, 1=on, 2=accept device default * (-cutsilence / -nocutsilence) */ int silence_cut; /** * Chunk size in samples, i.e. processing unit for audio input * detection. Segmentation will be done by this unit. * */ int chunk_size; #ifdef GMM_VAD /** * (GMM_VAD) Backstep margin when speech trigger is detected. * */ int gmm_margin; /** * (GMM_VAD) Up trigger threshold of GMM likelihood, where GMM * likelihood is defined as \[ \max_{m \in M_v} p(x|m) - \max_{m * \in M_n} p(x|m) \] where $M_v$ is a set of voice GMM, and $M_n$ * is a set of noise GMM whose names are specified by * "-gmmreject". Julius calculate this value for each input * frame, and average it for the last gmm_margin frames, and when * the value gets higher than this value, Julius will start recognition. */ float gmm_uptrigger_thres; /** * (GMM_VAD) Down trigger threshold of GMM likelihood, where GMM * likelihood is defined as \[ \max_{m \in M_v} p(x|m) - \max_{m * \in M_n} p(x|m) \] where $M_v$ is a set of voice GMM, and $M_n$ * is a set of noise GMM whose names are specified by * "-gmmreject". Julius calculate this value for each input * frame, and average it for the last gmm_margin frames, and when * the value gets lower than this value, Julius will stop recognition. */ float gmm_downtrigger_thres; #endif } detect; /** * Pre-processing parameters before frontends * */ struct { /** * Strip off zero samples (-nostrip to unset) */ boolean strip_zero_sample; /** * Remove DC offset by zero mean (-zmean / -nozmean) */ boolean use_zmean; } preprocess; /** * Models and parameters for input rejection * */ struct { /** * GMM definition file (-gmm) */ char *gmm_filename; /** * Number of Gaussians to be computed on GMM calculation (-gmmnum) */ int gmm_gprune_num; /** * Comma-separated list of GMM model name to be rejected (-gmmreject) */ char *gmm_reject_cmn_string; /** * Length threshold to reject input (-rejectshort) */ int rejectshortlen; #ifdef POWER_REJECT /** * Rejection power threshold * */ float powerthres; #endif } reject; /** * decoding parameters to control recognition process (global) * */ struct { /** * INTERNAL: do on-the-fly decoding if TRUE (value depends on * device default and forced_realtime. */ boolean realtime_flag; /** * INTERNAL: TRUE if either of "-realtime" or "-norealtime" is * explicitly specified by user. When TRUE, the user-specified value * in forced_realtime will be applied to realtime_flag. */ boolean force_realtime_flag; /** * Force on-the-fly decoding on 1st pass with audio input and * MAP-CMN (-realtime / -norealtime) */ boolean forced_realtime; /** * TRUE if a kind of speech segmentation is enabled * */ boolean segment; } decodeopt; /** * Configurations for acoustic models (HMM, HMMList) and am-specific * parameters * */ JCONF_AM *am_root; /** * Language models (N-gram / DFA), dictionary, and related parameters. * */ JCONF_LM *lm_root; /** * Search parameters (LM/AM independent), annotation, * and output parameters * */ JCONF_SEARCH *search_root; /** * Current JCONF_AM for reading options * */ JCONF_LM *lmnow; /** * Current JCONF_AM for reading options * */ JCONF_AM *amnow; /** * Current JCONF_AM for reading options * */ JCONF_SEARCH *searchnow; /** * Config parameters for GMM computation. * (only gmmconf->analysis.* is used) * */ JCONF_AM *gmm; /** * Current option declaration mode while loading options * */ short optsection; /** * Whether option sectioning ristriction should be applied or not * */ boolean optsectioning; } Jconf; enum { JCONF_OPT_DEFAULT, JCONF_OPT_GLOBAL, JCONF_OPT_AM, JCONF_OPT_LM, JCONF_OPT_SR, SIZEOF_JCONF_OPT }; #endif /* __J_JCONF_H__ */ /* ======================================================= An OLD variable name mapping from old global.h to common.h These data are bogus, left here only for reference ======================================================= result_reorder_flag -> DELETED adinnet_port ->jconf.input.adinnet_port align_result_phoneme_flag ->jconf.annotate.align_result_phoneme_flag align_result_state_flag ->jconf.annotate.align_result_state_flag align_result_word_flag ->jconf.annotate.align_result_word_flag backmax ->recog.backmax backtrellis ->recog.backtrellis ccd_flag ->jconf.am.ccd_flag ccd_flag_force ->jconf.am.ccd_flag_force cm_alpha ->jconf.annotate.cm_alpha cm_alpha_bgn ->jconf.annotate.cm_alpha_bgn cm_alpha_end ->jconf.annotate.cm_alpha_end cm_alpha_num ->jconf.annotate.cm_alpha_num cm_alpha_step ->jconf.annotate.cm_alpha_step cm_cut_thres ->jconf.annotate.cm_cut_thres cm_cut_thres_pop ->jconf.annotate.cm_cut_thres_pop cmn_loaded ->recog.cmn_loaded cmn_map_weight ->jconf.frontend.cmn_map_weight cmn_update ->jconf.frontend.cmn_update cmnload_filename ->jconf.frontend.cmnload_filename cmnsave_filename ->jconf.frontend.cmnsave_filename compute_only_1pass ->jconf.sw.compute_only_1pass dfa ->model.dfa dfa_filename ->jconf.lm.dfa_filename dictfilename ->jconf.lm.dictfilename enable_iwsp ->jconf.lm.enable_iwsp enable_iwspword ->jconf.lm.enable_iwspword enveloped_bestfirst_width ->jconf.search.pass2.enveloped_bestfirst_width force_realtime_flag ->jconf.search.pass1.force_realtime_flag forced_realtime ->jconf.search.pass1.forced_realtime forcedict_flag ->jconf.lm.forcedict_flag framemaxscore ->recog.framemaxscore from_code ->jconf.output.from_code gmm ->model.gmm gmm_filename ->jconf.reject.gmm_filename gmm_gprune_num ->jconf.reject.gmm_gprune_num gmm_reject_cmn_string ->jconf.reject.gmm_reject_cmn_string gprune_method ->jconf.am.gprune_method gramlist ->model.grammars gramlist_root ->jconf.lm.gramlist_root graph_merge_neighbor_range ->jconf.graph.graph_merge_neighbor_range graph_totalwordnum ->recog.graph_totalwordnum graphout_cut_depth ->jconf.graph.graphout_cut_depth graphout_limit_boundary_loop_num ->jconf.graph.graphout_limit_boundary_loop_num graphout_search_delay ->jconf.graph.graphout_search_delay gs_statenum ->jconf.am.gs_statenum head_margin_msec ->jconf.detect.head_margin_msec head_silname ->jconf.lm.head_silname hmm_gs ->model.hmm_gs hmm_gs_filename ->jconf.am.hmm_gs_filename hmmfilename ->jconf.am.hmmfilename hmminfo ->model.hmminfo hypo_overflow ->jconf.search.pass2.hypo_overflow inputlist_filename ->jconf.input.inputlist_filename iw_cache_rate ->jconf.search.pass1.iw_cache_rate iwcdmaxn ->jconf.search.pass1.iwcdmaxn iwcdmethod ->jconf.search.pass1.iwcdmethod iwsp_penalty ->jconf.lm.iwsp_penalty iwspentry ->jconf.lm.iwspentry level_thres ->jconf.detect.level_thres lm_penalty ->jconf.lm.lm_penalty lm_penalty2 ->jconf.lm.lm_penalty2 lm_penalty_trans ->jconf.lm.lm_penalty_trans lm_weight ->jconf.lm.lm_weight lm_weight2 ->jconf.lm.lm_weight2 lmp_specified ->jconf.lm.lmp_specified lmp2_specified ->jconf.lm.lmp2_specified looktrellis_flag ->jconf.search.pass2.looktrellis_flag lookup_range ->jconf.search.pass2.lookup_range mapfilename ->jconf.am.mapfilename mixnum_thres ->jconf.am.mixnum_thres module_mode -> (app) module_port -> (app) module_sd -> (app) multigramout_flag ->jconf.output.multigramout_flag nbest ->jconf.search.pass2.nbest netaudio_devname ->jconf.input.netaudio_devname ngram ->model.ngram ngram_filename ->jconf.lm.ngram_filename ngram_filename_lr_arpa ->jconf.lm.ngram_filename_lr_arpa ngram_filename_rl_arpa ->jconf.lm.ngram_filename_rl_arpa old_iwcd_flag -> USE_OLD_IWCD (define.h) old_tree_function_flag ->jconf.search.pass1.old_tree_function_flag output_hypo_maxnum ->jconf.output.output_hypo_maxnum para ->jconf.analysis.para para_default ->jconf.analysis.para_default para_hmm ->jconf.analysis.para_hmm para_htk ->jconf.analysis.para_htk paramtype_check_flag ->jconf.analysis.paramtype_check_flag pass1_score ->recog.pass1_score pass1_wnum ->recog.pass1_wnum pass1_wseq ->recog.pass1_wseq penalty1 ->jconf.lm.penalty1 penalty2 ->jconf.lm.penalty2 peseqlen ->recog.peseqlen progout_flag ->jconf.output.progout_flag progout_interval ->jconf.output.progout_interval progout_interval_frame (beam.c) ->jconf.output.progout_interval realtime_flag ->jconf.search.pass1.realtime_flag record_dirname ->jconf.output.record_dirname rejectshortlen ->jconf.reject.rejectshortlen rest_param ->recog.rest_param result_output -> (app) scan_beam_thres ->jconf.search.pass2.scan_beam_thres separate_score_flag ->jconf.output.separate_score_flag separate_wnum ->jconf.search.pass1.separate_wnum silence_cut ->jconf.detect.silence_cut sp_break_2_begin_word ->recog.sp_break_2_begin_word sp_break_2_end_word ->recog.sp_break_2_end_word sp_break_last_nword ->recog.sp_break_last_nword sp_break_last_nword_allow_override ->recog.sp_break_last_nword_allow_override sp_break_last_word ->recog.sp_break_last_word sp_frame_duration ->jconf.successive.sp_frame_duration specified_trellis_beam_width ->jconf.search.pass1.specified_trellis_beam_width speech ->recog.speech speech_input ->jconf.input.speech_input speechlen ->recog.speechlen spmodel_name ->jconf.am.spmodel_name ssbuf ->recog.ssbuf sscalc ->jconf.frontend.sscalc sscalc_len ->jconf.frontend.sscalc_len sslen ->recog.sslen ssload_filename ->jconf.frontend.ssload_filename stack_size ->jconf.search.pass2.stack_size strip_zero_sample ->jconf.frontend.strip_zero_sample tail_margin_msec ->jconf.detect.tail_margin_msec tail_silname ->jconf.lm.tail_silname to_code ->jconf.output.to_code trellis_beam_width ->recog.trellis_beam_width trellis_check_flag ->jconf.sw.trellis_check_flag triphone_check_flag ->jconf.sw.triphone_check_flag use_ds48to16 ->jconf.input.use_ds48to16 use_zmean ->jconf.frontend.use_zmean wchmm ->recog.wchmm wchmm_check_flag ->jconf.sw.wchmm_check_flag winfo ->model.winfo wpair_keep_nlimit ->jconf.search.pass1.wpair_keep_nlimit zero_cross_num ->jconf.detect.zero_cross_num verbose_flag -> (remain in global.h) debug2_flag -> (remain in global.h) */ julius-4.2.2/libjulius/include/julius/misc.h0000644001051700105040000000160112004452401017362 0ustar ritrlab/** * @file misc.h * * * @brief その他の雑多な定義 * * * * @brief Some miscellaneous definitions * * * @author Akinobu Lee * @date Mon May 30 15:58:16 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_MISC_H__ #define __J_MISC_H__ /// Defines for selecting message output destination enum { SP_RESULT_TTY, ///< To tty SP_RESULT_MSOCK ///< Socket output in XML-like format for module mode }; /// Switch to specify the grammar changing timing policy. enum{SM_TERMINATE, SM_PAUSE, SM_WAIT}; /// Switch to specify the alignment unit enum{PER_WORD, PER_PHONEME, PER_STATE}; #endif /* __J_MISC_H__ */ julius-4.2.2/libjulius/include/julius/juliuslib.h0000644001051700105040000000100412004452401020426 0ustar ritrlab/** * @file juliuslib.h * * * @brief Library top header * * * * @brief ライブラリ使用時のトップヘッダー * * * @author Akinobu Lee * @date Tue Nov 6 22:42:12 2007 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include julius-4.2.2/libjulius/include/julius/wchmm.h0000644001051700105040000002355712004452401017560 0ustar ritrlab/** * @file wchmm.h * * * @brief 木構造化辞書の構造体定義. * * このファイルでは,第1パスで用いられる木構造化辞書(あるいは単語連結 * HMM (wchmm) とも呼ばれる)の構造体を定義しています. 起動時に,単語辞書の * 前単語が並列に並べられ,ツリー上に結合されて木構造化辞書が構築されます. * HMMの状態単位で構築され,各状態は,対応するHMM出力確率,ツリー内での遷移先 * のリスト,および探索のための様々な情報(言語スコアファクタリングのための * successor word list や uni-gram 最大値,単語始終端マーカー,音素開始 * マーカーなど)を含みます. * * * * @brief Structure Definition of tree lexicon * * This file defines structure for word-conjunction HMM, aka tree lexicon * for recognition of 1st pass. Words in the dictionary are gathered to * build a tree lexicon. The lexicon is built per HMM state basis, * with their HMM output probabilities, transition arcs, and other * informations for search such as successor word lists and maximum * uni-gram scores for LM factoring, word head/tail marker, phoneme * start marker, and so on. * * * @author Akinobu Lee * @date Sun Sep 18 21:31:32 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_WORD_CONJ_HMM_H__ #define __J_WORD_CONJ_HMM_H__ //#define MAXWCNSTEP 40000 ///< Number of states to be allocated at once #ifdef PASS1_IWCD /* Cross-word triphone handling */ /** * State output probability data for head phone of a word. The phoneme HMM * should change while search according to the last context word. * */ typedef struct { HMM_Logical *hmm; ///< Original HMM state on the dictionary short state_loc; ///< State location within the phoneme (1-) /* Context cache */ boolean last_is_lset; ///< TRUE if last assigned model was context-dependent state set union { HTK_HMM_State *state; ///< Last assigned state (last_is_lset = FALSE) CD_State_Set *lset; ///< Last assigned lset (last_is_lset = TRUE) } cache; WORD_ID lastwid_cache; ///< Last context word ID } RC_INFO; /** * State output probability data for 1-phone word. The phoneme HMM should * change according to the last context word. * */ typedef struct { HMM_Logical *hmm; ///< Original HMM state on the dictionary short state_loc; ///< State location within the phoneme (1-) /* Context cache */ boolean last_is_lset; ///< TRUE if last assigned model was context-dependent state set WORD_ID category; ///< Last context word's category ID union { HTK_HMM_State *state; ///< Last assigned state CD_State_Set *lset; ///< Last assigned lset } cache; WORD_ID lastwid_cache; ///< Last context word ID } LRC_INFO; /* For word tail phoneme, pseudo phone on the dictionary will be directly used as context-dependent state set */ /** * State output probability container on lexicon tree. Each state * should have one of them. * */ typedef union { HTK_HMM_State *state; ///< For AS_STATE (word-internal phone) CD_State_Set *lset; ///< For AS_LSET (word tail phone) RC_INFO *rset; ///< For AS_RSET (word head phone) LRC_INFO *lrset; ///< For AS_LRSET (phone in 1-phoneme word) } ACOUSTIC_SPEC; /** * ID to indicate which data is in the ACOUSTIC_SPEC container. * */ typedef enum { AS_STATE, ///< This state is in word-internal phone AS_LSET, ///< This state is in word tail phone AS_RSET, ///< This state is in word head phone AS_LRSET ///< This state is in 1-phone word } AS_Style; #endif /*************************************************************************/ /** * LM cache for the 1st pass * */ typedef struct { /// Word-internal factoring cache indexed by scid, holding last score LOGPROB *probcache; /// Word-internal factoring cache indexed by scid, holding last N-gram entry ID WORD_ID *lastwcache; /** * @brief Cross-word factoring cache to hold last-word-dependent factoring * score toward word head nodes. * * Cached values will be stored as [last_nword][n], where n is the number of * word-head node on which the last_nword-dependent N-gram factoring value * should be computed on cross-word transition. In 1-gram factoring, * n equals to wchmm->isolatenum, the number of isolated (not shared) * word-head nodes. * In 2-gram factoring, n simply equals to wchmm->startnum, the number of * all word-head nodes. * * The cache area will be allocated per the previous word when they appeared * while search. * It will retain across the speech stream, so the cache area will grow * to an extent as recognition was done for many files. */ LOGPROB **iw_sc_cache; /** * Maximum size of cross-word factoring cache @a iw_sc_cache per last word. * The value is set in max_successor_cache_init(). */ int iw_cache_num; #ifdef HASH_CACHE_IW WORD_ID *iw_lw_cache; ///< Maps hash cache id [x] to corresponding last word #endif } LM_PROB_CACHE; /*************************************************************************/ /** * Number of arcs in an arc cell. * */ #define A_CELL2_ALLOC_STEP 4 /** * Transition arc holding cell * */ typedef struct __A_CELL2__ { /** * Number of arcs currently stored in this cell. * If this reaches A_CELL2_ALLOC_STEP, next cell will be allocated. * */ unsigned short n; int arc[A_CELL2_ALLOC_STEP]; ///< Transition destination node numbers LOGPROB a[A_CELL2_ALLOC_STEP]; ///< Transitino probabilities struct __A_CELL2__ *next; ///< Pointer to next cell } A_CELL2; /** * HMM state on tree lexicon. * */ typedef struct wchmm_state { #ifdef PASS1_IWCD ACOUSTIC_SPEC out; ///< State output probability container /* below has been moved to WCHMM (04/06/22 by ri) */ /*unsigned char outstyle; output type (one of AS_Style) */ #else /* ~PASS1_IWCD */ HTK_HMM_State *out; ///< HMM State #endif /* ~PASS1_IWCD */ /** * LM factoring parameter: * If scid > 0, it will points to the successor list index. * If scid = 0, the node is not on branch. * If scid < 0, it will points to the unigram factoring value index. */ int scid; } WCHMM_STATE; /** * wchmm-specific work area * */ typedef struct { int *out_from; int *out_from_next; LOGPROB *out_a; LOGPROB *out_a_next; int out_from_len; } WCHMM_WORK; /** * Whole lexicon tree structure holding all information. * */ typedef struct wchmm_info { int lmtype; ///< LM type int lmvar; ///< LM variant boolean category_tree; ///< TRUE if category_tree is used HTK_HMM_INFO *hmminfo; ///< HMM definitions used to construct this lexicon NGRAM_INFO *ngram; ///< N-gram used to construct this lexicon DFA_INFO *dfa; ///< Grammar used to construct this lexicon WORD_INFO *winfo; ///< Word dictionary used to construct this lexicon boolean ccd_flag; ///< TRUE if handling context dependency int maxwcn; ///< Memory assigned maximum number of nodes int n; ///< Num of nodes in this lexicon WCHMM_STATE *state; ///< HMM state on tree lexicon [nodeID] LOGPROB *self_a; ///< Transition probability to self node LOGPROB *next_a; ///< Transition probabiltiy to next (now+1) node A_CELL2 **ac; ///< Transition arc information other than self and next. WORD_ID *stend; ///< Word ID that ends at the state [nodeID] int **offset; ///< Node ID of a phone [wordID][0..phonelen-1] int *wordend; ///< Node ID of word-end state [wordID] int startnum; ///< Number of root nodes int *startnode; ///< Root node index [0..startnum-1] -> node ID int *wordbegin; ///< Node ID of word-beginning state [wordID] for multipath mode int maxstartnum; ///< Allocated number of startnodes for multipath mode WORD_ID *start2wid; ///< Root node index [0..startnum-1] -> word ID for multipath mode #ifdef UNIGRAM_FACTORING int *start2isolate; ///< Root node index -> isolated root node ID int isolatenum; ///< Number of isolated root nodes #endif LOGPROB *wordend_a; ///< Transition prob. outside word [wordID] for non-multipath mode #ifdef PASS1_IWCD unsigned char *outstyle; ///< ID to indicate type of output probability container (one of AS_Style) #endif /* Successor lists on the tree are stored on sequencial list at @a sclist, and each node has index to the list */ /* sclist and sclen are used at 2-gram factoring only */ /* scword is used at 1-gram factoring only */ #ifdef UNIGRAM_FACTORING WORD_ID *scword; ///< successor word[scid] LOGPROB *fscore; ///< List of 1-gram factoring score [-scid] int fsnum; ///< Number of @a fscore #endif WORD_ID **sclist; ///< List of successor list [scid] WORD_ID *sclen; ///< Length of each succcessor list [scid] int scnum; ///< Total number of factoring nodes that has successor list BMALLOC_BASE *malloc_root; ///< Pointer for block memory allocation #ifdef PASS1_IWCD APATNODE *lcdset_category_root; ///< Index of lexicon-dependent category-aware pseudo phone set when used on Julian BMALLOC_BASE *lcdset_mroot; #endif /* PASS1_IWCD */ HMMWork *hmmwrk; ///< Work area for HMM computation in wchmm LM_PROB_CACHE lmcache; ///< LM score cache for 1st pass WCHMM_WORK wrk; ///< Other work area for 1st pass transition computation int separated_word_count; ///< Number of words actually separated (linearlized) from the tree char lccbuf[MAX_HMMNAME_LEN+7]; ///< Work area for HMM name conversion char lccbuf2[MAX_HMMNAME_LEN+7]; ///< Work area for HMM name conversion /* user-defined functions, used when this->lmvar == LM_NGRAM_USER */ /* they are local copy from parent Recog instance */ LOGPROB (*uni_prob_user)(WORD_INFO *, WORD_ID, LOGPROB); ///< Pointer to function returning word occurence probability LOGPROB (*bi_prob_user)(WORD_INFO *, WORD_ID, WORD_ID, LOGPROB); ///< Pointer to function returning a word probability given a word context (corresponds to bi-gram) } WCHMM_INFO; #endif /* __J_WORD_CONJ_HMM_H__ */ julius-4.2.2/libjulius/include/julius/global.h0000644001051700105040000000241012004452401017666 0ustar ritrlab/** * @file global.h * * * @brief 大域変数の定義 * * * * * @brief Global variables * * * * @author Akinobu Lee * @date Sun Sep 18 23:53:17 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __J_GLOBAL_H__ #define __J_GLOBAL_H__ #include #include #include #include #include /** * If GLOBAL_VARIABLE_DEFINE is defined, global variables are actually made. * Else, these are external definition. * */ #ifdef GLOBAL_VARIABLE_DEFINE #define GLOBAL /* */ #define GLOBAL_VAL(v) = (v) #else #define GLOBAL extern #define GLOBAL_VAL(v) /* */ #endif /* global variables */ GLOBAL boolean verbose_flag GLOBAL_VAL(TRUE); GLOBAL boolean debug2_flag GLOBAL_VAL(FALSE); GLOBAL boolean callback_debug_flag GLOBAL_VAL(FALSE); /* function list for adin process callback */ GLOBAL PLUGIN_ENTRY **global_plugin_list GLOBAL_VAL(NULL); GLOBAL int global_plugin_loaded_file_num GLOBAL_VAL(0); #endif /* __J_GLOBAL_H__ */ julius-4.2.2/libjulius/include/julius/trellis.h0000644001051700105040000000273510731704275020134 0ustar ritrlab/** * @file trellis.h * * * @brief 単語トレリスの構造体定義 * * * * * @brief Structure definitions of word trellis. * * * @author Akinobu Lee * @date Fri Aug 17 18:30:17 2007 * * $Revision: 1.2 $ * */ #ifndef __J_TRELLIS_H__ #define __J_TRELLIS_H__ /** * Word trellis element that holds survived word ends at each frame * on the 1st pass. * */ typedef struct __trellis_atom__ { LOGPROB backscore; ///< Accumulated score from start LOGPROB lscore; ///< LM score of this word WORD_ID wid; ///< Word ID short begintime; ///< Beginning frame short endtime; ///< End frame #ifdef WORD_GRAPH boolean within_wordgraph; ///< TRUE if within word graph boolean within_context; ///< TRUE if any of its following word was once survived in beam while search #endif struct __trellis_atom__ *last_tre; ///< Pointer to previous context trellis word struct __trellis_atom__ *next; ///< Temporary link to store generated trellis word on 1st pass } TRELLIS_ATOM; /** * Whole word trellis (aka backtrellis) generated as a result of 1st pass. * */ typedef struct __backtrellis__ { int framelen; ///< Frame length int *num; ///< Number of trellis words at frame [t] TRELLIS_ATOM ***rw; ///< List to trellis words at frame [t]: rw[t][0..num[t]] TRELLIS_ATOM *list; ///< Temporary storage point used in 1st pass BMALLOC_BASE *root; ///< memory allocation base for mybmalloc2() } BACKTRELLIS; #endif /* __J_TRELLIS_H__ */ julius-4.2.2/libjulius/include/julius/jfunc.h0000644001051700105040000001004112004452401017532 0ustar ritrlab/** * @file jfunc.h * * * @brief API related functions (not all) * * * * @brief API関連関数(全てではない) * * * @author Akinobu Lee * @date Tue Nov 6 22:41:00 2007 * * $Revision: 1.10 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #ifndef __J_JFUNC_H__ #define __J_JFUNC_H__ #ifdef __cplusplus extern "C" { #endif /* recogmain.c */ int j_open_stream(Recog *recog, char *file_or_dev_name); int j_close_stream(Recog *recog); int j_recognize_stream(Recog *recog); /* jfunc.c */ void j_request_pause(Recog *recog); void j_request_terminate(Recog *recog); void j_request_resume(Recog *recog); void schedule_grammar_update(Recog *recog); void j_reset_reload(Recog *recog); void j_enable_debug_message(); void j_disable_debug_message(); void j_enable_verbose_message(); void j_disable_verbose_message(); void j_internal_error(char *fmt, ...); int j_config_load_args(Jconf *jconf, int argc, char *argv[]); int j_config_load_string(Jconf *jconf, char *string); int j_config_load_file(Jconf *jconf, char *filename); Jconf *j_config_load_args_new(int argc, char *argv[]); Jconf *j_config_load_string_new(char *string); Jconf *j_config_load_file_new(char *filename); void j_add_dict(JCONF_LM *lm, char *dictfile); void j_add_word(JCONF_LM *lm, char *wordentry); boolean j_adin_init(Recog *recog); char *j_get_current_filename(Recog *recog); void j_recog_info(Recog *recog); Recog *j_create_instance_from_jconf(Jconf *jconf); boolean j_regist_user_lm_func(PROCESS_LM *lm, LOGPROB (*unifunc)(WORD_INFO *winfo, WORD_ID w, LOGPROB ngram_prob), LOGPROB (*bifunc)(WORD_INFO *winfo, WORD_ID context, WORD_ID w, LOGPROB ngram_prob), LOGPROB (*probfunc)(WORD_INFO *winfo, WORD_ID *contexts, int context_len, WORD_ID w, LOGPROB ngram_prob)); boolean j_regist_user_param_func(Recog *recog, boolean (*user_calc_vector)(MFCCCalc *, SP16 *, int)); JCONF_AM *j_get_amconf_by_name(Jconf *jconf, char *name); JCONF_AM *j_get_amconf_by_id(Jconf *jconf, int id); JCONF_AM *j_get_amconf_default(Jconf *jconf); JCONF_LM *j_get_lmconf_by_name(Jconf *jconf, char *name); JCONF_LM *j_get_lmconf_by_id(Jconf *jconf, int id); JCONF_SEARCH *j_get_searchconf_by_name(Jconf *jconf, char *name); JCONF_SEARCH *j_get_searchconf_by_id(Jconf *jconf, int id); boolean j_process_deactivate(Recog *recog, char *name); boolean j_process_deactivate_by_id(Recog *recog, int id); boolean j_process_activate(Recog *recog, char *name); boolean j_process_activate_by_id(Recog *recog, int id); boolean j_process_add_lm(Recog *recog, JCONF_LM *lmconf, JCONF_SEARCH *sconf, char *name); boolean j_remove_search(Recog *recog, JCONF_SEARCH *sconf); boolean j_remove_lm(Recog *recog, JCONF_LM *lmconf); boolean j_remove_am(Recog *recog, JCONF_AM *amconf); #ifdef DEBUG_VTLN_ALPHA_TEST void vtln_alpha(Recog *recog, RecogProcess *r); #endif /* instance.c */ MFCCCalc *j_mfcccalc_new(JCONF_AM *amconf); void j_mfcccalc_free(MFCCCalc *mfcc); PROCESS_AM *j_process_am_new(Recog *recog, JCONF_AM *amconf); void j_process_am_free(PROCESS_AM *am); PROCESS_LM *j_process_lm_new(Recog *recog, JCONF_LM *lmconf); void j_process_lm_free(PROCESS_LM *lm); RecogProcess *j_recogprocess_new(Recog *recog, JCONF_SEARCH *sconf); void j_recogprocess_free(RecogProcess *process); JCONF_AM *j_jconf_am_new(); void j_jconf_am_free(JCONF_AM *amconf); boolean j_jconf_am_regist(Jconf *jconf, JCONF_AM *amconf, char *name); JCONF_LM *j_jconf_lm_new(); void j_jconf_lm_free(JCONF_LM *lmconf); boolean j_jconf_lm_regist(Jconf *jconf, JCONF_LM *lmconf, char *name); JCONF_SEARCH *j_jconf_search_new(); void j_jconf_search_free(JCONF_SEARCH *sconf); boolean j_jconf_search_regist(Jconf *jconf, JCONF_SEARCH *sconf, char *name); Jconf *j_jconf_new(); void j_jconf_free(Jconf *jconf); Recog *j_recog_new(); void j_recog_free(Recog *recog); #ifdef __cplusplus } #endif #endif /* __J_JFUNC_H__ */ julius-4.2.2/libjulius/libjulius-config.in0000755001051700105040000000203111071161147017125 0ustar ritrlab#!/bin/sh # # libsent configuration result reporter # # "libsent-config --cflags" generates C defines, and # "libsent-config --libs" generates libraries needed for compile with # the sentlib # # by Doshita Lab. Speech Group, Kyoto University 1991-2000 # by Shikano Lab. Speech Group, NAIST 2002 # # ripped from gtk's gtk-config.in # # $Id: libjulius-config.in,v 1.4 2008/10/02 15:01:59 sumomo Exp $ # # # @configure_input@ # version="\ Julius/Julian library rev.@JULIUS_VERSION@" usage="\ Usage: libjulius-config [--libs] [--cflags] [--info] [--version]" prefix="@prefix@" exec_prefix="@exec_prefix@" if test $# -eq 0; then echo "${usage}" 1>&2 exit 1 fi while test $# -gt 0; do case "$1" in -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) optarg= ;; esac case $1 in --cflags) echo @CPPFLAGS@ ;; --libs) echo -ljulius @LDFLAGS@ @LIBS@ ;; --version) echo "${version}" ;; *) echo "${usage}" 1>&2 exit 1 ;; esac shift done echo exit 0 julius-4.2.2/libjulius/src/0000755001051700105040000000000012004463507014122 5ustar ritrlabjulius-4.2.2/libjulius/src/jfunc.c0000644001051700105040000011502212004452401015363 0ustar ritrlab/** * @file jfunc.c * * * @brief アプリケーション向けの種々のAPI関数 * * このファイルには,アプリケーションからJuliusLibの各機能を呼び出す * API関数およびライブラリ化のために実装された種々の関数が定義されています. * * * * * @brief API functions for applications * * This file contains for API function definitions and miscellaneous * functions implemented for JuliusLib. * * * @author Akinobu Lee * @date Wed Aug 8 15:04:28 2007 * * $Revision: 1.9 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * Request engine to stop recognition. If the engine is recognizing a * speech input, it will stop after the current recognition ended. * * * エンジンに認識処理を一時停止するよう要求する. この関数を呼出し時に * 音声入力を実行中であった場合,その入力の認識が終了したあとで停止する. * * * @param recog [in] engine instance * * @callgraph * @callergraph * @ingroup pauseresume * */ void j_request_pause(Recog *recog) { /* pause recognition: will stop when the current input ends */ if (recog->process_active) { recog->process_want_terminate = FALSE; recog->process_want_reload = TRUE; recog->process_active = FALSE; } /* control the A/D-in module to stop recording */ if (recog->jconf->input.type == INPUT_WAVEFORM) { if (recog->adin->ad_pause != NULL) { (*(recog->adin->ad_pause))(); } } else { /* feature vector input */ if (recog->jconf->input.speech_input == SP_MFCMODULE) { if (recog->mfcclist->func.fv_pause) recog->mfcclist->func.fv_pause(); } } } /** * * Request engine to terminate recognition immediately. Even if the engine * is recognizing a speech input, it will stop immediately (in this case the * current input will be lost). * * * エンジンに認識処理を即時停止するよう要求する. この関数を呼出し時に * 音声入力を実行中の場合,その入力を破棄して即座に停止する. * * * @param recog [in] engine instance * * @callgraph * @callergraph * @ingroup pauseresume */ void j_request_terminate(Recog *recog) { /* terminate recognition: input will terminate immidiately */ /* set flags to stop adin to terminate immediately, and stop process */ if (recog->process_active) { recog->process_want_terminate = TRUE; recog->process_want_reload = TRUE; recog->process_active = FALSE; } if (recog->jconf->input.type == INPUT_WAVEFORM) { if (recog->adin->ad_terminate != NULL) { /* control the A/D-in module to terminate recording imemdiately */ (*(recog->adin->ad_terminate))(); } } else { /* feature vector input */ if (recog->jconf->input.speech_input == SP_MFCMODULE) { if (recog->mfcclist->func.fv_terminate) recog->mfcclist->func.fv_terminate(); } } } /** * * Resume the engine which has already paused or terminated. * * * 一時停止しているエンジンを再開させる. * * * @param recog * * @callgraph * @callergraph * @ingroup pauseresume */ void j_request_resume(Recog *recog) { if (recog->process_active == FALSE) { recog->process_want_terminate = FALSE; recog->process_active = TRUE; } /* control the A/D-in module to restart recording now */ if (recog->jconf->input.type == INPUT_WAVEFORM) { if (recog->adin->ad_resume != NULL) { (*(recog->adin->ad_resume))(); } } else { /* feature vector input */ if (recog->jconf->input.speech_input == SP_MFCMODULE) { if (recog->mfcclist->func.fv_resume) recog->mfcclist->func.fv_resume(); } } } /** * Request engine to check update of all grammar and re-construct * the glocal lexicon if needed. The actual update will be done * between input segment. This function should be called after some * grammars are modified. * * * * 全文法の変更をチェックし,必要であれば認識用辞書を再構築するよう * エンジンに要求する. 実際の処理は次の認識の合間に行われる. * この関数は文法を追加したり削除したなど, * 文法リストに変更を加えたあとに必ず呼ぶべきである. * * * * @param recog [in] engine instance * * @callgraph * @callergraph * @ingroup grammar */ void schedule_grammar_update(Recog *recog) { if (recog->process_active) { /* if recognition is currently running, tell engine how/when to re-construct global lexicon. */ switch(recog->gram_switch_input_method) { case SM_TERMINATE: /* discard input now and change (immediate) */ recog->process_want_terminate = TRUE; recog->process_want_reload = TRUE; break; case SM_PAUSE: /* segment input now, recognize it, and then change */ recog->process_want_terminate = FALSE; recog->process_want_reload = TRUE; break; case SM_WAIT: /* wait until the current input end and recognition completed */ recog->process_want_terminate = FALSE; recog->process_want_reload = FALSE; break; } /* After the update, recognition will restart without sleeping. */ } else { /* If recognition is currently not running, the received grammars are merely stored in memory here. The re-construction of global lexicon will be delayed: it will be re-built just before the recognition process starts next time. */ } } /** * * 再構築要求フラグをクリアする. * * * * Clear the grammar re-construction flag. * * * * @param recog [in] engine instance * * @callgraph * @callergraph * @ingroup grammar */ void j_reset_reload(Recog *recog) { recog->process_want_reload = FALSE; } /** * * Enable debug messages in JuliusLib to log. * * * JuliusLib内の関数でデバッグメッセージをログに出力するようにする * * @callgraph * @callergraph * @ingroup engine */ void j_enable_debug_message() { debug2_flag = TRUE; } /** * * Disable debug messages in JuliusLib to log. * * * JuliusLib内の関数でデバッグメッセージを出さないようにする. * * * @callgraph * @callergraph * @ingroup engine */ void j_disable_debug_message() { debug2_flag = FALSE; } /** * * Enable verbose messages in JuliusLib to log. * * * JuliusLib内の関数で主要メッセージをログに出力するようにする. * * * @callgraph * @callergraph * @ingroup engine */ void j_enable_verbose_message() { verbose_flag = TRUE; } /** * * Disable verbose messages in JuliusLib to log. * * * JuliusLib内の関数で主要メッセージのログ出力をしないようにする. * * * @callgraph * @callergraph * @ingroup engine */ void j_disable_verbose_message() { verbose_flag = FALSE; } /** * Output error message and exit the program. This is just for * internal use. * * @param fmt [in] format string, like printf. * @param ... [in] variable length argument like printf. * */ void j_internal_error(char *fmt, ...) { va_list ap; int ret; va_start(ap,fmt); ret = vfprintf(stderr, fmt, ap); va_end(ap); /* clean up socket if already opened */ cleanup_socket(); exit(1); } /** * * If multiple instances defined from init, remove initial one (id=0) * * * 複数インスタンスが定義されている場合、初期インスタンス(id=0)は * 無効なので消す. * * * @param jconf [i/o] global configuration instance * * @callgraph * @callergraph * @ingroup jconf */ static void j_config_remove_initial(Jconf *jconf) { JCONF_AM *am; JCONF_LM *lm; JCONF_SEARCH *s; if(jconf->am_root->next != NULL && jconf->am_root->id == 0) { am = jconf->am_root->next; free(jconf->am_root); jconf->am_root = am; } if(jconf->lm_root->next != NULL && jconf->lm_root->id == 0) { lm = jconf->lm_root->next; free(jconf->lm_root); jconf->lm_root = lm; } if(jconf->search_root->next != NULL && jconf->search_root->id == 0) { s = jconf->search_root->next; free(jconf->search_root); jconf->search_root = s; } } /** * * Load parameters from command argments, and set to each configuration * instances in jconf. * * * コマンド引数からパラメータを読み込み,jconf 内の各設定インスタンスに * 値を格納する. * * * @param jconf [i/o] global configuration instance * @param argc [in] number of arguments * @param argv [in] list of argument strings * * @return 0 on success, or -1 on failure. * * @callgraph * @callergraph * @ingroup jconf */ int j_config_load_args(Jconf *jconf, int argc, char *argv[]) { /* parse options and set variables */ if (opt_parse(argc, argv, NULL, jconf) == FALSE) { return -1; } /* if multiple instances defined from init, remove initial one (id=0) */ j_config_remove_initial(jconf); return 0; } /** * * Load parameters from command argment string, and set to each configuration * instances in jconf. * * * コマンド引数を含む文字列からパラメータを読み込み,jconf 内の各設定インスタンスに * 値を格納する. * * * @param jconf [i/o] global configuration instance * @param argstr [in] argument string * * @return 0 on success, or -1 on failure. * * @callgraph * @callergraph * @ingroup jconf */ int j_config_load_string(Jconf *jconf, char *string) { int argc; char **argv; char *buf; /* parse options and set variables */ if (config_string_parse(string, jconf) == FALSE) { return -1; } /* if multiple instances defined from init, remove initial one (id=0) */ j_config_remove_initial(jconf); return 0; } /** * * Load parameters from a jconf file and set to each configuration * instances in jconf. * * * jconf ファイルからパラメータを読み込み,jconf 内の各設定インスタンスに * 値を格納する. * * * @param jconf [i/o] glbal configuration instance * @param filename [in] jconf filename * * @return 0 on sucess, or -1 on failure. * * @callgraph * @callergraph * @ingroup jconf */ int j_config_load_file(Jconf *jconf, char *filename) { /* parse options and set variables */ if (config_file_parse(filename, jconf) == FALSE) { return -1; } /* if multiple instances defined from init, remove initial one (id=0) */ j_config_remove_initial(jconf); return 0; } /** * * Create a new configuration instance and load parameters from command * argments. * * * コマンド引数からパラメータを読み込み,その値を格納した * 新たな設定インスタンスを割り付けて返す. * * * @param argc [in] number of arguments * @param argv [in] list of argument strings * * @return the newly allocated global configuration instance. * * @callgraph * @callergraph * @ingroup jconf */ Jconf * j_config_load_args_new(int argc, char *argv[]) { Jconf *jconf; jconf = j_jconf_new(); if (j_config_load_args(jconf, argc, argv) == -1) { j_jconf_free(jconf); return NULL; } return jconf; } /** * * Create a new configuration instance and load parameters from a jconf * file. * * * 新たな設定インスタンスを割り付け,そこに * jconfファイルから設定パラメータを読み込んで返す. * * * @param filename [in] jconf filename * * @return the newly allocated global configuration instance. * * @callgraph * @callergraph * @ingroup jconf */ Jconf * j_config_load_file_new(char *filename) { Jconf *jconf; jconf = j_jconf_new(); if (j_config_load_file(jconf, filename) == -1) { j_jconf_free(jconf); return NULL; } return jconf; } /** * * Create a new configuration instance and load parameters from string * file. * * * 新たな設定インスタンスを割り付け,そこに * 文字列から設定パラメータを読み込んで返す. * * * @param string [in] option string * * @return the newly allocated global configuration instance. * * @callgraph * @callergraph * @ingroup jconf */ Jconf * j_config_load_string_new(char *string) { Jconf *jconf; jconf = j_jconf_new(); if (j_config_load_string(jconf, string) == -1) { j_jconf_free(jconf); return NULL; } return jconf; } /** * * Book to read an additional dictionary file to be read. * when called multiple times, all the file name will be stored and read. * The file will be read just after the normal dictionary at startup. * * * 追加辞書ファイルの読み込みを指定する. * 複数回呼ばれた場合、すべて読み込まれる。 * 指定された辞書は起動時に通常の辞書のあとに続けて読み込まれる. * * * @param lm [i/o] a LM configuration * @param dictfile [in] dictinoary file name * * @return the newly allocated global configuration instance. * * @callgraph * @callergraph * @ingroup jconf */ void j_add_dict(JCONF_LM *lm, char *dictfile) { JCONF_LM_NAMELIST *nl; nl = (JCONF_LM_NAMELIST *)mymalloc(sizeof(JCONF_LM_NAMELIST)); nl->name = (char *)mymalloc(strlen(dictfile) + 1); strcpy(nl->name, dictfile); nl->next = lm->additional_dict_files; lm->additional_dict_files = nl; } /** * * Add an additional word entry. * The string should contain a word entry in as the same format as dictionary. * If called multiple times, all the specified words will be appended. * * * 追加の単語エントリを指定する. * 内容は辞書ファイルと同じフォーマット. * 起動までに複数回呼ばれた場合、そのすべてが起動時に追加される. * * * @param lm [i/o] a LM configuration * @param wordentry [in] word entry string in dictionary format * * @callgraph * @callergraph * @ingroup jconf */ void j_add_word(JCONF_LM *lm, char *wordentry) { JCONF_LM_NAMELIST *nl; nl = (JCONF_LM_NAMELIST *)mymalloc(sizeof(JCONF_LM_NAMELIST)); nl->name = (char *)mymalloc(strlen(wordentry) + 1); strcpy(nl->name, wordentry); nl->next = lm->additional_dict_entries; lm->additional_dict_entries = nl; } /** * * Initialize and setup A/D-in device specified by the configuration * for recognition. When threading is enabled for the device, * A/D-in thread will start inside this function. * * * 設定で選択された A/D-in デバイスを初期化し認識の準備を行う. * そのデバイスに対して threading が指定されている場合は, * A/D-in 用スレッドがここで開始される. * * * @param recog [in] engine instance * * @return TRUE on success, FALSE on failure. * * @callgraph * @callergraph * @ingroup engine */ boolean j_adin_init(Recog *recog) { boolean ret; if (recog->jconf->input.type == INPUT_VECTOR) { /* feature vector input */ if (recog->jconf->input.speech_input == SP_MFCMODULE) { if (mfc_module_init(recog->mfcclist, recog) == FALSE) { return FALSE; } ret = mfc_module_standby(recog->mfcclist); } else { ret = TRUE; } return ret; } /* initialize A/D-in device */ ret = adin_initialize(recog); return(ret); } /** * * Return current input speech file name. return NULL if the current * input device does not support this function. * * * 現在の入力ファイル名を返す.現在の入力デバイスがこの機能をサポート * していない場合は NULL を返す. * * * @param recog [in] engine instance * * @return the file name, or NULL when this function is not available on * the current input device. * * @callgraph * @callergraph * @ingroup engine */ char * j_get_current_filename(Recog *recog) { char *p; p = NULL; if (recog->jconf->input.type == INPUT_WAVEFORM) { /* adin function input */ if (recog->adin->ad_input_name != NULL) { p = recog->adin->ad_input_name(); } } else { switch(recog->jconf->input.speech_input) { case SP_MFCMODULE: p = mfc_module_input_name(recog->mfcclist); break; case SP_MFCFILE: /* already assigned */ p = recog->adin->current_input_name; break; } } return p; } /** * * Output all configurations and system informations into log. * * * エンジンの全設定と全システム情報をログに出力する. * * * @param recog [in] engine instance * * @callgraph * @callergraph * @ingroup engine */ void j_recog_info(Recog *recog) { /* print out system information */ print_engine_info(recog); } /** * * @brief Instanciate / generate a new engine instance according * to the given global configuration instance. * * It inspects all parameters in the global configuration instance, load * all models into memory, build tree lexicons, allocate work area and * caches. It does all setup to start recognition except A/D-in * initialization. * * * * @brief 与えられた設定インスタンス内の情報に従って,新たな * エンジンインスタンスを 起動・生成する. * * 設定インスタンス内のパラメータのチェック後,モデルを読み込み,木構 * 造化辞書の生成,ワークエリアおよびキャッシュの確保などを行う. * A/D-in の初期化以外で認識を開始するのに必要な処理をすべて行う. * * * @param jconf [in] gloabl configuration instance * * @return the newly created engine instance. * * @callgraph * @callergraph * @ingroup instance */ Recog * j_create_instance_from_jconf(Jconf *jconf) { Recog *recog; /* check option values and set parameters needed for model loading */ if (j_jconf_finalize(jconf) == FALSE) { return NULL; } /* create a recognition instance */ recog = j_recog_new(); /* assign configuration to the instance */ recog->jconf = jconf; /* load all files according to the configurations */ if (j_load_all(recog, jconf) == FALSE) { jlog("ERROR: j_create_instance_from_jconf: error in loading model\n"); /* j_model_free(model); */ return NULL; } /* checkout for recognition: build lexicon tree, allocate cache */ if (j_final_fusion(recog) == FALSE) { jlog("ERROR: j_create_instance_from_jconf: error while setup for recognition\n"); j_recog_free(recog); return NULL; } return recog; } /** * * Assign user-defined language scoring functions into a LM processing * instance. This should be called after engine instance creation and * before j_final_fusion() is called. Remember that you should also * specify "-userlm" option at jconf to use user-define language scoring. * * * 言語モデル処理インスタンスにユーザ定義の言語スコア付与関数を登録する. * この関数はエンジンインスタンス生成後から j_final_fusion() が呼ばれる * までの間に呼ぶ必要がある. 注意:ユーザ定義の言語スコア関数を使う場合は * 実行時オプション "-userlm" も指定する必要があることに注意せよ. * * * @param lm [i/o] LM processing instance * @param unifunc [in] pointer to the user-defined unigram function * @param bifunc [in] pointer to the user-defined bi-igram function * @param probfunc [in] pointer to the user-defined N-gram function * * @return TRUE on success, FALSE on failure. * * @callgraph * @callergraph * @ingroup userfunc */ boolean j_regist_user_lm_func(PROCESS_LM *lm, LOGPROB (*unifunc)(WORD_INFO *winfo, WORD_ID w, LOGPROB ngram_prob), LOGPROB (*bifunc)(WORD_INFO *winfo, WORD_ID context, WORD_ID w, LOGPROB ngram_prob), LOGPROB (*probfunc)(WORD_INFO *winfo, WORD_ID *contexts, int context_len, WORD_ID w, LOGPROB ngram_prob)) { lm->lmfunc.uniprob = unifunc; lm->lmfunc.biprob = bifunc; lm->lmfunc.lmprob = probfunc; return TRUE; } /** * * Assign a user-defined parameter extraction function to engine instance. * * * ユーザ定義の特徴量計算関数を使うようエンジンに登録する. * * * @param recog [i/o] engine instance * @param user_calc_vector [in] pointer to function of parameter extraction * * @return TRUE on success, FALSE on error. * * @callgraph * @callergraph * @ingroup userfunc */ boolean j_regist_user_param_func(Recog *recog, boolean (*user_calc_vector)(MFCCCalc *, SP16 *, int)) { recog->calc_vector = user_calc_vector; return TRUE; } /** * * Get AM configuration structure in jconf by its name. * * * jconf内の AM モジュール設定構造体を名前で検索する. * * * @param jconf [in] global configuration * @param name [in] AM module name * * @return the specified AM configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_AM * j_get_amconf_by_name(Jconf *jconf, char *name) { JCONF_AM *amconf; for(amconf=jconf->am_root;amconf;amconf=amconf->next) { if (strmatch(amconf->name, name)) { break; } } if (!amconf) { /* error */ jlog("ERROR: j_get_amconf_by_name: [AM \"%s\"] not found\n", name); return NULL; } return amconf; } /** * * Get AM configuration structure in jconf by its id. * * * jconf内の AM モジュール設定構造体を ID で検索する. * * * @param jconf [in] global configuration * @param id [in] AM module ID * * @return the specified AM configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_AM * j_get_amconf_by_id(Jconf *jconf, int id) { JCONF_AM *amconf; for(amconf=jconf->am_root;amconf;amconf=amconf->next) { if (amconf->id == id) { break; } } if (!amconf) { /* error */ jlog("ERROR: j_get_amconf_by_id: [AM%02d] not found\n", id); return NULL; } return amconf; } /** * * Return default AM configuration. * * If multiple AM configuration exists, return the latest one. * * * デフォルトの AM 設定を返す. * * AMが複数設定されている場合,最も最近のものを返す. * * * @param jconf [in] global configuration * * @return the specified AM configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_AM * j_get_amconf_default(Jconf *jconf) { JCONF_AM *amconf; if (jconf->am_root == NULL) return NULL; for(amconf=jconf->am_root;amconf->next;amconf=amconf->next); return(amconf); } /** * * Get LM configuration structure in jconf by its name. * * * jconf内の LM モジュール設定構造体を名前で検索する. * * * @param jconf [in] global configuration * @param name [in] LM module name * * @return the specified LM configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_LM * j_get_lmconf_by_name(Jconf *jconf, char *name) { JCONF_LM *lmconf; for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) { if (strmatch(lmconf->name, name)) { break; } } if (!lmconf) { /* error */ jlog("ERROR: j_get_lmconf_by_name: [LM \"%s\"] not found\n", name); return NULL; } return lmconf; } /** * * Get LM configuration structure in jconf by its id. * * * jconf内の LM モジュール設定構造体を ID で検索する. * * * @param jconf [in] global configuration * @param id [in] LM module ID * * @return the specified LM configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_LM * j_get_lmconf_by_id(Jconf *jconf, int id) { JCONF_LM *lmconf; for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) { if (lmconf->id == id) { break; } } if (!lmconf) { /* error */ jlog("ERROR: j_get_lmconf_by_id: [LM%02d] not found\n", id); return NULL; } return lmconf; } /** * * Get SEARCH configuration structure in jconf by its name. * * * jconf内の SESARCH モジュール設定構造体を名前で検索する. * * * @param jconf [in] global configuration * @param name [in] SEARCH module name * * @return the found SEARCH configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_SEARCH * j_get_searchconf_by_name(Jconf *jconf, char *name) { JCONF_SEARCH *sconf; for(sconf=jconf->search_root;sconf;sconf=sconf->next) { if (strmatch(sconf->name, name)) { break; } } if (!sconf) { /* error */ jlog("ERROR: j_get_searchconf_by_name: [SR \"%s\"] not found\n", name); return NULL; } return sconf; } /** * * Get SEARCH configuration structure in jconf by its id. * * * jconf内の SEARCH モジュール設定構造体を ID で検索する. * * * @param jconf [in] global configuration * @param id [in] SEARCH module ID * * @return the found SEARCH configuration, or NULL if not found. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_SEARCH * j_get_searchconf_by_id(Jconf *jconf, int id) { JCONF_SEARCH *sconf; for(sconf=jconf->search_root;sconf;sconf=sconf->next) { if (sconf->id == id) { break; } } if (!sconf) { /* error */ jlog("ERROR: j_get_searchconf_by_id: [SR%02d] not found\n", id); return NULL; } return sconf; } /** * * De-activate a recognition process instance designated by its name. * The process will actually pauses at the next recognition interval. * * * 指定された名前の認識処理インスタンスの動作を一時停止させる. * 実際に停止するのは次の音声認識の合間である. * * * @param recog [i/o] engine instance * @param name [in] SR name to deactivate * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process * */ boolean j_process_deactivate(Recog *recog, char *name) { RecogProcess *r; for(r=recog->process_list;r;r=r->next) { if (strmatch(r->config->name, name)) { /* book to be inactive at next interval */ r->active = -1; break; } } if (!r) { /* not found */ jlog("ERROR: j_process_deactivate: no SR instance named \"%s\", cannot deactivate\n", name); return FALSE; } /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * De-activate a recognition process instance designated by its ID. * The process will actually pauses at the next recognition interval. * * * 指定された認識処理インスタンスの動作を一時停止させる. * 対象インスタンスを ID 番号で指定する場合はこちらを使う. * 実際に停止するのは次の音声認識の合間である. * * * @param recog [i/o] engine instance * @param id [in] SR ID to deactivate * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process * */ boolean j_process_deactivate_by_id(Recog *recog, int id) { RecogProcess *r; for(r=recog->process_list;r;r=r->next) { if (r->config->id == id) { /* book to be inactive at next interval */ r->active = -1; break; } } if (!r) { /* not found */ jlog("ERROR: j_process_deactivate_by_id: no SR instance whose id is \"%02d\", cannot deactivate\n", id); return FALSE; } /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * Activate a recognition process instance that has been made inactive, by * its name. * The process will actually starts at the next recognition interval. * * * 一時停止されていた認識処理インスタンスの動作を再開させる. * 実際に再開するのは次の音声認識の合間である. * * * @param recog [i/o] engine instance * @param name [in] SR name to activate * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process * */ boolean j_process_activate(Recog *recog, char *name) { RecogProcess *r; for(r=recog->process_list;r;r=r->next) { if (strmatch(r->config->name, name)) { /* book to be active at next interval */ r->active = 1; break; } } if (!r) { /* not found */ jlog("ERROR: j_process_activate: no SR instance named \"%s\", cannot activate\n", name); return FALSE; } /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * Activate a recognition process instance that has been made inactive, by * the ID. * The process will actually starts at the next recognition interval. * * * 一時停止されていた認識処理インスタンスの動作を再開させる(ID指定). * 実際に再開するのは次の音声認識の合間である. * * * @param recog [i/o] engine instance * @param id [in] SR ID to activate * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process * */ boolean j_process_activate_by_id(Recog *recog, int id) { RecogProcess *r; for(r=recog->process_list;r;r=r->next) { if (r->config->id == id) { /* book to be active at next interval */ r->active = 1; break; } } if (!r) { /* not found */ jlog("ERROR: j_process_activate_by_id: no SR instance whose id is \"%02d\", cannot activate\n", id); return FALSE; } /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * @brief Create a new recognizer with a new LM and SR configurations. * * This function creates new LM process instance and recognition process * instance corresponding to the given LM and SR configurations. * AM process to be assigned to them is the current default AM. * Both the new LM and SR will be assigned the same instance name. * * * @brief LM および SR 設定に基づき認識処理プロセスを追加する. * * この関数は与えられたLM設定およびSR設定データに基づき,新たな * LMインスタンスおよび認識プロセスインスタンスをエンジン内部に * 生成する. AMについては現在のデフォルトAMが自動的に用いられる. * 名前はLMインスタンス,認識プロセスインスタンスとも同じ名前が * あたえられる. * * * @param recog [i/o] engine instance * @param lmconf [in] a new LM configuration * @param sconf [in] a new SR configuration * @param name [in] name of the new instances * * @return TRUE on success, FALSE on error. * * @callgraph * @callergraph * @ingroup jfunc_process */ boolean j_process_add_lm(Recog *recog, JCONF_LM *lmconf, JCONF_SEARCH *sconf, char *name) { /* add lmconf to global config */ if (j_jconf_lm_regist(recog->jconf, lmconf, name) == FALSE) { jlog("ERROR: j_process_add_lm: failed to regist new LM conf as \"%s\"\n", name); return FALSE; } /* assign lmconf and default amconf to the sconf */ sconf->amconf = j_get_amconf_default(recog->jconf); sconf->lmconf = lmconf; /* add the sconf to global config */ if (j_jconf_search_regist(recog->jconf, sconf, name) == FALSE) { jlog("ERROR: j_process_add_lm: failed to regist new SR conf as \"%s\"\n", name); j_jconf_search_free(sconf); return FALSE; } /* finalize the whole parameters */ if (j_jconf_finalize(recog->jconf) == FALSE) { jlog("ERROR: j_process_add_lm: failed to finalize the updated whole jconf\n"); return FALSE; } /* create LM process intance for the lmconf, and load LM */ if (j_load_lm(recog, lmconf) == FALSE) { jlog("ERROR: j_process_add_lm: failed to load LM \"%s\"\n", lmconf->name); return FALSE; } /* create recognition process instance for the sconf, and setup for recognition */ if (j_launch_recognition_instance(recog, sconf) == FALSE) { jlog("ERROR: j_process_add_lm: failed to start a new recognizer instance \"%s\"\n", sconf->name); return FALSE; } /* the created process will be live=FALSE, active = 1, so the new recognition instance is dead now but will be made live at next session */ /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * Remove a recognition process instance. * The specified search conf will also be released and destroyed * inside this function. * * * 認識処理インスタンスを削除する. * 指定されたSEARCH設定もこの関数内で解放・削除される. * * * @param recog [in] engine instance * @param sconf [in] SEARCH configuration corresponding to the target * recognition process to remove * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process */ boolean j_process_remove(Recog *recog, JCONF_SEARCH *sconf) { RecogProcess *r, *r_prev; JCONF_SEARCH *sc, *sc_prev; if (sconf == NULL) { jlog("ERROR: j_process_remove: sconf == NULL\n"); return FALSE; } /* find corresponding process in engine and remove it from list */ r_prev = NULL; for(r=recog->process_list;r;r=r->next) { if (r->config == sconf) { if (r_prev == NULL) { recog->process_list = r->next; } else { r_prev->next = r->next; } break; } r_prev = r; } if (!r) { jlog("ERROR: j_process_remove: specified sconf %02d %s not found in recogprocess, removal failed\n", sconf->id, sconf->name); return FALSE; } /* remove config from list in engine */ sc_prev = NULL; for(sc=recog->jconf->search_root;sc;sc=sc->next) { if (sc == sconf) { if (sc_prev == NULL) { recog->jconf->search_root = sc->next; } else { sc_prev->next = sc->next; } break; } sc_prev = sc; } if (!sc) { jlog("ERROR: j_process_remove: sconf %02d %s not found\n", sconf->id, sconf->name); } /* free them */ j_recogprocess_free(r); if (verbose_flag) jlog("STAT: recogprocess %02d %s removed\n", sconf->id, sconf->name); j_jconf_search_free(sconf); /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * Remove an LM process instance. * The specified lm conf will also be released and destroyed * inside this function. * * * 言語モデルインスタンスを削除する. * 指定された言語モデル設定もこの関数内で解放・削除される. * * * @param recog [in] engine instance * @param lmconf [in] LM configuration corresponding to the target * LM process to remove * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process */ boolean j_process_lm_remove(Recog *recog, JCONF_LM *lmconf) { RecogProcess *r; PROCESS_LM *lm, *lm_prev; JCONF_LM *l, *l_prev; if (lmconf == NULL) { jlog("ERROR: j_process_lm_remove: lmconf == NULL\n"); return FALSE; } /* check if still used by a process */ for(r=recog->process_list;r;r=r->next) { if (r->config->lmconf == lmconf) { jlog("ERROR: j_process_lm_remove: specified lmconf %02d %s still used in a recogprocess %02d %s\n", lmconf->id, lmconf->name, r->config->id, r->config->name); return FALSE; } } /* find corresponding LM process in engine and remove it from list */ lm_prev = NULL; for(lm=recog->lmlist;lm;lm=lm->next) { if (lm->config == lmconf) { if (lm_prev == NULL) { recog->lmlist = lm->next; } else { lm_prev->next = lm->next; } break; } lm_prev = lm; } if (!lm) { jlog("ERROR: j_process_lm_remove: specified lmconf %02d %s not found in LM process, removal failed\n", lmconf->id, lmconf->name); return FALSE; } /* remove config from list in engine */ l_prev = NULL; for(l=recog->jconf->lm_root;l;l=l->next) { if (l == lmconf) { if (l_prev == NULL) { recog->jconf->lm_root = l->next; } else { l_prev->next = l->next; } break; } l_prev = l; } if (!l) { jlog("ERROR: j_process_lm_remove: lmconf %02d %s not found\n", lmconf->id, lmconf->name); return FALSE; } /* free them */ j_process_lm_free(lm); if (verbose_flag) jlog("STAT: LM process %02d %s removed\n", lmconf->id, lmconf->name); j_jconf_lm_free(lmconf); /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } /** * * Remove an AM process instance (experimental). * The specified am conf will also be released and destroyed * inside this function. * * * 言語モデルインスタンスを削除する(実験中). * 指定された言語モデル設定もこの関数内で解放・削除される. * * * @param recog [in] engine instance * @param amconf [in] AM configuration corresponding to the target * AM process to remove * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup jfunc_process */ boolean j_process_am_remove(Recog *recog, JCONF_AM *amconf) { RecogProcess *r; PROCESS_LM *lm; PROCESS_AM *am, *am_prev; JCONF_AM *a, *a_prev; if (amconf == NULL) { jlog("ERROR: j_process_am_remove: amconf == NULL\n"); return FALSE; } /* check if still used by a process */ for(r=recog->process_list;r;r=r->next) { if (r->config->amconf == amconf) { jlog("ERROR: j_process_am_remove: specified amconf %02d %s still used in a recogprocess %02d %s\n", amconf->id, amconf->name, r->config->id, r->config->name); return FALSE; } } /* check if still used by a LM process */ for(lm=recog->lmlist;lm;lm=lm->next) { if (lm->am->config == amconf) { jlog("ERROR: j_process_am_remove: specified amconf %02d %s still used in a LM %02d %s\n", amconf->id, amconf->name, lm->config->id, lm->config->name); return FALSE; } } /* find corresponding AM process in engine and remove it from list */ am_prev = NULL; for(am=recog->amlist;am;am=am->next) { if (am->config == amconf) { if (am_prev == NULL) { recog->amlist = am->next; } else { am_prev->next = am->next; } break; } am_prev = am; } if (!am) { jlog("ERROR: j_process_am_remove: specified amconf %02d %s not found in AM process, removal failed\n", amconf->id, amconf->name); return FALSE; } /* remove config from list in engine */ a_prev = NULL; for(a=recog->jconf->am_root;a;a=a->next) { if (a == amconf) { if (a_prev == NULL) { recog->jconf->am_root = a->next; } else { a_prev->next = a->next; } break; } a_prev = a; } if (!a) { jlog("ERROR: j_process_am_remove: amconf %02d %s not found\n", amconf->id, amconf->name); return FALSE; } /* free them */ j_process_am_free(am); if (verbose_flag) jlog("STAT: AM process %02d %s removed\n", amconf->id, amconf->name); j_jconf_am_free(amconf); /* tell engine to update */ recog->process_want_reload = TRUE; return TRUE; } #ifdef DEBUG_VTLN_ALPHA_TEST void vtln_alpha(Recog *recog, RecogProcess *r) { Sentence *s; float alpha, alpha_bgn, alpha_end; float max_alpha; LOGPROB max_score; PROCESS_AM *am; MFCCCalc *mfcc; SentenceAlign *align; s = &(r->result.sent[0]); align = result_align_new(); max_score = LOG_ZERO; printf("------------ begin VTLN -------------\n"); mfcc = r->am->mfcc; alpha_bgn = mfcc->para->vtln_alpha - VTLN_RANGE; alpha_end = mfcc->para->vtln_alpha + VTLN_RANGE; for(alpha = alpha_bgn; alpha <= alpha_end; alpha += VTLN_STEP) { mfcc->para->vtln_alpha = alpha; if (InitFBank(mfcc->wrk, mfcc->para) == FALSE) { jlog("ERROR: VTLN: InitFBank() failed\n"); return; } if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) { jlog("ERROR: VTLN: wav2mfcc() failed\n"); return; } outprob_prepare(&(r->am->hmmwrk), mfcc->param->samplenum); word_align(s->word, s->word_num, mfcc->param, align, r); printf("%f: %f\n", alpha, align->allscore); if (max_score < align->allscore) { max_score = align->allscore; max_alpha = alpha; } } printf("MAX: %f: %f\n", max_alpha, max_score); mfcc->para->vtln_alpha = max_alpha; if (InitFBank(mfcc->wrk, mfcc->para) == FALSE) { jlog("ERROR: VTLN: InitFBank() failed\n"); return; } printf("------------ end VTLN -------------\n"); result_align_free(align); } #endif /* end of file */ julius-4.2.2/libjulius/src/useropt.c0000644001051700105040000001306512004452401015763 0ustar ritrlab/** * @file useropt.c * * * @brief ユーザ定義オプション * * * * @brief User-defined option handling * * * @author Akinobu Lee * @date Sun Sep 02 19:44:37 2007 * * $Revision: 1.5 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * List of user option data * */ static USEROPT *useropt_root = NULL; /** * * Generate a new option data. * * * 新たなユーザオプションデータを生成. * * * @return a new user option data. * */ static USEROPT * useropt_new() { USEROPT *new; new = (USEROPT *)mymalloc(sizeof(USEROPT)); new->optstr = NULL; new->desc = NULL; new->argnum = 0; new->reqargnum = 0; new->next = NULL; return new; } /** * * ユーザオプションデータを解放. * * * Release a user option data. * * * @param x [in] a user option data to release * */ static void useropt_free(USEROPT *x) { if (x->optstr) free(x->optstr); if (x->desc) free(x->desc); free(x); } /** * * Release all user option data. * * * 全てのユーザオプションデータを解放する. * * * @callgraph * @callergraph */ void useropt_free_all() { USEROPT *x, *tmp; x = useropt_root; while(x) { tmp = x->next; useropt_free(x); x = tmp; } useropt_root = NULL; } /** * * Add a user-defined option to Julius. * When reqargnum is lower than argnum, the first (reqargnum) arguments * are required and the rest (argnum - reqargnum) options are optional. * * * Julius にユーザ定義オプションを追加する. * argnum には引数の最大数,reqargnum はそのうち必須である引数の数を * 指定する. argnum > reqargnum の場合,先頭から reqargnum 個が必須で, * それ以降が optional として扱われる. * * * @param fmt [in] option string (should begin with '-') * @param argnum [in] total number of argument for this option (including optional) * @param reqargnum [in] number of required argument * @param desc [in] description string for help * @param func [in] option handling function * * @return TRUE on success, FALSE on failure * * @callgraph * @callergraph * @ingroup engine */ boolean j_add_option(char *fmt, int argnum, int reqargnum, char *desc, boolean (*func)(Jconf *jconf, char *arg[], int argnum)) { USEROPT *new; if (fmt[0] != '-') { jlog("ERROR: j_add_option: option string must start with \'-\': %s\n", fmt); return FALSE; } if (argnum < reqargnum) { /* error */ jlog("ERROR: j_add_option: number of required argument (%d) larger than total (%d)\n", reqargnum, argnum); return FALSE; } /* if this is first time, register free function at app exit */ if (useropt_root == NULL) atexit(useropt_free_all); /* allocate new */ new = useropt_new(); /* set option string */ new->optstr = strcpy((char *)mymalloc(strlen(fmt)+1), fmt); /* set number of arguments */ new->argnum = argnum; /* set number of required arguments. If less than argnum, the latter options should be optional */ new->reqargnum = reqargnum; /* set description string */ new->desc = strcpy((char*)mymalloc(strlen(desc)+1),desc); /* set user-given function to process this option */ new->func = func; /* add to list */ new->next = useropt_root; useropt_root = new; return TRUE; } /** * * Inspect for the user-specified options at option parsing * * * オプション設定においてユーザ定義オプション処理を行う. * * * * @param jconf [in] global configuration variables * @param argv [in] argument array * @param argc [in] number of arguments in argv * @param n [i/o] current position in argv * * @return 1 when the current argument was processed successfully * by one of the user options, 0 when no user option matched for the * current argument, or -1 on error. * * @callgraph * @callergraph */ int useropt_exec(Jconf *jconf, char *argv[], int argc, int *n) { USEROPT *x; int narg, i; for(x=useropt_root;x;x=x->next) { if (strmatch(argv[*n], x->optstr)) { i = *n + 1; while(i < argc && (argv[i][0] != '-' || (argv[i][1] >= '0' && argv[i][1] <= '9'))) i++; narg = i - *n - 1; if (narg > x->argnum || narg < x->reqargnum) { if (x->reqargnum != x->argnum) { jlog("ERROR: useropt_exec: \"%s\" should have at least %d argument(s)\n", x->optstr, x->reqargnum); } else { jlog("ERROR: useropt_exec: \"%s\" should have %d argument(s)\n", x->optstr, x->argnum); } return -1; /* error */ } if ((*(x->func))(jconf, &(argv[(*n)+1]), narg) == FALSE) { jlog("ERROR: useropt_exec: \"%s\" function returns FALSE\n", x->optstr); return -1; /* error */ } *n += narg; return 1; /* processed */ } } return 0; /* nothing processed */ } /** * * Output description of all the registered user options. * * * 登録されている全てのユーザ定義オプションの説明を出力する. * * * @param fp [in] file pointer to output for * * @callgraph * @callergraph */ void useropt_show_desc(FILE *fp) { USEROPT *x; int i; if (! useropt_root) return; fprintf(fp, "\n Additional options for application:\n"); for(x=useropt_root;x;x=x->next) { fprintf(fp, " [%s", x->optstr); for(i=0;ireqargnum;i++) fprintf(fp, " arg"); for(i=x->reqargnum;iargnum;i++) fprintf(fp, " (arg)"); fprintf(fp, "]\t%s\n", x->desc); } } /* end of file */ julius-4.2.2/libjulius/src/graphout.c0000644001051700105040000022053612004452401016116 0ustar ritrlab/** * @file graphout.c * * * @brief 単語ラティスの生成. * * * * @brief Output word lattice. * * * @author Akinobu LEE * @date Thu Mar 17 12:46:31 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /// Define if you want debugging output for graph generation #undef GDEBUG /// Define if you want much more debugging output for graph generation #undef GDEBUG2 #if defined(GDEBUG) || defined(GDEBUG2) static WCHMM_INFO *wchmm_local; ///< Local copy, just for debug #endif /** * * グラフ出力を初期化する. 現在はデバッグ用処理のみ. * * @param wchmm [in] 木構造化辞書 * * * Initialize data for graphout. * * @param wchmm [in] tree lexicon * * * @callgraph * @callergraph * */ void wordgraph_init(WCHMM_INFO *wchmm) { #if defined(GDEBUG) || defined(GDEBUG2) wchmm_local = wchmm; #endif } /**************************************************************/ /* allocation and free of a WordGraph instance */ /** * * グラフ単語を新たに生成し,そのポインタを返す. * * @param wid [in] 単語ID * @param headphone [in] 単語先頭の音素 * @param tailphone [in] 単語末端の音素 * @param leftframe [in] 始端時刻(フレーム) * @param rightframe [in] 終端時刻(フレーム) * @param fscore_head [in] 始端での部分文スコア (g + h) * @param fscore_tail [in] 終端での部分文スコア (g + h) * @param gscore_head [in] 先頭での入力末端からのViterbiスコア (g) * @param gscore_tail [in] 末尾での入力末端からのViterbiスコア (g) * @param lscore [in] 単語の言語スコア (Julian では値に意味なし) * @param cm [in] 単語の信頼度スコア (探索時に動的に計算されたもの) * * @return 新たに生成されたグラフ単語へのポインタ * * * Allocate a new graph word and return a new pointer to it. * * @param wid [in] word ID * @param headphone [in] phoneme on head of word * @param tailphone [in] phoneme on tail of word * @param leftframe [in] beginning time in frames * @param rightframe [in] end time in frames * @param fscore_head [in] sentence score on search at word head (g + h) * @param fscore_tail [in] sentence score on search at word tail (g + h) * @param gscore_head [in] Viterbi score accumulated from input end at word head (g) * @param gscore_tail [in] Viterbi score accumulated from input end at word tail (g) * @param lscore [in] language score of the word (bogus in Julian) * @param cm [in] word confidence score (computed on search time) * * @return pointer to the newly created graph word. * */ static WordGraph * wordgraph_new(WORD_ID wid, HMM_Logical *headphone, HMM_Logical *tailphone, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm) { WordGraph *new; new = (WordGraph *)mymalloc(sizeof(WordGraph)); new->wid = wid; new->lefttime = leftframe; new->righttime = rightframe; new->fscore_head = fscore_head; new->fscore_tail = fscore_tail; new->gscore_head = gscore_head; new->gscore_tail = gscore_tail; new->lscore_tmp = lscore; /* n-gram only */ #ifdef CM_SEARCH new->cmscore = cm; #endif new->forward_score = new->backward_score = 0.0; if (rightframe - leftframe + 1 != 0) { //new->amavg = (gscore_head - gscore_tail - lscore) / (float)(rightframe - leftframe + 1); new->amavg = (gscore_head - gscore_tail) / (float)(rightframe - leftframe + 1); } new->headphone = headphone; new->tailphone = tailphone; new->leftwordmaxnum = FANOUTSTEP; new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum); new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum); new->leftwordnum = 0; new->rightwordmaxnum = FANOUTSTEP; new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum); new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum); new->rightwordnum = 0; new->mark = FALSE; #ifdef GRAPHOUT_DYNAMIC new->purged = FALSE; #endif new->next = NULL; new->saved = FALSE; new->graph_cm = 0.0; #ifdef GDEBUG { int i; WordGraph *w; jlog("DEBUG: NEW: \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[new->wid], new->lefttime, new->righttime); for(i=0;ileftwordnum;i++) { w = new->leftword[i]; jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } for(i=0;irightwordnum;i++) { w = new->rightword[i]; jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } jlog("DEBUG: \headphone: %s\n", new->headphone->name); jlog("DEBUG: \tailphone: %s\n", new->tailphone->name); } #endif return(new); } /** * * あるグラフ単語のメモリ領域を解放する. * * @param wg [in] グラフ単語 * * * Free a graph word. * * @param wg [in] graph word to be freed. * * * @callgraph * @callergraph * */ void wordgraph_free(WordGraph *wg) { free(wg->rightword); free(wg->right_lscore); free(wg->leftword); free(wg->left_lscore); free(wg); } /**************************************************************/ /* Handling contexts */ /** * * あるグラフ単語の左コンテキストに新たなグラフ単語を追加する. * * @param wg [i/o] 追加先のグラフ単語 * @param left [in] @a wg の左コンテキストとして追加されるグラフ単語 * @param lscore [in] 接続言語スコア * * * Add a graph word as a new left context. * * @param wg [i/o] word graph to which the @a left word will be added as left context. * @param left [in] word graph which will be added to the @a wg as left context. * @param lscore [in] word connection score * */ static void wordgraph_add_leftword(WordGraph *wg, WordGraph *left, LOGPROB lscore) { if (wg == NULL) return; if (left == NULL) return; if (wg->leftwordnum >= wg->leftwordmaxnum) { /* expand */ wg->leftwordmaxnum += FANOUTSTEP; wg->leftword = (WordGraph **)myrealloc(wg->leftword, sizeof(WordGraph *) * wg->leftwordmaxnum); wg->left_lscore = (LOGPROB *)myrealloc(wg->left_lscore, sizeof(LOGPROB) * wg->leftwordmaxnum); } wg->leftword[wg->leftwordnum] = left; wg->left_lscore[wg->leftwordnum] = lscore; wg->leftwordnum++; #ifdef GDEBUG jlog("DEBUG: addleft: \"%s\"[%d..%d] added as %dth left of \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[left->wid], left->lefttime, left->righttime, wg->leftwordnum, wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime); #endif } /** * * あるグラフ単語の右コンテキストに新たなグラフ単語を追加する. * * @param wg [i/o] 追加先のグラフ単語 * @param right [in] @a wg の右コンテキストとして追加されるグラフ単語 * @param lscore [in] 接続言語スコア * * * Add a graph word as a new right context. * * @param wg [i/o] word graph to which the @a right word will be added as * right context. * @param right [in] word graph which will be added to the @a wg as right * context. * @param lscore [in] word connection score * */ static void wordgraph_add_rightword(WordGraph *wg, WordGraph *right, LOGPROB lscore) { if (wg == NULL) return; if (right == NULL) return; if (wg->rightwordnum >= wg->rightwordmaxnum) { /* expand */ wg->rightwordmaxnum += FANOUTSTEP; wg->rightword = (WordGraph **)myrealloc(wg->rightword, sizeof(WordGraph *) * wg->rightwordmaxnum); wg->right_lscore = (LOGPROB *)myrealloc(wg->right_lscore, sizeof(LOGPROB) * wg->rightwordmaxnum); } wg->rightword[wg->rightwordnum] = right; wg->right_lscore[wg->rightwordnum] = lscore; wg->rightwordnum++; #ifdef GDEBUG jlog("DEBUG: addright: \"%s\"[%d..%d] added as %dth right of \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[right->wid], right->lefttime, right->righttime, wg->rightwordnum, wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime); #endif } /** * * 左コンテキストに指定したグラフ単語が既にあるかどうかチェックし, * なければ追加する. * * @param wg [i/o] 調べるグラフ単語 * @param left [in] このグラフ単語が @a wg の左コンテキストにあるかチェックする * @param lscore [in] 接続言語スコア * * @return 同じグラフ単語が左コンテキストに存在せず新たに追加した場合は TRUE, * 左コンテキストとして同じグラフ単語がすでに存在しており追加しなかった場合は * FALSEを返す. * * * Check for the left context if the specified graph already exists, and * add it if not yet. * * @param wg [i/o] graph word whose left context will be checked * @param left [in] graph word to be checked as left context of @a wg * @param lscore [in] word connection score * * @return TRUE if not exist yet and has been added, or FALSE if already * exist and thus not added. * * * @callgraph * @callergraph * */ boolean wordgraph_check_and_add_leftword(WordGraph *wg, WordGraph *left, LOGPROB lscore) { int i; if (wg == NULL) return FALSE; if (left == NULL) return FALSE; for(i=0;ileftwordnum;i++) { if (wg->leftword[i] == left) { break; } } if (i >= wg->leftwordnum) { /* no leftword matched */ wordgraph_add_leftword(wg, left, lscore); return TRUE; } else if (wg->left_lscore[i] < lscore) { /* for same word connection, keep maximum LM score */ if (debug2_flag) jlog("DEBUG: check_and_add_leftword: update left\n"); wg->left_lscore[i] = lscore; } return FALSE; } /** * * 右コンテキストに指定したグラフ単語が既にあるかどうかチェックし, * なければ追加する. * * @param wg [i/o] 調べるグラフ単語 * @param right [in] このグラフ単語が @a wg の右コンテキストにあるかチェックする * @param lscore [in] 接続言語スコア * * @return 同じグラフ単語が右コンテキストに存在せず新たに追加した場合は TRUE, * 右コンテキストとして同じグラフ単語がすでに存在しており追加しなかった場合は * FALSEを返す. * * * Check for the right context if the specified graph already exists, and * add it if not yet. * * @param wg [i/o] graph word whose right context will be checked * @param right [in] graph word to be checked as right context of @a wg * @param lscore [in] word connection score * * @return TRUE if not exist yet and has been added, or FALSE if already * exist and thus not added. * * * @callgraph * @callergraph * */ boolean wordgraph_check_and_add_rightword(WordGraph *wg, WordGraph *right, LOGPROB lscore) { int i; if (wg == NULL) return FALSE; if (right == NULL) return FALSE; for(i=0;irightwordnum;i++) { if (wg->rightword[i] == right) { break; } } if (i >= wg->rightwordnum) { /* no rightword matched */ wordgraph_add_rightword(wg, right, lscore); return TRUE; } else if (wg->right_lscore[i] < lscore) { /* for same word connection, keep maximum LM score */ if (debug2_flag) jlog("DEBUG: check_and_add_rightword: update right\n"); wg->right_lscore[i] = lscore; } return FALSE; } /** * * 同一グラフ単語のマージ時に,単語グラフのコンテキストを全て別の単語グラフに * 追加する. * * @param dst [i/o] 追加先のグラフ単語 * @param src [in] 追加元のグラフ単語 * * @return 1つでも新たに追加されれば TRUE, 1つも追加されなければ FALSE を返す. * * * Add all the context words to other for merging the same two graph words. * * @param dst [i/o] destination graph word * @param src [in] source graph word * * @return TRUE if at least one context word has been newly added, or FALSE if * context on @a dst has not been updated. * */ static boolean merge_contexts(WordGraph *dst, WordGraph *src) { int s, d; WordGraph *adding; boolean ret; #ifdef GDEBUG jlog("DEBUG: merge_contexts: merging context of \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n", wchmm_local->winfo->woutput[src->wid], src->lefttime, src->righttime, wchmm_local->winfo->woutput[dst->wid], dst->lefttime, dst->righttime); #endif ret = FALSE; /* left context */ for(s=0;sleftwordnum;s++) { adding = src->leftword[s]; if (adding->mark) continue; /* direct link between dst and src will disapper to avoid unneccesary loop */ if (adding == dst) { #ifdef GDEBUG jlog("DEBUG: merge_contexts: skipping direct link (dst) -> (src)\n"); #endif continue; } for(d=0;dleftwordnum;d++) { if (dst->leftword[d]->mark) continue; if (dst->leftword[d] == adding) { break; } } if (d >= dst->leftwordnum) { /* no leftword matched */ wordgraph_add_leftword(dst, adding, src->left_lscore[s]); #ifdef GDEBUG jlog("DEBUG: merge_contexts: added \"%s\"[%d..%d] as a new left context\n", wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime); #endif ret = TRUE; } else if (dst->left_lscore[d] < src->left_lscore[s]) { jlog("DEBUG: merge_context: update left\n"); dst->left_lscore[d] = src->left_lscore[s]; } #ifdef GDEBUG else { jlog("DEBUG: merge_contexts: \"%s\"[%d..%d] already exist\n", wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime); } #endif } /* right context */ for(s=0;srightwordnum;s++) { adding = src->rightword[s]; if (adding->mark) continue; /* direct link between dst and src will disapper to avoid unneccesary loop */ if (adding == dst) { #ifdef GDEBUG jlog("DEBUG: merge_contexts: skipping direct link (src) -> (dst)\n"); #endif continue; } for(d=0;drightwordnum;d++) { if (dst->rightword[d]->mark) continue; if (dst->rightword[d] == adding) { break; } } if (d >= dst->rightwordnum) { /* no rightword matched */ wordgraph_add_rightword(dst, adding, src->right_lscore[s]); #ifdef GDEBUG jlog("DEBUG: merge_contexts: added \"%s\"[%d..%d] as a new right context\n", wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime); #endif ret = TRUE; } else if (dst->right_lscore[d] < src->right_lscore[s]) { jlog("DEBUG: merge_context: update right\n"); dst->right_lscore[d] = src->right_lscore[s]; } #ifdef GDEBUG else { jlog("DEBUG: merge_contexts: \"%s\"[%d..%d] already exist\n", wchmm_local->winfo->woutput[adding->wid], adding->lefttime, adding->righttime); } #endif } return(ret); } /** * * 左コンテキスト上のあるグラフ単語を別のグラフ単語に置き換える. * * @param wg [i/o] 操作対象のグラフ単語 * @param from [in] 置き換え元となる左コンテキスト上のグラフ単語 * @param to [in] 置き換え先のグラフ単語 * @param lscore [in] 接続言語スコア * * * Substitute a word at left context of a graph word to another. * * @param wg [i/o] target graph word. * @param from [in] left context word to be substituted * @param to [in] substitution destination. * @param lscore [in] word connection score * */ static void swap_leftword(WordGraph *wg, WordGraph *from, WordGraph *to, LOGPROB lscore) { int i; #ifdef GDEBUG jlog("DEBUG: swapleft: replacing left of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n", wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime, wchmm_local->winfo->woutput[from->wid], from->lefttime, from->righttime, wchmm_local->winfo->woutput[to->wid], to->lefttime, to->righttime); #endif for(i=0;ileftwordnum;i++) { if (wg->leftword[i] == from) { wg->leftword[i] = to; wg->left_lscore[i] = lscore; } } } /** * * 右コンテキスト上のあるグラフ単語を別のグラフ単語に置き換える. * * @param wg [i/o] 操作対象のグラフ単語 * @param from [in] 置き換え元となる右コンテキスト上のグラフ単語 * @param to [in] 置き換え先のグラフ単語 * @param lscore [in] 接続言語スコア * * * Substitute a word at right context of a graph word to another. * * @param wg [i/o] target graph word. * @param from [in] right context word to be substituted * @param to [in] substitution destination. * @param lscore [in] word connection score * */ static void swap_rightword(WordGraph *wg, WordGraph *from, WordGraph *to, LOGPROB lscore) { int i; #ifdef GDEBUG jlog("DEBUG: swapright: replacing right of \"%s\"[%d..%d] from \"%s\"[%d..%d] to \"%s\"[%d..%d]...\n", wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime, wchmm_local->winfo->woutput[from->wid], from->lefttime, from->righttime, wchmm_local->winfo->woutput[to->wid], to->lefttime, to->righttime); #endif for(i=0;irightwordnum;i++) { if (wg->rightword[i] == from) { wg->rightword[i] = to; wg->right_lscore[i] = lscore; } } } /** * * 左コンテキストリスト中の重複を除去する * * @param wg [i/o] 操作対象のグラフ単語 * * * Delete duplicate entries in left context list of a graph word. * * @param wg [i/o] target graph word * */ static void uniq_leftword(WordGraph *wg) { int i, j, dst; boolean ok; dst = 0; for(i=0;ileftwordnum;i++) { ok = TRUE; for(j=0;jleftword[i] == wg->leftword[j]) { ok = FALSE; break; } } if (ok == TRUE) { wg->leftword[dst] = wg->leftword[i]; wg->left_lscore[dst] = wg->left_lscore[i]; dst++; } } wg->leftwordnum = dst; } /** * * 右コンテキストリスト中の重複を除去する * * @param wg [i/o] 操作対象のグラフ単語 * * * Delete duplicate entries in right context list of a graph word. * * @param wg [i/o] target graph word * */ static void uniq_rightword(WordGraph *wg) { int i, j, dst; boolean ok; dst = 0; for(i=0;irightwordnum;i++) { ok = TRUE; for(j=0;jrightword[i] == wg->rightword[j]) { ok = FALSE; break; } } if (ok == TRUE) { wg->rightword[dst] = wg->rightword[i]; wg->right_lscore[dst] = wg->right_lscore[i]; dst++; } } wg->rightwordnum = dst; } /** * * 左右のグラフ単語のコンテキストリストからそのグラフ単語自身を消去する. * * @param wg [in] 操作対象のグラフ単語 * * * Remove the specified word graph from contexts of all left and right words. * * @param wg [in] target graph word * */ static void wordgraph_remove_context(WordGraph *wg) { WordGraph *w; int i,j,k; if (wg == NULL) return; for(i=0;ileftwordnum;i++) { w = wg->leftword[i]; k=0; for(j=0;jrightwordnum;j++) { if (w->rightword[j] != wg) { if (j != k) { w->rightword[k] = w->rightword[j]; w->right_lscore[k] = w->right_lscore[j]; } k++; } } w->rightwordnum = k; } for(i=0;irightwordnum;i++) { w = wg->rightword[i]; k=0; for(j=0;jleftwordnum;j++) { if (w->leftword[j] != wg) { if (j != k) { w->leftword[k] = w->leftword[j]; w->left_lscore[k] = w->left_lscore[j]; } k++; } } w->leftwordnum = k; #ifdef GDEBUG2 if (w->leftwordnum == 0) { jlog("DEBUG: leftword becomes 0 by remove_context\n"); put_wordgraph(jlog_get_fp(), w, wchmm_local->winfo); jlog("DEBUG: by deleting its left context:\n"); put_wordgraph(jlog_get_fp(), wg, wchmm_local->winfo); } #endif } } /** * * グラフ単語の左右のコンテキストをリンクする. * * @param wg [in] 操作対象のグラフ単語 * * * link all words at the context of the graph word. * * @param wg [in] target graph word * */ static void wordgraph_link_context(WordGraph *wg) { int i,j; WordGraph *left, *right; if (wg == NULL) return; for(i=0;ileftwordnum;i++) { left = wg->leftword[i]; if (left->mark) continue; if (left == wg) continue; for(j=0;jrightwordnum;j++) { right = wg->rightword[j]; if (right->mark) continue; if (right == wg) continue; if (left == right) continue; wordgraph_check_and_add_leftword(right, left, wg->left_lscore[i]); wordgraph_check_and_add_rightword(left, right, wg->right_lscore[j]); } } } /**************************************************************/ /* Operations for organizing WordGraph set */ /** * * 単語グラフ中の削除マークの付いた単語を削除する. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * * @return 削除された単語の数 * * * Actually erase the marked words in word graph. * * @param rootp [i/o] pointer to root node of a word graph * * @return the number of erased words. * */ static int wordgraph_exec_erase(WordGraph **rootp) { WordGraph *wg, *we, *wtmp; int count; if (*rootp == NULL) return(0); wg = *rootp; count = 0; while (wg != NULL) { we = wg->next; while(we != NULL && we->mark == TRUE) { wtmp = we->next; wordgraph_free(we); count++; we = wtmp; } wg->next = we; wg = we; } if ((*rootp)->mark == TRUE) { wtmp = (*rootp)->next; wordgraph_free(*rootp); count++; *rootp = wtmp; } return(count); } /** * * グラフソート用 qsort コールバック * * @param x [in] 要素1 * @param y [in] 要素2 * * @return x > y なら 1, x < y なら -1, x = y なら 0 を返す. * * * qsort callback for word sorting. * * @param x [in] element 1 * @param y [in] element 2 * * @return 1 if x>y, -1 if x */ static int compare_lefttime(WordGraph **x, WordGraph **y) { if ((*x)->lefttime > (*y)->lefttime) return 1; else if ((*x)->lefttime < (*y)->lefttime) return -1; else { if ((*x)->righttime > (*y)->righttime) return 1; else if ((*x)->righttime < (*y)->righttime) return -1; else { if ((*x)->fscore_head < (*y)->fscore_head) return 1; else if ((*x)->fscore_head > (*y)->fscore_head) return -1; else return 0; } } } /** * * 単語グラフ内の全単語を開始時間順にソートし,通し番号をつける. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ格納場所 * @param r [i/o] 認識処理インスタンス * * * Sort words by left time and annotate sequencial id for them in a word graph. * * @param rootp [i/o] address of pointer to root node of a word graph * @param r [i/o] recognition process instance * * * @callgraph * @callergraph * */ int wordgraph_sort_and_annotate_id(WordGraph **rootp, RecogProcess *r) { WordGraph *wg; int cnt; WordGraph **wlist; int i; WordGraph *wo; /* count total number of words in the graph */ cnt = 0; for(wg=*rootp;wg;wg=wg->next) cnt++; if (cnt == 0) return 0; /* sort them by lefttime */ wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * cnt); i = 0; for(wg=*rootp;wg;wg=wg->next) { wlist[i++] = wg; } qsort(wlist, cnt, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_lefttime); /* annotated id and re-order the link by the id */ wo = NULL; for(i=cnt-1;i>=0;i--) { wg = wlist[i]; wg->id = i; wg->next = wo; wo = wg; } *rootp = wo; free(wlist); return cnt; } /** * * 単語グラフ内の全単語を全て解放する. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * * * Free all the words in a word graph. * * @param rootp [i/o] pointer to root node of a word graph * * * @callgraph * @callergraph * */ void wordgraph_clean(WordGraph **rootp) { WordGraph *wg, *wtmp; wg = *rootp; while(wg != NULL) { wtmp = wg->next; wordgraph_free(wg); wg = wtmp; } *rootp = NULL; } /*********************************************************************/ /* Post-processing of generated word arcs after search has been done */ /** * * 単語グラフ深さカットのための qsort 用コールバック. fscore_head で * 降順にソートする. * * @param x [in] 要素1 * @param y [in] 要素2 * * @return qsort に準じた返り値 * * * Callback function for qsort to do word graph depth cutting. Graph * words will be sorted downward based on fscore_head. * * @param x [in] element 1 * @param y [in] element 2 * * @return values for qsort * */ static int compare_beam(WordGraph **x, WordGraph **y) { if ((*x)->fscore_head < (*y)->fscore_head) return 1; else if ((*x)->fscore_head > (*y)->fscore_head) return -1; else return 0; } /** * * @brief グラフ後処理その1:初期単語グラフの抽出. * * 探索中に生成された単語候補集合から,末端から始まるパス上に無いleaf単語を * 削除することで初期単語グラフを抽出する. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * @param r [in] 認識処理インスタンス * * * @brief Post-processing step 1: Extract initial word graph. * * Extract initial word graph from generated word arcs while search, by * purging leaf nodes and arcs that are not on the path from edge to edge. * * @param rootp [i/o] pointer to root node of a word graph * @param r [in] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_purge_leaf_nodes(WordGraph **rootp, RecogProcess *r) { WordGraph *wg; int i, dst; boolean changed; int count, erased, del_left, del_right; /* count whole */ count = 0; for(wg=*rootp;wg;wg=wg->next) count++; if (verbose_flag) jlog("STAT: graphout: %d initial word arcs generated\n", count); if (count == 0) return; if (verbose_flag) jlog("STAT: graphout: step 1: purge leaf nodes\n"); /* mark words to be erased */ del_left = del_right = 0; do { changed = FALSE; for(wg=*rootp;wg;wg=wg->next) { if (wg->mark == TRUE) continue; /* mark if wg has no left context, or all leftwords are marked */ if (wg->lefttime != 0) { for(i=0;ileftwordnum;i++) { if (wg->leftword[i]->mark == FALSE) break; } if (i >= wg->leftwordnum) { wg->mark = TRUE; changed = TRUE; del_left++; continue; } } /* mark if wg has no right context, or all rightwords are marked */ if (wg->righttime != r->peseqlen - 1) { for(i=0;irightwordnum;i++) { if (wg->rightword[i]->mark == FALSE) break; } if (i >= wg->rightwordnum) { wg->mark = TRUE; changed = TRUE; del_right++; continue; } } } } while (changed == TRUE); if (verbose_flag) jlog("STAT: graphout: %d leaf words found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right); /* do compaction of left/rightwords */ for(wg=*rootp;wg;wg=wg->next) { if (wg->mark) continue; dst = 0; for(i=0;ileftwordnum;i++) { if (wg->leftword[i]->mark == FALSE) { if (dst != i) { wg->leftword[dst] = wg->leftword[i]; wg->left_lscore[dst] = wg->left_lscore[i]; } dst++; } } wg->leftwordnum = dst; } for(wg=*rootp;wg;wg=wg->next) { if (wg->mark) continue; dst = 0; for(i=0;irightwordnum;i++) { if (wg->rightword[i]->mark == FALSE) { if (dst != i) { wg->rightword[dst] = wg->rightword[i]; wg->right_lscore[dst] = wg->right_lscore[i]; } dst++; } } wg->rightwordnum = dst; } /* execute erase of marked words */ erased = wordgraph_exec_erase(rootp); if (verbose_flag) jlog("STAT: graphout: %d words purged, %d words left in lattice\n", erased, count - erased); } /** * * @brief グラフ後処理その1. 5:グラフの深さによる単語候補のカット * * GRAPHOUT_DEPTHCUT 指定時,グラフの深さによる単語候補のカットを行う. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * @param r [in] 認識処理インスタンス * * * @brief Post-processing step 1.5: word graph depth cutting * * If GRAPHOUT_DEPTHCUT is defined, perform word graph depth cutting. * * @param rootp [i/o] pointer to root node of a word graph * @param r [in] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_depth_cut(WordGraph **rootp, RecogProcess *r) { #ifdef GRAPHOUT_DEPTHCUT WordGraph *wg; int i, dst; boolean changed; int count, erased, del_left, del_right; WordGraph **wlist; boolean f; int *wc; int t; int pruned; if (r->config->graph.graphout_cut_depth < 0) return; if (verbose_flag) jlog("STAT: graphout: step 1.5: cut less likely hypothesis by depth of %d\n", r->config->graph.graphout_cut_depth); /* count whole */ count = 0; for(wg=*rootp;wg;wg=wg->next) count++; if (count == 0) return; /* prepare buffer to count words per frame */ wc = (int *)mymalloc(sizeof(int) * r->peseqlen); for (t=0;tpeseqlen;t++) wc[t] = 0; /* sort words by fscore_head */ wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count); i = 0; for(wg=*rootp;wg;wg=wg->next) { wlist[i++] = wg; } qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_beam); /* count words per frame, and unlink/mark them if below beam width */ pruned = 0; for (i=0;ilefttime;t<=wg->righttime;t++) { wc[t]++; if (wc[t] <= r->config->graph.graphout_cut_depth) f = FALSE; } if (f) { //wordgraph_remove_context(wg); wg->mark = TRUE; pruned++; } } #ifdef GDEBUG2 jlog("DEBUG: GRAPH DEPTH STATISTICS: NUMBER OF WORDS PER FRAME\n"); for(t=0;tpeseqlen;t++) { if (wc[t] > r->config->graph.graphout_cut_depth) { jlog("*"); } else { jlog(" "); } jlog("%4d: %d\n", t, wc[t]); } #endif if (verbose_flag) jlog("STAT: graphout: %d words out of %d are going to be pruned by depth cutting\n", pruned, count); free(wlist); free(wc); /* mark words to be erased */ del_left = del_right = 0; do { changed = FALSE; for(wg=*rootp;wg;wg=wg->next) { if (wg->mark == TRUE) continue; /* mark if wg has no left context, or all leftwords are marked */ if (wg->lefttime != 0) { for(i=0;ileftwordnum;i++) { if (wg->leftword[i]->mark == FALSE) break; } if (i >= wg->leftwordnum) { wg->mark = TRUE; changed = TRUE; del_left++; continue; } } /* mark if wg has no right context, or all rightwords are marked */ if (wg->righttime != r->peseqlen - 1) { for(i=0;irightwordnum;i++) { if (wg->rightword[i]->mark == FALSE) break; } if (i >= wg->rightwordnum) { wg->mark = TRUE; changed = TRUE; del_right++; continue; } } } } while (changed == TRUE); if (verbose_flag) jlog("STAT: graphout: %d new leaves found (left_blank=%d, right_blank=%d)\n", del_left + del_right, del_left, del_right); /* do compaction of left/rightwords */ for(wg=*rootp;wg;wg=wg->next) { if (wg->mark) continue; dst = 0; for(i=0;ileftwordnum;i++) { if (wg->leftword[i]->mark == FALSE) { if (dst != i) { wg->leftword[dst] = wg->leftword[i]; wg->left_lscore[dst] = wg->left_lscore[i]; } dst++; } } wg->leftwordnum = dst; } for(wg=*rootp;wg;wg=wg->next) { if (wg->mark) continue; dst = 0; for(i=0;irightwordnum;i++) { if (wg->rightword[i]->mark == FALSE) { if (dst != i) { wg->rightword[dst] = wg->rightword[i]; wg->right_lscore[dst] = wg->right_lscore[i]; } dst++; } } wg->rightwordnum = dst; } /* execute erase of marked words */ erased = wordgraph_exec_erase(rootp); if (verbose_flag) jlog("STAT: graphout: total %d words purged, %d words left in lattice\n", erased, count - erased); #else /* ~GRAPHOUT_DEPTHCUT */ if (verbose_flag) jlog("STAT: graphout: step 1.5: graph depth cutting has been disabled, skipped\n"); #endif } /** * * 単語間の境界情報のずれ補正を実行する. グラフ中の単語をチェックし, * 接続単語間で境界時間情報にずれがあるときは,そのずれを修正する. * 複数のコンテキスト間で異なる境界情報が存在する場合は,候補を * コピーしてそれぞれに合わせる. またアラインメントが不正な単語を除去する. * * @param rootp [i/o] グラフ単語リストのルートポインタ * @param mov_num_ret [out] 境界時間が動いた単語数を格納する変数へのポインタ * @param dup_num_ret [out] コピーされた単語数を格納する変数へのポインタ * @param del_num_ret [out] 削除された単語数を格納する変数へのポインタ * @param mod_num_ret [out] 変更された単語数を格納する変数へのポインタ * @param count [in] グラフ上の単語数 * @param maxfnum * @param peseqlen * @param lmtype * @param p_framelist * @param p_framescorelist * * @return グラフ内の単語が1つ以上変更されれば TRUE,変更なしであれば FALSE * を返す. * * * Execute adjustment of word boundaries. It looks through the graph to * check correspondence of word boundary information among context, and if * there is a gap, the beginning frame of right word will be moved to the * end frame of left word. If several alignment is found among contexts, * the word will be duplicated and each will be fit to each context. Also, * words with invalid alignment will be eliminated. * * @param rootp [in] root pointer to the list of graph words * @param mov_num_ret [out] pointer to hold resulted number of moved words * @param dup_num_ret [out] pointer to hold resulted number of duplicated words * @param del_num_ret [out] pointer to hold resulted number of eliminated words * @param mod_num_ret [out] pointer to hold resulted number of modified words * @param count [in] number of words in graph * @param maxfnum * @param peseqlen * @param lmtype * @param p_framelist * @param p_framescorelist * * @return TRUE if any word has been changed, or FALSE if no word has been altered. * */ static boolean wordgraph_adjust_boundary_sub(WordGraph **rootp, int *mov_num_ret, int *dup_num_ret, int *del_num_ret, int *mod_num_ret, int count, int *maxfnum, int peseqlen, int lmtype, int **p_framelist, LOGPROB **p_framescorelist) { WordGraph *wg, *left, *new; int i, j, k; int fnum; int mov_num, dup_num, del_num, mod_num; boolean changed = FALSE; int *framelist; LOGPROB *framescorelist; mov_num = dup_num = del_num = mod_num = 0; framelist = *p_framelist; framescorelist = *p_framescorelist; /* maximum number of left context words does not exceed total word num */ /* allocate temporal work area. these are permanent buffer that will be kept between recognition sessions. */ if (*maxfnum == 0) { /* when this is called for the first time, allocate buffer */ *maxfnum = count; framelist = (int *)mymalloc(sizeof(int) * (*maxfnum)); framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum)); #ifdef GDEBUG jlog("DEBUG: Notice: maxfnum starts at %d\n", *maxfnum); #endif } else if (*maxfnum < count) { /* for later call, expand buffer if necessary */ free(framescorelist); free(framelist); *maxfnum = count; framelist = (int *)mymalloc(sizeof(int) * (*maxfnum)); framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum)); #ifdef GDEBUG jlog("DEBUG: Notice: maxfnum expanded by count (%d)\n", *maxfnum); #endif } #ifdef GDEBUG2 jlog("DEBUG: ***CHECK LOOP BEGIN***\n"); #endif for(wg=*rootp;wg;wg=wg->next) { if (wg->mark) continue; /* already marked */ #ifdef GDEBUG2 jlog("DEBUG: [%d..%d] \"%s\"\n", wg->lefttime, wg->righttime, wchmm_local->winfo->woutput[wg->wid]); #endif if (wg->leftwordnum == 0) { /* no leftword */ if (wg->lefttime != 0) { /* some fraction found by former elimination: remove this */ #ifdef GDEBUG2 jlog("DEBUG: -> no leftword at middle of lattice, eliminate this\n"); #endif wordgraph_remove_context(wg); wg->mark = TRUE; del_num++; changed = TRUE; } /* if has no leftword, adjustment of this word is not needed */ continue; } if (wg->rightwordnum == 0) { /* no rightword */ if (wg->righttime != peseqlen-1) { /* some fraction found by former elimination: remove this */ #ifdef GDEBUG2 jlog("DEBUG: -> no rightword at middle of lattice, eliminate this\n"); #endif wordgraph_remove_context(wg); wg->mark = TRUE; del_num++; changed = TRUE; continue; } /* if on right edge, continue adjusting */ } /* correct lefttime variation to framelist[] and framescorelist[] */ fnum = 0; /* check for buffer overrun */ if (wg->leftwordnum > (*maxfnum)) { /* expand buffer if necessary */ free(framescorelist); free(framelist); *maxfnum = wg->leftwordnum; framelist = (int *)mymalloc(sizeof(int) * (*maxfnum)); framescorelist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (*maxfnum)); #ifdef GDEBUG jlog("DEBUG: Notice: wg->leftwordnum exceeds maxfnum (%d > %d), expanded\n", wg->leftwordnum, *maxfnum); #endif } for(i=0;ileftwordnum;i++) { left = wg->leftword[i]; if (left->mark) continue; for(j=0;jrighttime + 1) break; } if (j >= fnum) { framelist[fnum] = left->righttime + 1; /* the tail gscore contains the language score of the word, so the head gscore of its right context should consider this */ framescorelist[fnum] = left->gscore_tail - wg->left_lscore[i]; fnum++; } } #ifdef GDEBUG2 jlog("DEBUG: possible boundary of left words:"); if (fnum == 0) { jlog(" (not exist)\n"); } else { for(j=0;jlefttime != framelist[0]) { #ifdef GDEBUG2 jlog("DEBUG: !moving as [%d..%d]", framelist[0], wg->righttime); #endif /* check the time correctness: if the lefttime is larger than righttime, this graph word has been completely overridden by the left word (i.e. the aligned frames are absorbed by re-alignment. In this case this word should be removed. */ if (framelist[0] > wg->righttime) { #ifdef GDEBUG2 jlog(" : eliminated"); #endif wordgraph_link_context(wg); wordgraph_remove_context(wg); wg->mark = TRUE; del_num++; } else { #ifdef GDEBUG2 jlog(" : ok"); #endif /* adjust time and score */ wg->lefttime = framelist[0]; wg->gscore_head = framescorelist[0]; mov_num++; } #ifdef GDEBUG2 jlog("\n"); #endif changed = TRUE; } else if (wg->gscore_head != framescorelist[0]) { /* adjust only score */ #ifdef GDEBUG2 jlog("DEBUG: !ghead score changed: %f -> %f\n", wg->gscore_head, framescorelist[0]); #endif wg->gscore_head = framescorelist[0]; mod_num++; changed = TRUE; } } if (fnum > 1) { /* multiple candidate: make copy for each (fnum)*/ for(j=0;jrighttime); #endif if (framelist[j] > wg->righttime) { /* bogus link: link leftwords and rightwords, and delete this */ #ifdef GDEBUG2 jlog(" : eliminated"); #endif for(i=0;ileftwordnum;i++) { left = wg->leftword[i]; if (left->mark) continue; if (left->righttime + 1 == framelist[j]) { for(k=0;krightwordnum;k++) { if ((wg->rightword[k])->mark) continue; if (wg->rightword[k] == left) continue; wordgraph_check_and_add_leftword(wg->rightword[k], left, wg->left_lscore[i]); wordgraph_check_and_add_rightword(left, wg->rightword[k], wg->right_lscore[k]); } } } del_num++; } else { /* really duplicate */ #ifdef GDEBUG2 jlog(" : ok"); #endif new = wordgraph_new(wg->wid, wg->headphone, wg->tailphone, framelist[j], wg->righttime, wg->fscore_head, wg->fscore_tail, framescorelist[j], wg->gscore_tail, wg->lscore_tmp #ifdef CM_SEARCH , wg->cmscore #else , LOG_ZERO #endif ); /* copy corresponding link */ for(i=0;ileftwordnum;i++) { if ((wg->leftword[i])->mark) continue; if ((wg->leftword[i])->righttime + 1 == framelist[j]) { wordgraph_add_leftword(new, wg->leftword[i], wg->left_lscore[i]); wordgraph_add_rightword(wg->leftword[i], new, wg->left_lscore[i]); } } for(i=0;irightwordnum;i++) { if ((wg->rightword[i])->mark) continue; wordgraph_add_rightword(new, wg->rightword[i], wg->right_lscore[i]); wordgraph_add_leftword(wg->rightword[i], new, wg->right_lscore[i]); } new->saved = TRUE; new->next = *rootp; *rootp = new; } #ifdef GDEBUG2 jlog("\n"); #endif } /* remove the original */ #ifdef GDEBUG2 jlog("DEBUG: !delete original [%d..%d]\n", wg->lefttime, wg->righttime); #endif wordgraph_remove_context(wg); wg->mark = TRUE; dup_num--; changed = TRUE; } } *mov_num_ret = mov_num; *dup_num_ret = dup_num; *del_num_ret = del_num; *mod_num_ret = mod_num; *p_framelist = framelist; *p_framescorelist = framescorelist; #ifdef GDEBUG2 if (changed) { jlog("DEBUG: *** some graph has been altered, check loop continues\n"); } else { jlog("DEBUG: *** graph not changed at last loop, check ends here\n"); } #endif return (changed); } /** * * グラフ内に境界情報やスコアが全く同一の単語がある場合それらをマージする. * * @param rootp [i/o] グラフ単語リストのルートポインタ * @param rest_ret [out] マージ後のグラフ内の単語数を返すポインタ * @param merged_ret [out] マージされた単語数を返すポインタ * * * Merge duplicated words with exactly the same scores and alignments. * * @param rootp [i/o] root pointer to the list of graph words * @param rest_ret [out] pointer to hold resulted number of words left in graph * @param merged_ret [out] pointer to hold resuled number of merged words * */ static void wordgraph_compaction_thesame_sub(WordGraph **rootp, int *rest_ret, int *merged_ret) { WordGraph *wg, *we; int i, count, erased, merged; count = 0; merged = 0; for(wg=*rootp;wg;wg=wg->next) { count++; if (wg->mark == TRUE) continue; for(we=wg->next;we;we=we->next) { if (we->mark == TRUE) continue; /* find the word with exactly the same time and score */ if (wg->wid == we->wid && wg->headphone == we->headphone && wg->tailphone == we->tailphone && wg->lefttime == we->lefttime && wg->righttime == we->righttime && wg->fscore_head == we->fscore_head && wg->fscore_tail == we->fscore_tail) { /* merge contexts */ merge_contexts(wg, we); /* swap contexts of left / right contexts */ for(i=0;ileftwordnum;i++) { if (we->leftword[i]->mark) continue; //if (we->leftword[i] == wg) continue; swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]); } for(i=0;irightwordnum;i++) { if (we->rightword[i]->mark) continue; //if (we->rightword[i] == wg) continue; swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]); } we->mark = TRUE; merged++; } } } erased = wordgraph_exec_erase(rootp); for(wg=*rootp;wg;wg=wg->next) { uniq_leftword(wg); uniq_rightword(wg); } *rest_ret = count - erased; *merged_ret = merged; } /** * * @brief グラフ後処理その2:単語境界情報の調整. * * GRAPHOUT_PRECISE_BOUNDARY 定義時,後続単語に依存した正確な単語境界 * を得るために,探索中において,グラフ単語を生成したあとに次回展開時に * 事後的に単語境界を移動させる. このため,前後の単語のもつ(移動前の) * 境界情報との対応がとれなくなるので,探索終了後に各単語の前後の単語へ * 正しい単語境界を伝搬させることで整合性をとる. * * 単語境界のずれは単語間で伝搬するため,すべての単語境界が動かなくなるまで * 調整が繰り返される. 巨大なグラフでは短い単語の沸きだしで処理が終わらない * 場合があるが,この場合 GRAPHOUT_LIMIT_BOUNDARY_LOOP を指定することで, * 繰り返す数の上限を graphout_limit_boundary_loop_num に制限できる. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * @param r [i/o] 認識処理インスタンス * * * @brief Post-processing step 2: Adjust word boundaries. * * When GRAPHOUT_PRECISE_BOUNDARY is defined, the word boundaries will be * moved depending on the later word expansion to get context-dependent * precise boundaries. So the precise boundary, modified after generation * while search, should be propagated to the context words in the post * processing. * * Since the affect of word boundaries may propagate to the context words, * the adjustment procedure has to be executed iteratively until all the * boundaries are fixated. However, when graph is large, the oscillation of * short words will results in very long loop. By defining * GRAPHOUT_LIMIT_BOUNDARY_LOOP, the number of the adjustment loop can be * up to the number specified by graphout_limit_bounrady_loop_num. * * @param rootp [i/o] pointer to root node of a word graph * @param r [i/o] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_adjust_boundary(WordGraph **rootp, RecogProcess *r) { #ifdef GRAPHOUT_PRECISE_BOUNDARY WordGraph *wg; int mov_num, dup_num, del_num, mod_num; int count, merged; boolean flag; int loopcount; int maxfnum; int *framelist; ///< frame list for adjust_boundary_sub LOGPROB *framescorelist; ///< frame score list for adjust_boundary_sub loopcount = 0; if (verbose_flag) jlog("STAT: graphout: step 2: adjust boundaries\n"); mov_num = dup_num = del_num = 0; /* count number of all words */ count = 0; for(wg=*rootp;wg;wg=wg->next) count++; maxfnum = 0; do { /* do adjust */ flag = wordgraph_adjust_boundary_sub(rootp, &mov_num, &dup_num, &del_num, &mod_num, count, &maxfnum, r->peseqlen, r->lmtype, &framelist, &framescorelist); /* do compaction */ wordgraph_compaction_thesame_sub(rootp, &count, &merged); if (verbose_flag) jlog("STAT: graphout: #%d: %d moved, %d duplicated, %d purged, %d modified, %d idential, %d left\n", loopcount + 1, mov_num, dup_num, del_num, mod_num, merged, count); #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP if (++loopcount >= r->config->graph.graphout_limit_boundary_loop_num) { if (verbose_flag) jlog("STAT: graphout: loop count reached %d, terminate loop now\n", r->config->graph.graphout_limit_boundary_loop_num); break; } #endif } while (flag); /* free work area allocated in adjust_boundary_sub */ if (maxfnum > 0) { free(framescorelist); free(framelist); } /* execute erase of marked words */ wordgraph_exec_erase(rootp); #else if (verbose_flag) jlog("STAT: graphout: step 2: SKIP (adjusting boundaries)\n"); #endif /* GRAPHOUT_PRECISE_BOUNDARY */ } /** * * @brief グラフ後処理その3:単語の束ね(完全同一) * * 単語境界時刻と部分文仮説スコアが完全に一致する同じ単語どうしを一つに束ねる. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * * * @brief Post-processing step 3: Bundle words (exactly the same ones) * * This function bundles same words which have exactly the same * boundaries and partial sentence scores. * * @param rootp [i/o] pointer to root node of a word graph * * * @callgraph * @callergraph * */ void wordgraph_compaction_thesame(WordGraph **rootp) { int rest, erased; if (verbose_flag) jlog("STAT: graphout: step 3: merge idential hypotheses (same score, boundary, context)\n"); wordgraph_compaction_thesame_sub(rootp, &rest, &erased); if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, rest); } /** * * @brief グラフ後処理その4:単語の束ね(区間同一) * * 単語境界時刻が一致する同じ単語どうしを一つに束ねる. スコアが * 同一でなくても束ねられる. この場合,部分文スコアが最も高い候補が * 残る. graph_merge_neighbor_range が 負 の場合は実行されない. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * @param r [i/o] 認識処理インスタンス * * * @brief Post-processing step 4: Bundle words (same boundaries) * * This function bundles the same words which have exactly the same * boundaries, allowing having different scores. The word with * the best partial sentence score will be adopted. This function * will not take effect when graph_merge_neightbor_range is lower than 0. * * @param rootp [i/o] pointer to root node of a word graph * @param r [i/o] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_compaction_exacttime(WordGraph **rootp, RecogProcess *r) { WordGraph *wg, *we; int i, count, erased; if (r->config->graph.graph_merge_neighbor_range < 0) { if (verbose_flag) jlog("STAT: graphout: step 4: SKIP (merge the same words with same boundary to the most likely one\n"); return; } if (verbose_flag) jlog("STAT: graphout: step 4: merge same words with same boundary to the most likely one\n"); count = 0; for(wg=*rootp;wg;wg=wg->next) { count++; if (wg->mark == TRUE) continue; for(we=wg->next;we;we=we->next) { if (we->mark == TRUE) continue; /* find same words at same position */ if (wg->wid == we->wid && wg->lefttime == we->lefttime && wg->righttime == we->righttime) { /* merge contexts */ merge_contexts(wg, we); /* swap contexts of left / right contexts */ for(i=0;ileftwordnum;i++) { swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]); } for(i=0;irightwordnum;i++) { swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]); } /* keep the max score */ if (wg->fscore_head < we->fscore_head) { wg->headphone = we->headphone; wg->tailphone = we->tailphone; wg->fscore_head = we->fscore_head; wg->fscore_tail = we->fscore_tail; wg->gscore_head = we->gscore_head; wg->gscore_tail = we->gscore_tail; wg->lscore_tmp = we->lscore_tmp; #ifdef CM_SEARCH wg->cmscore = we->cmscore; #endif wg->amavg = we->amavg; } we->mark = TRUE; } } } erased = wordgraph_exec_erase(rootp); if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, count-erased); for(wg=*rootp;wg;wg=wg->next) { uniq_leftword(wg); uniq_rightword(wg); } } /** * * @brief グラフ後処理その5:単語の束ね(近傍区間) * * 似た単語境界時刻を持つ同じ単語どうしを一つに束ねる. 許すずれの幅は * graph_merge_neighbor_range で与え,これが 0 か負である場合は実行されない. * * @param rootp [i/o] 単語グラフのルートノードへのポインタ * @param r [i/o] 認識処理インスタンス * * * @brief Post-processing step 5: Bundle words (neighbor words) * * This function bundles the same words which appears at similar place. * If the difference of both the left boundary and right right boundary * is under graph_merge_neighbor_range, it will be bundled. * If its value is lower than or equal to 0, this function does not take * effect. * * @param rootp [i/o] pointer to root node of a word graph * @param r [i/o] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_compaction_neighbor(WordGraph **rootp, RecogProcess *r) { WordGraph *wg, *we; int i, count, erased; if (r->config->graph.graph_merge_neighbor_range <= 0) { if (verbose_flag) jlog("STAT: graphout: step 5: SKIP (merge the same words around)\n"); return; } if (verbose_flag) jlog("STAT: graphout: step 5: merge same words around, with %d frame margin\n", r->config->graph.graph_merge_neighbor_range); count = 0; for(wg=*rootp;wg;wg=wg->next) { count++; if (wg->mark == TRUE) continue; for(we=wg->next;we;we=we->next) { if (we->mark == TRUE) continue; if (wg->wid == we->wid && abs(wg->lefttime - we->lefttime) <= r->config->graph.graph_merge_neighbor_range && abs(wg->righttime - we->righttime) <= r->config->graph.graph_merge_neighbor_range) { /* merge contexts */ merge_contexts(wg, we); /* swap contexts of left / right contexts */ for(i=0;ileftwordnum;i++) { swap_rightword(we->leftword[i], we, wg, we->left_lscore[i]); } for(i=0;irightwordnum;i++) { swap_leftword(we->rightword[i], we, wg, we->right_lscore[i]); } /* keep the max score */ if (wg->fscore_head < we->fscore_head) { wg->headphone = we->headphone; wg->tailphone = we->tailphone; wg->fscore_head = we->fscore_head; wg->fscore_tail = we->fscore_tail; wg->gscore_head = we->gscore_head; wg->gscore_tail = we->gscore_tail; wg->lscore_tmp = we->lscore_tmp; #ifdef CM_SEARCH wg->cmscore = we->cmscore; #endif wg->amavg = we->amavg; } we->mark = TRUE; } } } erased = wordgraph_exec_erase(rootp); if (verbose_flag) jlog("STAT: graphout: %d words merged, %d words left in lattice\n", erased, count-erased); for(wg=*rootp;wg;wg=wg->next) { uniq_leftword(wg); uniq_rightword(wg); } } /**************************************************************/ /* generation of graph word candidates while search */ /** * * 新たな単語グラフ候補を生成して返す. この時点ではまだ単語グラフ中には * 登録されていない. * * @param wid [in] 単語ID * @param wid_left [in] word ID of left context for determining head phone * @param wid_right [in] word ID of right context for determining tail phone * @param leftframe [in] 始端時刻(フレーム) * @param rightframe [in] 終端時刻(フレーム) * @param fscore_head [in] 始端での部分文スコア (g + h) * @param fscore_tail [in] 終端での部分文スコア (g + h) * @param gscore_head [in] 先頭での入力末端からのViterbiスコア (g) * @param gscore_tail [in] 末尾での入力末端からのViterbiスコア (g) * @param lscore [in] 言語スコア * @param cm [in] 信頼度 * @param r [in] 認識処理インスタンス * * @return 新たに生成されたグラフ単語候補へのポインタ * * * Return a newly allocated graph word candidates. The resulting word * is not registered to the word graph yet. * * @param wid [in] word ID * @param wid_left [in] word ID of left context for determining head phone * @param wid_right [in] word ID of right context for determining tail phone * @param leftframe [in] beginning time in frames * @param rightframe [in] end time in frames * @param fscore_head [in] sentence score on search at word head (g + h) * @param fscore_tail [in] sentence score on search at word tail (g + h) * @param gscore_head [in] Viterbi score accumulated from input end at word head (g) * @param gscore_tail [in] Viterbi score accumulated from input end at word tail (g) * @param lscore [in] language score * @param cm [in] confidence score * @param r [in] recognition process instance * * @return pointer to the newly created graph word candidate. * * * @callgraph * @callergraph * */ WordGraph * wordgraph_assign(WORD_ID wid, WORD_ID wid_left, WORD_ID wid_right, int leftframe, int rightframe, LOGPROB fscore_head, LOGPROB fscore_tail, LOGPROB gscore_head, LOGPROB gscore_tail, LOGPROB lscore, LOGPROB cm, RecogProcess *r) { WordGraph *newarc; HMM_Logical *l, *ret, *head, *tail; WORD_INFO *winfo; winfo = r->lm->winfo; /* find context dependent phones at head and tail */ l = winfo->wseq[wid][winfo->wlen[wid]-1]; if (wid_right != WORD_INVALID) { ret = get_right_context_HMM(l, winfo->wseq[wid_right][0]->name, r->am->hmminfo); if (ret != NULL) l = ret; } if (winfo->wlen[wid] > 1) { tail = l; l = winfo->wseq[wid][0]; } if (wid_left != WORD_INVALID) { ret = get_left_context_HMM(l, winfo->wseq[wid_left][winfo->wlen[wid_left]-1]->name, r->am->hmminfo); if (ret != NULL) l = ret; } head = l; if (winfo->wlen[wid] <= 1) { tail = l; } /* generate a new graph word hypothesis */ newarc = wordgraph_new(wid, head, tail, leftframe, rightframe, fscore_head, fscore_tail, gscore_head, gscore_tail, lscore, cm); //jlog("DEBUG: [%d..%d] %d\n", leftframe, rightframe, wid); return newarc; } /** * * グラフ単語候補を単語グラフの一部として確定する. 確定されたグラフ単語には * saved に TRUE がセットされる. * * @param wg [i/o] 登録するグラフ単語候補 * @param right [i/o] @a wg の右コンテキストとなる単語 * @param root [i/o] 確定済み単語グラフのルートノードへのポインタ * * * Register a graph word candidate to the word graph as a member. * The registered word will have the saved member to be set to TRUE. * * @param wg [i/o] graph word candidate to be registered * @param right [i/o] right context graph word * @param root [i/o] pointer to root node of already registered word graph * * * @callgraph * @callergraph * */ void wordgraph_save(WordGraph *wg, WordGraph *right, WordGraph **root) { if (wg != NULL) { wg->next = *root; *root = wg; wg->saved = TRUE; wordgraph_add_leftword(right, wg, wg->lscore_tmp); wordgraph_add_rightword(wg, right, wg->lscore_tmp); } } #ifdef GRAPHOUT_DYNAMIC /** * * ある単語グラフ候補について,既に確定したグラフ単語中に同じ位置に * 同じ単語があるかどうかを調べる. もしあれば,単語グラフ候補の * コンテキストをその確定済みグラフ単語にマージする. * * GRAPHOUT_SEARCH定義時は,さらにここで探索を中止すべきかどうかも判定する. * すなわち,次単語仮説がそのグラフ単語の左コンテキストとして既に確定した * グラフ単語中にあれば,それ以上の展開は不要で探索を中止すべきと判定する. * * @param now [i/o] 単語グラフ候補 * @param root [i/o] 確定済み単語グラフのルートノードへのポインタ * @param next_wid [in] 次単語仮説 * @param merged_p [out] 探索を中止すべきなら TRUE,続行してよければ * FALSE が格納される (GRAPHOUT_SEARCH 定義時) * @param jconf [in] 探索用設定パラメータ * * @return 同じ位置に同じ単語があった場合,マージした先の * 確定済みグラフ単語へのポインタを返す. もしなかった場合,NULL を返す. * * * Check if a graph word with the same word ID and same position as the * given graph word candidate exists in the already registered word graph. * If such graph word is found, the word contexts of the given word * graph candidate will be merged to the found graph word in the registered * word graph. * * When GRAPHOUT_SEARCH is defined, whether to terminate the search at here * will be determined here. That is, if the next word in search already * exists in the list of left context words of the merged graph word, * it is determined that the next path has already been expanded and thus * there is no need to proceed more on this hypothesis. * * @param now [i/o] graph word candidate * @param root [i/o] pointer to root node of already registered word graph * @param next_wid [in] next word on search * @param merged_p [out] will be set to TRUE if search should be terminated, * or FALSE if search should be proceeded (when GRAPHOUT_SEARCH defined) * @param jconf [in] configuration parameters for this search * * @return the pointer to the already registered graph word when the same * word was found on the same position, or NULL if such word not found in * already registered word graph. * * * @callgraph * @callergraph * */ WordGraph * wordgraph_check_merge(WordGraph *now, WordGraph **root, WORD_ID next_wid, boolean *merged_p, JCONF_SEARCH *jconf) { WordGraph *wg; int i; #ifdef GDEBUG WordGraph *w; #endif #ifdef GRAPHOUT_SEARCH *merged_p = FALSE; #endif if (now == NULL) return(NULL); #ifdef GDEBUG jlog("DEBUG: check_merge: checking \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[now->wid], now->lefttime, now->righttime); for(i=0;ileftwordnum;i++) { w = now->leftword[i]; jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } for(i=0;irightwordnum;i++) { w = now->rightword[i]; jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } #endif for(wg=*root;wg;wg=wg->next) { if (wg == now) continue; #ifdef GRAPHOUT_DYNAMIC /* skip already merged word */ if (wg->purged) continue; #endif if (jconf->graph.graph_merge_neighbor_range < 0) { /* when no merging, words with different triphone context at word edge should be differenciated */ if (wg->headphone != now->headphone || wg->tailphone != now->tailphone) { continue; } } if (wg->wid == now->wid && wg->lefttime == now->lefttime && wg->righttime == now->righttime) { /* same word on the same position is found in current word graph */ #ifdef GDEBUG jlog("DEBUG: check_merge: same word found: \"%s\"[%d..%d]\n", wchmm_local->winfo->woutput[wg->wid], wg->lefttime, wg->righttime); for(i=0;ileftwordnum;i++) { w = wg->leftword[i]; jlog("DEBUG: \t left%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } for(i=0;irightwordnum;i++) { w = wg->rightword[i]; jlog("DEBUG: \tright%d: \"%15s\"[%d..%d]\n", i, wchmm_local->winfo->woutput[w->wid], w->lefttime, w->righttime); } #endif /* merge contexts */ merge_contexts(wg, now); /* swap contexts of left / right contexts */ for(i=0;ileftwordnum;i++) { swap_rightword(now->leftword[i], now, wg, now->left_lscore[i]); uniq_rightword(now->leftword[i]); } for(i=0;irightwordnum;i++) { swap_leftword(now->rightword[i], now, wg, now->right_lscore[i]); uniq_leftword(now->rightword[i]); } #ifdef GRAPHOUT_SEARCH /* if the left and right contexts of now are already included in wg, and wg already has left node of next word, it means that the current word and the last word context is already included in the existing word graph. So, in the case this partial path should be abandoned. */ for(i=0;ileftwordnum;i++) { if (wg->leftword[i]->wid == next_wid) break; } if (i < wg->leftwordnum) { *merged_p = TRUE; } #endif /* GRAPHOUT_SEARCH */ #ifdef GRAPHOUT_OVERWRITE /* if current hypothesis score is higher than saved, overwrite the scores and not terminate */ if ( #ifdef GRAPHOUT_OVERWRITE_GSCORE //wg->gscore_head < now->gscore_head wg->amavg < now->amavg; #else wg->fscore_head < now->fscore_head #endif ) { wg->headphone = now->headphone; wg->tailphone = now->tailphone; wg->fscore_head = now->fscore_head; wg->fscore_tail = now->fscore_tail; wg->gscore_head = now->gscore_head; wg->gscore_tail = now->gscore_tail; wg->lscore_tmp = now->lscore_tmp; #ifdef CM_SEARCH wg->cmscore = now->cmscore; #endif wg->amavg = now->amavg; #ifdef GRAPHOUT_SEARCH *merged_p = FALSE; #endif } #endif /* GRAPHOUT_OVERWRITE */ /* the merged word should be discarded for later merging from another word, so disable this */ now->purged = TRUE; /* return the found one */ return wg; } } /* if the same word not found, return NULL */ return NULL; } #endif /* GRAPHOUT_DYNAMIC */ /**************************************************************/ /* misc. functions */ /** * * グラフ単語の情報をテキストで出力する. 内容は以下のとおり: *
 *   ID: left=左コンテキストのID[,ID,...] right=右コンテキストID[,ID,..]
 *   [左端フレーム..右端フレーム]
 *   wid=単語ID
 *   name="単語名"
 *   lname="N-gram 単語名,あるいはカテゴリ番号 (Julian)"
 *   f=探索中の左端での部分文スコア(g(n) + h(n+1)) n=この単語
 *   f_prev=探索中の右端での部分文スコア(g(n-1) + h(n)) n=この単語
 *   g_head=左端での累積Viterbiスコア g(n)
 *   g_prev=右端での累積Viterbiスコア g(n-1) + LM(n)
 *   lscore=言語スコア LM(n)   (Julius の場合のみ)
 *   AMavg=フレーム平均音響尤度
 *   cmscore=単語信頼度
 * 
* * @param fp [in] 出力先のファイルポインタ * @param wg [in] 出力するグラフ単語 * @param winfo [in] 単語辞書 *
* * Output information of a graph word in text in the format below: * (n means the word) * *
 *   ID: left=left_context_ID[,ID,...] right=right_context_ID[,ID,...]
 *   [left_edge_frame...right_edge_frame]
 *   wid=word_id
 *   name="word string"
 *   lname="N-gram word string (Julius) or category number (Julian)"
 *   f="partial sentence score at left edge (g(n) + h(n+1)) on search time"
 *   f_prev="partial sentence score at right edge (g(n-1) + h(n)) on search time"
 *   g_head="accumulated viterbi score at left edge (g(n))"
 *   g_prev="accumulated viterbi score at right edge (g(n-1) + LM(n)"
 *   lscore="language score LM(n)  (Julius only)"
 *   AMavg="average acoustic likelihood per frame"
 *   cmscore="confidence score"
 * 
* @param fp [in] file pointer to which output should go * @param wg [in] graph word to output * @param winfo [in] word dictionary *
* * @callgraph * @callergraph * */ void put_wordgraph(FILE *fp, WordGraph *wg, WORD_INFO *winfo) { int i; if (fp == NULL) return; if (wg == NULL) { fprintf(fp, "(NULL)\n"); } else { fprintf(fp, "%d:", wg->id); fprintf(fp, " [%d..%d]", wg->lefttime, wg->righttime); for(i=0;ileftwordnum;i++) { fprintf(fp, (i == 0) ? " left=%d" : ",%d", wg->leftword[i]->id); } for(i=0;irightwordnum;i++) { fprintf(fp, (i == 0) ? " right=%d" : ",%d", wg->rightword[i]->id); } for(i=0;ileftwordnum;i++) { fprintf(fp, (i == 0) ? " left_lscore=%f" : ",%f", wg->left_lscore[i]); } for(i=0;irightwordnum;i++) { fprintf(fp, (i == 0) ? " right_lscore=%f" : ",%f", wg->right_lscore[i]); } fprintf(fp, " lscore_tmp=%f", wg->lscore_tmp); fprintf(fp, " wid=%d name=\"%s\" lname=\"%s\" f=%f f_prev=%f g_head=%f g_prev=%f", wg->wid, winfo->woutput[wg->wid], winfo->wname[wg->wid], wg->fscore_head, wg->fscore_tail, wg->gscore_head, wg->gscore_tail); fprintf(fp, " forward_score=%f backword_score=%f", wg->forward_score, wg->backward_score); if (wg->righttime - wg->lefttime + 1 != 0) { fprintf(fp, " AMavg=%f", wg->amavg); } #ifdef CM_SEARCH fprintf(fp, " cmscore=%f", wg->cmscore); #endif fprintf(fp, " graphcm=%f", wg->graph_cm); fprintf(fp, " headphone=%s", wg->headphone->name); fprintf(fp, " tailphone=%s", wg->tailphone->name); fprintf(fp, "\n"); } } /** * * 生成された単語グラフ中の全単語をテキスト出力する. * * @param fp [in] 出力先のファイルポインタ * @param root [in] 単語グラフのルートノードへのポインタ * @param winfo [in] 単語辞書 * * * Output text information of all the words in word graph. * * @param fp [in] file pointer to which output should go * @param root [in] pointer to root node of a word graph * @param winfo [in] word dictionary * * * @callgraph * @callergraph * */ void wordgraph_dump(FILE *fp, WordGraph *root, WORD_INFO *winfo) { WordGraph *wg; fprintf(fp, "--- begin wordgraph data ---\n"); for(wg=root;wg;wg=wg->next) { put_wordgraph(fp, wg, winfo); } fprintf(fp, "--- end wordgraph data ---\n"); } /** * * デバッグ用:単語グラフの整合性をチェックする. * * @param rootp [in] 単語グラフのルートノードへのポインタ * @param r [i/o] 認識処理インスタンス * * * For debug: Check the coherence in word graph. * * @param rootp [in] pointer to root node of a word graph * @param r [i/o] recognition process instance * * * @callgraph * @callergraph * */ void wordgraph_check_coherence(WordGraph *rootp, RecogProcess *r) { WordGraph *wg, *wl, *wr; int nl, nr; WORD_INFO *winfo; winfo = r->lm->winfo; for(wg=rootp;wg;wg=wg->next) { /* check ID overflow */ if (wg->id < 0 || wg->id >= r->graph_totalwordnum) { jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d])\n", wg->id, r->graph_totalwordnum-1); put_wordgraph(jlog_get_fp(), wg, winfo); continue; } /* check link */ for(nl=0;nlleftwordnum;nl++){ wl = wg->leftword[nl]; if (wl->id < 0 || wl->id >= r->graph_totalwordnum) { jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d]) in left context\n", wl->id, r->graph_totalwordnum-1); put_wordgraph(jlog_get_fp(), wg, winfo); continue; } for(nr=0;nrrightwordnum;nr++){ if (wl->rightword[nr] == wg) break; } if (nr >= wl->rightwordnum) { jlog("ERROR: on graph, reverse link not found in left context\n"); put_wordgraph(jlog_get_fp(), wg, winfo); put_wordgraph(jlog_get_fp(), wl, winfo); continue; } } for(nr=0;nrrightwordnum;nr++){ wr = wg->rightword[nr]; if (wr->id < 0 || wr->id >= r->graph_totalwordnum) { jlog("ERROR: invalid graph word id \"%d\" (should be [0..%d]) in right context\n", wr->id, r->graph_totalwordnum-1); put_wordgraph(jlog_get_fp(), wg, winfo); continue; } for(nl=0;nlleftwordnum;nl++){ if (wr->leftword[nl] == wg) break; } if (nl >= wr->leftwordnum) { jlog("ERROR: on graph, reverse link not found in left context\n"); put_wordgraph(jlog_get_fp(), wg, winfo); put_wordgraph(jlog_get_fp(), wr, winfo); continue; } } } } /* lattice-based posterior probability computation by forward-backward algorithm */ /** * * qsort callback function to order words from right to left. * * * 単語を右から左へ並べるための qsort コールバック関数 * * * @param x [in] 1st element * @param y [in] 2nd element * * @return value required by qsort * */ static int compare_forward(WordGraph **x, WordGraph **y) { if ((*x)->righttime < (*y)->righttime) return 1; else if ((*x)->righttime > (*y)->righttime) return -1; else return 0; } /** * * qsort callback function to order words from left to right. * * * 単語を左から右へ並べるための qsort コールバック関数 * * * @param x [in] 1st element * @param y [in] 2nd element * * @return value required by qsort * */ static int compare_backward(WordGraph **x, WordGraph **y) { if ((*x)->lefttime < (*y)->lefttime) return -1; else if ((*x)->lefttime > (*y)->lefttime) return 1; else return 0; } /** * * 常用対数で表現されている確率の和を計算する. * * * compute addition of two probabilities in log10 form. * * * @param x [in] first value * @param y [in] second value * * @return value of log(10^x + 10^y) * */ static LOGPROB addlog10(LOGPROB x, LOGPROB y) { if (x < y) { //return(y + log10(1 + pow(10, x-y))); return(y + log(1 + pow(10, x-y)) * INV_LOG_TEN); } else { return(x + log(1 + pow(10, y-x)) * INV_LOG_TEN); } } /** * * 生成されたラティス上において,forward-backward アルゴリズムにより * 信頼度を計算する. 計算された値は各グラフ単語の graph_cm に格納される. * 事後確率の計算では,探索中の信頼度計算と同じ * α値(r->config->annotate.cm_alpha)が用いられる. * * * Compute graph-based confidence scores by forward-backward parsing on * the generated lattice. The computed scores are stored in graph_cm of * each graph words. The same alpha value of search-time confidence scoring * (r->config->annotate.cm_alpha) will be used to compute the posterior * probabilities. * * * @param root [in] root graph node * @param r [in] recognition process instance *
* * @callgraph * @callergraph * */ void graph_forward_backward(WordGraph *root, RecogProcess *r) { WordGraph *wg, *left, *right; int i, j; LOGPROB s; LOGPROB sum1, sum2; int count; WordGraph **wlist; LOGPROB cm_alpha; cm_alpha = r->config->annotate.cm_alpha; /* make a wordgraph list for frame-sorted access */ count = 0; for(wg=root;wg;wg=wg->next) count++; if (count == 0) return; wlist = (WordGraph **)mymalloc(sizeof(WordGraph *) * count); i = 0; for(wg=root;wg;wg=wg->next) { wlist[i++] = wg; } /* sort wordgraph list downward by the right frame*/ qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_forward); /* clear forward scores */ for(wg=root;wg;wg=wg->next) { wg->forward_score = LOG_ZERO; } /* forward procedure */ sum1 = LOG_ZERO; for(i=0;irighttime == r->peseqlen - 1) { /* set initial score */ wg->forward_score = 0.0; //wg->forward_score += wg->lscore * cm_alpha; } else { /* (just a bogus check...) */ if (wg->forward_score == LOG_ZERO) { wordgraph_dump(stdout, root, r->lm->winfo); put_wordgraph(stdout, wg, r->lm->winfo); j_internal_error("NO CONTEXT?\n"); } } /* propagate scores */ s = wg->amavg * (wg->righttime - wg->lefttime + 1); s *= cm_alpha; s += wg->forward_score; if (wg->lefttime == 0) { /* add for sum */ sum1 = addlog10(sum1, s); } else { /* propagate to left words */ for(j=0;jleftwordnum;j++) { left = wg->leftword[j]; left->forward_score = addlog10(left->forward_score, s + wg->left_lscore[j] * cm_alpha); } } } /* sort wordgraph list downward by the right score */ qsort(wlist, count, sizeof(WordGraph *), (int (*)(const void *, const void *))compare_backward); /* clear backward scores */ for(wg=root;wg;wg=wg->next) { wg->backward_score = LOG_ZERO; } /* backward procedure */ sum2 = LOG_ZERO; for(i=0;ilefttime == 0) { /* set initial score */ wg->backward_score = 0.0; } else { /* (just a bogus check...) */ if (wg->backward_score == LOG_ZERO) { put_wordgraph(stdout, wg, r->lm->winfo); j_internal_error("NO CONTEXT?\n"); } } /* propagate scores */ s = wg->amavg * (wg->righttime - wg->lefttime + 1); s *= cm_alpha; s += wg->backward_score; if (wg->righttime == r->peseqlen - 1) { /* add for sum */ //sum2 = addlog10(sum2, s + wg->lscore * cm_alpha); sum2 = addlog10(sum2, s); } else { for(j=0;jrightwordnum;j++) { right = wg->rightword[j]; right->backward_score = addlog10(right->backward_score, s + wg->right_lscore[j] * cm_alpha); } } } if (verbose_flag) jlog("STAT: graph_cm: forward score = %f, backward score = %f\n", sum1, sum2); /* compute CM */ for(wg=root;wg;wg=wg->next) { s = wg->amavg * (wg->righttime - wg->lefttime + 1); s *= cm_alpha; s = wg->backward_score + s + wg->forward_score; wg->graph_cm = pow(10, s - sum1); //wg->graph_cm = s - sum1; } free(wlist); } /* end of file */ julius-4.2.2/libjulius/src/wav2mfcc.c0000644001051700105040000001413312004452401015767 0ustar ritrlab/** * @file wav2mfcc.c * * * @brief 特徴量ベクトル(MFCC)系列の算出(非実時間版) * * 入力された音声波形から,特徴ベクトル系列を抽出します. * Julius/Julianで抽出できる特徴ベクトルは,MFCC の任意次元数のもので, * _0, _E, _D, _A, _Z, _N の任意の組合わせをサポートします. * そのほか,窓長やフレームシフト,帯域カットなどのパラメータを指定できます. * 認識時には,音響モデルのヘッダとチェックが行われ,CMNの有無など * が決定されます. * * ここの関数は,バッファ上に蓄積された音声波形データを一度に * 特徴ベクトル系列に変換するもので,ファイル入力などに用いられます. * マイク入力などで,入力と平行に認識を行う場合は,ここの関数ではなく, * realtime-1stpass.c 内で行われます. * * * * @brief Calculate feature vector (MFCC) sequence (non on-the-fly ver.) * * Parameter vector sequence extraction of input speech is done * here. The supported parameter is MFCC, with any combination of * all the qualifiers in HTK: _0, _E, _D, _A, _Z, _N. Acoustic model * for recognition should be trained with the same parameter type. * You can specify other parameters such as window size, frame shift, * high/low frequency cut-off via runtime options. At startup, Julius * will check for the parameter types of acoustic model if it conforms * the limitation, and determine whether other additional processing * is needed such as Cepstral Mean Normalization. * * Functions below are used to convert fully buffered whole sentence * utterance, and typically used for audio file input. When input * is concurrently processed with recognition process at 1st pass, * in case of microphone input, the MFCC computation will be done * within functions in realtime-1stpass.c instead of these. * * * @author Akinobu Lee * @date Sun Sep 18 19:40:34 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include /** * * 音声波形データから MFCC パラメータを抽出する. * エンジンインスタンス内の MFCC 計算インスタンスごとにパラメータ抽出が * 行われ,それぞれの mfcc->param に格納される. * * @param speech [in] 音声波形データ * @param speechlen [in] @a speech の長さ(単位:サンプル数) * @param recog [in] エンジンインスタンス * * @return 成功時 TRUE, エラー時 FALSE を返す. * * * Extract MFCC parameters with sentence CMN from given waveform. * Parameters will be computed for each MFCC calculation instance * in the engine instance, and stored in mfcc->param for each. * * @param speech [in] buffer of speech waveform * @param speechlen [in] length of @a speech in samples * @param recog [in] engine instance * * @return TRUE on success, FALSE on error. * * * @callgraph * @callergraph */ boolean wav2mfcc(SP16 speech[], int speechlen, Recog *recog) { int framenum; int len; Value *para; MFCCCalc *mfcc; /* calculate frame length from speech length, frame size and frame shift */ framenum = (int)((speechlen - recog->jconf->input.framesize) / recog->jconf->input.frameshift) + 1; if (framenum < 1) { jlog("WARNING: input too short (%d samples), ignored\n", speechlen); return FALSE; } for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->frontend.ssload_filename) { /* setup for spectral subtraction using file */ if (mfcc->frontend.ssbuf == NULL) { /* load noise spectrum for spectral subtraction from file (once) */ if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { jlog("ERROR: wav2mfcc: failed to read noise spectrum from file \"%s\"\n", mfcc->frontend.ssload_filename); return FALSE; } } } if (mfcc->frontend.sscalc) { /* compute noise spectrum from head silence for each input */ len = mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000; if (len > speechlen) len = speechlen; #ifdef SSDEBUG jlog("DEBUG: [%d]\n", len); #endif mfcc->frontend.ssbuf = new_SS_calculate(speech, len, &(mfcc->frontend.sslen), mfcc->frontend.mfccwrk_ss, mfcc->para); } } /* compute mfcc from speech file for each mfcc instances */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { para = mfcc->para; /* malloc new param */ param_init_content(mfcc->param); if (param_alloc(mfcc->param, framenum, para->veclen) == FALSE) { jlog("ERROR: failed to allocate memory for converted parameter vectors\n"); return FALSE; } if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) { /* make link from mfccs to this buffer */ mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; mfcc->wrk->ssbuflen = mfcc->frontend.sslen; mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; } /* make MFCC from speech data */ if (Wav2MFCC(speech, mfcc->param->parvec, para, speechlen, mfcc->wrk) == FALSE) { jlog("ERROR: failed to compute MFCC from input speech\n"); if (mfcc->frontend.sscalc) { free(mfcc->frontend.ssbuf); mfcc->frontend.ssbuf = NULL; } return FALSE; } /* set miscellaneous parameters */ mfcc->param->header.samplenum = framenum; mfcc->param->header.wshift = para->smp_period * para->frameshift; mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ mfcc->param->header.samptype = F_MFCC; if (para->delta) mfcc->param->header.samptype |= F_DELTA; if (para->acc) mfcc->param->header.samptype |= F_ACCL; if (para->energy) mfcc->param->header.samptype |= F_ENERGY; if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; mfcc->param->veclen = para->veclen; mfcc->param->samplenum = framenum; if (mfcc->frontend.sscalc) { free(mfcc->frontend.ssbuf); mfcc->frontend.ssbuf = NULL; } } return TRUE; } /* end of file */ julius-4.2.2/libjulius/src/pass1.c0000644001051700105040000005100112004452401015301 0ustar ritrlab/** * @file pass1.c * * * @brief 第1パス:フレーム同期ビーム探索 * * 静的木構造辞書を用いて,入力特徴量ベクトル列に対して,Juliusの第1パス * であるフレーム同期ビーム探索を行います. * * 入力データ全体があらかじめ得られている場合は,一括で計算を * 行う関数 get_back_trellis() がメインから呼ばれます. オンライン認識 * の場合は realtime_1stpass.c から,初期化,フレームごとの計算, * 終了処理のそれぞれが入力の進行にあわせて個別に呼ばれます. * * 実際の個々の認識処理インスタンスごとの処理は beam.c に記述されています. * * * * * @brief The first pass: frame-synchronous beam search * * These functions perform a frame-synchronous beam search using a static * lexicon tree, as the first pass of Julius/Julian. * * When the whole input is already obtained, get_back_trellis() simply * does all the processing of the 1st pass. When performing online * real-time recognition with concurrent speech input, each function * will be called separately from realtime_1stpass.c according on the * basis of input processing. * * The core recognition processing functions for each recognition * process instances are written in beam.c. * * * * @author Akinobu Lee * @date Fri Oct 12 23:14:13 2007 * * $Revision: 1.9 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /********************************************************************/ /* 第1パスを実行するメイン関数 */ /* 入力をパイプライン処理する場合は realtime_1stpass.c を参照のこと */ /* main function to execute 1st pass */ /* the pipeline processing is not here: see realtime_1stpass.c */ /********************************************************************/ /** * * @brief Process one input frame for all recognition process instance. * * This function proceeds the recognition for one frame. All * recognition process instance will be processed synchronously. * The input frame for each instance is stored in mfcc->f, where mfcc * is the MFCC calculation instance assigned to each process instance. * * If an instance's mfcc->invalid is set to TRUE, its processing will * be skipped. * * When using GMM, GMM computation will also be executed here. * If GMM_VAD is defined, GMM-based voice detection will be performed * inside this function, by using a scheme of short-pause segmentation. * * This function also handles segmentation of recognition process. A * segmentation will occur when end of speech is detected by level-based * sound detection or GMM-based / decoder-based VAD, or by request from * application. When segmented, it stores current frame and return with * that status. * * The frame-wise callbacks will be executed inside this function, * when at least one valid recognition process instances exists. * * * * @brief 全ての認識処理インスタンス処理を1フレーム分進める. * * 全ての認識処理インスタンスについて,割り付けられているMFCC計算インスタンス * の mfcc->f をカレントフレームとして処理を1フレーム進める. * * なお,mfcc->invalid が TRUE となっている処理インスタンスの処理はスキップ * される. * * GMMの計算もここで呼び出される. GMM_VAD 定義時は,GMM による * 発話区間開始・終了の検出がここで行われる. また,GMMの計算結果, * あるいは認識処理内のショートポーズセグメンテーション判定やデバイス・外部 * からの要求によりセグメンテーションが要求されたかどうかの判定も行う. * * フレーム単位で呼び出されるコールバックが登録されている場合は,それらの * 呼出しも行う. * * * @param recog [in] engine instance * * @return 0 on success, -1 on error, or 1 when an input segmentation * occured/requested inside this function. * * @callgraph * @callergraph * */ int decode_proceed(Recog *recog) { MFCCCalc *mfcc; boolean break_flag; boolean break_decode; RecogProcess *p; boolean ok_p; #ifdef GMM_VAD GMMCalc *gmm; boolean break_gmm; #endif break_decode = FALSE; for(p = recog->process_list; p; p = p->next) { #ifdef DETERMINE p->have_determine = FALSE; #endif p->have_interim = FALSE; } for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->segmented = FALSE; } #ifdef POWER_REJECT for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; if (mfcc->f == 0) { mfcc->avg_power = 0.0; if (debug2_flag) jlog("STAT: power_reject: reset\n"); } } #endif #ifdef GMM_VAD if (recog->gmm != NULL) { /* reset flags */ break_gmm = FALSE; recog->gc->want_rewind = FALSE; } #endif if (recog->gmm != NULL && recog->gmmmfcc->valid) { /* GMM 計算を行う */ if (recog->gmmmfcc->f == 0) { /* GMM 計算の初期化 */ gmm_prepare(recog); } /* このフレームに対するGMMの尤度を計算 */ gmm_proceed(recog); #ifdef GMM_VAD /* Check for GMM-based VAD */ gmm = recog->gc; gmm_check_trigger(recog); if (gmm->after_trigger) { /* after trigger, in speech area */ if (gmm->down_trigger) { /* down trigger, end segment */ #ifdef GMM_VAD_DEBUG printf("GMM_VAD: %d: down trigger\n", recog->gmmmfcc->f); #endif recog->gmmmfcc->sparea_start = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; if (recog->gmmmfcc->sparea_start < 0) recog->gmmmfcc->sparea_start = 0; gmm->after_trigger = FALSE; recog->gmmmfcc->segmented = TRUE; break_gmm = TRUE; } else { /* keep recognition */ } } else { /* before trigger, in noise area */ if (gmm->up_trigger) { /* start recognition */ /* request caller to rewind to the backstep point and re-start with normal search */ if (recog->gmmmfcc->f + 1 < recog->jconf->detect.gmm_margin) { gmm->rewind_frame = 0; } else { gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; } #ifdef GMM_VAD_DEBUG printf("GMM_VAD: %d: up trigger, start recognition with %d frame rewind\n", recog->gmmmfcc->f, recog->gmmmfcc->f - gmm->rewind_frame); #endif gmm->want_rewind = TRUE; gmm->want_rewind_reprocess = TRUE; gmm->after_trigger = TRUE; return 0; } else { /* before trigger, noise continues */ /* if noise goes more than a certain frame, shrink the noise area to avoid unlimited memory usage */ if (recog->gmmmfcc->f + 1 > GMM_VAD_AUTOSHRINK_LIMIT) { gmm->want_rewind = TRUE; gmm->want_rewind_reprocess = FALSE; gmm->rewind_frame = recog->gmmmfcc->f + 1 - recog->jconf->detect.gmm_margin; if (debug2_flag) { jlog("DEBUG: GMM_VAD: pause exceeded %d, rewind\n", GMM_VAD_AUTOSHRINK_LIMIT); } } /* skip recognition processing */ return 0; } } #endif /* GMM_VAD */ } for(p = recog->process_list; p; p = p->next) { if (!p->live) continue; mfcc = p->am->mfcc; if (!mfcc->valid) { /* このフレームの処理をスキップ */ /* skip processing the frame */ continue; } /* mfcc-f のフレームについて認識処理(フレーム同期ビーム探索)を進める */ /* proceed beam search for mfcc->f */ if (mfcc->f == 0) { /* 最初のフレーム: 探索処理を初期化 */ /* initial frame: initialize search process */ if (get_back_trellis_init(mfcc->param, p) == FALSE) { jlog("ERROR: %02d %s: failed to initialize the 1st pass\n", p->config->id, p->config->name); return -1; } } if (mfcc->f > 0 || p->am->hmminfo->multipath) { /* 1フレーム探索を進める */ /* proceed search for 1 frame */ if (get_back_trellis_proceed(mfcc->f, mfcc->param, p, FALSE) == FALSE) { mfcc->segmented = TRUE; break_decode = TRUE; } if (p->config->successive.enabled) { if (detect_end_of_segment(p, mfcc->f - 1)) { /* セグメント終了検知: 第1パスここで中断 */ mfcc->segmented = TRUE; break_decode = TRUE; } } } } /* セグメントすべきかどうか最終的な判定を行う. デコーダベースVADあるいは spsegment の場合,複数インスタンス間で OR を取る.また,GMMなど複数基準がある場合は基準間で AND を取る.*/ /* determine whether to segment at here If multiple segmenter exists, take their AND */ break_flag = FALSE; if (break_decode #ifdef GMM_VAD || (recog->gmm != NULL && break_gmm) #endif ) { break_flag = TRUE; } if (break_flag) { /* 探索処理の終了が発生したのでここで認識を終える. 最初のフレームから [f-1] 番目までが認識されたことになる */ /* the recognition process tells us to stop recognition, so recognition should be terminated here. the recognized data are [0..f-1] */ /* 最終フレームを last_time にセット */ /* set the last frame to last_time */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->last_time = mfcc->f - 1; } if (! recog->jconf->decodeopt.segment) { /* ショートポーズ以外で切れた場合,残りのサンプルは認識せずに捨てる */ /* drop rest inputs if segmented by error */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->param->header.samplenum = mfcc->f; mfcc->param->samplenum = mfcc->f; } } return 1; } /* call frame-wise callback for the processing results if any */ #ifdef DETERMINE ok_p = FALSE; for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; if (p->have_determine) { ok_p = TRUE; } } if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, recog); #endif ok_p = FALSE; for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; if (p->have_interim) { ok_p = TRUE; } } if (ok_p) callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog); return 0; } #ifdef POWER_REJECT boolean power_reject(Recog *recog) { MFCCCalc *mfcc; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { /* skip if not realtime and raw file processing */ if (mfcc->avg_power == 0.0) continue; if (debug2_flag) jlog("STAT: power_reject: MFCC%02d: avg_power = %f\n", mfcc->id, mfcc->avg_power / mfcc->param->samplenum); if (mfcc->avg_power / mfcc->param->samplenum < recog->jconf->reject.powerthres) return TRUE; } return FALSE; } #endif /** * * @brief End procedure of the first pass (when segmented) * * This function do things for ending the first pass and prepare for * the next recognition, when the input was segmented at the middle of * recognition by some reason. * * First, the best path at each recognition process instance will be parsed * and stored. In case of recognition error or input rejection, the error * status will be set. * * Then, the last pause segment of the processed input will be cut and saved * to be processed at first in the recognition of the next or remaining input. * * * * @brief 第1パスの終了処理(セグメント時) * * 入力が何らかの事由によって途中でセグメントされた時に,第1パスの認識処理を * 終了して次回再開するための処理を行う. * * まず,各認識処理インスタンスに対して,最尤単語系列を見付け,第1パスの * 認識結果として格納する. また,認識失敗・入力棄却の時はエラーステータスをそ * れぞれセットする. * * そして,次回の認識で,次のセグメントの認識を,検出された末尾雑音 * 区間から再開するために,その末尾雑音区間を切り出しておく処理を呼ぶ. * * * * @param recog [in] engine instance * * @callgraph * @callergraph */ void decode_end_segmented(Recog *recog) { boolean ok_p; int mseclen; RecogProcess *p; int last_status; /* rejectshort 指定時, 入力が短ければここで第1パス結果を出力しない */ /* suppress 1st pass output if -rejectshort and input shorter than specified */ ok_p = TRUE; if (recog->jconf->reject.rejectshortlen > 0) { mseclen = (float)recog->mfcclist->last_time * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0; if (mseclen < recog->jconf->reject.rejectshortlen) { last_status = J_RESULT_STATUS_REJECT_SHORT; ok_p = FALSE; } } #ifdef POWER_REJECT if (ok_p) { if (power_reject(recog)) { last_status = J_RESULT_STATUS_REJECT_POWER; ok_p = FALSE; } } #endif if (ok_p) { for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; finalize_1st_pass(p, p->am->mfcc->last_time); } } else { for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; p->result.status = last_status; } } if (recog->jconf->decodeopt.segment) { finalize_segment(recog); } if (recog->gmm != NULL) { /* GMM 計算の終了 */ gmm_end(recog); } } /** * * @brief End procedure of the first pass * * This function finish the first pass, when the input was fully * processed to the end. * * The best path at each recognition process instance will be parsed * and stored. In case of recognition error or input rejection, the * error status will be set. * * * * @brief 第1パスの終了処理 * * 入力が最後まで処理されて終了したときに,第1パスの認識処理を * 終了させる. * * 各認識処理インスタンスに対して,その時点での第1パスの最尤単語 * 系列を格納する. また,認識失敗・入力棄却の時はエラーステータスをそ * れぞれセットする. * * * * @param recog [in] engine instance * * @callgraph * @callergraph */ void decode_end(Recog *recog) { MFCCCalc *mfcc; int mseclen; boolean ok_p; RecogProcess *p; int last_status; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->segmented = FALSE; } if (recog->gmm != NULL) { /* GMM 計算の終了 */ gmm_end(recog); } #ifdef GMM_VAD /* もしトリガがかからないまま入力終了に達したのなら,そのままエラー終了 */ if (recog->jconf->decodeopt.segment) { if (recog->gmm) { if (recog->gc->after_trigger == FALSE) { for(p=recog->process_list;p;p=p->next) { p->result.status = J_RESULT_STATUS_ONLY_SILENCE; /* reject by decoding */ } /* ショートポーズセグメンテーションの場合, 入力パラメータ分割などの最終処理も行なう */ /* When short-pause segmentation enabled */ finalize_segment(recog); return; } } } #endif /* 第1パスの最後のフレームの認識処理を行う */ /* finalize 1st pass */ for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; #ifdef SPSEGMENT_NAIST if (recog->jconf->decodeopt.segment) { if (p->pass1.after_trigger == FALSE) continue; } #endif mfcc = p->am->mfcc; if (mfcc->f > 0) { get_back_trellis_end(mfcc->param, p); } } /* 終了処理 */ for(p=recog->process_list;p;p=p->next) { if (!p->live) continue; ok_p = TRUE; /* check rejection by no input */ if (ok_p) { mfcc = p->am->mfcc; /* 入力長がデルタの計算に十分でない場合,入力無しとする. */ /* if input is short for compute all the delta coeff., terminate here */ if (mfcc->f == 0) { jlog("STAT: no input frame\n"); last_status = J_RESULT_STATUS_FAIL; ok_p = FALSE; } } /* check rejection by input length */ if (ok_p) { if (recog->jconf->reject.rejectshortlen > 0) { mseclen = (float)mfcc->param->samplenum * (float)recog->jconf->input.period * (float)recog->jconf->input.frameshift / 10000.0; if (mseclen < recog->jconf->reject.rejectshortlen) { last_status = J_RESULT_STATUS_REJECT_SHORT; ok_p = FALSE; } } } #ifdef POWER_REJECT /* check rejection by average power */ if (ok_p) { if (power_reject(recog)) { last_status = J_RESULT_STATUS_REJECT_POWER; ok_p = FALSE; } } #endif #ifdef SPSEGMENT_NAIST /* check rejection non-triggered input segment */ if (ok_p) { if (recog->jconf->decodeopt.segment) { if (p->pass1.after_trigger == FALSE) { last_status = J_RESULT_STATUS_ONLY_SILENCE; /* reject by decoding */ ok_p = FALSE; } } } #endif if (ok_p) { /* valid input segment, finalize it */ finalize_1st_pass(p, mfcc->param->samplenum); } else { /* invalid input segment */ p->result.status = last_status; } } if (recog->jconf->decodeopt.segment) { /* ショートポーズセグメンテーションの場合, 入力パラメータ分割などの最終処理も行なう */ /* When short-pause segmentation enabled */ finalize_segment(recog); } } /** * * @brief フレーム同期ビーム探索メイン関数(バッチ処理用) * * 与えられた入力ベクトル列に対して第1パス(フレーム同期ビーム探索)を * 行い,その結果を出力する. また全フレームに渡る単語終端を,第2パス * のために単語トレリス構造体に格納する. * * この関数は入力ベクトル列があらかじめ得られている場合に用いられる. * 第1パスが入力と並列して実行されるオンライン認識の場合, * この関数は用いられず,代わりにこのファイルで定義されている各サブ関数が * 直接 realtime-1stpass.c 内から呼ばれる. * * @param recog [in] エンジンインスタンス * * * @brief Frame synchronous beam search: the main (for batch mode) * * This function perform the 1st recognition pass of frame-synchronous beam * search and output the result. It also stores all the word ends in every * input frame to word trellis structure. * * This function will be called if the whole input vector is already given * to the end. When online recognition, where the 1st pass will be * processed in parallel with input, this function will not be used. * In that case, functions defined in this file will be directly called * from functions in realtime-1stpass.c. * * @param recog [in] engine instance * * @callgraph * @callergraph */ boolean get_back_trellis(Recog *recog) { boolean ok_p; MFCCCalc *mfcc; int rewind_frame; PROCESS_AM *am; boolean reprocess; /* initialize mfcc instances */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { /* mark all as valid, since all frames are fully prepared beforehand */ if (mfcc->param->samplenum == 0) mfcc->valid = FALSE; else mfcc->valid = TRUE; /* set frame pointers to 0 */ mfcc->f = 0; } /* callback of process start */ #ifdef BACKEND_VAD if (recog->jconf->decodeopt.segment) { /* at first time, recognition does not start yet */ /* reset segmentation flags */ spsegment_init(recog); } else { /* execute callback for pass1 begin here */ callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } #else if (recog->jconf->decodeopt.segment) { if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); } else { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; #endif while(1) { ok_p = TRUE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; if (mfcc->f < mfcc->param->samplenum) { mfcc->valid = TRUE; ok_p = FALSE; } else { mfcc->valid = FALSE; } } if (ok_p) { /* すべての MFCC が終わりに達したのでループ終了 */ /* all MFCC has been processed, end of loop */ break; } switch (decode_proceed(recog)) { case -1: /* error */ return FALSE; break; case 0: /* success */ break; case 1: /* segmented */ /* 探索中断: 処理された入力は 0 から t-2 まで */ /* search terminated: processed input = [0..t-2] */ /* この時点で第1パスを終了する */ /* end the 1st pass at this point */ decode_end_segmented(recog); /* terminate 1st pass here */ return TRUE; } #ifdef BACKEND_VAD /* check up trigger in case of VAD segmentation */ if (recog->jconf->decodeopt.segment) { if (recog->triggered == FALSE) { if (spsegment_trigger_sync(recog)) { if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } } #endif if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { /* do rewind for all mfcc here */ spsegment_restart_mfccs(recog, rewind_frame, reprocess); /* reset outprob cache for all AM */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); } } /* call frame-wise callback */ callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); /* 1フレーム処理が進んだのでポインタを進める */ /* proceed frame pointer */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->f++; } if (recog->process_want_terminate) { /* termination requested */ decode_end_segmented(recog); return TRUE; } } /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ decode_end(recog); return TRUE; } /* end of file */ julius-4.2.2/libjulius/src/version.c.in0000644001051700105040000001200012004452401016340 0ustar ritrlab/** * @file version.c * * * @brief バージョンおよびコンパイル時設定の出力 * * * * * @brief Output version and compilation-time configuration. * * * * @author Akinobu Lee * @date Mon Sep 12 01:34:15 2005 * * $Revision: 1.8 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* @configure_input@ */ #include #define CC "@CC@" ///< Used compiler #define CFLAGS "@CFLAGS@" ///< Used flags for compilation /** * * ヘッダを出力する. * * @param strm [in] 出力ストリーム * * * Output application header. * * @param strm [in] output stream * */ void j_put_header(FILE *strm){ if (strm == NULL) return; fprintf(strm,"%s rev.%s (%s)\n\n", JULIUS_PRODUCTNAME, JULIUS_VERSION, JULIUS_SETUP); } /** * * バージョン情報を出力する * * @param strm [in] 出力ストリーム * * * Output version information. * * @param strm [in] output stream * */ void j_put_version(FILE *strm){ if (strm == NULL) return; fprintf(strm,"\n%s rev.%s (%s) built for %s\n\n", JULIUS_PRODUCTNAME, JULIUS_VERSION, JULIUS_SETUP, JULIUS_HOSTINFO); fprintf(strm,"Copyright (c) 1991-2012 Kawahara Lab., Kyoto University\n"); fprintf(strm,"Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan\n"); fprintf(strm,"Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology\n"); fprintf(strm,"Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology\n\n"); } /** * * コンパイル時の設定を出力する. * * @param strm [in] 入力ストリーム * * * Output compile-time settings. * * @param strm [in] input stream * */ void j_put_compile_defs(FILE *strm){ if (strm == NULL) return; fprintf(strm,"Engine specification:\n"); fprintf(strm," - Base setup : %s\n", JULIUS_SETUP); fprintf(strm," - Supported LM : DFA, N-gram, Word\n"); fprintf(strm," - Extension :"); #ifndef UNIGRAM_FACTORING fprintf(strm, ", 2gramFactoring"); #endif # ifdef GRAPHOUT_DYNAMIC # ifdef GRAPHOUT_SEARCH /* this is default */ //fprintf(strm, " GraphOutSearch"); # else fprintf(strm, " GraphOutNonSearchTermination"); # endif # else fprintf(strm, " GraphOutFromNBest"); # endif # ifndef GRAPHOUT_PRECISE_BOUNDARY fprintf(strm, " DisableGraphOutPostFitting"); # endif #ifdef CM_SEARCH_LIMIT # ifdef CM_SEARCH_LIMIT_AFTER fprintf(strm, " CMPruning_OnlyAfterReached"); # else fprintf(strm, " CMPruning"); # endif # ifdef CM_SEARCH_LIMIT_POP fprintf(strm, " CMPruningOnPOP"); # endif #endif # ifndef LM_FIX_DOUBLE_SCORING fprintf(strm, " NoLMFix"); # endif # ifndef CLASS_NGRAM fprintf(strm, " NoClassNGram"); # endif #ifdef WORDS_INT fprintf(strm, " WordsInt"); #endif # ifdef LOWMEM fprintf(strm, " SingleTree"); # else # ifdef LOWMEM2 /* fprintf(strm, " HiFreqLinearTree");*/ # else fprintf(strm, " ShortWordTree"); # endif # endif # ifndef CATEGORY_TREE //fprintf(strm, " NoCategoryTree"); # endif #ifdef MONOTREE fprintf(strm, " MonoTree1"); #endif #ifndef SCAN_BEAM fprintf(strm, " NoScoreEnvelope"); #endif #ifndef PASS1_IWCD fprintf(strm, " NoIWCD1"); #endif #ifdef PASS2_STRICT_IWCD fprintf(strm, " StrictIWCD2"); #endif #ifdef WPAIR # ifdef WPAIR_KEEP_NLIMIT fprintf(strm, " WordPairNApprox"); # else fprintf(strm, " WordPairApprox"); # endif #endif #ifdef WORD_GRAPH fprintf(strm, " 1stPassWordGraph"); #endif #ifndef CONFIDENCE_MEASURE fprintf(strm, " NoCM"); #else # ifdef CM_NBEST fprintf(strm, " N-bestCM"); # endif # ifdef CM_MULTIPLE_ALPHA fprintf(strm, " MultiCMOutput"); # endif #endif /* CONFIDENCE_MEASURE */ #ifndef USE_MIC fprintf(strm, " NoMic"); #endif #ifdef USE_NETAUDIO fprintf(strm, " NetAudio"); #endif #ifndef HAVE_PTHREAD fprintf(strm, " NoPThread"); #endif #ifdef HAVE_LIBSNDFILE fprintf(strm, " LibSndFile"); #endif #ifdef VISUALIZE fprintf(strm, " Visualize"); #endif #ifdef FORK_ADINNET fprintf(strm, " ForkOnAdinnet"); #endif #ifndef MFCC_SINCOS_TABLE fprintf(strm, " DisableMFCCTable"); #endif #ifndef LM_FIX_DOUBLE_SCORING fprintf(strm, " DisableLMFix3.4"); #endif #ifdef USE_LIBJCODE fprintf(strm, " Libjcode"); #endif #ifdef HAVE_ICONV fprintf(strm, " IconvOutput"); #endif #ifdef GMM_VAD fprintf(strm, " GMMVAD"); #endif #ifdef SPSEGMENT_NAIST fprintf(strm, " DecoderVAD"); #endif #ifdef POWER_REJECT fprintf(strm, " PowerReject"); #endif fprintf(strm, "\n"); fprintf(strm," - Compiled by : %s %s\n", CC, CFLAGS); } /** * * ライブラリの設定を出力する * * @param strm [in] 出力ストリーム * * * Output library configuration. * * @param strm [in] output stream * */ void j_put_library_defs(FILE *strm) { if (strm == NULL) return; fprintf(strm, "Library configuration: "); confout(strm); fprintf(strm, "\n"); } /* end of file */ julius-4.2.2/libjulius/src/confnet.c0000644001051700105040000004713012004452401015716 0ustar ritrlab/** * @file confnet.c * * * @brief Confusion network の生成 * * 認識の結果得られた単語グラフから,confusion network を生成する. * * * * @brief Confusion network generation * * Generate confusion network from the obtained word lattice. * * * @author Akinobu Lee * @date Thu Aug 16 00:15:51 2007 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * Define to enable debug output. * */ #undef CDEBUG /** * Define to enable further debug output. * */ #undef CDEBUG2 /** * Use graph-based CM for confusion network generation. If not * defined search-based CM (default of old julius) will be used. * However, the clustering process does not work properly with this * definition, since sum of the search- based CM for a word set on the * same position is not always 1.0. Thus you'd better always define * this. * */ #define PREFER_GRAPH_CM /** * Julius identify the words by their dictionary IDs, so words with * different entries are treated as a different word. If this is * defined, Julius treat words with the same output string as same * words and bundle them in confusion network generation. * */ #define BUNDLE_WORD_WITH_SAME_OUTPUT /** * Determine whether the two words are idential in confusion network * generation. * * @param w1 [in] first word * @param w2 [in] second word * @param winfo [in] word dictionary * * @return TRUE if they are idential, FALSE if not. */ static boolean is_same_word(WORD_ID w1, WORD_ID w2, WORD_INFO *winfo) { if (w1 == w2 #ifdef BUNDLE_WORD_WITH_SAME_OUTPUT || strmatch(winfo->woutput[w1], winfo->woutput[w2]) #endif ) return TRUE; return FALSE; } /**************************************************************/ /** * Macro to access the order matrix. * */ #define m2i(A, B) (B) * r->order_matrix_count + (A) /** * Judge order between two words by their word graph ID. * * @param i [in] id of left graph word * @param j [in] id of right graph word * * @return TRUE if they are ordered, or FALSE if not. */ static boolean graph_ordered(RecogProcess *r, int i, int j) { if (i != j && r->order_matrix[m2i(i,j)] == 0 && r->order_matrix[m2i(j,i)] == 0) { return FALSE; } return TRUE; } /** * Scan the order matrix to update it at initial step and after word * (set) marging. * */ static void graph_update_order(RecogProcess *r) { int i, j, k; boolean changed; int count; count = r->order_matrix_count; do { changed = FALSE; for(i=0;iorder_matrix[m2i(i, j)] == 1) { for(k=0;korder_matrix[m2i(j, k)] == 1) { if (r->order_matrix[m2i(i, k)] == 0) { r->order_matrix[m2i(i, k)] = 1; changed = TRUE; } } } } } } } while (changed == TRUE); } /** * Extract order relationship between any two words in the word graph * for confusion network generation. * * @param root [in] root pointer to the word graph * @param r [in] recognition process instance * * @callgraph * @callergraph */ void graph_make_order(WordGraph *root, RecogProcess *r) { int count; WordGraph *wg, *right; int i; /* make sure total num and id are valid */ count = 0; for(wg=root;wg;wg=wg->next) count++; if (count == 0) { r->order_matrix = NULL; return; } if (count != r->graph_totalwordnum) { jlog("Error: graph_make_order: r->graph_totalwordnum differ from actual number?\n"); r->order_matrix = NULL; return; } r->order_matrix_count = count; for(wg=root;wg;wg=wg->next) { if (wg->id >= count) { jlog("Error: graph_make_order: wordgraph id >= count (%d >= %d)\n", wg->id, count); r->order_matrix = NULL; return; } } /* allocate and clear matrix */ r->order_matrix = (char *)mymalloc(count * count); for(i=0;iorder_matrix[i] = 0; /* set initial order info */ for(wg=root;wg;wg=wg->next) { for(i=0;irightwordnum;i++) { right = wg->rightword[i]; r->order_matrix[m2i(wg->id, right->id)] = 1; } } /* right propagate loop */ graph_update_order(r); } /** * Free the order relation data. * * @callgraph * @callergraph */ void graph_free_order(RecogProcess *r) { if (r->order_matrix) { free(r->order_matrix); r->order_matrix = NULL; } } /**************************************************************/ /** * Create a new cluster holder. * * @return the newly allocated cluster holder. */ static CN_CLUSTER * cn_new() { CN_CLUSTER *new; new = (CN_CLUSTER *)mymalloc(sizeof(CN_CLUSTER)); new->wg = (WordGraph **)mymalloc(sizeof(WordGraph *) * CN_CLUSTER_WG_STEP); new->wgnum_alloc = CN_CLUSTER_WG_STEP; new->wgnum = 0; new->words = NULL; new->pp = NULL; new->next = NULL; return new; } /** * Free a cluster holder * * @param c [out] a cluster holder to be released. * */ static void cn_free(CN_CLUSTER *c) { free(c->wg); if (c->words) free(c->words); if (c->pp) free(c->pp); free(c); } /** * Free all cluster holders. * * @param croot [out] pointer to root pointer of cluster holder list. * * @callgraph * @callergraph * */ void cn_free_all(CN_CLUSTER **croot) { CN_CLUSTER *c, *ctmp; c = *croot; while(c) { ctmp = c->next; cn_free(c); c = ctmp; } *croot = NULL; } /** * Add a graph word to a cluster holder. * * @param c [out] cluster holder * @param wg [in] graph word to be added */ static void cn_add_wg(CN_CLUSTER *c, WordGraph *wg) { if (c->wgnum >= c->wgnum_alloc) { c->wgnum_alloc += CN_CLUSTER_WG_STEP; c->wg = (WordGraph **)myrealloc(c->wg, sizeof(WordGraph *) * c->wgnum_alloc); } c->wg[c->wgnum] = wg; c->wgnum++; } /** * Merge a cluster holder into another. * * @param dst [i/o] target cluster holder * @param src [in] source cluster holder. */ static void cn_merge(RecogProcess *r, CN_CLUSTER *dst, CN_CLUSTER *src) { WordGraph *wg; int i, j, n; /* update order matrix */ for(i=0;iwgnum;i++) { wg = src->wg[i]; for(j=0;jwgnum;j++) { for(n=0;nleftwordnum;n++) { r->order_matrix[m2i(wg->leftword[n]->id, dst->wg[j]->id)] = 1; } for(n=0;nrightwordnum;n++) { r->order_matrix[m2i(dst->wg[j]->id, wg->rightword[n]->id)] = 1; } } } graph_update_order(r); /* add words in the source cluster to target cluster */ for(i=0;iwgnum;i++) { cn_add_wg(dst, src->wg[i]); } } /** * Erase a cluster holder and remove it from the list. * * @param target [i/o] a cluster holder to be erased * @param root [i/o] pointer to root pointer of cluster holder list */ static void cn_destroy(CN_CLUSTER *target, CN_CLUSTER **root) { CN_CLUSTER *c, *cprev; cprev = NULL; for(c = *root; c; c = c->next) { if (c == target) { if (cprev) { cprev->next = c->next; } else { *root = c->next; } cn_free(c); break; } cprev = c; } } /** * Build / update word list from graph words for a cluster holder. * * @param c [i/o] cluster holder to process * @param winfo [in] word dictionary */ static void cn_build_wordlist(CN_CLUSTER *c, WORD_INFO *winfo) { int i, j; if (c->words) { free(c->words); } c->words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * (c->wgnum + 1)); c->wordsnum = 0; for(i=0;iwgnum;i++) { for(j=0;jwordsnum;j++) { if (is_same_word(c->words[j], c->wg[i]->wid, winfo)) break; } if (j>=c->wordsnum) { c->words[c->wordsnum] = c->wg[i]->wid; c->wordsnum++; } } } /** * qsort_reentrant callback to sort clusters by their time order. * * @param x [in] element 1 * @param y [in] element 2 * @param r [in] recognition process instance * * @return order value */ static int compare_cluster(CN_CLUSTER **x, CN_CLUSTER **y, RecogProcess *r) { //int i, min1, min2; /* * * for(i=0;i<(*x)->wgnum;i++) { * if (i == 0 || min1 > (*x)->wg[i]->lefttime) min1 = (*x)->wg[i]->lefttime; * } * for(i=0;i<(*y)->wgnum;i++) { * if (i == 0 || min2 > (*y)->wg[i]->lefttime) min2 = (*y)->wg[i]->lefttime; * } * if (min1 < min2) return -1; * else if (min1 > min2) return 1; * else return 0; */ int i, j; if (x == y) return 0; for(i=0;i<(*x)->wgnum;i++) { for(j=0;j<(*y)->wgnum;j++) { //if (graph_ordered((*x)->wg[i]->id, (*y)->wg[j]->id)) dir = 1; if (r->order_matrix[m2i((*x)->wg[i]->id, (*y)->wg[j]->id)] == 1) { return -1; } } } return 1; } /** * Compute intra-word similarity of two graph words for confusion network * generation. * * @param w1 [in] graph word 1 * @param w2 [in] graph word 2 * * @return the similarity value. */ static PROB get_intraword_similarity(WordGraph *w1, WordGraph *w2) { PROB overlap; int overlap_frame, sum_len; PROB sim; /* compute overlap_frame */ if (w1->lefttime < w2->lefttime) { if (w1->righttime < w2->lefttime) { overlap_frame = 0; } else if (w1->righttime > w2->righttime) { overlap_frame = w2->righttime - w2->lefttime + 1; } else { overlap_frame = w1->righttime - w2->lefttime + 1; } } else if (w1->lefttime > w2->righttime) { overlap_frame = 0; } else { if (w1->righttime > w2->righttime) { overlap_frame = w2->righttime - w1->lefttime + 1; } else { overlap_frame = w1->righttime - w1->lefttime + 1; } } sum_len = (w1->righttime - w1->lefttime + 1) + (w2->righttime - w2->lefttime + 1); overlap = (PROB)overlap_frame / (PROB)sum_len; #ifdef CDEBUG2 printf("[%d..%d] [%d..%d] overlap = %d / %d = %f", w1->lefttime, w1->righttime, w2->lefttime, w2->righttime, overlap_frame, sum_len, overlap); #endif #ifdef PREFER_GRAPH_CM #ifdef CDEBUG2 printf(" cm=%f, %f", w1->graph_cm, w2->graph_cm); #endif sim = overlap * w1->graph_cm * w2->graph_cm; #else #ifdef CDEBUG2 printf(" cm=%f, %f", w1->cmscore, w2->cmscore); #endif sim = overlap * w1->cmscore * w2->cmscore; #endif #ifdef CDEBUG2 printf(" similarity=%f\n", sim); #endif return sim; } /** * Compute intra-word similarity of two clusters. * * @param c1 [in] cluster 1 * @param c2 [in] cluster 2 * @param winfo [in] word dictionary * * @return the maximum similarity. */ static PROB get_cluster_intraword_similarity(CN_CLUSTER *c1, CN_CLUSTER *c2, WORD_INFO *winfo) { int i1, i2; PROB simmax, sim; simmax = 0.0; for(i1 = 0; i1 < c1->wgnum; i1++) { for(i2 = 0; i2 < c2->wgnum; i2++) { if (is_same_word(c1->wg[i1]->wid, c2->wg[i2]->wid, winfo)) { //if (graph_ordered(c1->wg[i1]->id, c2->wg[i2]->id)) continue; sim = get_intraword_similarity(c1->wg[i1], c2->wg[i2]); if (simmax < sim) simmax = sim; } } } return(simmax); } #ifdef CDEBUG /** * Output a cluster information. * * @param fp [in] file pointer to output * @param c [in] cluster to output * @param winfo [in] word dictionary */ static void put_cluster(FILE *fp, CN_CLUSTER *c, WORD_INFO *winfo) { int i; for(i=0;iwgnum;i++) { fprintf(fp, "[%d:%s:%d..%d]", c->wg[i]->id, winfo->woutput[c->wg[i]->wid], c->wg[i]->lefttime, c->wg[i]->righttime); } printf("\n"); } #endif /** * Return minimum value of the three arguments. * * @param a [in] value 1 * @param b [in] value 2 * @param c [in] value 3 * * @return the minumum value. */ static int minimum(int a, int b, int c) { int min; min = a; if (b < min) min = b; if (c < min) min = c; return min; } /** * Calculate Levenstein distance (edit distance) of two words. * * @param w1 [in] word ID 1 * @param w2 [in] word ID 2 * @param winfo [in] word dictionary * * @return the distance. */ static int edit_distance(WORD_ID w1, WORD_ID w2, WORD_INFO *winfo, char *b1, char *b2) { int i1, i2; int *d; int len1, len2; int j; int cost; int distance; len1 = winfo->wlen[w1] + 1; len2 = winfo->wlen[w2] + 1; d = (int *)mymalloc(sizeof(int) * len1 * len2); for(j=0;jwseq[w1][i1-1]->name, b1); for(i2=1;i2wseq[w2][i2-1]->name, b2); if (strmatch(b1, b2)) { cost = 0; } else { cost = 1; } d[i2 * len1 + i1] = minimum(d[(i2-1) * len1 + i1] + 1, d[i2 * len1 + (i1-1)] + 1, d[(i2-1) * len1 + (i1-1)] + cost); } } distance = d[len1 * len2 - 1]; free(d); return(distance); } /** * Compute inter-word similarity of two clusters. * * @param c1 [in] cluster 1 * @param c2 [in] cluster 2 * @param winfo [in] word dictionary * * @return the average similarity. */ static PROB get_cluster_interword_similarity(RecogProcess *r, CN_CLUSTER *c1, CN_CLUSTER *c2, WORD_INFO *winfo, char *buf1, char *buf2) { int i1, i2, j; WORD_ID w1, w2; PROB p1, p2; PROB sim, simsum; int simsum_count; int dist; /* order check */ for(i1 = 0; i1 < c1->wgnum; i1++) { for(i2 = 0; i2 < c2->wgnum; i2++) { if (graph_ordered(r, c1->wg[i1]->id, c2->wg[i2]->id)) { /* ordered clusters should not be merged */ //printf("Ordered:\n"); //printf("c1:\n"); put_cluster(stdout, c1, winfo); //printf("c2:\n"); put_cluster(stdout, c2, winfo); return 0.0; } } } #ifdef CDEBUG2 printf("-----\n"); printf("c1:\n"); put_cluster(stdout, c1, winfo); printf("c2:\n"); put_cluster(stdout, c2, winfo); #endif /* compute similarity */ simsum = 0.0; simsum_count = 0; for(i1 = 0; i1 < c1->wordsnum; i1++) { w1 = c1->words[i1]; p1 = 0.0; for(j = 0; j < c1->wgnum; j++) { if (is_same_word(c1->wg[j]->wid, w1, winfo)) { #ifdef PREFER_GRAPH_CM p1 += c1->wg[j]->graph_cm; #else p1 += c1->wg[j]->cmscore; #endif } } for(i2 = 0; i2 < c2->wordsnum; i2++) { w2 = c2->words[i2]; p2 = 0.0; for(j = 0; j < c2->wgnum; j++) { if (is_same_word(c2->wg[j]->wid, w2, winfo)) { #ifdef PREFER_GRAPH_CM p2 += c2->wg[j]->graph_cm; #else p2 += c2->wg[j]->cmscore; #endif } } dist = edit_distance(w1, w2, winfo, buf1, buf2); #ifdef CDEBUG2 for(j=0;jwlen[w1];j++) { printf("%s ", winfo->wseq[w1][j]->name); } printf("\n"); for(j=0;jwlen[w2];j++) { printf("%s ", winfo->wseq[w2][j]->name); } printf("\n"); printf("distance=%d\n", dist); #endif sim = 1.0 - (float)dist / (float)(winfo->wlen[w1] + winfo->wlen[w2]); #ifdef CDEBUG2 printf("(%s) - (%s): sim = %f, p1 = %f, p2 = %f\n", winfo->woutput[w1], winfo->woutput[w2], sim, p1, p2); #endif simsum += sim * p1 * p2; simsum_count++; } } #ifdef CDEBUG2 printf("SIM=%f\n", simsum / simsum_count); printf("-----\n"); #endif return(simsum / simsum_count); } /** * @brief Create a confusion network from word graph. * * @param root [in] root pointer of word graph * @param r [in] recognition process instance * * @return root pointer to the cluster list. * * @callgraph * @callergraph * */ CN_CLUSTER * confnet_create(WordGraph *root, RecogProcess *r) { CN_CLUSTER *croot; CN_CLUSTER *c, *cc, *cmax1, *cmax2; WordGraph *wg; PROB sim, max_sim; int wg_totalnum, n, i; char *buf1, *buf2; buf1 = (char *)mymalloc(MAX_HMMNAME_LEN); buf2 = (char *)mymalloc(MAX_HMMNAME_LEN); /* make initial confnet instances from word graph */ croot = NULL; wg_totalnum = 0; for(wg=root;wg;wg=wg->next) { c = cn_new(); cn_add_wg(c, wg); c->next = croot; croot = c; wg_totalnum++; } /* intraword clustering iteration */ do { /* find most similar pair */ max_sim = 0.0; for(c=croot;c;c=c->next) { for(cc=c->next;cc;cc=cc->next) { sim = get_cluster_intraword_similarity(c, cc, r->lm->winfo); if (max_sim < sim) { max_sim = sim; cmax1 = c; cmax2 = cc; } } } /* merge the maximum one if exist */ if (max_sim != 0.0) { #ifdef CDEBUG printf(">>> max_sim = %f\n", max_sim); put_cluster(stdout, cmax1, r->lm->winfo); put_cluster(stdout, cmax2, r->lm->winfo); #endif cn_merge(r, cmax1, cmax2); cn_destroy(cmax2, &croot); } } while (max_sim != 0.0); /* loop until no more similar pair exists */ n = 0; for(c=croot;c;c=c->next) n++; if (verbose_flag) jlog("STAT: confnet: %d words -> %d clusters by intra-word clustering\n", wg_totalnum, n); #ifdef CDEBUG printf("---- result of intra-word clustering ---\n"); i = 0; for(c=croot;c;c=c->next) { printf("%d :", i); put_cluster(stdout, c, r->lm->winfo); #ifdef CDEBUG2 for(i=0;iwgnum;i++) { printf(" "); put_wordgraph(stdout, c->wg[i], r->lm->winfo); } #endif i++; } printf("----------------------------\n"); #endif /* inter-word clustering */ do { /* build word list for each cluster */ for(c=croot;c;c=c->next) cn_build_wordlist(c, r->lm->winfo); /* find most similar pair */ max_sim = 0.0; for(c=croot;c;c=c->next) { for(cc=c->next;cc;cc=cc->next) { sim = get_cluster_interword_similarity(r, c, cc, r->lm->winfo, buf1, buf2); if (max_sim < sim) { max_sim = sim; cmax1 = c; cmax2 = cc; } } } /* merge the maximum one if exist */ if (max_sim != 0.0) { #ifdef CDEBUG printf(">>> max_sim = %f\n", max_sim); put_cluster(stdout, cmax1, r->lm->winfo); put_cluster(stdout, cmax2, r->lm->winfo); #endif cn_merge(r, cmax1, cmax2); cn_destroy(cmax2, &croot); } } while (max_sim != 0.0); /* loop until no more similar pair exists */ n = 0; for(c=croot;c;c=c->next) n++; if (verbose_flag) jlog("STAT: confnet: -> %d clusters by inter-word clustering\n", n); /* compute posterior probabilities and insert NULL entry */ { PROB p, psum; int j; for(c=croot;c;c=c->next) { psum = 0.0; c->pp = (LOGPROB *)mymalloc(sizeof(LOGPROB) * (c->wordsnum + 1)); for(i=0;iwordsnum;i++) { p = 0.0; for(j = 0; j < c->wgnum; j++) { if (is_same_word(c->wg[j]->wid, c->words[i], r->lm->winfo)) { #ifdef PREFER_GRAPH_CM p += c->wg[j]->graph_cm; #else p += c->wg[j]->cmscore; #endif } } c->pp[i] = p; psum += p; } if (psum < 1.0) { c->words[c->wordsnum] = WORD_INVALID; c->pp[c->wordsnum] = 1.0 - psum; c->wordsnum++; } } } /* sort the words in each cluster by their posterior probabilities */ { int j; WORD_ID wtmp; LOGPROB ltmp; for(c=croot;c;c=c->next) { for(i=0;iwordsnum;i++) { for(j=c->wordsnum - 1;j>i;j--) { if (c->pp[j-1] < c->pp[j]) { ltmp = c->pp[j-1]; c->pp[j-1] = c->pp[j]; c->pp[j] = ltmp; wtmp = c->words[j-1]; c->words[j-1] = c->words[j]; c->words[j] = wtmp; } } } } } /* re-order clusters by their beginning frames */ { CN_CLUSTER **clist; int k; /* sort cluster list by the left frame*/ clist = (CN_CLUSTER **)mymalloc(sizeof(CN_CLUSTER *) * n); for(i=0,c=croot;c;c=c->next) { clist[i++] = c; } qsort_reentrant(clist, n, sizeof(CN_CLUSTER *), (int (*)(const void *, const void *, void *))compare_cluster, r); croot = NULL; for(k=0;knext = NULL; else clist[k]->next = clist[k+1]; } free(clist); } #if 0 /* output */ printf("---- begin confusion network ---\n"); for(c=croot;c;c=c->next) { for(i=0;iwordsnum;i++) { printf("(%s:%.3f)", (c->words[i] == WORD_INVALID) ? "-" : r->lm->winfo->woutput[c->words[i]], c->pp[i]); if (i == 0) printf(" "); } printf("\n"); } printf("---- end confusion network ---\n"); #endif free(buf2); free(buf1); return(croot); } /* end of file */ julius-4.2.2/libjulius/src/gmm.c0000644001051700105040000005454612004452401015053 0ustar ritrlab/** * @file gmm.c * * * @brief GMM による入力棄却およびVAD * * Gaussian Mixture Model (GMM) が起動時に指定された場合,Julius/Julian は * 入力発話に対してフレームごとにスコアを計算し,その累積スコアを算出する. * これはGMMに基づく入力音声の発話検証および棄却に用いられる. 実際の計算は * 第1パスの認識処理と並行してリアルタイムに行なわれ,第1パス終了と同時に * 結果が出力される. * * GMMのスコア計算には Gaussian pruning の safe algorithm が用いられ, * 各フレームにおいて上位 N 個だけが正しく得られるように計算される. * ただし通常の認識用音響モデルの場合と異なり,直前フレームの順位情報は * 用いていない. * * GMM_VAD 定義時は,上記の入力棄却に加えて,short-pause segmentation と * 同じ枠組にを用いた VAD が行われる. * * * * @brief Input rejection and VAD using GMM * * When a Gaussian Mixture Model (GMM) is specified on startup, Julius/Julian * will compute the frame-wise likelihoods of each GMM for given inputs, * and produces the accumulated scores for each. Then the input rejection is * determined from the value. Actually, the recognition will be computed * on-line concurrently with the 1st pass, and the result will be got as * soon as the 1st pass ends. * * Gaussian pruning is performed using the safe algorithm in the computation * of GMM scores. In each frame, pruning will be done to fully compute only * the top N Gaussians. The algorithm is slightly simpler than AM computation, * i.e. the score order of the previous frame is not used here. * * When GMM_VAD is defined, a GMM-based VAD will be enabled in addition to * the input rejection, using the scheme of short-pause segmentation. * * * @author Akinobu LEE * @date Tue Mar 15 05:14:10 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 2003-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #undef MES /** * * Gaussianのスコアを計算済みGaussianリストのどの位置に挿入すべきかを返す. * * @param gc [i/o] GMM計算用ワークエリア * @param score [in] 挿入したいスコア * @param len [in] 現在のリストの長さ * * @return リスト内の挿入位置 * * * Return insertion point where a computed Gaussian score should be * inserted in current list of computed Gaussians. * * @param gc [i/o] work area for GMM calculation * @param score [in] a score to be inserted * @param len [in] current length of the list * * @return index to insert the value at the list. * */ static int gmm_find_insert_point(GMMCalc *gc, LOGPROB score, int len) { /* binary search on score */ int left = 0; int right = len - 1; int mid; while (left < right) { mid = (left + right) / 2; if (gc->OP_calced_score[mid] > score) { left = mid + 1; } else { right = mid; } } return(left); } /** * * あるGaussianの計算結果を計算済みGaussianリストに格納する. * * @param gc [i/o] GMM計算用ワークエリア * @param id [in] Gaussian の GMM 内での番号 * @param score [in] その Gaussian の計算された音響尤度 * @param len [in] 現在のリストの長さ(現在格納されている Gaussian の数) * * @return 格納後のリストの長さ. * * * Store a Gaussian likelihood to the list of computed Gaussians. * * @param gc [i/o] work area for GMM calculation * @param id [in] id of a Gaussian in the GMM to be stored * @param score [in] the likelihood of the Gaussian to be stored * @param len [in] current list length (= current number of Gaussians in cache) * * @return the current length of list after the storing. * */ static int gmm_cache_push(GMMCalc *gc, int id, LOGPROB score, int len) { int insertp; if (len == 0) { /* first one */ gc->OP_calced_score[0] = score; gc->OP_calced_id[0] = id; return(1); } if (gc->OP_calced_score[len-1] >= score) { /* bottom */ if (len < gc->OP_gprune_num) { /* append to bottom */ gc->OP_calced_score[len] = score; gc->OP_calced_id[len] = id; len++; } return len; } if (gc->OP_calced_score[0] < score) { insertp = 0; } else { insertp = gmm_find_insert_point(gc, score, len); } if (len < gc->OP_gprune_num) { memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp)); memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp)); } else if (insertp < len - 1) { memmove(&(gc->OP_calced_score[insertp+1]), &(gc->OP_calced_score[insertp]), sizeof(LOGPROB)*(len - insertp - 1)); memmove(&(gc->OP_calced_id[insertp+1]), &(gc->OP_calced_id[insertp]), sizeof(int)*(len - insertp - 1)); } gc->OP_calced_score[insertp] = score; gc->OP_calced_id[insertp] = id; if (len < gc->OP_gprune_num) len++; return(len); } /** * * 現在のフレームの入力ベクトルに対する Gaussian の出力確率を計算する. * Gaussian pruning は行なわない. * * @param gc [i/o] GMM計算用ワークエリア * @param binfo [in] Gaussian * * @return 出力確率の対数値 * * * Compute an output probability of a Gaussian for the input vector of * current frame. No Gaussian pruning is performed in this function. * * @param gc [i/o] work area for GMM calculation * @param binfo [in] Gaussian * * @return the log output probability. * */ static LOGPROB gmm_compute_g_base(GMMCalc *gc, HTK_HMM_Dens *binfo) { VECT tmp, x; VECT *mean; VECT *var; VECT *vec = gc->OP_vec; short veclen = gc->OP_veclen; if (binfo == NULL) return(LOG_ZERO); mean = binfo->mean; var = binfo->var->vec; tmp = 0.0; for (; veclen > 0; veclen--) { x = *(vec++) - *(mean++); tmp += x * x * *(var++); } return((tmp + binfo->gconst) * -0.5); } /** * * 現在のフレームの入力ベクトルに対する Gaussian の出力確率を計算する. * 計算時には固定しきい値による safe pruning を行なう. * * @param gc [i/o] GMM計算用ワークエリア * @param binfo [in] Gaussian * @param thres [in] safe pruning のための枝刈りしきい値 * * @return 出力確率の対数値 * * * Compute an output probability of a Gaussian for the input vector of * current frame. Safe pruning is performed in this function. * * @param gc [i/o] work area for GMM calculation * @param binfo [in] Gaussian * @param thres [in] pruning threshold for safe pruning * * @return the log output probability. * */ static LOGPROB gmm_compute_g_safe(GMMCalc *gc, HTK_HMM_Dens *binfo, LOGPROB thres) { VECT tmp, x; VECT *mean; VECT *var; VECT *vec = gc->OP_vec; short veclen = gc->OP_veclen; VECT fthres = thres * (-2.0); if (binfo == NULL) return(LOG_ZERO); mean = binfo->mean; var = binfo->var->vec; tmp = binfo->gconst; for (; veclen > 0; veclen--) { x = *(vec++) - *(mean++); tmp += x * x * *(var++); if (tmp > fthres) return LOG_ZERO; } return(tmp * -0.5); } /** * * GMM計算における Gaussian pruning のためのワークエリアを確保する * * @param gc [i/o] GMM計算用ワークエリア * @param hmminfo [in] HMM 構造体 * @param prune_num [in] Gaussian pruning において計算する上位ガウス分布数 * * * Allocate work area for Gaussian pruning for GMM calculation. * * @param gc [i/o] work area for GMM calculation * @param hmminfo [in] HMM structure * @param prune_num [in] number of top Gaussians to be computed at the pruning * */ static void gmm_gprune_safe_init(GMMCalc *gc, HTK_HMM_INFO *hmminfo, int prune_num) { /* store the pruning num to local area */ gc->OP_gprune_num = prune_num; /* maximum Gaussian set size = maximum mixture size * nstream */ gc->OP_calced_maxnum = hmminfo->maxmixturenum * gc->OP_nstream; /* allocate memory for storing list of currently computed Gaussian in a frame */ gc->OP_calced_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->OP_calced_maxnum); gc->OP_calced_id = (int *)mymalloc(sizeof(int) * gc->OP_calced_maxnum); } /** * * @brief ガウス分布集合内の各ガウス分布の現フレームに対する出力確率を計算する. * * Gaussian pruning により,実際には上位 N 個のみを保証する枝刈りが行なわれ, * スコアの低いガウス分布は計算されない. * * 計算結果は計算済みGaussianリスト (OP_calced_score, OP_calced_id) に * 格納される. * * @param gc [i/o] GMM計算用ワークエリア * @param g [in] ガウス分布集合 * @param gnum [in] @a g の長さ * * * @brief Compute scores for a set of Gaussians with Gaussian pruning for * the current frame. * * Gaussian pruning will be performed to guarantee only the top N Gaussians * to be fully computed. The results will be stored in the list of * computed Gaussians in OP_calced_score and OP_calced_id. * * @param gc [i/o] work area for GMM calculation * @param g [in] set of Gaussians * @param gnum [in] length of @a g * */ static void gmm_gprune_safe(GMMCalc *gc, HTK_HMM_Dens **g, int gnum) { int i, num = 0; LOGPROB score, thres; thres = LOG_ZERO; for (i = 0; i < gnum; i++) { if (num < gc->OP_gprune_num) { score = gmm_compute_g_base(gc, g[i]); } else { score = gmm_compute_g_safe(gc, g[i], thres); if (score <= thres) continue; } num = gmm_cache_push(gc, i, score, num); thres = gc->OP_calced_score[num-1]; } gc->OP_calced_num = num; } /** * * あるGMM状態の現フレームに対する出力確率を計算する. * * @param gc [i/o] GMM計算用ワークエリア * @param state [in] GMM 状態 * * @return 出力確率の対数スコア * * * Compute the output probability of a GMM state for the current frame. * * @param gc [i/o] work area for GMM calculation * @param state [in] GMM state * * @return the log probability. * */ static LOGPROB gmm_calc_mix(GMMCalc *gc, HTK_HMM_State *state) { int i; LOGPROB logprob, logprobsum; int s; PROB stream_weight; /* compute Gaussian set */ logprobsum = 0.0; for(s=0;sOP_nstream;s++) { /* set stream weight */ if (state->w) stream_weight = state->w->weight[s]; else stream_weight = 1.0; /* setup storage pointer for this mixture pdf */ gc->OP_vec = gc->OP_vec_stream[s]; gc->OP_veclen = gc->OP_veclen_stream[s]; /* compute output probabilities */ gmm_gprune_safe(gc, state->pdf[s]->b, state->pdf[s]->mix_num); /* computed Gaussians will be set in: score ... OP_calced_score[0..OP_calced_num] id ... OP_calced_id[0..OP_calced_num] */ /* sum */ for(i=0;iOP_calced_num;i++) { gc->OP_calced_score[i] += state->pdf[s]->bweight[gc->OP_calced_id[i]]; } /* add log probs */ logprob = addlog_array(gc->OP_calced_score, gc->OP_calced_num); /* if outprob of a stream is zero, skip this stream */ if (logprob <= LOG_ZERO) continue; /* sum all the obtained mixture scores */ logprobsum += logprob * stream_weight; } if (logprobsum == 0.0) return(LOG_ZERO); /* no valid stream */ if (logprobsum <= LOG_ZERO) return(LOG_ZERO); /* lowest == LOG_ZERO */ return (logprob * INV_LOG_TEN); } /** * * 入力の指定フレームにおけるGMM状態のスコアを求めるメイン関数. * * @param gc [i/o] GMM計算用ワークエリア * @param t [in] 計算するフレーム * @param stateinfo [in] GMM状態 * @param param [in] 入力ベクトル系列 * * @return 出力確率の対数スコア * * * Main function to compute the output probability of a GMM state for * the specified input frame. * * @param gc [i/o] work area for GMM calculation * @param t [in] time frame on which the output probability should be computed * @param stateinfo [in] GMM state * @param param [in] input vector sequence * * @return the log output probability. * */ static LOGPROB outprob_state_nocache(GMMCalc *gc, int t, HTK_HMM_State *stateinfo, HTK_Param *param) { int d, i; /* set global values for outprob functions to access them */ for(d=0,i=0;iOP_nstream;i++) { gc->OP_vec_stream[i] = &(param->parvec[t][d]); d += gc->OP_veclen_stream[i]; } return(gmm_calc_mix(gc, stateinfo)); } /************************************************************************/ /* global functions */ /** * * GMMの計算のための初期化. 起動時に一度だけ呼ばれる. * * @param recog [i/o] エンジンインスタンス * * * Initialization for computing GMM likelihoods. This will be called * once on startup. * * @param recog [i/o] engine instance * * * @callgraph * @callergraph * */ boolean gmm_init(Recog *recog) { HTK_HMM_INFO *gmm; HTK_HMM_Data *d; GMMCalc *gc; int i; gmm = recog->gmm; /* check GMM format */ /* tied-mixture GMM is not supported */ if (gmm->is_tied_mixture) { jlog("ERROR: gmm_init: tied-mixture GMM is not supported\n"); return FALSE; } /* assume 3 state GMM (only one output state) */ for(d=gmm->start;d;d=d->next) { if (d->state_num > 3) { jlog("ERROR: gmm_init: more than three states (one output state) defined in GMM [%s]\n", d->name); return FALSE; } } /* check if CMN needed */ /* allocate work area */ if (recog->gc == NULL) { gc = (GMMCalc *)mymalloc(sizeof(GMMCalc)); recog->gc = gc; } else { gc = recog->gc; } /* allocate buffers */ gc->gmm_score = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gmm->totalhmmnum); #ifdef GMM_VAD gc->nframe = recog->jconf->detect.gmm_margin; gc->rates = (LOGPROB *)mymalloc(sizeof(LOGPROB) * gc->nframe); #endif gc->is_voice = (boolean *)mymalloc(sizeof(boolean) * gmm->totalhmmnum); i = 0; if (recog->jconf->reject.gmm_reject_cmn_string) { for(d=recog->gmm->start;d;d=d->next) { if (strstr(recog->jconf->reject.gmm_reject_cmn_string, d->name)) { gc->is_voice[i] = FALSE; } else { gc->is_voice[i] = TRUE; } i++; } } else { for(d=recog->gmm->start;d;d=d->next) { gc->is_voice[i] = TRUE; i++; } } /* initialize work area */ gc->OP_nstream = gmm->opt.stream_info.num; for(i=0;iOP_nstream;i++) { gc->OP_veclen_stream[i] = gmm->opt.stream_info.vsize[i]; } gmm_gprune_safe_init(gc, gmm, recog->jconf->reject.gmm_gprune_num); /* check if variances are inversed */ if (!gmm->variance_inversed) { /* here, inverse all variance values for faster computation */ htk_hmm_inverse_variances(gmm); gmm->variance_inversed = TRUE; } return TRUE; } /** * * GMM計算のための準備を行なう. 1入力開始ごとに呼ばれる. * * @param recog [i/o] エンジンインスタンス * * * Prepare for the next GMM computation. This will be called just before * an input begins. * * @param recog [i/o] engine instance * * * @callgraph * @callergraph */ void gmm_prepare(Recog *recog) { HTK_HMM_Data *d; int i; /* initialize score buffer and frame count */ i = 0; for(d=recog->gmm->start;d;d=d->next) { recog->gc->gmm_score[i] = 0.0; i++; } #ifdef GMM_VAD for(i=0;igc->nframe;i++) recog->gc->rates[i] = 0.0; recog->gc->framep = 0; recog->gc->filled = FALSE; recog->gc->in_voice = FALSE; #endif recog->gc->framecount = 0; #ifdef GMM_VAD_DEBUG printf("GMM_VAD: init\n"); #endif } /** * * 与えられた入力ベクトル列上のあるフレームについて,全GMMのスコアを計算し, * 計算結果を gmm_score に積算する. * * GMM_VAD 定義時は,後で VAD 判定するために,過去 jconf->detect.gmm_margin * フレーム分の VAD スコア (音声GMMの最大スコア - 雑音GMMの最大スコア)が * 保存される. * * @param recog [i/o] エンジンインスタンス * * * Compute output probabilities of all GMM for a given input vector, and * accumulate the results to the gmm_score buffer. * * When GMM_VAD is defined, VAD scores, * "(maximum score of speech GMMs) - (maximum score of noise GMMs)" of * last frames (jconf->detect.gmm_margin) will be stored for later VAD * decision. * * @param recog [i/o] engine instance * * * @callgraph * @callergraph */ void gmm_proceed(Recog *recog) { HTK_HMM_Data *d; GMMCalc *gc; int i; MFCCCalc *mfcc; LOGPROB score; #ifdef GMM_VAD LOGPROB max_n; LOGPROB max_v; #endif mfcc = recog->gmmmfcc; gc = recog->gc; if (!mfcc->valid) return; gc->framecount++; #ifdef GMM_VAD max_n = max_v = LOG_ZERO; #endif i = 0; for(d=recog->gmm->start;d;d=d->next) { score = outprob_state_nocache(gc, mfcc->f, d->s[1], mfcc->param); gc->gmm_score[i] += score; #ifdef GMM_VAD if (gc->is_voice[i]) { if (max_v < score) max_v = score; } else { if (max_n < score) max_n = score; } #endif #ifdef MES jlog("DEBUG: [%s: total=%f avg=%f]\n", d->name, gc->gmm_score[i], gc->gmm_score[i] / (float)gc->framecount); #endif i++; } #ifdef GMM_VAD #ifdef GMM_VAD_DEBUG //printf("GMM_VAD: max_v = %f, max_n = %f, rate = %f\n", max_v, max_n, max_v - max_n, gc->framep); #endif /* set rate of this frame */ gc->rates[gc->framep] = max_v - max_n; #ifdef GMM_VAD_DEBUG printf("GMM_VAD: %f\n", max_v - max_n); #endif /* increment current frame pointer */ gc->framep++; /* if reached end, go to start point */ if (gc->framep >= gc->nframe) { gc->filled = TRUE; gc->framep = 0; } #endif } /** * * @brief GMMの計算を終了し,結果を出力する. * * gmm_proceed() によって累積された各フレームごとのスコアから, * 最大スコアのGMMを決定する. その事後確率に基づく信頼度を計算し * 最終的な結果を result_gmm() によって出力する. * * @param recog [i/o] エンジンインスタンス * * * @brief Finish the GMM computation for an input, and output the result. * * The GMM of the maximum score is finally determined from the accumulated * scores computed by gmm_proceed(), and compute the confidence score of the * maximum GMM using posterior probability. Then the result will be output * using result_gmm(). * * @param recog [i/o] engine instance * * * @callgraph * @callergraph */ void gmm_end(Recog *recog) { HTK_HMM_INFO *gmm; LOGPROB *score; HTK_HMM_Data *d; LOGPROB maxprob; HTK_HMM_Data *dmax; #ifdef CONFIDENCE_MEASURE LOGPROB sum; #endif int i; int maxid; if (recog->gc->framecount == 0) return; gmm = recog->gmm; score = recog->gc->gmm_score; /* get max score */ i = 0; maxprob = LOG_ZERO; dmax = NULL; maxid = 0; for(d=gmm->start;d;d=d->next) { if (maxprob < score[i]) { dmax = d; maxprob = score[i]; maxid = i; } i++; } recog->gc->max_d = dmax; recog->gc->max_i = maxid; #ifdef CONFIDENCE_MEASURE /* compute CM */ sum = 0.0; i = 0; for(d=gmm->start;d;d=d->next) { //sum += pow(10, recog->jconf->annotate.cm_alpha * (score[i] - maxprob)); sum += pow(10, 0.05 * (score[i] - maxprob)); i++; } recog->gc->gmm_max_cm = 1.0 / sum; #endif /* output result */ callback_exec(CALLBACK_RESULT_GMM, recog); } /** * * GMMの識別結果,最後の入力が音声入力として有効であったか * 無効であったかを返す. * * @param recog [i/o] エンジンインスタンス * * @return 一位のGMMの名前が gmm_reject_cmn_string 内に無ければ valid として * TRUE, あれば invalid として FALSE を返す. * * * Return whether the last input was valid or invalid, from the result of * GMM computation. * * @param recog [i/o] engine instance * * @return TRUE if input is valid, i.e. the name of maximum GMM is not included * in gmm_reject_cmn_string, or FALSE if input is invalid, i.e. the name is * included in that string. * * * @callgraph * @callergraph */ boolean gmm_valid_input(Recog *recog) { if (recog->gc->max_d == NULL) return FALSE; if (recog->gc->is_voice[recog->gc->max_i]) { return TRUE; } return FALSE; } /** * * Free work area used for GMM calculation. * * * GMM計算に用いたワークエリアを開放する. * * * @param recog [i/o] engine instance * * @callgraph * @callergraph * */ void gmm_free(Recog *recog) { if (recog->gc) { free(recog->gc->OP_calced_score); free(recog->gc->OP_calced_id); free(recog->gc->is_voice); #ifdef GMM_VAD free(recog->gc->rates); #endif free(recog->gc->gmm_score); free(recog->gc); recog->gc = NULL; } } #ifdef GMM_VAD /** * * Compute score of voice activity from the last (jconf->detect.gmm_margin) * frames. Positive value designates speech, and negative means noise. * * * 直前の (jconf->detect.gmm_margin) フレーム分のスコアから * voice activity のスコアを計算する. 正の値は音声,負の値は雑音を表す. * * * @param gc [i/o] work area for GMM calculation * @param mean_ret [out] mean value of last (jconf->detect.gmm_margin) frames * @param var_ret [out] variance of last (jconf->detect.gmm_margin) frames * @param count_ret [out] count of speech frames in last (jconf->detect.gmm_margin) frames * */ static void voice_activity_score(GMMCalc *gc, float *mean_ret, float *var_ret, int *count_ret) { int i, len; LOGPROB mean; LOGPROB var; LOGPROB x; int count; if (!gc->filled) { /* cycle buffer not filled yet */ *mean_ret = 0.0; *var_ret = 0.0; *count_ret = 0; return; } if (gc->filled) { len = gc->nframe; } else { len = gc->framep; } mean = 0; count = 0; for(i=0;irates[i]; if (gc->rates[i] > 0.0) count++; } mean /= (float)len; var = 0.0; for(i=0;irates[i]; var += x * x; } var /= (float)len; *mean_ret = mean; *var_ret = var; *count_ret = count; } /** * * Check if trigger of speech / noise segment. If we are in noise segment * and some speech input begins at this frame, recog->gc->up_trigger will * be set to TRUE. If current is in speech segment and it ended at * this frame, recog->gc->down_trigger will be set to FALSE. * * * 音声/非音声区間の区切りを検知する. これまでが非音声区間でこのフレームで * 音声トリガを検知したとき,recog->gc->up_trigger を TRUE にセットする. 現在 * 音声区間で区間終了を検知したとき,recog->gc->down_trigger を TRUE に * セットする. * * * @param recog [i/o] engine instance * * @callgraph * @callergraph */ void gmm_check_trigger(Recog *recog) { GMMCalc *gc; gc = recog->gc; float mean; float var; int count; gc->up_trigger = gc->down_trigger = FALSE; voice_activity_score(gc, &mean, &var, &count); if (gc->in_voice) { if (mean <= recog->jconf->detect.gmm_downtrigger_thres) { gc->down_trigger = TRUE; gc->in_voice = FALSE; } } else { if (mean >= recog->jconf->detect.gmm_uptrigger_thres) { gc->up_trigger = TRUE; gc->in_voice = TRUE; } } #ifdef GMM_VAD_DEBUG printf("GMM_VAD: %s: %f %f %d", gc->in_voice ? "VOICE" : "NOISE", mean, var, count); if (gc->up_trigger) printf(": BEGIN"); if (gc->down_trigger) printf(": END"); printf("\n"); #endif } #endif /* GMM_VAD */ /* end of file */ julius-4.2.2/libjulius/src/m_usage.c0000644001051700105040000005277212004452401015712 0ustar ritrlab/** * @file m_usage.c * * * @brief ヘルプを表示する * * * * @brief Print help. * * * @author Akinobu Lee * @date Fri May 13 15:04:34 2005 * * $Revision: 1.17 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * ヘルプを表示する. * * * * Output help document. * * * * @param fp [in] file pointer to output help * * @callgraph * @callergraph * @ingroup engine * */ void j_output_argument_help(FILE *fp) { Jconf *jconf; #ifdef ENABLE_PLUGIN int id; char buf[64]; PLUGIN_ENTRY *p; FUNC_VOID func; #endif /* load default values */ jconf = j_jconf_new(); j_put_header(fp); j_put_compile_defs(fp); fprintf(fp, "\nOptions:\n"); fprintf(fp, "\n--- Global Options -----------------------------------------------\n"); fprintf(fp, "\n Speech Input:\n"); fprintf(fp, " (Can extract only MFCC based features from waveform)\n"); fprintf(fp, " [-input devname] input source (default = htkparam)\n"); fprintf(fp, " htkparam/mfcfile HTK parameter file\n"); fprintf(fp, " file/rawfile waveform file (%s)\n", SUPPORTED_WAVEFILE_FORMAT); #ifdef USE_MIC fprintf(fp, " mic default microphone device\n"); # ifdef HAS_ALSA fprintf(fp, " alsa use ALSA interface\n"); # endif # ifdef HAS_OSS fprintf(fp, " oss use OSS interface\n"); # endif # ifdef HAS_ESD fprintf(fp, " esd use ESounD interface\n"); # endif # ifdef HAS_PULSEAUDIO fprintf(fp, " pulseaudio use PulseAudio interface\n"); # endif #endif #ifdef USE_NETAUDIO fprintf(fp, " netaudio DatLink/NetAudio server\n"); #endif fprintf(fp, " adinnet adinnet client (TCP/IP)\n"); fprintf(fp, " stdin standard input\n"); #ifdef ENABLE_PLUGIN if (global_plugin_list) { if ((id = plugin_get_id("adin_get_optname")) >= 0) { for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_VOID) p->func; (*func)(buf, (int)64); fprintf(fp, " %-18s(adin plugin #%d)\n", buf, p->source_id); } } if ((id = plugin_get_id("fvin_get_optname")) >= 0) { for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_VOID) p->func; (*func)(buf, (int)64); fprintf(fp, " %-18s(feature vector input plugin #%d)\n", buf, p->source_id); } } } #endif fprintf(fp, " [-filelist file] filename of input file list\n"); #ifdef USE_NETAUDIO fprintf(fp, " [-NA host:unit] get audio from NetAudio server at host:unit\n"); #endif fprintf(fp, " [-adport portnum] adinnet port number to listen (%d)\n", jconf->input.adinnet_port); fprintf(fp, " [-48] enable 48kHz sampling with internal down sampler (OFF)\n"); fprintf(fp, " [-zmean/-nozmean] enable/disable DC offset removal (OFF)\n"); fprintf(fp, " [-nostrip] disable stripping off zero samples\n"); fprintf(fp, " [-record dir] record triggered speech data to dir\n"); fprintf(fp, " [-rejectshort msec] reject an input shorter than specified\n"); #ifdef POWER_REJECT fprintf(fp, " [-powerthres value] rejection threshold of average power (%.1f)\n", jconf->reject.powerthres); #endif fprintf(fp, "\n Speech Detection: (default: on=mic/net off=files)\n"); /*fprintf(fp, " [-pausesegment] turn on (force) pause detection\n");*/ /*fprintf(fp, " [-nopausesegment] turn off (force) pause detection\n");*/ fprintf(fp, " [-cutsilence] turn on (force) skipping long silence\n"); fprintf(fp, " [-nocutsilence] turn off (force) skipping long silence\n"); fprintf(fp, " [-lv unsignedshort] input level threshold (0-32767) (%d)\n", jconf->detect.level_thres); fprintf(fp, " [-zc zerocrossnum] zerocross num threshold per sec. (%d)\n", jconf->detect.zero_cross_num); fprintf(fp, " [-headmargin msec] header margin length in msec. (%d)\n", jconf->detect.head_margin_msec); fprintf(fp, " [-tailmargin msec] tail margin length in msec. (%d)\n", jconf->detect.tail_margin_msec); fprintf(fp, " [-chunksize sample] unit length for processing (%d)\n", jconf->detect.chunk_size); fprintf(fp, "\n GMM utterance verification:\n"); fprintf(fp, " -gmm filename GMM definition file\n"); fprintf(fp, " -gmmnum num GMM Gaussian pruning num (%d)\n", jconf->reject.gmm_gprune_num); fprintf(fp, " -gmmreject string comma-separated list of noise model name to reject\n"); #ifdef GMM_VAD fprintf(fp, "\n GMM-based VAD:\n"); fprintf(fp, " -gmmmargin frames backstep margin on speech trigger (%d)\n", jconf->detect.gmm_margin); fprintf(fp, " -gmmup score up-trigger threshold (%.1f)\n", jconf->detect.gmm_uptrigger_thres); fprintf(fp, " -gmmdown score down-trigger threshold (%.1f)\n", jconf->detect.gmm_downtrigger_thres); #endif fprintf(fp, "\n On-the-fly Decoding: (default: on=mic/net off=files)\n"); fprintf(fp, " [-realtime] turn on, input streamed with MAP-CMN\n"); fprintf(fp, " [-norealtime] turn off, input buffered with sentence CMN\n"); fprintf(fp, "\n Others:\n"); fprintf(fp, " [-C jconffile] load options from jconf file\n"); fprintf(fp, " [-quiet] reduce output to only word string\n"); fprintf(fp, " [-demo] equal to \"-quiet -progout\"\n"); fprintf(fp, " [-debug] (for debug) dump numerous log\n"); fprintf(fp, " [-callbackdebug] (for debug) output message per callback\n"); fprintf(fp, " [-check (wchmm|trellis)] (for debug) check internal structure\n"); fprintf(fp, " [-check triphone] triphone mapping check\n"); fprintf(fp, " [-setting] print engine configuration and exit\n"); fprintf(fp, " [-help] print this message and exit\n"); fprintf(fp, "\n--- Instance Declarations ----------------------------------------\n\n"); fprintf(fp, " [-AM] start a new acoustic model instance\n"); fprintf(fp, " [-LM] start a new language model instance\n"); fprintf(fp, " [-SR] start a new recognizer (search) instance\n"); fprintf(fp, " [-AM_GMM] start an AM feature instance for GMM\n"); fprintf(fp, " [-GLOBAL] start a global section\n"); fprintf(fp, " [-nosectioncheck] disable option location check\n"); fprintf(fp, "\n--- Acoustic Model Options (-AM) ---------------------------------\n"); fprintf(fp, "\n Acoustic analysis:\n"); fprintf(fp, " [-htkconf file] load parameters from the HTK Config file\n"); fprintf(fp, " [-smpFreq freq] sample period (Hz) (%ld)\n", jconf->am_root->analysis.para_default.smp_freq); fprintf(fp, " [-smpPeriod period] sample period (100ns) (%ld)\n", jconf->am_root->analysis.para_default.smp_period); fprintf(fp, " [-fsize sample] window size (sample) (%d)\n", jconf->am_root->analysis.para_default.framesize); fprintf(fp, " [-fshift sample] frame shift (sample) (%d)\n", jconf->am_root->analysis.para_default.frameshift); fprintf(fp, " [-preemph] pre-emphasis coef. (%.2f)\n", jconf->am_root->analysis.para_default.preEmph); fprintf(fp, " [-fbank] number of filterbank channels (%d)\n", jconf->am_root->analysis.para_default.fbank_num); fprintf(fp, " [-ceplif] cepstral liftering coef. (%d)\n", jconf->am_root->analysis.para_default.lifter); fprintf(fp, " [-rawe] [-norawe] toggle using raw energy (no)\n"); fprintf(fp, " [-enormal] [-noenormal] toggle normalizing log energy (no)\n"); fprintf(fp, " [-escale] scaling log energy for enormal (%.1f)\n", jconf->am_root->analysis.para_default.escale); fprintf(fp, " [-silfloor] energy silence floor in dB (%.1f)\n", jconf->am_root->analysis.para_default.silFloor); fprintf(fp, " [-delwin frame] delta windows length (frame) (%d)\n", jconf->am_root->analysis.para_default.delWin); fprintf(fp, " [-accwin frame] accel windows length (frame) (%d)\n", jconf->am_root->analysis.para_default.accWin); fprintf(fp, " [-hifreq freq] freq. of upper band limit, off if <0 (%d)\n", jconf->am_root->analysis.para_default.hipass); fprintf(fp, " [-lofreq freq] freq. of lower band limit, off if <0 (%d)\n", jconf->am_root->analysis.para_default.lopass); fprintf(fp, " [-sscalc] do spectral subtraction (file input only)\n"); fprintf(fp, " [-sscalclen msec] length of head silence for SS (msec) (%d)\n", jconf->am_root->frontend.sscalc_len); fprintf(fp, " [-ssload filename] load constant noise spectrum from file for SS\n"); fprintf(fp, " [-ssalpha value] alpha coef. for SS (%f)\n", jconf->am_root->frontend.ss_alpha); fprintf(fp, " [-ssfloor value] spectral floor for SS (%f)\n", jconf->am_root->frontend.ss_floor); fprintf(fp, " [-zmeanframe/-nozmeanframe] frame-wise DC removal like HTK(OFF)\n"); fprintf(fp, " [-usepower/-nousepower] use power in fbank analysis (OFF)\n"); fprintf(fp, " [-cmnload file] load initial CMN param from file on startup\n"); fprintf(fp, " [-cmnsave file] save CMN param to file after each input\n"); fprintf(fp, " [-cmnnoupdate] not update CMN param while recog. (use with -cmnload)\n"); fprintf(fp, " [-cmnmapweight] weight value of initial cm for MAP-CMN (%6.2f)\n", jconf->am_root->analysis.cmn_map_weight); fprintf(fp, " [-cvn] cepstral variance normalisation (%s)\n", jconf->amnow->analysis.para.cvn ? "on" : "off"); fprintf(fp, " [-vtln alpha lowcut hicut] enable VTLN (1.0 to disable) (%f)\n", jconf->am_root->analysis.para_default.vtln_alpha); fprintf(fp, "\n Acoustic Model:\n"); fprintf(fp, " -h hmmdefsfile HMM definition file name\n"); fprintf(fp, " [-hlist HMMlistfile] HMMlist filename (must for triphone model)\n"); fprintf(fp, " [-iwcd1 methodname] switch IWCD triphone handling on 1st pass\n"); fprintf(fp, " best N use N best score (default of n-gram, N=%d)\n", jconf->am_root->iwcdmaxn); fprintf(fp, " max use maximum score\n"); fprintf(fp, " avg use average score (default of dfa)\n"); fprintf(fp, " [-force_ccd] force to handle IWCD\n"); fprintf(fp, " [-no_ccd] don't handle IWCD\n"); fprintf(fp, " [-notypecheck] don't check input parameter type\n"); fprintf(fp, " [-spmodel HMMname] name of short pause model (\"%s\")\n", SPMODEL_NAME_DEFAULT); fprintf(fp, " [-multipath] switch decoding for multi-path HMM (auto)\n"); fprintf(fp, "\n Acoustic Model Computation Method:\n"); fprintf(fp, " [-gprune methodname] select Gaussian pruning method:\n"); #ifdef GPRUNE_DEFAULT_SAFE fprintf(fp, " safe safe pruning (default for TM/PTM)\n"); #else fprintf(fp, " safe safe pruning\n"); #endif #if GPRUNE_DEFAULT_HEURISTIC fprintf(fp, " heuristic heuristic pruning (default for TM/PTM)\n"); #else fprintf(fp, " heuristic heuristic pruning\n"); #endif #if GPRUNE_DEFAULT_BEAM fprintf(fp, " beam beam pruning (default for TM/PTM)\n"); #else fprintf(fp, " beam beam pruning\n"); #endif fprintf(fp, " none no pruning (default for non tmix models)\n"); #ifdef ENABLE_PLUGIN if (global_plugin_list) { if ((id = plugin_get_id("calcmix_get_optname")) >= 0) { for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_VOID) p->func; (*func)(buf, (int)64); fprintf(fp, " %-14s(calculation plugin #%d)\n", buf, p->source_id); } } } #endif fprintf(fp, " [-tmix gaussnum] Gaussian num threshold per mixture for pruning (%d)\n", jconf->am_root->mixnum_thres); fprintf(fp, " [-gshmm hmmdefs] monophone hmmdefs for GS\n"); fprintf(fp, " [-gsnum N] N-best state will be selected (%d)\n", jconf->am_root->gs_statenum); fprintf(fp, "\n--- Language Model Options (-LM) ---------------------------------\n"); fprintf(fp, "\n N-gram:\n"); fprintf(fp, " -d file.bingram n-gram file in Julius binary format\n"); fprintf(fp, " -nlr file.arpa forward n-gram file in ARPA format\n"); fprintf(fp, " -nrl file.arpa backward n-gram file in ARPA format\n"); fprintf(fp, " [-lmp float float] weight and penalty (tri: %.1f %.1f mono: %.1f %1.f)\n", DEFAULT_LM_WEIGHT_TRI_PASS1, DEFAULT_LM_PENALTY_TRI_PASS1, DEFAULT_LM_WEIGHT_MONO_PASS1, DEFAULT_LM_PENALTY_MONO_PASS1); fprintf(fp, " [-lmp2 float float] for 2nd pass (tri: %.1f %.1f mono: %.1f %1.f)\n", DEFAULT_LM_WEIGHT_TRI_PASS2, DEFAULT_LM_PENALTY_TRI_PASS2, DEFAULT_LM_WEIGHT_MONO_PASS2, DEFAULT_LM_PENALTY_MONO_PASS2); fprintf(fp, " [-transp float] penalty for transparent word (%+2.1f)\n", jconf->search_root->lmp.lm_penalty_trans); fprintf(fp, "\n DFA Grammar:\n"); fprintf(fp, " -dfa file.dfa DFA grammar file\n"); fprintf(fp, " -gram file[,file2...] (list of) grammar prefix(es)\n"); fprintf(fp, " -gramlist filename filename of grammar list\n"); fprintf(fp, " [-penalty1 float] word insertion penalty (1st pass) (%.1f)\n", jconf->search_root->lmp.penalty1); fprintf(fp, " [-penalty2 float] word insertion penalty (2nd pass) (%.1f)\n", jconf->search_root->lmp.penalty2); fprintf(fp, "\n Word Dictionary for N-gram and DFA:\n"); fprintf(fp, " -v dictfile dictionary file name\n"); fprintf(fp, " [-silhead wordname] (n-gram) beginning-of-sentence word (%s)\n", BEGIN_WORD_DEFAULT); fprintf(fp, " [-siltail wordname] (n-gram) end-of-sentence word (%s)\n", END_WORD_DEFAULT); fprintf(fp, " [-mapunk wordname] (n-gram) map unknown words to this (%s)\n", UNK_WORD_DEFAULT); fprintf(fp, " [-forcedict] ignore error entry and keep running\n"); fprintf(fp, " [-iwspword] (n-gram) add short-pause word for inter-word CD sp\n"); fprintf(fp, " [-iwspentry entry] (n-gram) word entry for \"-iwspword\" (%s)\n", IWSPENTRY_DEFAULT); fprintf(fp, " [-adddict dictfile] (n-gram) load extra dictionary\n"); fprintf(fp, " [-addentry entry] (n-gram) load extra word entry\n"); fprintf(fp, "\n Isolated Word Recognition:\n"); fprintf(fp, " -w file[,file2...] (list of) wordlist file name(s)\n"); fprintf(fp, " -wlist filename file that contains list of wordlists\n"); fprintf(fp, " -wsil head tail sp name of silence/pause model\n"); fprintf(fp, " head - BOS silence model name (%s)\n", jconf->lm_root->wordrecog_head_silence_model_name); fprintf(fp, " tail - EOS silence model name (%s)\n", jconf->lm_root->wordrecog_tail_silence_model_name); fprintf(fp, " sp - their name as context or \"NULL\" (%s)\n", (jconf->lm_root->wordrecog_silence_context_name[0] == '\0') ? "NULL" : jconf->lm_root->wordrecog_silence_context_name); #ifdef DETERMINE fprintf(fp, " -wed float int thresholds for early word determination\n"); fprintf(fp, " float: score threshold (%.1f)\n", jconf->search_root->pass1.determine_score_thres); fprintf(fp, " int: frame duration thres (%d)\n", jconf->search_root->pass1.determine_duration_thres); #endif fprintf(fp, "\n--- Recognizer / Search Options (-SR) ----------------------------\n"); fprintf(fp, "\n Search Parameters for the First Pass:\n"); fprintf(fp, " [-b beamwidth] beam width (by state num) (guessed)\n"); fprintf(fp, " (0: full search, -1: force guess)\n"); #ifdef SCORE_PRUNING fprintf(fp, " [-bs score_width] beam width (by score offset) (disabled)\n"); fprintf(fp, " (-1: disable)\n"); #endif #ifdef WPAIR # ifdef WPAIR_KEEP_NLIMIT fprintf(fp, " [-nlimit N] keeps only N tokens on each state (%d)\n", jconf->search_root->pass1.wpair_keep_nlimit); # endif #endif #ifdef SEPARATE_BY_UNIGRAM fprintf(fp, " [-sepnum wordnum] (n-gram) # of hi-freq word isolated from tree (%d)\n", jconf->lm_root->separate_wnum); #endif #ifdef HASH_CACHE_IW fprintf(fp, " [-iwcache percent] (n-gram) amount of inter-word LM cache (%3d)\n", jconf->search_root->pass1.iw_cache_rate); #endif fprintf(fp, " [-1pass] do 1st pass only, omit 2nd pass\n"); fprintf(fp, " [-inactive] recognition process not active on startup\n"); fprintf(fp, "\n Search Parameters for the Second Pass:\n"); fprintf(fp, " [-b2 hyponum] word envelope beam width (by hypo num) (%d)\n",jconf->search_root->pass2.enveloped_bestfirst_width); fprintf(fp, " [-n N] # of sentence to find (%d)\n", jconf->search_root->pass2.nbest); fprintf(fp, " [-output N] # of sentence to output (%d)\n",jconf->search_root->output.output_hypo_maxnum); #ifdef SCAN_BEAM fprintf(fp, " [-sb score] score beam threshold (by score) (%.1f)\n", jconf->search_root->pass2.scan_beam_thres); #endif fprintf(fp, " [-s hyponum] global stack size of hypotheses (%d)\n", jconf->search_root->pass2.stack_size); fprintf(fp, " [-m hyponum] hypotheses overflow threshold num (%d)\n", jconf->search_root->pass2.hypo_overflow); fprintf(fp, " [-lookuprange N] frame lookup range in word expansion (%d)\n", jconf->search_root->pass2.lookup_range); fprintf(fp, " [-looktrellis] (dfa) expand only backtrellis words\n"); fprintf(fp, " [-[no]multigramout] (dfa) output per-grammar results\n"); fprintf(fp, " [-oldtree] (dfa) use old build_wchmm()\n"); #ifdef PASS1_IWCD fprintf(fp, " [-oldiwcd] (dfa) use full lcdset\n"); #endif fprintf(fp, " [-iwsp] insert sp for all word end (multipath)(off)\n"); fprintf(fp, " [-iwsppenalty] trans. penalty for iwsp (multipath) (%.1f)\n", jconf->am_root->iwsp_penalty); fprintf(fp, "\n Short-pause Segmentation:\n"); fprintf(fp, " [-spsegment] enable short-pause segmentation\n"); fprintf(fp, " [-spdur] length threshold of sp frames (%d)\n", jconf->search_root->successive.sp_frame_duration); #ifdef SPSEGMENT_NAIST fprintf(fp, " [-spmargin] backstep margin on speech trigger (%d)\n", jconf->search_root->successive.sp_margin); fprintf(fp, " [-spdelay] delay on speech trigger (%d)\n", jconf->search_root->successive.sp_delay); #endif fprintf(fp, " [-pausemodels str] comma-delimited list of pause models for segment\n"); fprintf(fp, "\n Graph Output with graph-oriented search:\n"); fprintf(fp, " [-lattice] enable word graph (lattice) output\n"); fprintf(fp, " [-confnet] enable confusion network output\n"); fprintf(fp, " [-nolattice]][-noconfnet] disable lattice / confnet output\n"); fprintf(fp, " [-graphrange N] merge same words in graph (%d)\n", jconf->search_root->graph.graph_merge_neighbor_range); fprintf(fp, " -1: not merge, leave same loc. with diff. score\n"); fprintf(fp, " 0: merge same words at same location\n"); fprintf(fp, " >0: merge same words around the margin\n"); #ifdef GRAPHOUT_DEPTHCUT fprintf(fp, " [-graphcut num] graph cut depth at postprocess (-1: disable)(%d)\n", jconf->search_root->graph.graphout_cut_depth); #endif #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP fprintf(fp, " [-graphboundloop num] max. num of boundary adjustment loop (%d)\n", jconf->search_root->graph.graphout_limit_boundary_loop_num); #endif #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION fprintf(fp, " [-graphsearchdelay] inhibit search termination until 1st sent. found\n"); fprintf(fp, " [-nographsearchdelay] disable it (default)\n"); #endif fprintf(fp, "\n Forced Alignment:\n"); fprintf(fp, " [-walign] optionally output word alignments\n"); fprintf(fp, " [-palign] optionally output phoneme alignments\n"); fprintf(fp, " [-salign] optionally output state alignments\n"); #ifdef CONFIDENCE_MEASURE fprintf(fp, "\n Confidence Score:\n"); #ifdef CM_MULTIPLE_ALPHA fprintf(fp, " [-cmalpha f t s] CM smoothing factor (from, to, step)\n"); #else fprintf(fp, " [-cmalpha value] CM smoothing factor (%f)\n", jconf->search_root->annotate.cm_alpha); #endif #ifdef CM_SEARCH_LIMIT fprintf(fp, " [-cmthres value] CM threshold to cut hypo on 2nd pass (%f)\n", jconf->search_root->annotate.cm_cut_thres); #endif #endif /* CONFIDENCE_MEASURE */ fprintf(fp, "\n Message Output:\n"); fprintf(fp, " [-fallback1pass] use 1st pass result when search failed\n"); fprintf(fp, " [-progout] progressive output in 1st pass\n"); fprintf(fp, " [-proginterval] interval of progout in msec (%d)\n", jconf->search_root->output.progout_interval); fprintf(fp, "\n-------------------------------------------------\n"); j_jconf_free(jconf); /* output application-side options */ useropt_show_desc(fp); } /* end of file */ julius-4.2.2/libjulius/src/spsegment.c0000644001051700105040000007162012004452401016270 0ustar ritrlab/** * @file spsegment.c * * * @brief Short-pause segmentation and decoder-based VAD * * In short-pause segmentation mode, Julius tries to find a "pause * frame" by watching the word hypotheses at each frame. Julius treat * words with only a silence model as "pause word", and judge whether * the input frame is "pause frame" or not by watching if any of the * pause words gets maximum score at each frame. Then it will segment the * input when the duration of pause frame reaches a limit. * * On normal short-pause segmentation (as of ver.3.x), the pause * frames will not be eliminated. The input will be segment at the * frame where a speech begins after the pause frames, and the next * input will be processed from the beginning of the pause frames. In * other words, the detected area of pause frames are processed twice, * as end-of-segment silence at the former input segment and * beginning-of-segment silence at the latter input segment. * * When SPSEGMENT_NAIST is defined, a long pause area will be dropped * from recognition. When the detecting pause frames gets longer than * threshold, it segments the input at that point and skip the continuing * pauses until a speech frame comes. The recognition process will * be kept with a special status while in the pause segment. This scheme * works as a decoder-driven VAD. * * * * * @brief ショートポーズセグメンテーションおよびデコーダベースVAD * * ショートポーズセグメンテーションでは,第1パスにおいて「無音単語」の * スコアをフレームごとに調べ,それが一位であるフレームを「無音フレーム」 * とします. そして,無音フレームが一定以上のフレーム数にわたったときに, * 入力をそこで区切ります. * * 「無音単語」は,単語辞書において,読みが無音に対応する1モデルのみから * なる単語を指します. 無音モデルは -spmodel で指定されるモデル,および * N-gram モデル使用時は先頭・末尾の無音モデルとされます(明示的に指定 * するには -pausemodels オプションを使用します) * * 通常のショートポーズセグメンテーション(Ver.3.x 以前と同等)では,無 * 音区間の除去は行われません. 入力は,無音フレーム区間が終了してふた * たび音声がトリガした時点で区切られ,次セグメントの認識はその無音フ * レーム区間の開始点から再開されます. すなわち,検出された無音区間は, * 前セグメントの末尾の無音区間かつ次セグメントの開始の無音区間として, * セグメント間でオーバーラップして処理されます. * * SPSEGMENT_NAIST 定義時は,無音フレーム区間が長い場合はそこでいったん * 入力を区切り,次の入力再開までの間の無音区間をスキップするようになります. * 無音区間中も,仮説を生成しない特別な認識状態に入ることで, * 認識状態を保ちます. これによって,より無音時間が長い場合を想定した, * デコーダベースの VAD を行うことが出来ます. * * * @author Akinobu Lee * @date Wed Oct 17 12:47:29 2007 * * $Revision: 1.5 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * @brief ショートポーズ単語かどうか判定 * * 与えられた単語がショートポーズ単語であるかどうか調べる. * * @param w [in] 単語ID * @param r [in] 音声認識処理インスタンス * * @return ショートポーズ単語であれば TRUE,そうでなければ FALSE. * * * Check if the fiven word is a short-pause word. * * @param w [in] word id * @param r [in] recognition process instance * * @return TRUE if it is short pause word, FALSE if not. * * * @callgraph * @callergraph */ boolean is_sil(WORD_ID w, RecogProcess *r) { WORD_INFO *winfo; HTK_HMM_INFO *hmm; int i; winfo = r->lm->winfo; hmm = r->am->hmminfo; /* num of phones should be 1 */ if (winfo->wlen[w] > 1) return FALSE; if (r->pass1.pausemodel) { /* has pause model list */ for(i=0;ipass1.pausemodelnum;i++) { if (strmatch(winfo->wseq[w][0]->name, r->pass1.pausemodel[i])) { return TRUE; } } } else { /* short pause (specified by "-spmodel") */ if (winfo->wseq[w][0] == hmm->sp) return TRUE; if (r->lmtype == LM_PROB) { /* head/tail sil */ if (w == winfo->head_silwid || w == winfo->tail_silwid) return TRUE; } } return FALSE; } /** * * @brief Split input parameter for segmentation. * * Copy the rest samples in param to rest_param, and shrink the param * in mfcc instance. [start...param->samplenum] will be copied to * rest_param, and [0...end] will be left in param. * * * @brief セグメンテーション時に入力パラメータを分割する. * * 残りのサンプル(現在のフレームから終わりまで)を rest_param に * コピーし,元の param を短くする. [start...param->samplenum] が * rest_param にコピーされ,元の param には [0...end] が残る. * * * @param mfcc [i/o] MFCC calculation instance * @param start [in] copy start frame * @param end [in] original end frame * * @callgraph * @callergraph */ void mfcc_copy_to_rest_and_shrink(MFCCCalc *mfcc, int start, int end) { int t; /* copy rest parameters for next process */ mfcc->rest_param = new_param(); memcpy(&(mfcc->rest_param->header), &(mfcc->param->header), sizeof(HTK_Param_Header)); mfcc->rest_param->samplenum = mfcc->param->samplenum - start; mfcc->rest_param->header.samplenum = mfcc->rest_param->samplenum; mfcc->rest_param->veclen = mfcc->param->veclen; if (param_alloc(mfcc->rest_param, mfcc->rest_param->samplenum, mfcc->rest_param->veclen) == FALSE) { j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n"); } /* copy data */ for(t=start;tparam->samplenum;t++) { memcpy(mfcc->rest_param->parvec[t-start], mfcc->param->parvec[t], sizeof(VECT) * mfcc->rest_param->veclen); } /* shrink original param */ /* just shrink the length */ mfcc->param->samplenum = end; } /** * * Shrink the parameter sequence. Drop the first (p-1) frames and * move [p..samplenum] to 0. * * * パラメータを短くする. 最初の (p-1) フレームを消して,[p..samplenum] * のサンプルを最初に詰める. * * * @param mfcc [i/o] MFCC Calculation instance * @param p [in] frame point to remain * * @callgraph * @callergraph */ void mfcc_shrink(MFCCCalc *mfcc, int p) { int t; int len; if (p > 0) { /* copy data */ for(t=p;tparam->samplenum;t++) { memcpy(mfcc->param->parvec[t-p], mfcc->param->parvec[t], sizeof(VECT) * mfcc->param->veclen); } /* shrink original param */ /* just shrink the length */ len = mfcc->param->samplenum - p; mfcc->param->samplenum = len; mfcc->param->header.samplenum = len; } } /** * * @brief 発話区間終了の検知 * * ショートポーズセグメンテーション指定時, * 発話区間の終了を検出する. 無音単語が連続して最尤候補となるフレーム数を * カウントし,一定時間持続後にふたたび音声がトリガした時点で入力を * 区切る. * * SPSEGMENT_NAIST 定義時は,よりセグメント前後・間の無音時間が長い場合を * 想定したデコーダベースの VAD に切り替わる. この場合,音声トリガ検出前 * (r->pass1.after_triger == FALSE)では,仮説を生成しない状態で認識処理を * 続ける. 音声開始を検出したら特徴量を一定長 (r->config->successive.sp_margin) * 分だけ巻き戻して,通常の認識を開始する(r->pass1.after_trigger == TRUE). * 通常の認識中に無音区間が長く (r->config->successive.sp_frame_duration 以上) * 続いたら,そこで入力を区切る. * * @param r [i/o] 音声認識処理インスタンス * @param time [in] 現在の入力フレーム * * @return TRUE (このフレームでの終了を検出したら), FALSE (終了でない場合) * * * @brief Speech end point detection. * * Detect end-of-input by duration of short-pause words when short-pause * segmentation is enabled. When a pause word gets maximum score for a * successive frames, the segment will be treated as a pause frames. * When speech re-triggers, the current input will be segmented at that point. * * When SPSEGMENT_NAIST is defined, this function performs extended version * of the short pause segmentation, called "decoder-based VAD". When before * speech trigger (r->pass1.after_trigger == FALSE), it tells the recognition * functions not to generate word trellis and continue calculation. If a * speech trigger is found (not a pause word gets maximum score), the * input frames are 'rewinded' for a certain frame * (r->config->successive.sp_margin) and start the normal recognition * process from the rewinded frames (r->pass1.after_trigger = TRUE). * When a pause frame duration reaches a limit * (r->config->successive.sp_frame_duration), it terminate the search. * * @param r [i/o] recognition process instance * @param time [in] current input frame * * @return TRUE if end-of-input detected at this frame, FALSE if not. * * @callgraph * @callergraph */ boolean detect_end_of_segment(RecogProcess *r, int time) { FSBeam *d; TRELLIS_ATOM *tre; LOGPROB maxscore = LOG_ZERO; TRELLIS_ATOM *tremax = NULL; int count = 0; boolean detected = FALSE; #ifdef SPSEGMENT_NAIST MFCCCalc *mfcc; WORD_ID wid; int j; TOKEN2 *tk; int startframe; #endif d = &(r->pass1); #ifdef SPSEGMENT_NAIST if (! d->after_trigger) { /* we are in the first long pause segment before trigger */ /* find word end of maximum score from beam status */ for (j = d->n_start; j <= d->n_end; j++) { tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); if (r->wchmm->stend[tk->node] != WORD_INVALID) { if (maxscore < tk->score) { maxscore = tk->score; wid = r->wchmm->stend[tk->node]; } } } if (maxscore == LOG_ZERO) detected = TRUE; else if (is_sil(wid, r)) detected = TRUE; if (detected) { /***********************/ /* this is noise frame */ /***********************/ /* reset trigger duration */ d->trigger_duration = 0; /* if noise goes more than a certain frame, shrink the noise area to avoid unlimited memory usage */ if (r->am->mfcc->f > SPSEGMENT_NAIST_AUTOSHRINK_LIMIT) { d->want_rewind = TRUE; d->rewind_frame = r->am->mfcc->f - r->config->successive.sp_margin; d->want_rewind_reprocess = FALSE; if (debug2_flag) { jlog("DEBUG: pause exceeded %d, rewind\n", SPSEGMENT_NAIST_AUTOSHRINK_LIMIT); } return FALSE; } /* keep going */ d->want_rewind = FALSE; } else { /************************/ /* this is speech frame */ /************************/ /* increment trigger duration */ d->trigger_duration++; /* if not enough duration, not treat as up trigger */ if (d->trigger_duration < r->config->successive.sp_delay) { /* just continue detection */ return FALSE; } /***************************/ /* found speech up-trigger */ /***************************/ /* set backstep point */ if (r->am->mfcc->f < r->config->successive.sp_margin) { startframe = 0; } else { startframe = r->am->mfcc->f - r->config->successive.sp_margin; } if (debug2_flag) { jlog("DEBUG: speech triggered\n"); jlog("DEBUG: word=[%s] dur=%d\n", r->lm->winfo->woutput[wid], d->trigger_duration); jlog("DEBUG: backstep behind %d (from %d to %d) frame and start process\n", r->config->successive.sp_margin, r->am->mfcc->f, startframe); } /* if the pause segment was short, keep the context of last segment. else, reset the context */ if (r->lmtype == LM_PROB) { if (startframe > 0) { r->sp_break_last_word = WORD_INVALID; } } /* reset sp duration */ d->sp_duration = 0; /* request the caller to rewind the search to the backstep point and re-start with normal search */ d->want_rewind = TRUE; d->rewind_frame = startframe; d->want_rewind_reprocess = TRUE; /* this will enter to normal search in the next processing */ d->after_trigger = TRUE; } /* tell the caller not to segment */ return FALSE; } #endif /* SPSEGMENT_NAIST */ /* look for the best trellis word on the given time frame */ for(tre = r->backtrellis->list; tre != NULL && tre->endtime == time; tre = tre->next) { if (maxscore < tre->backscore) { maxscore = tre->backscore; tremax = tre; } count++; } if (tremax == NULL) { /* no word end: possible in the very beggining of input*/ detected = TRUE; /* assume it's in the short-pause duration */ } else if (count > 0) { /* many words found --- check if maximum is sp */ if (is_sil(tremax->wid, r)) { detected = TRUE; } } #ifdef SPSEGMENT_NAIST /************************************************************************/ /************************************************************************/ /* detected = TRUE if noise frame, or FALSE if speech frame */ /* sp区間持続チェック */ /* check sp segment duration */ if (d->first_sparea) { /* we are in the first sp segment */ if (d->in_sparea && detected) { /* sp continues */ d->sp_duration++; /* when sp continues more than -spdur plus -spmargin, it means that although a speech trigger has been detected by some reason, no actual speech has been found at first. */ /* in this case we force trigger to end this input */ if (d->sp_duration > r->config->successive.sp_delay + r->config->successive.sp_margin + r->config->successive.sp_frame_duration) { d->in_sparea = FALSE; d->first_sparea = FALSE; if (debug2_flag) { jlog("DEBUG: no valid speech starts, force trigger at %d\n", r->am->mfcc->f); } } } else if (d->in_sparea && !detected) { /* found speech frame */ d->in_sparea = FALSE; d->first_sparea = FALSE; if (debug2_flag) { jlog("DEBUG: speech segment start at %d\n", r->am->mfcc->f); } } } else { /* we are either in speech segment, or trailing sp segment */ if (!d->in_sparea) { /* we are in speech segment */ if (detected) { /* detected end of speech segment (begin of sp segment) */ /* 一時的に開始フレームとしてマーク */ /* mark this frame as "temporal" begging of short-pause segment */ d->tmp_sparea_start = time; #ifdef SP_BREAK_RESUME_WORD_BEGIN if (r->lmtype == LM_PROB) { /* sp 区間開始時点の最尤単語を保存 */ /* store the best word in this frame as resuming word */ d->tmp_sp_break_last_word = tremax ? tremax->wid : WORD_INVALID; } #endif d->in_sparea = TRUE; d->sp_duration = 1; } else { /* speech continues */ /* keep recognizing */ } } else { /* we are in trailing sp segment */ if (detected) { /* short pause frame continues */ d->sp_duration++; /* keep word as the "beggining" of next sp segment */ if (r->lmtype == LM_PROB) { #ifdef SP_BREAK_RESUME_WORD_BEGIN /* if this segment has triggered by (tremax == NULL) (in case the first several frame of input), the sp word (to be used as resuming word in the next segment) is not yet set. it will be detected here */ if (d->tmp_sp_break_last_word == WORD_INVALID) { if (tremax != NULL) d->tmp_sp_break_last_word = tremax->wid; } #else /* resume word at the "end" of sp segment */ /* simply update the best sp word */ if (tremax != NULL) d->last_tre_word = tremax->wid; #endif } if (d->sp_duration >= r->config->successive.sp_frame_duration) { /* silence over, segment the recognition here */ /* store begging frame of the segment */ //d->sparea_start = d->tmp_sparea_start; r->am->mfcc->sparea_start = time - r->config->successive.sp_frame_duration; if (r->lmtype == LM_PROB) { #ifdef SP_BREAK_RESUME_WORD_BEGIN /* resume word = most likely sp word on beginning frame of the segment */ r->sp_break_last_word = d->tmp_sp_break_last_word; #else /* resume word = most likely sp word on end frame of the segment */ r->sp_break_last_word = d->last_tre_word; #endif } if (debug2_flag) { jlog("DEBUG: trailing silence end, end this segment at %d\n", r->am->mfcc->f); } d->after_trigger = FALSE; d->trigger_duration = 0; d->want_rewind = FALSE; /*** segment: [sparea_start - time-1] ***/ return(TRUE); } /* else, keep recognition */ } else { /* speech re-triggered */ /* keep recognition */ d->in_sparea = FALSE; } } } d->want_rewind = FALSE; #else /* ~SPSEGMENT_NAIST */ /************************************************************************/ /************************************************************************/ /* sp区間持続チェック */ /* check sp segment duration */ if (d->in_sparea && detected) { /* we are already in sp segment and sp continues */ d->sp_duration++; /* increment count */ #ifdef SP_BREAK_RESUME_WORD_BEGIN /* resume word at the "beggining" of sp segment */ /* if this segment has triggered by (tremax == NULL) (in case the first several frame of input), the sp word (to be used as resuming word in the next segment) is not yet set. it will be detected here */ if (d->tmp_sp_break_last_word == WORD_INVALID) { if (tremax != NULL) d->tmp_sp_break_last_word = tremax->wid; } #else /* resume word at the "end" of sp segment */ /* simply update the best sp word */ if (tremax != NULL) d->last_tre_word = tremax->wid; #endif } /* sp区間開始チェック */ /* check if sp segment begins at this frame */ else if (!d->in_sparea && detected) { /* 一時的に開始フレームとしてマーク */ /* mark this frame as "temporal" begging of short-pause segment */ d->tmp_sparea_start = time; #ifdef SP_BREAK_RESUME_WORD_BEGIN /* sp 区間開始時点の最尤単語を保存 */ /* store the best word in this frame as resuming word */ d->tmp_sp_break_last_word = tremax ? tremax->wid : WORD_INVALID; #endif d->in_sparea = TRUE; /* yes, we are in sp segment */ d->sp_duration = 1; /* initialize duration count */ #ifdef SP_BREAK_DEBUG jlog("DEBUG: sp start %d\n", time); #endif /* SP_BREAK_DEBUG */ } /* sp 区間終了チェック */ /* check if sp segment ends at this frame */ else if (d->in_sparea && !detected) { /* (time-1) is end frame of pause segment */ d->in_sparea = FALSE; /* we are not in sp segment */ #ifdef SP_BREAK_DEBUG jlog("DEBUG: sp end %d\n", time); #endif /* SP_BREAK_DEBUG */ /* sp 区間長チェック */ /* check length of the duration*/ if (d->sp_duration < r->config->successive.sp_frame_duration) { /* 短すぎる: 第1パスを中断せず続行 */ /* too short segment: not break, continue 1st pass */ #ifdef SP_BREAK_DEBUG jlog("DEBUG: too short (%d<%d), ignored\n", d->sp_duration, r->config->successive.sp_frame_duration); #endif /* SP_BREAK_DEBUG */ } else if (d->first_sparea) { /* 最初のsp区間は silB にあたるので,第1パスを中断せず続行 */ /* do not break at first sp segment: they are silB */ d->first_sparea = FALSE; #ifdef SP_BREAK_DEBUG jlog("DEBUG: first silence, ignored\n"); #endif /* SP_BREAK_DEBUG */ } else { /* 区間終了確定, 第1パスを中断して第2パスへ */ /* break 1st pass */ #ifdef SP_BREAK_DEBUG jlog("DEBUG: >> segment [%d..%d]\n", r->am->mfcc->sparea_start, time-1); #endif /* SP_BREAK_DEBUG */ /* store begging frame of the segment */ r->am->mfcc->sparea_start = d->tmp_sparea_start; #ifdef SP_BREAK_RESUME_WORD_BEGIN /* resume word = most likely sp word on beginning frame of the segment */ r->sp_break_last_word = d->tmp_sp_break_last_word; #else /* resume word = most likely sp word on end frame of the segment */ r->sp_break_last_word = d->last_tre_word; #endif /*** segment: [sparea_start - time-1] ***/ return(TRUE); } } #endif /* ~SPSEGMENT_NAIST */ #ifdef SP_BREAK_EVAL jlog("DEBUG: [%d %d %d]\n", time, count, (detected) ? 50 : 0); #endif return (FALSE); } /*******************************************************************/ /* 第1パスセグメント終了処理 (ショートポーズセグメンテーション用) */ /* end of 1st pass for a segment (for short pause segmentation) */ /*******************************************************************/ /** * * @brief 逐次デコーディングのための第1パス終了時の処理 * * 逐次デコーディング使用時,この関数は finalize_1st_pass() 後に呼ばれ, * そのセグメントの第1パスの終了処理を行う. 具体的には, * 続く第2パスのための始終端単語のセット,および * 次回デコーディングを再開するときのために,入力ベクトル列の未処理部分の * コピーを rest_param に残す. * * @param recog [in] エンジンインスタンス * * * @brief Finalize the first pass for successive decoding * * When successive decoding mode is enabled, this function will be * called just after finalize_1st_pass() to finish the beam search * of the last segment. The beginning and ending words for the 2nd pass * will be set according to the 1st pass result. Then the current * input will be shrinked to the segmented length and the unprocessed * region are copied to rest_param for the next decoding. * * @param recog [in] engine instance * * @callgraph * @callergraph */ void finalize_segment(Recog *recog) { int spstart; RecogProcess *r; MFCCCalc *mfcc; boolean ok_p; /* トレリス始終端における最尤単語を第2パスの始終端単語として格納 */ /* fix initial/last word hypothesis of the next 2nd pass to the best word hypothesis at the first/last frame in backtrellis*/ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->lmtype == LM_PROB) { set_terminal_words(r); } } /* パラメータを, 今第1パスが終了したセグメント区間と残りの区間に分割する. ただし接続部のsp区間部分(sparea_start..len-1)は「のりしろ」として両方に コピーする */ /* Divide input parameter into two: the last segment and the rest. The short-pause area (sparea_start..len-1) is considered as "tab", copied in both parameters */ /* param[sparea_start..framelen] -> rest_param param[0..len-1] -> param [sparea_start...len-1] overlapped */ ok_p = FALSE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->segmented) { spstart = mfcc->sparea_start; ok_p = TRUE; break; } } if (ok_p) { /* the input was segmented in an instance */ /* shrink all param the len and store restart parameters in rest_param */ /* for each mfcc */ if (verbose_flag) jlog("STAT: segmented: next decoding will restart from %d\n", spstart); for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (verbose_flag) jlog("STAT: MFCC%02d: segmented: processed length=%d\n", mfcc->id, mfcc->last_time); /* copy the rest to mfcc->rest_param and shrink mfcc->param */ mfcc_copy_to_rest_and_shrink(mfcc, spstart, mfcc->last_time); } /* reset last_word info */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; r->sp_break_last_nword_allow_override = TRUE; } } else { /* last segment is on end of input: no rest parameter */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->rest_param = NULL; } /* reset last_word info */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; r->sp_break_2_begin_word = WORD_INVALID; r->sp_break_last_word = WORD_INVALID; r->sp_break_last_nword = WORD_INVALID; r->sp_break_last_nword_allow_override = FALSE; } } } #ifdef BACKEND_VAD /** * * Initialize parameters for decoder/GMM-based VAD. * This will be called before recognition start for each segment. * * * Decode/GMM-based VAD のためにパラメータを初期化する. * 各入力セグメントの認識処理を始める前に呼ばれる. * * * @param recog [i/o] engine instance * * @callgraph * @callergraph */ void spsegment_init(Recog *recog) { RecogProcess *p; /* at first time, recognition does not start yet */ #ifdef SPSEGMENT_NAIST for(p=recog->process_list;p;p=p->next) { p->pass1.after_trigger = FALSE; p->pass1.trigger_duration = 0; } #endif #ifdef GMM_VAD if (recog->gmm) { recog->gc->after_trigger = FALSE; recog->gc->duration = 0; } #endif recog->triggered = FALSE; } /** * * @brief Detect speech up-trigger and synhronize among instances. * * This function inspects all recognition instancces and gmm components * to see if any of them has detected trigger up (beginning of speech) * at the last recognition process. If trigger has been detected, * set trigger-up status for all the instances. * * * @brief 音声区間開始の検出およびインスタンス間同期. * * 全ての認識処理インスタンスとGMM処理部について,直前の認識処理で * トリガアップ(音声区間開始)が判定されたかどうかを調べる. * 開始された場合は,全ての認識処理インスタンスでアップトリガをマークする. * * * @param recog [in] engine instance * * @return TRUE if triggered, or FALSE if not. * * @callgraph * @callergraph */ boolean spsegment_trigger_sync(Recog *recog) { RecogProcess *p; boolean ok_p; ok_p = FALSE; if (recog->jconf->decodeopt.segment) { #ifdef SPSEGMENT_NAIST for(p = recog->process_list; p; p = p->next) { if (!p->live) continue; if (p->pass1.after_trigger) { ok_p = TRUE; break; } } #endif #ifdef GMM_VAD if (recog->gmm) { if (recog->gc->after_trigger) { ok_p = TRUE; } } #endif } if (ok_p) { /* up trigger detected */ #ifdef SPSEGMENT_NAIST for(p = recog->process_list; p; p = p->next) { if (!p->live) continue; p->pass1.after_trigger = TRUE; } #endif #ifdef GMM_VAD if (recog->gmm) { recog->gc->after_trigger = TRUE; } #endif } return ok_p; } #endif /* BACKEND_VAD */ /** * * @brief Check if rewind and restart of recognition is needed. * * This function checks if an instance requires rewinding of input * samples, and if recognition re-processing is needed after rewinding. * * * * @brief 巻き戻しと認識再開の必要性をチェックする. * * 音声認識処理において巻き戻しが必要がどうか調べ,必要な場合は * フレーム数と,巻き戻した後に巻戻し分の認識処理を行うかどうかを返す. * * * @param recog [in] engine instance * @param rf_ret [out] length of frame to rewind * @param repro_ret [out] TRUE if re-process is required after rewinding * * @return TRUE if rewinding is required, or FALSE if not. * * @callgraph * @callergraph */ boolean spsegment_need_restart(Recog *recog, int *rf_ret, boolean *repro_ret) { #ifdef SPSEGMENT_NAIST RecogProcess *p; #endif boolean ok_p; int rewind_frame = 0; boolean reprocess = FALSE; ok_p = FALSE; if (recog->jconf->decodeopt.segment) { #ifdef SPSEGMENT_NAIST /* check for rewind request from each process */ for(p = recog->process_list; p; p = p->next) { if (!p->live) continue; if (p->pass1.want_rewind) { p->pass1.want_rewind = FALSE; rewind_frame = p->pass1.rewind_frame; reprocess = p->pass1.want_rewind_reprocess; ok_p = TRUE; break; } } #endif /* SPSEGMENT_NAIST */ #ifdef GMM_VAD if (recog->gmm) { if (recog->gc->want_rewind) { recog->gc->want_rewind = FALSE; #ifdef SPSEGMENT_NAIST /* set to earlier one */ if (rewind_frame > recog->gc->rewind_frame) rewind_frame = recog->gc->rewind_frame; #else rewind_frame = recog->gc->rewind_frame; #endif reprocess = recog->gc->want_rewind_reprocess; ok_p = TRUE; } } #endif *rf_ret = rewind_frame; *repro_ret = reprocess; } return(ok_p); } /** * * @brief Execute rewinding. * * This function will set re-start point for the following processing, * and shrink the parameters for the rewinded part. The re-start point * is 0 (beginning of rest samples) for recognition restart, or * simply go back to the specified rewind frames for non restart. * * * * @brief 巻き戻し処理 * * 次回の入力処理の開始点を決定し,巻き戻し分パラメータを詰める. * 再開指定の場合開始点はパラメータの先頭に,それ以外の場合は巻戻した * 分だけ戻った位置にセットされる. * * * * @param recog [i/o] engine instance * @param rewind_frame [in] frame length to rewind * @param reprocess [in] TRUE if re-processing recognition is required for the following processing * * @callgraph * @callergraph */ void spsegment_restart_mfccs(Recog *recog, int rewind_frame, boolean reprocess) { MFCCCalc *mfcc; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; /* set last segmented time */ mfcc->last_time = mfcc->f - 1; /* reset frame pointers */ if (reprocess) { /* set all mfcc to initial point for re-process the whole frames */ mfcc->f = -1; } else { /* just bring back to the new last point after shrink */ mfcc->f -= rewind_frame; } /* shrink the current mfcc */ mfcc_shrink(mfcc, rewind_frame); } } /* end of file */ julius-4.2.2/libjulius/src/m_options.c0000644001051700105040000016700112004452401016271 0ustar ritrlab/** * @file m_options.c * * * @brief オプション処理 * * ここにある関数は,jconfファイルおよびコマンドラインからのオプション指定を * 順に読み込み,値を格納する. * * * * @brief Option parsing. * * These functions read option strings from jconf file or command line * and set values to the configuration structure. * * * @author Akinobu Lee * @date Thu May 12 18:52:07 2005 * * $Revision: 1.26 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * @brief 相対パスをフルパスに変換する. * * ファイルのパス名が相対パスであれば,カレントディレクトリをつけた * フルパスに変換して返す. 絶対パスであれば,そのまま返す. * * @param filename [in] ファイルのパス名 * @param dirname [in] カレントディレクトリのパス名 * * @return 絶対パス名の入った,新たに割り付けられたバッファ * * * @brief Change relative path to full path. * * If the file path is given as relative, prepend the dirname to it. * If the file path is full, just copy it to new buffer and return. * * @param filename [in] file path name * @param dirname [in] full path of current directory * * @return newly malloced buffer holding the full path name. * */ char * filepath(char *filename, char *dirname) { char *p; if (dirname != NULL && filename[0] != '/' #if defined(_WIN32) && filename[0] != '\\' && !(strlen(filename) >= 3 && filename[1] == ':') #endif ) { p = (char *)mymalloc(strlen(filename) + strlen(dirname) + 1); strcpy(p, dirname); strcat(p, filename); } else { p = strcpy((char *)mymalloc(strlen(filename)+1), filename); } return p; } /** * * Returns next argument string. * * * 次の引数の文字列を返す. * * * @param cur [i/o] pointer to current point of the argment array * @param argc [in] total number of argments * @param argv [in] argment array * * @return pointer to the next argument, or NULL if no more argument vailable. * */ static char * next_arg(int *cur, int argc, char *argv[]) { (*cur)++; if (*cur >= argc) { jlog("ERROR: m_options: option requires argument -- %s\n", argv[*cur-1]); return NULL; } return(argv[*cur]); } static boolean check_section(Jconf *jconf, char *optname, short sec) { if (! jconf->optsectioning) return TRUE; if (jconf->optsection == sec) return TRUE; if (jconf->optsection == JCONF_OPT_DEFAULT) return TRUE; switch(sec) { case JCONF_OPT_GLOBAL: jlog("ERROR: \"%s\" is global option (should be before any instance declaration)", optname); break; case JCONF_OPT_AM: jlog("ERROR: \"%s\" is AM option", optname); break; case JCONF_OPT_LM: jlog("ERROR: \"%s\" is LM option", optname); break; case JCONF_OPT_SR: jlog("ERROR: \"%s\" is SR (search) option", optname); break; } switch(jconf->optsection) { case JCONF_OPT_GLOBAL: jlog(", but exists at global section (-GLOBAL)\n"); break; case JCONF_OPT_AM: jlog(", but exists at AM section (-AM \"%s\")\n", jconf->amnow->name); break; case JCONF_OPT_LM: jlog(", but exists at LM section (-LM \"%s\")\n", jconf->lmnow->name); break; case JCONF_OPT_SR: jlog(", but exists at recognizer section (-SR \"%s\")\n", jconf->searchnow->name); break; } jlog("ERROR: fix it, or you can disable this check by \"-nosectioncheck\"\n"); return FALSE; } /** * * メモリ領域を解放し NULL で埋める. * @param p [i/o] メモリ領域の先頭を指すポインタ変数へのポインタ * @note @a p が NULL の場合は何も起こらない。 * * * Free memory and fill it with NULL. * @param p [i/o] pointer to pointer that holds allocated address * @note Nothing will happen if @a p equals to NULL. * */ #define FREE_MEMORY(p) \ {if (p) {free(p); p = NULL;}} /** * * オプション解析. * * @param argc [in] @a argv に含まれる引数の数 * @param argv [in] 引数値(文字列)の配列 * @param cwd [in] カレントディレクトリ * @param jconf [out] 値を格納するjconf構造体 * * * * Option parsing. * * @param argc [in] number of elements in @a argv * @param argv [in] array of argument strings * @param cwd [in] current directory * @param jconf [out] jconf structure to store data * * * @return TRUE on success, or FALSE on error. * * @callgraph * @callergraph */ boolean opt_parse(int argc, char *argv[], char *cwd, Jconf *jconf) { char *tmparg; int i; boolean unknown_opt; JCONF_AM *amconf, *atmp; JCONF_LM *lmconf, *ltmp; JCONF_SEARCH *sconf; char sname[JCONF_MODULENAME_MAXLEN]; #ifdef ENABLE_PLUGIN int sid; FUNC_INT func; #endif #define GET_TMPARG if ((tmparg = next_arg(&i, argc, argv)) == NULL) return FALSE for (i=1;i= '0' && tmparg[0] <= '9') { jlog("ERROR: m_options: AM name \"%s\" not acceptable: first character should not be a digit\n", tmparg); return FALSE; } /* if not first time, create new module instance and switch to it */ /* and switch current to this */ amconf = j_jconf_am_new(); if (j_jconf_am_regist(jconf, amconf, tmparg) == FALSE) { jlog("ERROR: failed to add new amconf as \"%s\"\n", tmparg); jlog("ERROR: m_options: failed to create amconf\n"); j_jconf_am_free(amconf); return FALSE; } jconf->amnow = amconf; jconf->optsection = JCONF_OPT_AM; continue; } else if (strmatch(argv[i],"-AM_GMM") || strmatch(argv[i], "[AM_GMM]")) { /* switch current to GMM */ if (jconf->gmm == NULL) { /* if new, allocate jconf for GMM */ jconf->gmm = j_jconf_am_new(); } jconf->amnow = jconf->gmm; jconf->optsection = JCONF_OPT_AM; continue; } else if (strmatch(argv[i],"-LM") || strmatch(argv[i], "[LM]")) { GET_TMPARG; if (tmparg[0] == '-') { jlog("ERROR: m_options: -LM needs an argument as module name\n"); return FALSE; } if (tmparg[0] >= '0' && tmparg[0] <= '9') { jlog("ERROR: m_options: LM name \"%s\" not acceptable: first character should not be a digit\n", tmparg); return FALSE; } /* create new module instance and switch to it */ /* and switch current to this */ lmconf = j_jconf_lm_new(); if (j_jconf_lm_regist(jconf, lmconf, tmparg) == FALSE) { jlog("ERROR: failed to add new lmconf as \"%s\"\n", tmparg); jlog("ERROR: m_options: failed to create lmconf\n"); j_jconf_lm_free(lmconf); return FALSE; } jconf->lmnow = lmconf; jconf->optsection = JCONF_OPT_LM; continue; } else if (strmatch(argv[i],"-SR") || strmatch(argv[i], "[SR]")) { GET_TMPARG; if (tmparg[0] == '-') { jlog("ERROR: m_options: -SR needs three arguments: module name, AM name and LM name\n"); return FALSE; } if (tmparg[0] >= '0' && tmparg[0] <= '9') { jlog("ERROR: m_options: SR name \"%s\" not acceptable: first character should not be a digit\n", tmparg); return FALSE; } /* store name temporarly */ strncpy(sname, tmparg, JCONF_MODULENAME_MAXLEN); /* get link to jconf_am and jconf_lm */ GET_TMPARG; if (tmparg[0] == '-') { jlog("ERROR: m_options: -SR needs three arguments: module name, AM name and LM name\n"); return FALSE; } if (tmparg[0] >= '0' && tmparg[0] <= '9') { /* arg is number */ if ((amconf = j_get_amconf_by_id(jconf, atoi(tmparg))) == NULL) return FALSE; } else { /* name string */ if ((amconf = j_get_amconf_by_name(jconf, tmparg)) == NULL) return FALSE; } GET_TMPARG; if (tmparg[0] == '-') { jlog("ERROR: m_options: -SR needs three arguments: module name, AM name and LM name\n"); return FALSE; } if (tmparg[0] >= '0' && tmparg[0] <= '9') { /* arg is number */ if ((lmconf = j_get_lmconf_by_id(jconf, atoi(tmparg))) == NULL) return FALSE; } else { /* name string */ if ((lmconf = j_get_lmconf_by_name(jconf, tmparg)) == NULL) return FALSE; } /* check to avoid assigning an LM for multiple SR */ for(sconf=jconf->search_root;sconf;sconf=sconf->next) { if (sconf->lmconf == lmconf) { jlog("ERROR: you are going to share LM \"%s\" among multiple SRs\n"); jlog("ERROR: current Julius cannot share LM among SRs\n"); jlog("ERROR: you should define LM for each SR\n"); return FALSE; } } /* if not first time, create new module instance and switch to it */ sconf = j_jconf_search_new(); sconf->amconf = amconf; sconf->lmconf = lmconf; if (j_jconf_search_regist(jconf, sconf, sname) == FALSE) { jlog("ERROR: failed to add new amconf as \"%s\"\n", sname); jlog("ERROR: m_options: failed to create search conf\n"); j_jconf_search_free(sconf); return FALSE; } jconf->searchnow = sconf; jconf->optsection = JCONF_OPT_SR; continue; } else if (strmatch(argv[i],"-GLOBAL")) { jconf->optsection = JCONF_OPT_GLOBAL; continue; } else if (strmatch(argv[i],"-sectioncheck")) { /* enable section check */ jconf->optsectioning = TRUE; continue; } else if (strmatch(argv[i],"-nosectioncheck")) { /* disable section check */ jconf->optsectioning = FALSE; continue; } else if (strmatch(argv[i],"-input")) { /* speech input */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->input.plugin_source = -1; if (strmatch(tmparg,"file") || strmatch(tmparg,"rawfile")) { jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_RAWFILE; jconf->decodeopt.realtime_flag = FALSE; } else if (strmatch(tmparg,"htkparam") || strmatch(tmparg,"mfcfile") || strmatch(tmparg,"mfc")) { jconf->input.type = INPUT_VECTOR; jconf->input.speech_input = SP_MFCFILE; jconf->decodeopt.realtime_flag = FALSE; } else if (strmatch(tmparg,"stdin")) { jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_STDIN; jconf->decodeopt.realtime_flag = FALSE; } else if (strmatch(tmparg,"adinnet")) { jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_ADINNET; jconf->decodeopt.realtime_flag = TRUE; #ifdef USE_NETAUDIO } else if (strmatch(tmparg,"netaudio")) { jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_NETAUDIO; jconf->decodeopt.realtime_flag = TRUE; #endif #ifdef USE_MIC } else if (strmatch(tmparg,"mic")) { jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_DEFAULT; jconf->decodeopt.realtime_flag = TRUE; } else if (strmatch(tmparg,"alsa")) { #ifdef HAS_ALSA jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_ALSA; jconf->decodeopt.realtime_flag = TRUE; #else jlog("ERROR: m_options: \"-input alsa\": ALSA support is not built-in\n"); return FALSE; #endif } else if (strmatch(tmparg,"oss")) { #ifdef HAS_OSS jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_OSS; jconf->decodeopt.realtime_flag = TRUE; #else jlog("ERROR: m_options: \"-input oss\": OSS support is not built-in\n"); return FALSE; #endif } else if (strmatch(tmparg,"esd")) { #ifdef HAS_ESD jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_ESD; jconf->decodeopt.realtime_flag = TRUE; #else jlog("ERROR: m_options: \"-input esd\": ESounD support is not built-in\n"); return FALSE; #endif } else if (strmatch(tmparg,"pulseaudio")) { #ifdef HAS_PULSEAUDIO jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_PULSEAUDIO; jconf->decodeopt.realtime_flag = TRUE; #else jlog("ERROR: m_options: \"-input pulseaudio\": PulseAudio support is not built-in\n"); return FALSE; #endif #endif #ifdef ENABLE_PLUGIN } else if ((sid = plugin_find_optname("adin_get_optname", tmparg)) != -1) { /* adin plugin */ jconf->input.plugin_source = sid; jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; func = (FUNC_INT) plugin_get_func(sid, "adin_get_configuration"); if (func == NULL) { jlog("ERROR: invalid plugin: adin_get_configuration() not exist\n"); jlog("ERROR: skip option \"-input %s\"\n", tmparg); continue; } jconf->decodeopt.realtime_flag = (*func)(0); } else if ((sid = plugin_find_optname("fvin_get_optname", tmparg)) != -1) { /* vector input plugin */ jconf->input.plugin_source = sid; jconf->input.type = INPUT_VECTOR; jconf->input.speech_input = SP_MFCMODULE; jconf->decodeopt.realtime_flag = FALSE; #endif } else { jlog("ERROR: m_options: unknown speech input source \"%s\"\n", tmparg); return FALSE; } continue; } else if (strmatch(argv[i],"-filelist")) { /* input file list */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; FREE_MEMORY(jconf->input.inputlist_filename); //jconf->input.inputlist_filename = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); jconf->input.inputlist_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-rejectshort")) { /* short input rejection */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->reject.rejectshortlen = atoi(tmparg); continue; #ifdef POWER_REJECT } else if (strmatch(argv[i],"-powerthres")) { /* short input rejection */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->reject.powerthres = atoi(tmparg); continue; #endif } else if (strmatch(argv[i],"-force_realtime")) { /* force realtime */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; if (strmatch(tmparg, "on")) { jconf->decodeopt.forced_realtime = TRUE; } else if (strmatch(tmparg, "off")) { jconf->decodeopt.forced_realtime = FALSE; } else { jlog("ERROR: m_options: \"-force_realtime\" should be either \"on\" or \"off\"\n"); return FALSE; } jconf->decodeopt.force_realtime_flag = TRUE; continue; } else if (strmatch(argv[i],"-realtime")) { /* equal to "-force_realtime on" */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->decodeopt.forced_realtime = TRUE; jconf->decodeopt.force_realtime_flag = TRUE; continue; } else if (strmatch(argv[i], "-norealtime")) { /* equal to "-force_realtime off" */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->decodeopt.forced_realtime = FALSE; jconf->decodeopt.force_realtime_flag = TRUE; continue; } else if (strmatch(argv[i],"-forcedict")) { /* skip dict error */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; jconf->lmnow->forcedict_flag = TRUE; continue; } else if (strmatch(argv[i],"-check")) { /* interactive model check mode */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; if (strmatch(tmparg, "wchmm")) { jconf->searchnow->sw.wchmm_check_flag = TRUE; } else if (strmatch(tmparg, "trellis")) { jconf->searchnow->sw.trellis_check_flag = TRUE; } else if (strmatch(tmparg, "triphone")) { jconf->searchnow->sw.triphone_check_flag = TRUE; } else { jlog("ERROR: m_options: invalid argument for \"-check\": %s\n", tmparg); return FALSE; } continue; } else if (strmatch(argv[i],"-notypecheck")) { /* don't check param type */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->input.paramtype_check_flag = FALSE; continue; } else if (strmatch(argv[i],"-nlimit")) { /* limit N token in a node */ #ifdef WPAIR_KEEP_NLIMIT if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass1.wpair_keep_nlimit = atoi(tmparg); #else jlog("WARNING: m_options: WPAIR_KEEP_NLIMIT disabled, \"-nlimit\" ignored\n"); #endif continue; } else if (strmatch(argv[i],"-lookuprange")) { /* trellis neighbor range */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.lookup_range = atoi(tmparg); continue; } else if (strmatch(argv[i],"-graphout")) { /* enable graph output */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.enabled = TRUE; jconf->searchnow->graph.lattice = TRUE; jconf->searchnow->graph.confnet = FALSE; continue; } else if (strmatch(argv[i],"-lattice")) { /* enable graph output */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.enabled = TRUE; jconf->searchnow->graph.lattice = TRUE; continue; } else if (strmatch(argv[i],"-nolattice")) { /* disable graph output */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.enabled = FALSE; jconf->searchnow->graph.lattice = FALSE; continue; } else if (strmatch(argv[i],"-confnet")) { /* enable confusion network */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.enabled = TRUE; jconf->searchnow->graph.confnet = TRUE; continue; } else if (strmatch(argv[i],"-noconfnet")) { /* disable graph output */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.enabled = FALSE; jconf->searchnow->graph.confnet = FALSE; continue; } else if (strmatch(argv[i],"-graphrange")) { /* neighbor merge range frame */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->graph.graph_merge_neighbor_range = atoi(tmparg); continue; #ifdef GRAPHOUT_DEPTHCUT } else if (strmatch(argv[i],"-graphcut")) { /* cut graph word by depth */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->graph.graphout_cut_depth = atoi(tmparg); continue; #endif #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP } else if (strmatch(argv[i],"-graphboundloop")) { /* neighbor merge range frame */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->graph.graphout_limit_boundary_loop_num = atoi(tmparg); continue; #endif #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION } else if (strmatch(argv[i],"-graphsearchdelay")) { /* not do graph search termination before the 1st sentence is found */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.graphout_search_delay = TRUE; continue; } else if (strmatch(argv[i],"-nographsearchdelay")) { /* not do graph search termination before the 1st sentence is found */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->graph.graphout_search_delay = FALSE; continue; #endif } else if (strmatch(argv[i],"-looktrellis")) { /* activate loopuprange */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->pass2.looktrellis_flag = TRUE; continue; } else if (strmatch(argv[i],"-multigramout")) { /* enable per-grammar decoding on 2nd pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->output.multigramout_flag = TRUE; continue; } else if (strmatch(argv[i],"-nomultigramout")) { /* disable per-grammar decoding on 2nd pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->output.multigramout_flag = FALSE; continue; } else if (strmatch(argv[i],"-oldtree")) { /* use old tree function */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->pass1.old_tree_function_flag = TRUE; continue; } else if (strmatch(argv[i],"-sb")) { /* score envelope width in 2nd pass */ #ifdef SCAN_BEAM if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.scan_beam_thres = atof(tmparg); #else jlog("WARNING: m_options: SCAN_BEAM disabled, \"-sb\" ignored\n"); #endif continue; #ifdef SCORE_PRUNING } else if (strmatch(argv[i],"-bs")) { /* score beam width for 1st pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass1.score_pruning_width = atof(tmparg); continue; #endif } else if (strmatch(argv[i],"-discount")) { /* (bogus) */ jlog("WARNING: m_options: option \"-discount\" is now bogus, ignored\n"); continue; } else if (strmatch(argv[i],"-cutsilence")) { /* force (long) silence detection on */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->detect.silence_cut = 1; continue; } else if (strmatch(argv[i],"-nocutsilence")) { /* force (long) silence detection off */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->detect.silence_cut = 0; continue; } else if (strmatch(argv[i],"-pausesegment")) { /* force (long) silence detection on (for backward compatibility) */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->detect.silence_cut = 1; continue; } else if (strmatch(argv[i],"-nopausesegment")) { /* force (long) silence detection off (for backward comatibility) */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->detect.silence_cut = 0; continue; } else if (strmatch(argv[i],"-lv")) { /* silence detection threshold level */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.level_thres = atoi(tmparg); continue; } else if (strmatch(argv[i],"-zc")) { /* silence detection zero cross num */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.zero_cross_num = atoi(tmparg); continue; } else if (strmatch(argv[i],"-headmargin")) { /* head silence length */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.head_margin_msec = atoi(tmparg); continue; } else if (strmatch(argv[i],"-tailmargin")) { /* tail silence length */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.tail_margin_msec = atoi(tmparg); continue; } else if (strmatch(argv[i],"-chunksize")) { /* chunk size for detection */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.chunk_size = atoi(tmparg); continue; } else if (strmatch(argv[i],"-hipass")||strmatch(argv[i],"-hifreq")) { /* frequency of upper band limit */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.hipass = atoi(tmparg); continue; } else if (strmatch(argv[i],"-lopass")||strmatch(argv[i],"-lofreq")) { /* frequency of lower band limit */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.lopass = atoi(tmparg); continue; } else if (strmatch(argv[i],"-smpPeriod")) { /* sample period (ns) */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.smp_period = atoi(tmparg); jconf->amnow->analysis.para.smp_freq = period2freq(jconf->amnow->analysis.para.smp_period); continue; } else if (strmatch(argv[i],"-smpFreq")) { /* sample frequency (Hz) */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.smp_freq = atoi(tmparg); jconf->amnow->analysis.para.smp_period = freq2period(jconf->amnow->analysis.para.smp_freq); continue; } else if (strmatch(argv[i],"-fsize")) { /* Window size */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.framesize = atoi(tmparg); continue; } else if (strmatch(argv[i],"-fshift")) { /* Frame shiht */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.frameshift = atoi(tmparg); continue; } else if (strmatch(argv[i],"-preemph")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.preEmph = atof(tmparg); continue; } else if (strmatch(argv[i],"-fbank")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.fbank_num = atoi(tmparg); continue; } else if (strmatch(argv[i],"-ceplif")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.lifter = atoi(tmparg); continue; } else if (strmatch(argv[i],"-rawe")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.raw_e = TRUE; continue; } else if (strmatch(argv[i],"-norawe")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.raw_e = FALSE; continue; } else if (strmatch(argv[i],"-enormal")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.enormal = TRUE; continue; } else if (strmatch(argv[i],"-noenormal")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.enormal = FALSE; continue; } else if (strmatch(argv[i],"-escale")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.escale = atof(tmparg); continue; } else if (strmatch(argv[i],"-silfloor")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.silFloor = atof(tmparg); continue; } else if (strmatch(argv[i],"-delwin")) { /* Delta window length */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.delWin = atoi(tmparg); continue; } else if (strmatch(argv[i],"-accwin")) { /* Acceleration window length */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.accWin = atoi(tmparg); continue; } else if (strmatch(argv[i],"-ssalpha")) { /* alpha coef. for SS */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->frontend.ss_alpha = atof(tmparg); continue; } else if (strmatch(argv[i],"-ssfloor")) { /* spectral floor for SS */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->frontend.ss_floor = atof(tmparg); continue; } else if (strmatch(argv[i],"-cvn")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.cvn = 1; continue; } else if (strmatch(argv[i],"-nocvn")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.cvn = 0; continue; } else if (strmatch(argv[i],"-vtln")) { /* VTLN */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.para.vtln_alpha = (float)atof(tmparg); GET_TMPARG; jconf->amnow->analysis.para.vtln_lower = (float)atof(tmparg); GET_TMPARG; jconf->amnow->analysis.para.vtln_upper = (float)atof(tmparg); continue; } else if (strmatch(argv[i],"-novtln")) { /* disable VTLN */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.vtln_alpha = 1.0; continue; } else if (strmatch(argv[i],"-48")) { /* use 48kHz input and down to 16kHz */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->input.use_ds48to16 = TRUE; continue; } else if (strmatch(argv[i],"-version") || strmatch(argv[i], "--version") || strmatch(argv[i], "-setting") || strmatch(argv[i], "--setting")) { /* print version and exit */ j_put_header(stderr); j_put_compile_defs(stderr); fprintf(stderr, "\n"); j_put_library_defs(stderr); return FALSE; } else if (strmatch(argv[i],"-quiet")) { /* minimum output */ debug2_flag = verbose_flag = FALSE; continue; } else if (strmatch(argv[i],"-debug")) { /* debug mode: output huge log */ debug2_flag = verbose_flag = TRUE; continue; } else if (strmatch(argv[i],"-callbackdebug")) { /* output callback debug message */ callback_debug_flag = TRUE; continue; } else if (strmatch(argv[i],"-progout")) { /* enable progressive output */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->output.progout_flag = TRUE; continue; } else if (strmatch(argv[i],"-proginterval")) { /* interval for -progout */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->output.progout_interval = atoi(tmparg); continue; } else if (strmatch(argv[i],"-demo")) { /* quiet + progout */ debug2_flag = verbose_flag = FALSE; jconf->searchnow->output.progout_flag = TRUE; continue; } else if (strmatch(argv[i],"-walign")) { /* do forced alignment by word */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->annotate.align_result_word_flag = TRUE; continue; } else if (strmatch(argv[i],"-palign")) { /* do forced alignment by phoneme */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->annotate.align_result_phoneme_flag = TRUE; continue; } else if (strmatch(argv[i],"-salign")) { /* do forced alignment by state */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->annotate.align_result_state_flag = TRUE; continue; } else if (strmatch(argv[i],"-output")) { /* output up to N candidate */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->output.output_hypo_maxnum = atoi(tmparg); continue; } else if (strmatch(argv[i],"-1pass")) { /* do only 1st pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->compute_only_1pass = TRUE; continue; } else if (strmatch(argv[i],"-hlist")) { /* HMM list file */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->mapfilename); GET_TMPARG; jconf->amnow->mapfilename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-nlr")) { /* word LR n-gram (ARPA) */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->ngram_filename_lr_arpa); GET_TMPARG; jconf->lmnow->ngram_filename_lr_arpa = filepath(tmparg, cwd); FREE_MEMORY(jconf->lmnow->ngram_filename); continue; } else if (strmatch(argv[i],"-nrl")) { /* word RL n-gram (ARPA) */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->ngram_filename_rl_arpa); GET_TMPARG; jconf->lmnow->ngram_filename_rl_arpa = filepath(tmparg, cwd); FREE_MEMORY(jconf->lmnow->ngram_filename); continue; } else if (strmatch(argv[i],"-lmp")) { /* LM weight and penalty (pass1) */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->lmp.lm_weight = (LOGPROB)atof(tmparg); GET_TMPARG; jconf->searchnow->lmp.lm_penalty = (LOGPROB)atof(tmparg); jconf->searchnow->lmp.lmp_specified = TRUE; continue; } else if (strmatch(argv[i],"-lmp2")) { /* LM weight and penalty (pass2) */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->lmp.lm_weight2 = (LOGPROB)atof(tmparg); GET_TMPARG; jconf->searchnow->lmp.lm_penalty2 = (LOGPROB)atof(tmparg); jconf->searchnow->lmp.lmp2_specified = TRUE; continue; } else if (strmatch(argv[i],"-transp")) { /* penalty for transparent word */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->lmp.lm_penalty_trans = (LOGPROB)atof(tmparg); continue; } else if (strmatch(argv[i],"-gram")) { /* comma-separatedlist of grammar prefix */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; if (multigram_add_prefix_list(tmparg, cwd, jconf->lmnow, LM_DFA_GRAMMAR) == FALSE) { jlog("ERROR: m_options: failed to read some grammars\n"); return FALSE; } continue; } else if (strmatch(argv[i],"-gramlist")) { /* file of grammar prefix list */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; tmparg = filepath(tmparg, cwd); if (multigram_add_prefix_filelist(tmparg, jconf->lmnow, LM_DFA_GRAMMAR) == FALSE) { jlog("ERROR: m_options: failed to read some grammars\n"); free(tmparg); return FALSE; } free(tmparg); continue; } else if (strmatch(argv[i],"-userlm")) { if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; /* just set lm flags here */ if (jconf->lmnow->lmtype != LM_PROB && jconf->lmnow->lmtype != LM_UNDEF) { jlog("ERROR: m_options: LM type conflicts: multiple LM specified?\n"); return FALSE; } jconf->lmnow->lmtype = LM_PROB; if (jconf->lmnow->lmvar != LM_UNDEF && jconf->lmnow->lmvar != LM_NGRAM_USER) { jlog("ERROR: m_options: statistical model conflict\n"); return FALSE; } jconf->lmnow->lmvar = LM_NGRAM_USER; continue; } else if (strmatch(argv[i],"-nogram")) { /* remove grammar list */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; multigram_remove_gramlist(jconf->lmnow); FREE_MEMORY(jconf->lmnow->dfa_filename); FREE_MEMORY(jconf->lmnow->dictfilename); if (jconf->lmnow->lmtype == LM_UNDEF) { jconf->lmnow->lmtype = LM_DFA; jconf->lmnow->lmvar = LM_DFA_GRAMMAR; } continue; } else if (strmatch(argv[i],"-dfa")) { /* DFA filename */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->dfa_filename); GET_TMPARG; jconf->lmnow->dfa_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-penalty1")) { /* word insertion penalty (pass1) */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->lmp.penalty1 = (LOGPROB)atof(tmparg); continue; } else if (strmatch(argv[i],"-penalty2")) { /* word insertion penalty (pass2) */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->lmp.penalty2 = (LOGPROB)atof(tmparg); continue; } else if (strmatch(argv[i],"-spmodel") || strmatch(argv[i], "-sp")) { /* name of short pause word */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->spmodel_name); GET_TMPARG; jconf->amnow->spmodel_name = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; } else if (strmatch(argv[i],"-multipath")) { /* force multipath mode */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->force_multipath = TRUE; continue; } else if (strmatch(argv[i],"-iwsp")) { /* enable inter-word short pause handing (for multipath) */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; jconf->lmnow->enable_iwsp = TRUE; continue; } else if (strmatch(argv[i],"-iwsppenalty")) { /* set inter-word short pause transition penalty (for multipath) */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->iwsp_penalty = atof(tmparg); continue; } else if (strmatch(argv[i],"-silhead")) { /* head silence word name */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->head_silname); GET_TMPARG; jconf->lmnow->head_silname = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; } else if (strmatch(argv[i],"-siltail")) { /* tail silence word name */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->tail_silname); GET_TMPARG; jconf->lmnow->tail_silname = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; } else if (strmatch(argv[i],"-mapunk")) { /* unknown word */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; strncpy(jconf->lmnow->unknown_name, tmparg, UNK_WORD_MAXLEN); continue; } else if (strmatch(argv[i],"-iwspword")) { /* add short pause word */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; jconf->lmnow->enable_iwspword = TRUE; continue; } else if (strmatch(argv[i],"-iwspentry")) { /* content of the iwspword */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->iwspentry); GET_TMPARG; jconf->lmnow->iwspentry = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; } else if (strmatch(argv[i],"-iwcache")) { /* control cross-word LM cache */ #ifdef HASH_CACHE_IW if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass1.iw_cache_rate = atof(tmparg); if (jconf->searchnow->pass1.iw_cache_rate > 100) jconf->searchnow->pass1.iw_cache_rate = 100; if (jconf->searchnow->pass1.iw_cache_rate < 1) jconf->searchnow->pass1.iw_cache_rate = 1; #else jlog("WARNING: m_options: HASH_CACHE_IW disabled, \"-iwcache\" ignored\n"); #endif continue; } else if (strmatch(argv[i],"-sepnum")) { /* N-best frequent word will be separated from tree */ #ifdef SEPARATE_BY_UNIGRAM if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; jconf->lmnow->separate_wnum = atoi(tmparg); #else jlog("WARNING: m_options: SEPARATE_BY_UNIGRAM disabled, \"-sepnum\" ignored\n"); i++; #endif continue; #ifdef USE_NETAUDIO } else if (strmatch(argv[i],"-NA")) { /* netautio device name */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; FREE_MEMORY(jconf->input.netaudio_devname); GET_TMPARG; jconf->input.netaudio_devname = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; #endif } else if (strmatch(argv[i],"-adport")) { /* adinnet port num */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->input.adinnet_port = atoi(tmparg); continue; } else if (strmatch(argv[i],"-nostrip")) { /* do not strip zero samples */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->preprocess.strip_zero_sample = FALSE; continue; } else if (strmatch(argv[i],"-zmean")) { /* enable DC offset by zero mean */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->preprocess.use_zmean = TRUE; continue; } else if (strmatch(argv[i],"-nozmean")) { /* disable DC offset by zero mean */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; jconf->preprocess.use_zmean = FALSE; continue; } else if (strmatch(argv[i],"-zmeanframe")) { /* enable frame-wise DC offset by zero mean */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.zmeanframe = TRUE; continue; } else if (strmatch(argv[i],"-nozmeanframe")) { /* disable frame-wise DC offset by zero mean */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.zmeanframe = FALSE; continue; } else if (strmatch(argv[i],"-usepower")) { /* use power instead of magnitude in filterbank analysis */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.usepower = TRUE; continue; } else if (strmatch(argv[i],"-nousepower")) { /* use magnitude in fbank analysis (default) */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.para.usepower = FALSE; continue; } else if (strmatch(argv[i],"-spsegment")) { /* enable short-pause segmentation */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->successive.enabled = TRUE; continue; } else if (strmatch(argv[i],"-spdur")) { /* speech down-trigger duration threshold in frame */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->successive.sp_frame_duration = atoi(tmparg); continue; #ifdef SPSEGMENT_NAIST } else if (strmatch(argv[i],"-spmargin")) { /* speech up-trigger backstep margin in frame */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->successive.sp_margin = atoi(tmparg); continue; } else if (strmatch(argv[i],"-spdelay")) { /* speech up-trigger delay frame */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->successive.sp_delay = atoi(tmparg); continue; #endif } else if (strmatch(argv[i],"-pausemodels")) { /* short-pause duration threshold */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; FREE_MEMORY(jconf->searchnow->successive.pausemodelname); GET_TMPARG; jconf->searchnow->successive.pausemodelname = strcpy((char*)mymalloc(strlen(tmparg)+1),tmparg); continue; } else if (strmatch(argv[i],"-gprune")) { /* select Gaussian pruning method */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; if (strmatch(tmparg,"safe")) { /* safest, slowest */ jconf->amnow->gprune_method = GPRUNE_SEL_SAFE; } else if (strmatch(tmparg,"heuristic")) { jconf->amnow->gprune_method = GPRUNE_SEL_HEURISTIC; } else if (strmatch(tmparg,"beam")) { /* fastest */ jconf->amnow->gprune_method = GPRUNE_SEL_BEAM; } else if (strmatch(tmparg,"none")) { /* no prune: compute all Gaussian */ jconf->amnow->gprune_method = GPRUNE_SEL_NONE; } else if (strmatch(tmparg,"default")) { jconf->amnow->gprune_method = GPRUNE_SEL_UNDEF; #ifdef ENABLE_PLUGIN } else if ((sid = plugin_find_optname("calcmix_get_optname", tmparg)) != -1) { /* mixture calculation plugin */ jconf->amnow->gprune_method = GPRUNE_SEL_USER; jconf->amnow->gprune_plugin_source = sid; #endif } else { jlog("ERROR: m_options: no such pruning method \"%s\"\n", argv[0], tmparg); return FALSE; } continue; /* * } else if (strmatch(argv[i],"-reorder")) { * result_reorder_flag = TRUE; * continue; */ } else if (strmatch(argv[i],"-no_ccd")) { /* force triphone handling = OFF */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->ccd_handling = FALSE; jconf->searchnow->force_ccd_handling = TRUE; continue; } else if (strmatch(argv[i],"-force_ccd")) { /* force triphone handling = ON */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->ccd_handling = TRUE; jconf->searchnow->force_ccd_handling = TRUE; continue; } else if (strmatch(argv[i],"-iwcd1")) { /* select cross-word triphone computation method */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; if (strmatch(tmparg, "max")) { /* use maximum score in triphone variants */ jconf->amnow->iwcdmethod = IWCD_MAX; } else if (strmatch(tmparg, "avg")) { /* use average in variants */ jconf->amnow->iwcdmethod = IWCD_AVG; } else if (strmatch(tmparg, "best")) { /* use average in variants */ jconf->amnow->iwcdmethod = IWCD_NBEST; GET_TMPARG; jconf->amnow->iwcdmaxn = atoi(tmparg); } else { jlog("ERROR: m_options: -iwcd1: wrong argument (max|avg|best N): %s\n", argv[0], tmparg); return FALSE; } continue; } else if (strmatch(argv[i],"-tmix")) { /* num of mixture to select */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; if (i + 1 < argc && isdigit(argv[i+1][0])) { jconf->amnow->mixnum_thres = atoi(argv[++i]); } continue; } else if (strmatch(argv[i],"-b2") || strmatch(argv[i],"-bw") || strmatch(argv[i],"-wb")) { /* word beam width in 2nd pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.enveloped_bestfirst_width = atoi(tmparg); continue; } else if (strmatch(argv[i],"-hgs")) { /* Gaussian selection model file */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->hmm_gs_filename); GET_TMPARG; jconf->amnow->hmm_gs_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-booknum")) { /* num of state to select in GS */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->gs_statenum = atoi(tmparg); continue; } else if (strmatch(argv[i],"-gshmm")) { /* same as "-hgs" */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->hmm_gs_filename); GET_TMPARG; jconf->amnow->hmm_gs_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-gsnum")) { /* same as "-booknum" */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->gs_statenum = atoi(tmparg); continue; } else if (strmatch(argv[i],"-cmnload")) { /* load CMN parameter from file */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->analysis.cmnload_filename); GET_TMPARG; jconf->amnow->analysis.cmnload_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-cmnsave")) { /* save CMN parameter to file */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->analysis.cmnsave_filename); GET_TMPARG; jconf->amnow->analysis.cmnsave_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-cmnupdate")) { /* update CMN parameter */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.cmn_update = TRUE; continue; } else if (strmatch(argv[i],"-cmnnoupdate")) { /* not update CMN parameter */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->analysis.cmn_update = FALSE; continue; } else if (strmatch(argv[i],"-cmnmapweight")) { /* CMN weight for MAP */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->analysis.cmn_map_weight = (float)atof(tmparg); continue; } else if (strmatch(argv[i],"-sscalc")) { /* do spectral subtraction (SS) for raw file input */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; jconf->amnow->frontend.sscalc = TRUE; FREE_MEMORY(jconf->amnow->frontend.ssload_filename); continue; } else if (strmatch(argv[i],"-sscalclen")) { /* head silence length used to compute SS (in msec) */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; jconf->amnow->frontend.sscalc_len = atoi(tmparg); continue; } else if (strmatch(argv[i],"-ssload")) { /* load SS parameter from file */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->frontend.ssload_filename); GET_TMPARG; jconf->amnow->frontend.ssload_filename = filepath(tmparg, cwd); jconf->amnow->frontend.sscalc = FALSE; continue; #ifdef CONFIDENCE_MEASURE } else if (strmatch(argv[i],"-cmalpha")) { /* CM log score scaling factor */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; #ifdef CM_MULTIPLE_ALPHA GET_TMPARG; jconf->searchnow->annotate.cm_alpha_bgn = (LOGPROB)atof(tmparg); GET_TMPARG; jconf->searchnow->annotate.cm_alpha_end = (LOGPROB)atof(tmparg); GET_TMPARG; jconf->searchnow->annotate.cm_alpha_step = (LOGPROB)atof(tmparg); jconf->searchnow->annotate.cm_alpha_num = (int)((jconf->searchnow->annotate.cm_alpha_end - jconf->searchnow->annotate.cm_alpha_bgn) / jconf->searchnow->annotate.cm_alpha_step) + 1; if (jconf->searchnow->annotate.cm_alpha_num > 100) { jlog("ERROR: m_option: cm_alpha step num exceeds limit (100)\n"); return FALSE; } #else GET_TMPARG; jconf->searchnow->annotate.cm_alpha = (LOGPROB)atof(tmparg); #endif continue; #ifdef CM_SEARCH_LIMIT } else if (strmatch(argv[i],"-cmthres")) { /* CM cut threshold for CM decoding */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->annotate.cm_cut_thres = (LOGPROB)atof(tmparg); continue; #endif #ifdef CM_SEARCH_LIMIT_POP } else if (strmatch(argv[i],"-cmthres2")) { /* CM cut threshold for CM decoding */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->annotate.cm_cut_thres_pop = (LOGPROB)atof(tmparg); continue; #endif #endif /* CONFIDENCE_MEASURE */ } else if (strmatch(argv[i],"-gmm")) { /* load SS parameter from file */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; FREE_MEMORY(jconf->reject.gmm_filename); GET_TMPARG; jconf->reject.gmm_filename = filepath(tmparg, cwd); continue; } else if (strmatch(argv[i],"-gmmnum")) { /* num of Gaussian pruning for GMM */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->reject.gmm_gprune_num = atoi(tmparg); continue; } else if (strmatch(argv[i],"-gmmreject")) { if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; FREE_MEMORY(jconf->reject.gmm_reject_cmn_string); jconf->reject.gmm_reject_cmn_string = strcpy((char *)mymalloc(strlen(tmparg)+1), tmparg); continue; #ifdef GMM_VAD } else if (strmatch(argv[i],"-gmmmargin")) { /* backstep margin */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.gmm_margin = atoi(tmparg); continue; } else if (strmatch(argv[i],"-gmmup")) { /* uptrigger threshold */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.gmm_uptrigger_thres = atof(tmparg); continue; } else if (strmatch(argv[i],"-gmmdown")) { /* uptrigger threshold */ if (!check_section(jconf, argv[i], JCONF_OPT_GLOBAL)) return FALSE; GET_TMPARG; jconf->detect.gmm_downtrigger_thres = atof(tmparg); continue; #endif } else if (strmatch(argv[i],"-htkconf")) { if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; GET_TMPARG; tmparg = filepath(tmparg, cwd); if (htk_config_file_parse(tmparg, &(jconf->amnow->analysis.para_htk)) == FALSE) { jlog("ERROR: m_options: failed to read %s\n", tmparg); free(tmparg); return FALSE; } free(tmparg); continue; } else if (strmatch(argv[i], "-wlist")) { if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; tmparg = filepath(tmparg, cwd); if (multigram_add_prefix_filelist(tmparg, jconf->lmnow, LM_DFA_WORD) == FALSE) { jlog("ERROR: m_options: failed to read some word lists\n"); free(tmparg); return FALSE; } free(tmparg); continue; } else if (strmatch(argv[i], "-wsil")) { /* * if (jconf->lmnow->lmvar != LM_UNDEF && jconf->lmnow->lmvar != LM_DFA_WORD) { * jlog("ERROR: \"-wsil\" only valid for isolated word recognition mode\n"); * return FALSE; * } */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; strncpy(jconf->lmnow->wordrecog_head_silence_model_name, tmparg, MAX_HMMNAME_LEN); GET_TMPARG; strncpy(jconf->lmnow->wordrecog_tail_silence_model_name, tmparg, MAX_HMMNAME_LEN); GET_TMPARG; if (strmatch(tmparg, "NULL")) { jconf->lmnow->wordrecog_silence_context_name[0] = '\0'; } else { strncpy(jconf->lmnow->wordrecog_silence_context_name, tmparg, MAX_HMMNAME_LEN); } continue; #ifdef DETERMINE } else if (strmatch(argv[i], "-wed")) { //if (jconf->lmnow->lmvar != LM_UNDEF && jconf->lmnow->lmvar != LM_DFA_WORD) { //jlog("ERROR: \"-wed\" only valid for isolated word recognition mode\n"); //return FALSE; //} if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass1.determine_score_thres = atof(tmparg); GET_TMPARG; jconf->searchnow->pass1.determine_duration_thres = atoi(tmparg); continue; #endif } else if (strmatch(argv[i], "-inactive")) { /* start inactive */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->sw.start_inactive = TRUE; continue; } else if (strmatch(argv[i], "-active")) { /* start active (default) */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->sw.start_inactive = FALSE; continue; } else if (strmatch(argv[i],"-fallback1pass")) { /* use 1st pass result on search failure */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; jconf->searchnow->sw.fallback_pass1_flag = TRUE; continue; #ifdef ENABLE_PLUGIN } else if (strmatch(argv[i],"-plugindir")) { GET_TMPARG; plugin_load_dirs(tmparg); continue; #endif } else if (strmatch(argv[i],"-adddict")) { if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; tmparg = filepath(tmparg, cwd); j_add_dict(jconf->lmnow, tmparg); free(tmparg); continue; } else if (strmatch(argv[i],"-addentry")) { if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; j_add_word(jconf->lmnow, tmparg); continue; } if (argv[i][0] == '-' && strlen(argv[i]) == 2) { /* 1-letter options */ switch(argv[i][1]) { case 'h': /* hmmdefs */ if (!check_section(jconf, argv[i], JCONF_OPT_AM)) return FALSE; FREE_MEMORY(jconf->amnow->hmmfilename); GET_TMPARG; jconf->amnow->hmmfilename = filepath(tmparg, cwd); break; case 'v': /* dictionary */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->dictfilename); GET_TMPARG; jconf->lmnow->dictfilename = filepath(tmparg, cwd); break; case 'w': /* word list (isolated word recognition) */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; GET_TMPARG; if (multigram_add_prefix_list(tmparg, cwd, jconf->lmnow, LM_DFA_WORD) == FALSE) { jlog("ERROR: m_options: failed to read some word list\n"); return FALSE; } break; case 'd': /* binary N-gram */ /* lmvar should be overriden by the content of the binary N-gram */ if (!check_section(jconf, argv[i], JCONF_OPT_LM)) return FALSE; FREE_MEMORY(jconf->lmnow->ngram_filename); FREE_MEMORY(jconf->lmnow->ngram_filename_lr_arpa); FREE_MEMORY(jconf->lmnow->ngram_filename_rl_arpa); GET_TMPARG; jconf->lmnow->ngram_filename = filepath(tmparg, cwd); break; case 'b': /* beam width in 1st pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass1.specified_trellis_beam_width = atoi(tmparg); break; case 's': /* stack size in 2nd pass */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.stack_size = atoi(tmparg); break; case 'n': /* N-best search */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.nbest = atoi(tmparg); break; case 'm': /* upper limit of hypothesis generation */ if (!check_section(jconf, argv[i], JCONF_OPT_SR)) return FALSE; GET_TMPARG; jconf->searchnow->pass2.hypo_overflow = atoi(tmparg); break; default: //jlog("ERROR: m_options: wrong argument: %s\n", argv[0], argv[i]); //return FALSE; unknown_opt = TRUE; } } else { /* error */ //jlog("ERROR: m_options: wrong argument: %s\n", argv[0], argv[i]); //return FALSE; unknown_opt = TRUE; } if (unknown_opt) { /* call user-side option processing */ switch(useropt_exec(jconf, argv, argc, &i)) { case 0: /* does not match user-side options */ jlog("ERROR: m_options: wrong argument: \"%s\"\n", argv[i]); return FALSE; case -1: /* Error in user-side function */ jlog("ERROR: m_options: error in processing \"%s\"\n", argv[i]); return FALSE; } } } /* set default values if not specified yet */ for(atmp=jconf->am_root;atmp;atmp=atmp->next) { if (!atmp->spmodel_name) { atmp->spmodel_name = strcpy((char*)mymalloc(strlen(SPMODEL_NAME_DEFAULT)+1), SPMODEL_NAME_DEFAULT); } } for(ltmp=jconf->lm_root;ltmp;ltmp=ltmp->next) { if (!ltmp->head_silname) { ltmp->head_silname = strcpy((char*)mymalloc(strlen(BEGIN_WORD_DEFAULT)+1), BEGIN_WORD_DEFAULT); } if (!ltmp->tail_silname) { ltmp->tail_silname = strcpy((char*)mymalloc(strlen(END_WORD_DEFAULT)+1), END_WORD_DEFAULT); } if (!ltmp->iwspentry) { ltmp->iwspentry = strcpy((char*)mymalloc(strlen(IWSPENTRY_DEFAULT)+1), IWSPENTRY_DEFAULT); } } #ifdef USE_NETAUDIO if (!jconf->input.netaudio_devname) { jconf->input.netaudio_devname = strcpy((char*)mymalloc(strlen(NETAUDIO_DEVNAME)+1), NETAUDIO_DEVNAME); } #endif /* USE_NETAUDIO */ return TRUE; } /** * * オプション関連のメモリ領域を解放する. * * * Free memories of variables allocated by option arguments. * * * @param jconf [i/o] jconf configuration data * * @callgraph * @callergraph */ void opt_release(Jconf *jconf) { JCONF_AM *am; JCONF_LM *lm; JCONF_SEARCH *s; FREE_MEMORY(jconf->input.inputlist_filename); #ifdef USE_NETAUDIO FREE_MEMORY(jconf->input.netaudio_devname); #endif /* USE_NETAUDIO */ FREE_MEMORY(jconf->reject.gmm_filename); FREE_MEMORY(jconf->reject.gmm_reject_cmn_string); for(am=jconf->am_root;am;am=am->next) { FREE_MEMORY(am->hmmfilename); FREE_MEMORY(am->mapfilename); FREE_MEMORY(am->spmodel_name); FREE_MEMORY(am->hmm_gs_filename); FREE_MEMORY(am->analysis.cmnload_filename); FREE_MEMORY(am->analysis.cmnsave_filename); FREE_MEMORY(am->frontend.ssload_filename); } for(lm=jconf->lm_root;lm;lm=lm->next) { FREE_MEMORY(lm->ngram_filename); FREE_MEMORY(lm->ngram_filename_lr_arpa); FREE_MEMORY(lm->ngram_filename_rl_arpa); FREE_MEMORY(lm->dfa_filename); FREE_MEMORY(lm->head_silname); FREE_MEMORY(lm->tail_silname); FREE_MEMORY(lm->iwspentry); FREE_MEMORY(lm->dictfilename); multigram_remove_gramlist(lm); } for(s=jconf->search_root;s;s=s->next) { FREE_MEMORY(s->successive.pausemodelname); } } /* end of file */ julius-4.2.2/libjulius/src/m_chkparam.c0000644001051700105040000003277012004452401016370 0ustar ritrlab/** * @file m_chkparam.c * * * @brief パラメータ設定の後処理. * * jconf ファイルおよびコマンドオプションによって与えられた * パラメータについて後処理を行い,最終的に認識処理で使用する値を確定する. * * * * @brief Post processing of parameters for recognition. * * These functions will finalize the parameter values for recognition. * They check for parameters given from jconf file or command line, * set default values if needed, and prepare for recognition. * * * * @author Akinobu LEE * @date Fri Mar 18 16:31:45 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * ファイルが存在して読み込み可能かチェックする. * * @param filename [in] ファイルパス名 * * * Check if a file actually exist and is readable. * * @param filename [in] file path name * * */ boolean checkpath(char *filename) { if (access(filename, R_OK) == -1) { jlog("ERROR: m_chkparam: cannot access %s\n", filename); return FALSE; } return TRUE; } /** * * @brief jconf設定パラメータを最終的に決定する * * この関数は,jconf ファイルやコマンドオプションによって与えられた * jconf 内のパラメータについて精査を行う. 具体的には,値の範囲のチェッ * クや,競合のチェック,設定から算出される各種パラメータの計算,使用 * するモデルに対する指定の有効性などをチェックする. * * この関数は,アプリケーションによって jconf の各値の指定が終了した直後, * エンジンインスタンスの作成やモデルのロードが行われる前に呼び出される * べきである. * * * * @brief Check and finalize jconf parameters. * * This functions parse through the global jconf configuration parameters. * This function checks for value range of variables, file existence, * competing specifications among variables or between variables and models, * calculate some parameters from the given values, etc. * * This function should be called just after all values are set by * jconf, command argument or by user application, and before creating * engine instance and loading models. * * * * @param jconf [i/o] global jconf configuration structure * * @return TRUE when all check has been passed, or FALSE if not passed. * * @callgraph * @callergraph * @ingroup jconf */ boolean j_jconf_finalize(Jconf *jconf) { boolean ok_p; JCONF_LM *lm; JCONF_AM *am; JCONF_SEARCH *s, *hs; ok_p = TRUE; /* update and tailor configuration */ /* if a search config has progout_flag enabled, set it to all config */ hs = NULL; for(s=jconf->search_root;s;s=s->next) { if (s->output.progout_flag) { hs = s; break; } } if (hs != NULL) { for(s=jconf->search_root;s;s=s->next) { s->output.progout_flag = hs->output.progout_flag; s->output.progout_interval = hs->output.progout_interval; } } /* if an instance has short-pause segmentation enabled, set it to global opt for parameter handling (only a recognizer with this option will decide the segmentation, but the segmentation should be synchronized for all the recognizer) */ for(s=jconf->search_root;s;s=s->next) { if (s->successive.enabled) { jconf->decodeopt.segment = TRUE; break; } } #ifdef GMM_VAD /* if GMM VAD enabled, set it to global */ if (jconf->reject.gmm_filename) { jconf->decodeopt.segment = TRUE; } #endif for(lm = jconf->lm_root; lm; lm = lm->next) { if (lm->lmtype == LM_UNDEF) { /* determine LM type from the specified LM files */ if (lm->ngram_filename_lr_arpa || lm->ngram_filename_rl_arpa || lm->ngram_filename) { /* n-gram specified */ lm->lmtype = LM_PROB; lm->lmvar = LM_NGRAM; } if (lm->gramlist_root) { /* DFA grammar specified */ if (lm->lmtype != LM_UNDEF) { jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n"); return FALSE; } lm->lmtype = LM_DFA; lm->lmvar = LM_DFA_GRAMMAR; } if (lm->dfa_filename) { /* DFA grammar specified by "-dfa" */ if (lm->lmtype != LM_UNDEF && lm->lmvar != LM_DFA_GRAMMAR) { jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n"); return FALSE; } lm->lmtype = LM_DFA; lm->lmvar = LM_DFA_GRAMMAR; } if (lm->wordlist_root) { /* word list specified */ if (lm->lmtype != LM_UNDEF) { jlog("ERROR: m_chkparam: LM conflicts: several LM of different type specified?\n"); return FALSE; } lm->lmtype = LM_DFA; lm->lmvar = LM_DFA_WORD; } } if (lm->lmtype == LM_UNDEF) { /* an LM is not specified */ jlog("ERROR: m_chkparam: you should specify at least one LM to run Julius!\n"); return FALSE; } if (lm->lmtype == LM_PROB) { if (lm->dictfilename == NULL) { jlog("ERROR: m_chkparam: needs dictionary file (-v dict_file)\n"); ok_p = FALSE; } } /* file existence check */ if (lm->dictfilename != NULL) if (!checkpath(lm->dictfilename)) ok_p = FALSE; if (lm->ngram_filename != NULL) if (!checkpath(lm->ngram_filename)) ok_p = FALSE; if (lm->ngram_filename_lr_arpa != NULL) if (!checkpath(lm->ngram_filename_lr_arpa)) ok_p = FALSE; if (lm->ngram_filename_rl_arpa != NULL) if (!checkpath(lm->ngram_filename_rl_arpa)) ok_p = FALSE; if (lm->dfa_filename != NULL) if (!checkpath(lm->dfa_filename)) ok_p = FALSE; } for(am = jconf->am_root; am; am = am->next) { /* check if needed files are specified */ if (am->hmmfilename == NULL) { jlog("ERROR: m_chkparam: needs HMM definition file (-h hmmdef_file)\n"); ok_p = FALSE; } /* file existence check */ if (am->hmmfilename != NULL) if (!checkpath(am->hmmfilename)) ok_p = FALSE; if (am->mapfilename != NULL) if (!checkpath(am->mapfilename)) ok_p = FALSE; if (am->hmm_gs_filename != NULL) if (!checkpath(am->hmm_gs_filename)) ok_p = FALSE; /* cmn{save,load}_filename allows missing file (skipped if missing) */ if (am->frontend.ssload_filename != NULL) if (!checkpath(am->frontend.ssload_filename)) ok_p = FALSE; } if (jconf->reject.gmm_filename != NULL) if (!checkpath(jconf->reject.gmm_filename)) ok_p = FALSE; if (jconf->input.inputlist_filename != NULL) { if (jconf->input.speech_input != SP_RAWFILE && jconf->input.speech_input != SP_MFCFILE) { jlog("WARNING: m_chkparam: not file input, \"-filelist %s\" ignored\n", jconf->input.inputlist_filename); } else { if (!checkpath(jconf->input.inputlist_filename)) ok_p = FALSE; } } /* set default realtime flag according to input mode */ if (jconf->decodeopt.force_realtime_flag) { if (jconf->input.type == INPUT_VECTOR) { jlog("WARNING: m_chkparam: real-time concurrent processing is not needed on feature vector input\n"); jlog("WARNING: m_chkparam: real-time flag has turned off\n"); jconf->decodeopt.realtime_flag = FALSE; } else { jconf->decodeopt.realtime_flag = jconf->decodeopt.forced_realtime; } } /* check for cmn */ if (jconf->decodeopt.realtime_flag) { for(am = jconf->am_root; am; am = am->next) { if (am->analysis.cmn_update == FALSE && am->analysis.cmnload_filename == NULL) { jlog("ERROR: m_chkparam: when \"-cmnnoupdate\", initial cepstral normalisation data should be given by \"-cmnload\"\n"); ok_p = FALSE; } } } /* set values for search config */ for(s=jconf->search_root;s;s=s->next) { lm = s->lmconf; am = s->amconf; /* force context dependency handling flag for word-recognition mode */ if (lm->lmtype == LM_DFA && lm->lmvar == LM_DFA_WORD) { /* disable inter-word context dependent handling ("-no_ccd") */ s->ccd_handling = FALSE; s->force_ccd_handling = TRUE; /* force 1pass ("-1pass") */ s->compute_only_1pass = TRUE; } /* set default iwcd1 method from lm */ /* WARNING: THIS WILL BEHAVE WRONG IF MULTIPLE LM TYPE SPECIFIED */ /* RECOMMEND USING EXPLICIT OPTION */ if (am->iwcdmethod == IWCD_UNDEF) { switch(lm->lmtype) { case LM_PROB: am->iwcdmethod = IWCD_NBEST; break; case LM_DFA: am->iwcdmethod = IWCD_AVG; break; } } } /* check option validity with the current lm type */ /* just a warning message for user */ for(s=jconf->search_root;s;s=s->next) { lm = s->lmconf; am = s->amconf; if (lm->lmtype != LM_PROB) { /* in case not a probabilistic model */ if (s->lmp.lmp_specified) { jlog("WARNING: m_chkparam: \"-lmp\" only for N-gram, ignored\n"); } if (s->lmp.lmp2_specified) { jlog("WARNING: m_chkparam: \"-lmp2\" only for N-gram, ignored\n"); } if (s->lmp.lm_penalty_trans != 0.0) { jlog("WARNING: m_chkparam: \"-transp\" only for N-gram, ignored\n"); } if (lm->head_silname && !strmatch(lm->head_silname, BEGIN_WORD_DEFAULT)) { jlog("WARNING: m_chkparam: \"-silhead\" only for N-gram, ignored\n"); } if (lm->tail_silname && !strmatch(lm->tail_silname, END_WORD_DEFAULT)) { jlog("WARNING: m_chkparam: \"-siltail\" only for N-gram, ignored\n"); } if (lm->enable_iwspword) { jlog("WARNING: m_chkparam: \"-iwspword\" only for N-gram, ignored\n"); } if (lm->iwspentry && !strmatch(lm->iwspentry, IWSPENTRY_DEFAULT)) { jlog("WARNING: m_chkparam: \"-iwspentry\" only for N-gram, ignored\n"); } #ifdef HASH_CACHE_IW if (s->pass1.iw_cache_rate != 10) { jlog("WARNING: m_chkparam: \"-iwcache\" only for N-gram, ignored\n"); } #endif #ifdef SEPARATE_BY_UNIGRAM if (lm->separate_wnum != 150) { jlog("WARNING: m_chkparam: \"-sepnum\" only for N-gram, ignored\n"); } #endif } if (lm->lmtype != LM_DFA) { /* in case not a deterministic model */ if (s->pass2.looktrellis_flag) { jlog("WARNING: m_chkparam: \"-looktrellis\" only for grammar, ignored\n"); } if (s->output.multigramout_flag) { jlog("WARNING: m_chkparam: \"-multigramout\" only for grammar, ignored\n"); } if (s->lmp.penalty1 != 0.0) { jlog("WARNING: m_chkparam: \"-penalty1\" only for grammar, ignored\n"); } if (s->lmp.penalty2 != 0.0) { jlog("WARNING: m_chkparam: \"-penalty2\" only for grammar, ignored\n"); } } } if (!ok_p) { jlog("ERROR: m_chkparam: could not pass parameter check\n"); } else { jlog("STAT: jconf successfully finalized\n"); } if (debug2_flag) { print_jconf_overview(jconf); } return ok_p; } /** * * @brief あらかじめ定められた第1パスのデフォルトビーム幅を返す. * * デフォルトのビーム幅は,認識エンジンのコンパイル時設定や * 使用する音響モデルに従って選択される. これらの値は,20k の * IPA 評価セットで得られた最適値(精度を保ちつつ最大速度が得られる値) * である. * * @return 実行時の条件によって選択されたビーム幅 * * * @brief Returns the pre-defined default beam width on 1st pass of * beam search. * * The default beam width will be selected from the pre-defined values * according to the compilation-time engine setting and the type of * acoustic model. The pre-defined values were determined from the * development experiments on IPA evaluation testset of Japanese 20k-word * dictation task. * * @return the selected default beam width. * */ static int default_width(HTK_HMM_INFO *hmminfo) { if (strmatch(JULIUS_SETUP, "fast")) { /* for fast setup */ if (hmminfo->is_triphone) { if (hmminfo->is_tied_mixture) { /* tied-mixture triphones (PTM etc.) */ return(600); } else { /* shared-state triphone */ #ifdef PASS1_IWCD return(800); #else /* v2.1 compliant (no IWCD on 1st pass) */ return(1000); #endif } } else { /* monophone */ return(400); } } else { /* for standard / v2.1 setup */ if (hmminfo->is_triphone) { if (hmminfo->is_tied_mixture) { /* tied-mixture triphones (PTM etc.) */ return(800); } else { /* shared-state triphone */ #ifdef PASS1_IWCD return(1500); #else return(1500); /* v2.1 compliant (no IWCD on 1st pass) */ #endif } } else { /* monophone */ return(700); } } } /** * * @brief 第1パスのビーム幅を決定する. * * ユーザが "-b" オプションでビーム幅を指定しなかった場合は, * 下記のうち小さい方がビーム幅として採用される. * - default_width() の値 * - sqrt(語彙数) * 15 * * @param wchmm [in] 木構造化辞書 * @param specified [in] ユーザ指定ビーム幅(0: 全探索 -1: 未指定) * * @return 採用されたビーム幅. * * * @brief Determine beam width on the 1st pass. * * @param wchmm [in] tree lexicon data * @param specified [in] user-specified beam width (0: full search, * -1: not specified) * * @return the final beam width to be used. * * * @callgraph * @callergraph */ int set_beam_width(WCHMM_INFO *wchmm, int specified) { int width; int standard_width; if (specified == 0) { /* full search */ jlog("WARNING: doing full search (can be extremely slow)\n"); width = wchmm->n; } else if (specified == -1) { /* not specified */ standard_width = default_width(wchmm->hmminfo); /* system default */ width = (int)(sqrt(wchmm->winfo->num) * 15.0); /* heuristic value!! */ if (width > standard_width) width = standard_width; /* 2007/1/20 bgn */ if (width < MINIMAL_BEAM_WIDTH) { width = MINIMAL_BEAM_WIDTH; } /* 2007/1/20 end */ } else { /* actual value has been specified */ width = specified; } if (width > wchmm->n) width = wchmm->n; return(width); } /* end of file */ julius-4.2.2/libjulius/src/word_align.c0000644001051700105040000004211112004452401016401 0ustar ritrlab/** * @file word_align.c * * * @brief 単語・音素・状態単位のアラインメント * * ここでは,認識結果に対する入力音声のアラインメントを出力するための * 関数が定義されています. * * Julius/Julian では,認識結果においてその単語や音素,あるいはHMMの状態が * それぞれ入力音声のどの区間にマッチしたのかを知ることができます. * より正確なアラインメントを求めるために,Julius/Julian では認識中の * 近似を含む情報は用いずに,認識が終わった後に得られた認識結果の単語列に * 対して,あらためて forced alignment を実行しています. * * * * @brief Forced alignment by word / phoneme / state unit. * * This file defines functions for performing forced alignment of * recognized words. The forced alignment is implimented in Julius/Julian * to get the best matching segmentation of recognized word sequence * upon input speech. Word-level, phoneme-level and HMM state-level * alignment can be obtained. * * Julius/Julian performs the forced alignment as a post-processing of * recognition process. Recomputation of Viterbi path on the recognized * word sequence toward input speech will be done after the recognition * to get better alignment. * * * * @author Akinobu Lee * @date Sat Sep 24 16:09:46 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 与えられた単語列からHMMを連結して文全体のHMMを構築する. * * @param wseq [in] 単語列 * @param num [in] @a wseq の数 * @param has_sp_ret [out] ショートポーズを後続に挿入しうるユニットの情報 * @param num_ret [out] 構築されたHMMに含まれる音素HMMの数 * @param end_ret [out] アラインメントの区切りとなる状態番号の列 * @param per_what [in] 単語・音素・状態のどの単位でアラインメントを取るかを指定 * @param r [in] 認識処理インスタンス * * @return あらたに割り付けられた文全体をあらわすHMMモデル列へのポインタを返す. * * * Make the whole sentence HMM from given word sequence by connecting * each phoneme HMM. * * @param wseq [in] word sequence to align * @param num [in] number of @a wseq * @param has_sp_ret [out] unit information of whether it can be followed by a short-pause * @param num_ret [out] number of HMM contained in the generated sentence HMM * @param end_ret [out] sequence of state location as alignment unit * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param r [in] recognition process instance * * @return newly malloced HMM sequences. * */ static HMM_Logical ** make_phseq(WORD_ID *wseq, short num, boolean **has_sp_ret, int *num_ret, int **end_ret, int per_what, RecogProcess *r) { HMM_Logical **ph; /* phoneme sequence */ boolean *has_sp; int k; int phnum; /* num of above */ WORD_ID tmpw, w; int i, j, pn, st, endn; HMM_Logical *tmpp, *ret; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; boolean enable_iwsp; /* for multipath */ winfo = r->lm->winfo; hmminfo = r->am->hmminfo; if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp; /* make ph[] from wseq[] */ /* 1. calc total phone num and malloc */ phnum = 0; for (w=0;wwlen[wseq[w]]; ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum); if (hmminfo->multipath && enable_iwsp) { has_sp = (boolean *)mymalloc(sizeof(boolean) * phnum); } else { has_sp = NULL; } /* 2. make phoneme sequence */ st = 0; if (hmminfo->multipath) st++; pn = 0; endn = 0; for (w=0;wwlen[tmpw];i++) { tmpp = winfo->wseq[tmpw][i]; /* handle cross-word context dependency */ if (r->ccd_flag) { if (w > 0 && i == 0) { /* word head */ if ((ret = get_left_context_HMM(tmpp, ph[pn-1]->name, hmminfo)) != NULL) { tmpp = ret; } /* if triphone not found, fallback to bi/mono-phone */ /* use pseudo phone when no bi-phone found in alignment... */ } if (w < num-1 && i == winfo->wlen[tmpw] - 1) { /* word tail */ if ((ret = get_right_context_HMM(tmpp, winfo->wseq[wseq[w+1]][0]->name, hmminfo)) != NULL) { tmpp = ret; } } } ph[pn] = tmpp; if (hmminfo->multipath && enable_iwsp) { if (i == winfo->wlen[tmpw] - 1) { has_sp[pn] = TRUE; } else { has_sp[pn] = FALSE; } } if (per_what == PER_STATE) { for (j=0;jmultipath && enable_iwsp && has_sp[pn]) { for (k=0;ksp)-2;k++) { (*end_ret)[endn++] = st + j + k; } } } st += hmm_logical_state_num(tmpp) - 2; if (hmminfo->multipath && enable_iwsp && has_sp[pn]) { st += hmm_logical_state_num(hmminfo->sp) - 2; } if (per_what == PER_PHONEME) (*end_ret)[endn++] = st - 1; pn++; } if (per_what == PER_WORD) (*end_ret)[endn++] = st - 1; } *num_ret = phnum; *has_sp_ret = has_sp; return ph; } /** * * 文全体のHMMを構築し,Viterbiアラインメントを実行し,結果を出力する. * * @param words [in] 文仮説をあらわす単語列 * @param wnum [in] @a words の長さ * @param param [in] 入力特徴パラメータ列 * @param per_what [in] 単語・音素・状態のどの単位でアラインメントを取るかを指定 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Build sentence HMM, call viterbi_segment() and output result. * * @param words [in] word sequence of the sentence * @param wnum [in] number of words in @a words * @param param [in] input parameter vector * @param per_what [in] specify the alignment unit (word / phoneme / state) * @param s [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * */ static void do_align(WORD_ID *words, short wnum, HTK_Param *param, int per_what, SentenceAlign *align, RecogProcess *r) { HMM_Logical **phones; /* phoneme sequence */ boolean *has_sp; /* whether phone can follow short pause */ int k; int phonenum; /* num of above */ HMM *shmm; /* sentence HMM */ int *end_state; /* state number of word ends */ int *end_frame; /* segmented last frame of words */ LOGPROB *end_score; /* normalized score of each words */ LOGPROB allscore; /* total score of this word sequence */ WORD_ID w; int i, rlen; int end_num = 0; int *id_seq, *phloc = NULL, *stloc = NULL; int j,n,p; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; boolean enable_iwsp; /* for multipath */ winfo = r->lm->winfo; hmminfo = r->am->hmminfo; if (hmminfo->multipath) enable_iwsp = r->lm->config->enable_iwsp; /* initialize result storage buffer */ switch(per_what) { case PER_WORD: jlog("ALIGN: === word alignment begin ===\n"); end_num = wnum; phloc = (int *)mymalloc(sizeof(int)*wnum); i = 0; for(w=0;wwlen[words[w]]; } break; case PER_PHONEME: jlog("ALIGN: === phoneme alignment begin ===\n"); end_num = 0; for(w=0;wwlen[words[w]]; break; case PER_STATE: jlog("ALIGN: === state alignment begin ===\n"); end_num = 0; for(w=0;wwlen[words[w]]; i++) { end_num += hmm_logical_state_num(winfo->wseq[words[w]][i]) - 2; } if (hmminfo->multipath && enable_iwsp) { end_num += hmm_logical_state_num(hmminfo->sp) - 2; } } phloc = (int *)mymalloc(sizeof(int)*end_num); stloc = (int *)mymalloc(sizeof(int)*end_num); { n = 0; p = 0; for(w=0;wwlen[words[w]]; i++) { for(j=0; jwseq[words[w]][i]) - 2; j++) { phloc[n] = p; stloc[n] = j + 1; n++; } if (hmminfo->multipath && enable_iwsp && i == winfo->wlen[words[w]] - 1) { for(k=0;ksp)-2;k++) { phloc[n] = p; stloc[n] = j + 1 + k + end_num; n++; } } p++; } } } break; } end_state = (int *)mymalloc(sizeof(int) * end_num); /* make phoneme sequence word sequence */ phones = make_phseq(words, wnum, &has_sp, &phonenum, &end_state, per_what, r); /* build the sentence HMMs */ shmm = new_make_word_hmm(hmminfo, phones, phonenum, has_sp); if (shmm == NULL) { j_internal_error("Error: failed to make word hmm for alignment\n"); } /* call viterbi segmentation function */ allscore = viterbi_segment(shmm, param, r->wchmm->hmmwrk, hmminfo->multipath, end_state, end_num, &id_seq, &end_frame, &end_score, &rlen); /* store result to s */ align->num = rlen; align->unittype = per_what; align->begin_frame = (int *)mymalloc(sizeof(int) * rlen); align->end_frame = (int *)mymalloc(sizeof(int) * rlen); align->avgscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * rlen); for(i=0;ibegin_frame[i] = (i == 0) ? 0 : end_frame[i-1] + 1; align->end_frame[i] = end_frame[i]; align->avgscore[i] = end_score[i]; } switch(per_what) { case PER_WORD: align->w = (WORD_ID *)mymalloc(sizeof(WORD_ID) * rlen); for(i=0;iw[i] = words[id_seq[i]]; } break; case PER_PHONEME: align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); for(i=0;iph[i] = phones[id_seq[i]]; } break; case PER_STATE: align->ph = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * rlen); align->loc = (short *)mymalloc(sizeof(short) * rlen); if (hmminfo->multipath) align->is_iwsp = (boolean *)mymalloc(sizeof(boolean) * rlen); for(i=0;iph[i] = phones[phloc[id_seq[i]]]; if (hmminfo->multipath) { if (enable_iwsp && stloc[id_seq[i]] > end_num) { align->loc[i] = stloc[id_seq[i]] - end_num; align->is_iwsp[i] = TRUE; } else { align->loc[i] = stloc[id_seq[i]]; align->is_iwsp[i] = FALSE; } } else { align->loc[i] = stloc[id_seq[i]]; } } break; } align->allscore = allscore; free_hmm(shmm); free(id_seq); free(phones); if (has_sp) free(has_sp); free(end_score); free(end_frame); free(end_state); switch(per_what) { case PER_WORD: free(phloc); break; case PER_PHONEME: break; case PER_STATE: free(phloc); free(stloc); } } /** * * 単語ごとの forced alignment を行う. * * @param words [in] 単語列 * @param wnum [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per word for the given word sequence. * * @param words [in] word sequence * @param wnum [in] length of @a words * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void word_align(WORD_ID *words, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { do_align(words, wnum, param, PER_WORD, align, r); } /** * * 単語ごとの forced alignment を行う(単語が逆順で与えられる場合) * * @param revwords [in] 単語列(逆順) * @param wnum [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per word for the given word sequence (reversed order). * * @param revwords [in] word sequence in reversed direction * @param wnum [in] length of @a revwords * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void word_rev_align(WORD_ID *revwords, short wnum, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int w; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wnum); for (w=0;w * 音素ごとの forced alignment を行う. * * @param words [in] 単語列 * @param num [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per phoneme for the given word sequence. * * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void phoneme_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { do_align(words, num, param, PER_PHONEME, align, r); } /** * * 音素ごとの forced alignment を行う(単語が逆順で与えられる場合) * * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per phoneme for the given word sequence (reversed order). * * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void phoneme_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;p * HMM状態ごとの forced alignment を行う. * * @param words [in] 単語列 * @param num [in] @a words の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per HMM state for the given word sequence. * * @param words [in] word sequence * @param num [in] length of @a words * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void state_align(WORD_ID *words, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { do_align(words, num, param, PER_STATE, align, r); } /** * * HMM状態ごとの forced alignment を行う(単語が逆順で与えられる場合) * * @param revwords [in] 単語列(逆順) * @param num [in] @a revwords の単語数 * @param param [in] 入力特徴ベクトル列 * @param align [out] アラインメント結果を格納するSentence構造体 * @param r [i/o] 認識処理インスタンス * * * Do forced alignment per state for the given word sequence (reversed order). * * @param revwords [in] word sequence in reversed direction * @param num [in] length of @a revwords * @param param [in] input parameter vectors * @param align [out] Sentence data area to store the alignment result * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void state_rev_align(WORD_ID *revwords, short num, HTK_Param *param, SentenceAlign *align, RecogProcess *r) { WORD_ID *words; /* word sequence (true order) */ int p; words = (WORD_ID *)mymalloc(sizeof(WORD_ID) * num); for (p=0;p * 認識結果に対して必要なアラインメントを全て実行する. * * @param r [i/o] 認識処理インスタンス * @param param [in] 入力特徴ベクトル列 * * * Do required forced alignment for the recognition results * * @param r [i/o] recognition process instance * @param param [in] input parameter vectors * * @callgraph * @callergraph */ void do_alignment_all(RecogProcess *r, HTK_Param *param) { int n; Sentence *s; SentenceAlign *now, *prev; for(n = 0; n < r->result.sentnum; n++) { s = &(r->result.sent[n]); /* do forced alignment if needed */ if (r->config->annotate.align_result_word_flag) { now = result_align_new(); word_align(s->word, s->word_num, param, now, r); if (s->align == NULL) s->align = now; else prev->next = now; prev = now; } if (r->config->annotate.align_result_phoneme_flag) { now = result_align_new(); phoneme_align(s->word, s->word_num, param, now, r); if (s->align == NULL) s->align = now; else prev->next = now; prev = now; } if (r->config->annotate.align_result_state_flag) { now = result_align_new(); state_align(s->word, s->word_num, param, now, r); if (s->align == NULL) s->align = now; else prev->next = now; prev = now; } } } /* end of file */ julius-4.2.2/libjulius/src/backtrellis.c0000644001051700105040000003413712004452401016564 0ustar ritrlab/** * @file backtrellis.c * * * @brief 単語トレリスの保存・参照 * * 第1パスの結果を単語トレリスとして保存し,第2パスで参照するための関数群 * です. Julius では,第1パスで探索中に終端が生き残っていた単語は全て, * その始終端フレーム,始端からの累積尤度および単語履歴とともに * 保存され,第2パスでその集合の中から再探索が行われます. * この第1パスでフレームごとに残される単語情報のことを「トレリス単語」, * トレリス単語の集合全体を「単語トレリス」と呼びます. * * トレリス単語は,第1パスの認識中に各フレームごとに保存されます. * 第1パス終了後,トレリス全体の整形・再配置とフレームごとのインデックス * を作成します. * * 第2パスでは,この単語トレリスを参照して * 各時間(入力フレーム)における展開可能な仮説のリストを得るとともに, * その第1パスでの(後ろ向きの)累積尤度を,第2パスにおける仮説の未展開部分の * 推定スコアとして用います. このしくみから,単語トレリスは「バックトレリス」 * とも呼ばれています. * * * * @brief Word trellis operations * * Functions to store the result of the 1st pass as "word trellis", * and functions to access them from the 2nd pass are defined in this * file. On the 1st pass of Julius, all the promising words whose * word end has been survived at the 1st pass will be stored as "word * trellis", which consists of surviving words: word boundary, * accumulated score and word history. * * The trellis word will be stored per frame at the 1st pass. After * the 1st pass ended, the word trellis will be re-organized and * indexed by frame to prepare for access at the 2nd pass. * * In the 2nd pass of reverse stack decoding, this word trellis will be * used to constrain the word hypothesis, and also used to estimate * the score of unseen area by the obtained backward scores in the 1st pass. * Thus the word trellis information is also called as "back trellis" in * Julius. * * * @author Akinobu LEE * @date Tue Feb 22 15:40:01 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 単語トレリスを保持する 単語トレリス 構造体を初期化する(起動時に1回だけ実行) * * @param bt [in] 初期化する 単語トレリス 構造体へのポインタ * * * Initialize backtrellis that will hold the whole word trellis * (called once on startup). * * @param bt [in] pointer to the backtrellis structure to initialize * * * @callergraph * @callgraph * */ void bt_init(BACKTRELLIS *bt) { bt->num = NULL; bt->rw = NULL; bt->list = NULL; bt->root = NULL; } /** * * 次回の認識用に 単語トレリス 構造体を準備する (認識開始時ごとに実行). * * @param bt [in] 対象とする単語トレリス構造体へのポインタ * * * Prepare backtrellis for the next input (called at beginning of each * speech segment). * * @param bt [in] pointer to the word trellis structure * * @callergraph * @callgraph * */ void bt_prepare(BACKTRELLIS *bt) { /* free previously allocated data */ mybfree2(&(bt->root)); /* reset entry point */ bt->num = NULL; bt->rw = NULL; bt->list = NULL; bt->root = NULL; } /** * * Free memories of backtrellis. * * * 単語トレリスのメモリを開放する. * * * @param bt [out] pointer to the word trellis structure. * * @callergraph * @callgraph * */ void bt_free(BACKTRELLIS *bt) { if (bt->root) mybfree2(&(bt->root)); free(bt); } /** * * Allocate a new trellis word atom. * * * トレリス単語を新たに割り付ける. * * * @param bt [out] pointer to the word trellis structure. * * @return pointer to the newly allocated trellis word. * * @callergraph * @callgraph * */ TRELLIS_ATOM * bt_new(BACKTRELLIS *bt) { TRELLIS_ATOM *new; new = (TRELLIS_ATOM *)mybmalloc2(sizeof(TRELLIS_ATOM), &(bt->root)); return new; } /** * * 第1パスで出現したトレリス単語(単語終端のトレリス情報)を格納する. * * ここでは格納だけ行い,第1パス終了後に bt_relocate_rw() で * フレーム順に再配置する. * * @param bt [i/o] トレリス単語を格納するバックトレリス構造体 * @param tatom [in] 出現したトレリス単語へのポインタ * * * Store a trellis word generated on the 1st pass for the 2nd pass. * * This function just store the new atom into backtrellis. * They will be re-located per frame after 1st pass for quick access * in the 2nd pass. * * @param bt [i/o] backtrellis structure to store the trellis word * @param tatom [in] the trellis word to be stored * * * @callergraph * @callgraph * */ void bt_store(BACKTRELLIS *bt, TRELLIS_ATOM *tatom) { #ifdef WORD_GRAPH tatom->within_context = FALSE; tatom->within_wordgraph = FALSE; #endif tatom->next = bt->list; bt->list = tatom; } /** * * 第1パス終了後, 格納された単語トレリス情報をフレーム順に再配置する. * * @param bt [i/o] 単語トレリス構造体 * * * Re-locate the stored atom lists per frame (will be called after the * 1st pass). * * @param bt [i/o] word trellis structure * * * @callergraph * @callgraph * */ void bt_relocate_rw(BACKTRELLIS *bt) { TRELLIS_ATOM *tre; int t; int totalnum, n; TRELLIS_ATOM **tmp; if (bt->framelen == 0) { bt->num = NULL; return; } bt->num = (int *)mybmalloc2(sizeof(int) * bt->framelen, &(bt->root)); /* count number of trellis atom (= survived word end) for each frame */ for (t=0;tframelen;t++) bt->num[t] = 0; totalnum = 0; for (tre=bt->list;tre;tre=tre->next) { /* the last frame (when triggered from sp to non-sp) should be discarded */ if (tre->endtime >= bt->framelen) continue; bt->num[tre->endtime]++; totalnum++; } /* if no atom found, return here with all bt->num[t] set to 0 */ if (totalnum <= 0) { bt->num = NULL; return; } /* allocate area */ bt->rw = (TRELLIS_ATOM ***)mybmalloc2(sizeof(TRELLIS_ATOM **) * bt->framelen, &(bt->root)); tmp = (TRELLIS_ATOM **)mybmalloc2(sizeof(TRELLIS_ATOM *) * totalnum, &(bt->root)); n = 0; for (t=0;tframelen;t++) { if (bt->num[t] > 0) { bt->rw[t] = (TRELLIS_ATOM **)&(tmp[n]); n += bt->num[t]; } } /* then store the atoms */ for (t=0;tframelen;t++) bt->num[t] = 0; for (tre=bt->list;tre;tre=tre->next) { /* the last frame (when triggered from sp to non-sp) should be discarded */ if (tre->endtime >= bt->framelen) continue; t = tre->endtime; bt->rw[t][bt->num[t]] = tre; bt->num[t]++; } } /* 以下の関数は bt_relocate_rw 実行後にのみ使用可能となる. */ /* functions below this line should be called after bt_relocate_rw() */ /** * * 逐次デコーディング時, 第1パス終了後に, * 入力セグメントの両端に残った最尤単語仮説を取り出し, それらを * 第2パスにおける初期/最終仮説としてセットする. * * @param r [in] 認識処理インスタンス * * * When using progressive decoding with short pause segmentation, * This function extracts the best word hypothesis on head and tail of * the current input segment just after the 1st pass ends, * and store them as start/end word in the following 2nd pass. * * @param r [in] recognition process instance * * * @callergraph * @callgraph * */ void set_terminal_words(RecogProcess *r) { LOGPROB maxscore; int i,t; BACKTRELLIS *bt; bt = r->backtrellis; if (bt->num == NULL) return; maxscore = LOG_ZERO; /* find last frame where a word exists */ for(t=bt->framelen-1;t>=0;t--) { if (bt->num[t] > 0) break; } /* get maximum word hypothesis at that frame */ for(i=0;inum[t];i++) { if (maxscore < (bt->rw[t][i])->backscore) { maxscore = (bt->rw[t][i])->backscore; r->sp_break_2_begin_word = (bt->rw[t][i])->wid; } } maxscore = LOG_ZERO; /* find first frame where a word exists */ for(t=0;tframelen;t++) { if (bt->num[t] > 0) break; } /* get maximum word hypothesis at that frame */ for(i=0;inum[t];i++) { if (maxscore < (bt->rw[t][i])->backscore) { maxscore = (bt->rw[t][i])->backscore; r->sp_break_2_end_word = (bt->rw[t][i])->wid; } } #ifdef SP_BREAK_DEBUG jlog("DEBUG: 2nd pass begin word: %s\n", (r->sp_break_2_begin_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_begin_word]); jlog("DEBUG: 2nd pass end word: %s\n", (r->sp_break_2_end_word == WORD_INVALID) ? "WORD_INVALID" : r->lm->winfo->wname[r->sp_break_2_end_word]); #endif } /* the outprob on the trellis connection point should be discounted */ /** * * 第1パス終了後, 第2パスでのトレリス再接続計算のために, * 全時間に渡って各トレリス単語の終端の最終状態の出力尤度を再計算し, * それを累積から差し引いておく. 第2パスでは,仮説接続時には * 接続仮説を考慮して接続点の状態の尤度が再計算される. * * @param wchmm [in] 木構造化辞書 * @param bt [in] 単語トレリス構造体 * @param param [in] 入力パラメータ情報 * * * Discount the output probabilities of the last state from the accumulated * score on word edge for all trellis words survived on the 1st pass, * for the acoustic re-computation on the 2nd pass. * The acousitic likelihood of the word edge state will be re-computed * when the next word hypotheses are expanded on the next 2nd pass. * * @param wchmm [in] tree lexicon * @param bt [in] word trellis structure * @param param [in] input parameter * * * @callergraph * @callgraph * */ void bt_discount_pescore(WCHMM_INFO *wchmm, BACKTRELLIS *bt, HTK_Param *param) { int t,i; TRELLIS_ATOM *tre; if (bt->num == NULL) return; for (t=0; tframelen; t++) { for (i=0; inum[t]; i++) { tre = bt->rw[t][i]; /* On normal version, both language score and the output prob. score at the connection point should removed on the trellis for the later connection. On multi-path mode, removing only the language score is enough. */ tre->backscore -= outprob_style(wchmm, wchmm->wordend[tre->wid], tre->last_tre->wid, t, param); } } } /** * * Subtract 2-gram scores at each trellis word for the 2nd pass. * * * 第2パスのために2-gramスコアをトレリス上の単語から差し引く. * * * @param bt [in] word trellis * * @callergraph * @callgraph * */ void bt_discount_lm(BACKTRELLIS *bt) { int t,i; TRELLIS_ATOM *tre; if (bt->num == NULL) return; /* the LM score of the last word should be subtracted, because their LM will be assigned by 3-gram on the 2nd pass. */ for (t=0; tframelen; t++) { for (i=0; inum[t]; i++) { tre = bt->rw[t][i]; tre->backscore -= tre->lscore; } } } /** * * bt_sort_rw()用のqsortコールバック. * * @param a [in] 要素1 * @param b [in] 要素2 * * @return 昇順ソートに必要な値 * * * qsort callback for bt_sort_rw(). * * @param a [in] first element * @param b [in] second element * * @return a value needed to do upward sort. * * * */ static int compare_wid(TRELLIS_ATOM **a, TRELLIS_ATOM **b) { if ((*a)->wid > (*b)->wid) return 1; if ((*a)->wid < (*b)->wid) return -1; return 0; } /** * * bt_relocate_rw() 終了後, 高速アクセスのために * バックトレリス構造体内のトレリス単語をフレームごとに * 単語IDでソートしておく. * * @param bt [i/o] 単語トレリス構造体 * * * * Sort the trellis words in the backtrellis by the word IDs per each frame, * for rapid access on the 2nd pass. This should be called just after * bt_relocate_rw() was called. * * @param bt [i/o] word trellis structure * * * @callergraph * @callgraph * */ void bt_sort_rw(BACKTRELLIS *bt) { int t; if (bt->num == NULL) return; for (t=0;tframelen;t++) { qsort(bt->rw[t], bt->num[t], sizeof(TRELLIS_ATOM *), (int (*)(const void *,const void *))compare_wid); } } /* 以下の関数は事前にbt_sort_rw() が呼ばれていること(第2パス用) */ /* functions below should be called after bt_sort_rw() */ /** * * 単語トレリス内の指定時刻フレーム上に,指定単語の終端があるかどうかを * 検索する. * * @param bt [in] 単語トレリス構造体 * @param t [in] 検索する終端時刻(フレーム) * @param wkey [in] 検索する単語の単語ID * * @return 見つかった場合そのトレリス単語へのポインタ,見つからなければ NULL. * * * Search a word on the specified frame in a word trellis data. * * @param bt [in] word trellis structure * @param t [in] word end frame on which to search * @param wkey [in] word ID to search * * @return pointer to the found trellis word, or NULL if not found. * * * @callergraph * @callgraph * */ TRELLIS_ATOM * bt_binsearch_atom(BACKTRELLIS *bt, int t, WORD_ID wkey) { /* do binary search */ /* assume rw are ordered by wid */ int left, right, mid; TRELLIS_ATOM *tmp; #ifdef WPAIR int i; LOGPROB maxscore; TRELLIS_ATOM *maxtre; #endif if (bt->num[t] == 0) return(NULL); left = 0; right = bt->num[t] - 1; while (left < right) { mid = (left + right) / 2; if ((bt->rw[t][mid])->wid < wkey) { left = mid + 1; } else { right = mid; } } tmp = bt->rw[t][left]; if (tmp->wid == wkey) { #ifdef WPAIR /* same word with different context will be found: most likely one will be returned */ maxscore = LOG_ZERO; maxtre = NULL; i = left; while (i >= 0) { tmp = bt->rw[t][i]; if (tmp->wid != wkey) break; #ifdef WORD_GRAPH /* only words on a graph path should be counted */ if (!tmp->within_wordgraph) { i--; continue; } #endif if (maxscore < tmp->backscore) { maxscore = tmp->backscore; maxtre = tmp; } i--; } i = left; while (i < bt->num[t]) { tmp = bt->rw[t][i]; if (tmp->wid != wkey) break; #ifdef WORD_GRAPH /* only words on a graph path should be counted */ if (!tmp->within_wordgraph) { i++; continue; } #endif if (maxscore < tmp->backscore) { maxscore = tmp->backscore; maxtre = tmp; } i++; } tmp = maxtre; #else #ifdef WORD_GRAPH /* treat only words on a graph path */ if (! tmp->within_wordgraph) { return NULL; } #endif #endif /* WPAIR */ return(tmp); } else { return(NULL); } } /* end of file */ julius-4.2.2/libjulius/src/m_jconf.c0000644001051700105040000003326412004452401015700 0ustar ritrlab/** * @file m_jconf.c * * * @brief 設定ファイルの読み込み. * * オプション指定を記述した jconf 設定ファイルを読み込みます. * jconf 設定ファイル内では,ダブルクォーテーションによる文字列の * 指定,バックスラッシュによる文字のエスケープができます. * また,各行において '#' 以降はスキップされます. * * jconf 設定ファイル内では,全ての相対パスは,アプリケーションの * カレントディレクトリではなく,その jconf の存在するディレクトリからの * 相対パスとして解釈されます. * * また,$HOME, ${HOME}, $(HOME), の形で指定された部分について * 環境変数を展開できます. * * * * * @brief Read a configuration file. * * These functions are for reading jconf configuration file and set the * parameters into jconf structure. String bracing by double quotation, * and escaping character with backslash are supproted. * Characters after '#' at each line will be ignored. * * Note that all relative paths in jconf file are treated as relative * to the jconf file, not the run-time current directory. * * You can expand environment variables in a format of $HOME, ${HOME} or * $(HOME) in jconf file. * * * * @author Akinobu Lee * @date Thu May 12 14:16:18 2005 * * $Revision: 1.10 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #if defined(_WIN32) && !defined(__CYGWIN32__) #include #endif #define ISTOKEN(A) (A == ' ' || A == '\t' || A == '\n') ///< Determine token characters #define BUFLEN 512 /** * * @brief jconf 用の行読み込みルーチン * * バックスラッシュによるエスケープ処理,および Mac/Win の改行コードに * 対応する. 空行はスキップされ,改行コードは消される. * * @param buf [out] 読み込んだ1行分のテキストを格納するバッファ * @param size [in] @a buf の大きさ(バイト数) * @param fp [in] ファイルポインタ * * @return @a buf を返す. EOF でこれ以上入力がなければ NULL を返す. * * * @brief line reading function for jconf file. * * This function has capability of character escaping and newline codes * on Win/Mac. Blank line will be skipped and newline characters will be * stripped. * * @param buf [out] buffer to store the read text per line * @param size [in] size of @a buf in bytes * @param fp [in] file pointer * * @return @a buf on success, or NULL when encountered EOF and no further input. * */ /* added by H.Banno for Windows & Mac */ static char * fgets_jconf(char *buf, int size, FILE *fp) { int c, prev_c; int pos; if (fp == NULL) return NULL; pos = 0; c = '\0'; prev_c = '\0'; while (1) { if (pos >= size) { pos--; break; } c = fgetc(fp); if (c == EOF) { buf[pos] = '\0'; if (pos <= 0) { return NULL; } else { return buf; } } else if (c == '\n' || c == '\r') { if (c == '\r' && (c = fgetc(fp)) != '\n') { /* for Mac */ ungetc(c, fp); } if (prev_c == '\\') { pos--; } else { break; } } else { buf[pos] = c; pos++; #if defined(_WIN32) && !defined(__CYGWIN32__) if (c == '\\' && (_ismbblead(prev_c) && _ismbbtrail(c))) { c = '\0'; } #endif } prev_c = c; } buf[pos] = '\0'; return buf; } /** * * @brief ファイルのパス名からディレクトリ名を抜き出す. * * 最後の '/' は残される. * * @param path [i/o] ファイルのパス名(関数内で変更される) * * * @brief Get directory name from a path name of a file. * * The trailing slash will be left, and the given buffer will be modified. * * @param path [i/o] file path name, will be modified to directory name * */ void get_dirname(char *path) { char *p; /* /path/file -> /path/ */ /* path/file -> path/ */ /* /file -> / */ /* file -> */ /* ../file -> ../ */ p = path + strlen(path) - 1; while (*p != '/' #if defined(_WIN32) && !defined(__CYGWIN32__) && *p != '\\' #endif && p != path) p--; if (p == path && *p != '/') *p = '\0'; else *(p+1) = '\0'; } /** * * @brief 環境変数の展開 * * 環境変数を展開する. $HOME の形の文字列を環境変数とみなし,その値で * 置換する. 置換が起こった際には,与えられた文字列バッファを内部で * 解放し,あらたに割り付けられたバッファを返す. * * 変数の指定は $HOME, ${HOME}, $(HOME), の形で指定できる. * $ を展開したくない場合はバックスラッシュ "\" でエスケープできる. * またシングルクォート "'" で括られた範囲は展開を行わない. * * @param str [in] 対象文字列(展開発生時は内部で free されるので注意) * * @return 展開すべき対象がなかった場合,str がそのまま返される. 展開が行われた場合,あらたに割り付けられた展開後の文字列を含むバッファが返される. * * * @brief Envronment valuable expansion for a string * * This function expands environment valuable in a string. When an * expantion occurs, the given buffer will be released inside this * function and newly allocated buffer that holds the resulting string * will be returned. * * Environment valuables should be in a form of $HOME, ${HOME} or $(HOME). * '$' can be escaped by back slash, and strings enbraced by single quote * will be treated as is (no expansion). * * @param str [in] target string * * @return the str itself when no expansion performed, or newly * allocated buffer if expansion occurs. * */ static char * expand_env(char *str) { char *p, *q; char *bgn; char eb; char *target; char *envval; int target_malloclen; int len, n; boolean inbrace; char env[256]; /* check if string contains '$' and return immediately if not */ /* '$' = 36, '\'' = 39 */ p = str; inbrace = FALSE; while (*p != '\0') { if (*p == 39) { if (inbrace == FALSE) { inbrace = TRUE; } else { inbrace = FALSE; } p++; continue; } if (! inbrace) { if (*p == '\\') { p++; if (*p == '\0') break; } else { if (*p == 36) break; } } p++; } if (*p == '\0') return str; /* prepare result buffer */ target_malloclen = strlen(str) * 2; target = (char *)mymalloc(target_malloclen); p = str; q = target; /* parsing */ inbrace = FALSE; while (*p != '\0') { /* look for next '$' */ while (*p != '\0') { if (*p == 39) { if (inbrace == FALSE) { inbrace = TRUE; } else { inbrace = FALSE; } p++; continue; } if (! inbrace) { if (*p == '\\') { p++; if (*p == '\0') break; } else { if (*p == 36) break; } } *q = *p; p++; q++; n = q - target; if (n >= target_malloclen) { target_malloclen *= 2; target = myrealloc(target, target_malloclen); q = target + n; } } if (*p == '\0') { /* reached end of string */ *q = '\0'; break; } /* move to next */ p++; /* check for brace */ eb = 0; if (*p == '(') { eb = ')'; } else if (*p == '{') { eb = '}'; } /* proceed to find env end point and set the env string to env[] */ if (eb != 0) { p++; bgn = p; while (*p != '\0' && *p != eb) p++; if (*p == '\0') { jlog("ERROR: failed to expand variable: no end brace: \"%s\"\n", str); free(target); return str; } } else { bgn = p; while (*p == '_' || (*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')) p++; } len = p - bgn; if (len >= 256 - 1) { jlog("ERROR: failed to expand variable: too long env name: \"%s\"\n", str); free(target); return str; } strncpy(env, bgn, len); env[len] = '\0'; /* get value */ if ((envval = getenv(env)) == NULL) { jlog("ERROR: failed to expand variable: no such variable \"%s\"\n", env); free(target); return str; } if (debug2_flag) { /* for debug */ jlog("DEBUG: expand $%s to %s\n", env, envval); } /* paste value to target */ while(*envval != '\0') { *q = *envval; q++; envval++; n = q - target; if (n >= target_malloclen) { target_malloclen *= 2; target = myrealloc(target, target_malloclen); q = target + n; } } /* go on to next */ if (eb != 0) p++; } free(str); return target; } /* read-in and parse jconf file and process those using m_options */ /** * * @brief オプション文字列を分解して追加格納する. * * @param buf [in] 文字列 * @param argv [i/o] オプション列へのポインタ * @param argc [i/o] オプション列の数へのポインタ * @param maxnum [i/o] オプション列の割付最大数 * * * @brief Divide option string into option arguments and append to array. * * @param buf [in] option string * @param argv [i/o] pointer to option array * @param argc [i/o] pointer to the length of option array * @param maxnum [i/o] pointer to the allocated length of option array * */ static void add_to_arglist(char *buf, char ***argv_ret, int *argc_ret, int *maxnum_ret) { char *p = buf; char cpy[BUFLEN]; char *dst, *dst_from; char **argv = *argv_ret; int argc = *argc_ret; int maxnum = *maxnum_ret; dst = cpy; while (1) { while (*p != '\0' && ISTOKEN(*p)) p++; if (*p == '\0') break; dst_from = dst; while (*p != '\0' && (!ISTOKEN(*p))) { #if !defined(_WIN32) if (*p == '\\') { /* escape by '\' */ if (*(++p) == '\0') break; *(dst++) = *(p++); } else { #endif if (*p == '"') { /* quote by "" */ p++; while (*p != '\0' && *p != '"') *(dst++) = *(p++); if (*p == '\0') break; p++; } else if (*p == '\'') { /* quote by '' */ p++; while (*p != '\0' && *p != '\'') *(dst++) = *(p++); if (*p == '\0') break; p++; } else if (*p == '#') { /* comment out by '#' */ *p = '\0'; break; } else { /* other */ *(dst++) = *(p++); } #if !defined(_WIN32) } #endif } if (dst != dst_from) { *dst = '\0'; dst++; if ( argc >= maxnum) { maxnum += 20; argv = (char **)myrealloc(argv, sizeof(char *) * maxnum); } argv[argc++] = strcpy((char*)mymalloc(strlen(dst_from)+1), dst_from); } } *argv_ret = argv; *argc_ret = argc; *maxnum_ret = maxnum; } /** * * オプション指定を含む文字列を解析して値をセットする. * 相対パス名はカレントからの相対として扱われる. * * @param str [in] オプション指定を含む文字列 * @param jconf [out] 値をセットする jconf 設定データ * * * Parse a string and set the specified option values. * Relative paths will be treated as relative to current directory. * * @param str [in] string which contains options * @param jconf [out] global configuration data to be written. * * * @callgraph * @callergraph */ boolean config_string_parse(char *str, Jconf *jconf) { int c_argc; char **c_argv; int maxnum; char buf[BUFLEN]; char *cdir; int i; boolean ret; jlog("STAT: parsing option string: \"%s\"\n", str); /* set the content of jconf file into argument list c_argv[1..c_argc-1] */ maxnum = 20; c_argv = (char **)mymalloc(sizeof(char *) * maxnum); c_argv[0] = strcpy((char *)mymalloc(7), "string"); c_argc = 1; add_to_arglist(str, &c_argv, &c_argc, &maxnum); /* env expansion */ for (i=1;i 0) { free(c_argv[c_argc]); } free(c_argv); return(ret); } /** * * jconf 設定ファイルを読み込んで解析し,対応するオプションを設定する. * オプション内の相対パスは、その jconf 設定ファイルからの相対となる. * * @param conffile [in] jconf ファイルのパス名 * @param jconf [out] 値をセットする jconf 設定データ * * * Read and parse a jconf file, and set the specified option values. * Relative paths in the file will be treated as relative to the file, * not the application current. * * @param conffile [in] jconf file path name * @param jconf [out] global configuration data to be written. * * * @callgraph * @callergraph */ boolean config_file_parse(char *conffile, Jconf *jconf) { int c_argc; char **c_argv; FILE *fp; int maxnum; char buf[BUFLEN]; char *cdir; int i; boolean ret; jlog("STAT: include config: %s\n", conffile); /* set the content of jconf file into argument list c_argv[1..c_argc-1] */ /* c_argv[0] will be the original conffile name */ /* inside jconf file, quoting by ", ' and escape by '\' is supported */ if ((fp = fopen(conffile, "r")) == NULL) { jlog("ERROR: m_jconf: failed to open jconf file: %s\n", conffile); return FALSE; } maxnum = 20; c_argv = (char **)mymalloc(sizeof(char *) * maxnum); c_argv[0] = strcpy((char *)mymalloc(strlen(conffile)+1), conffile); c_argc = 1; while (fgets_jconf(buf, BUFLEN, fp) != NULL) { if (buf[0] == '\0') continue; add_to_arglist(buf, &c_argv, &c_argc, &maxnum); } if (fclose(fp) == -1) { jlog("ERROR: m_jconf: cannot close jconf file\n"); return FALSE; } /* env expansion */ for (i=1;i 0) { free(c_argv[c_argc]); } free(c_argv); return(ret); } /* end of file */ julius-4.2.2/libjulius/src/recogmain.c0000644001051700105040000012641112004452401016226 0ustar ritrlab/** * @file recogmain.c * * * @brief 認識メイン関数 * * * * @brief Main function of recognition process. * * * @author Akinobu Lee * @date Wed Aug 8 14:53:53 2007 * * $Revision: 1.20 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /** * @mainpage * * * This is a source code browser of Julius. * * - Sample code to use JuliusLib: julius-simple.c * - JuliusLib API reference: @ref jfunc * - List of callbacks: libjulius/include/julius/callback.h * * You can access documentation for files, functions and structures * from the tabs at the top of this page. * * * * これは Julius のソースコードのブラウザです. * * - JuliusLibを使用するサンプルコード: julius-simple/julius-simple.c * - JuliusLib API リファレンス: @ref jfunc * - コールバック 一覧: libjulius/include/julius/callback.h * * ページ上部のタブからファイル・関数・構造体等の説明を見ることが出来ます. * * * */ /** * @defgroup jfunc JuliusLib API * * * Here is a reference of all Julius library API functions. * * * Julius ライブラリ API 関数のリファレンスです. * * */ /** * @defgroup engine Basic API * @ingroup jfunc * * * Basic functions to start-up and initialize engines. * * * 認識エンジンの設定等 * * */ /** * @defgroup callback Callback API * @ingroup jfunc * * * Functions to add callback to get results and status. * * * 認識結果やエンジン状態を知るためのコールバック * * */ /** * @defgroup pauseresume Pause and Resume API * @ingroup jfunc * * * Functions to pause / resume engine inputs. * * * エンジンの一時停止・再開 * * */ /** * @defgroup userfunc User function API * @ingroup jfunc * * * Functions to register user function to be applied inside Julius. * * * ユーザ関数の登録 * * */ /** * @defgroup jfunc_process Process API * @ingroup jfunc * * * Functions to create / remove / (de)activate recognition process and models * on live. * * * モデルおよび認識プロセスの動的追加・削除・有効化・無効化 * * */ /** * @defgroup grammar Grammar / Dictionary API * @ingroup jfunc * * * Functions to manage grammars or word dictionaries at run time. * * * 文法・単語辞書の操作 * * */ /** * @defgroup jconf Jconf configuration API * @ingroup jfunc * * * Functions to load / create configuration parameters. * * * Jconf 構造体によるパラメータ情報の管理 * * */ /** * @defgroup instance LM/AM/SR instance API * @ingroup jfunc * * * Functions to handle modules and processes directly. * * * モデルモジュールやプロセスを直接扱う関数. * * */ #define GLOBAL_VARIABLE_DEFINE ///< Actually make global vars in global.h #include #include #if defined(_WIN32) && !defined(__CYGWIN32__) #include #include #endif /* ---------- utility functions -----------------------------------------*/ #ifdef REPORT_MEMORY_USAGE /** * * 通常終了時に使用メモリ量を調べて出力する (Linux, sol2) * * * * Get process size and output on normal exit. (Linux, sol2) * * */ static void print_mem() { char buf[200]; sprintf(buf,"ps -o vsz,rss -p %d",getpid()); system(buf); fflush(stdout); fflush(stderr); } #endif /** * * allocate storage of recognition alignment results. * * @return the new pointer * * * アラインメント結果の格納場所を確保 * * @return 確保された領域へのポインタ * * * @callgraph * @callergraph * */ SentenceAlign * result_align_new() { SentenceAlign *new; new = (SentenceAlign *)mymalloc(sizeof(SentenceAlign)); new->w = NULL; new->ph = NULL; new->loc = NULL; new->begin_frame = NULL; new->end_frame = NULL; new->avgscore = NULL; new->is_iwsp = NULL; new->next = NULL; return new; } /** * * free storage of recognition alignment results. * * @param a [i/o] alignment data to be released * * * アラインメント結果の格納場所を確保 * * @param a [i/o] 解放されるアラインメントデータ * * * @callgraph * @callergraph * */ void result_align_free(SentenceAlign *a) { if (a->w) free(a->w); if (a->ph) free(a->ph); if (a->loc) free(a->loc); if (a->begin_frame) free(a->begin_frame); if (a->end_frame) free(a->end_frame); if (a->avgscore) free(a->avgscore); if (a->is_iwsp) free(a->is_iwsp); free(a); } /** * * Allocate storage of recognition results. * * * 認識結果の格納場所を確保する. * * * @param r [out] recognition process instance * @param num [in] number of sentences to be output * * @callgraph * @callergraph * */ void result_sentence_malloc(RecogProcess *r, int num) { int i; r->result.sent = (Sentence *)mymalloc(sizeof(Sentence) * num); for(i=0;iresult.sent[i].align = NULL; r->result.sentnum = 0; } /** * * Free storage of recognition results. * * * 認識結果の格納場所を解放する. * * * @param r [i/o] recognition process instance * * @callgraph * @callergraph */ void result_sentence_free(RecogProcess *r) { int i; SentenceAlign *a, *atmp; if (r->result.sent) { for(i=0;iresult.sentnum;i++) { a = r->result.sent[i].align; while(a) { atmp = a->next; result_align_free(a); a = atmp; } } free(r->result.sent); r->result.sent = NULL; } } /** * * Clear all result storages for next input. * * * 認識結果の格納場所を全てクリアする. * * * @param r [in] recognition process instance. * * @callgraph * @callergraph */ void clear_result(RecogProcess *r) { #ifdef WORD_GRAPH /* clear 1st pass word graph output */ wordgraph_clean(&(r->result.wg1)); #endif if (r->lmvar == LM_DFA_WORD) { if (r->result.status == J_RESULT_STATUS_SUCCESS) { /* clear word recog result of first pass as in final result */ free(r->result.sent); } } else { if (r->graphout) { if (r->config->graph.confnet) { /* free confusion network clusters */ cn_free_all(&(r->result.confnet)); } else if (r->config->graph.lattice) { } /* clear all wordgraph */ wordgraph_clean(&(r->result.wg)); } result_sentence_free(r); } } /* --------------------- speech buffering ------------------ */ /** * * @brief 検出された音をバッファに保存する adin_go() コールバック * * この関数は,検出された音声入力を順次 recog->speech に記録して * いく. バッファ処理モード(=非リアルタイムモード)で認識を行なう * ときに用いられる. * * @param now [in] 検出された音声波形データの断片 * @param len [in] @a now の長さ(サンプル数) * @param recog [i/o] エンジンインスタンス * * @return エラー時 -1 (adin_go は即時中断する),通常時 0 (adin_go は * 続行する),区間終了要求時 1 (adin_go は現在の音声区間を閉じる). * * * * @brief adin_go() callback to score triggered inputs to buffer. * * This function records the incomping speech segments detected in adin_go() * to recog->speech. This function will be used when recognition runs * in buffered mode (= non-realtime mode). * * @param now [in] input speech samples. * @param len [in] length of @a now in samples * @param recog [i/o] engine instance * * @return -1 on error (tell adin_go() to terminate), 0 on success (tell * adin_go() to continue recording), or 1 when this function requires * input segmentation. * */ int adin_cut_callback_store_buffer(SP16 *now, int len, Recog *recog) { if (recog->speechlen == 0) { /* first part of a segment */ if (!recog->process_active) { return(1); } } if (recog->speechlen + len > recog->speechalloclen) { while (recog->speechlen + len > recog->speechalloclen) { recog->speechalloclen += MAX_SPEECH_ALLOC_STEP; } if (recog->speech == NULL) { recog->speech = (SP16 *)mymalloc(sizeof(SP16) * recog->speechalloclen); } else { if (debug2_flag) { jlog("STAT: expanding recog->speech to %d samples\n", recog->speechalloclen); } recog->speech = (SP16 *)myrealloc(recog->speech, sizeof(SP16) * recog->speechalloclen); } } /* store now[0..len] to recog->speech[recog->speechlen] */ memcpy(&(recog->speech[recog->speechlen]), now, len * sizeof(SP16)); recog->speechlen += len; return(0); /* tell adin_go to continue reading */ } /* --------------------- adin check callback --------------- */ /** * * @brief 音声入力中に定期的に実行されるコールバック. * * この関数は,adin_go() にて音声入力待ち,あるいは音声認識中に * 定期的に繰り返し呼び出される関数である. ユーザ定義のコールバック * (CALLBACK_POLL) の呼び出し,および中断判定を行う. * * @param recog [in] エンジンインスタンス * * @return 通常時 0, 即時中断を要求時 -2, 認識中止の要求時は -1 を返す. * * * @brief callback function periodically called while input. * * This function will be called periodically from adin_go() while * waiting input or processing recognition. It will call user-defined * callback registered in CALLBACK_POLL, check for the process * status and issue recognition termination request. * * @param recog [in] engine instance * * @return 0 normally, -2 for immediate termination, and -1 if requesting * recognition stop. * * */ static int callback_check_in_adin(Recog *recog) { /* module: check command and terminate recording when requested */ callback_exec(CALLBACK_POLL, recog); /* With audio input via adinnet, TERMINATE command will issue terminate command to the adinnet client. The client then stops recording immediately and return end-of-segment ack. Then it will cause this process to stop recognition as normal. So we need not to perform immediate termination at this callback, but just ignore the results in the main.c. */ #if 1 if (recog->process_want_terminate) { /* TERMINATE ... force termination */ return(-2); } if (recog->process_want_reload) { return(-1); } #else if (recog->process_want_terminate /* TERMINATE ... force termination */ && recog->jconf->input.speech_input != SP_ADINNET) { return(-2); } if (recog->process_want_reload) { return(-1); } #endif return(0); } /*********************/ /* open input stream */ /*********************/ /** * * Open input stream. * * * 音声入力ストリームを開く * * * @param recog [i/o] engine instance * @param file_or_dev_name [in] file or device name of the device * * @return 0 on success, -1 on error, -2 on device initialization error. * * @callgraph * @callergraph * @ingroup engine */ int j_open_stream(Recog *recog, char *file_or_dev_name) { Jconf *jconf; char *p; jconf = recog->jconf; if (jconf->input.type == INPUT_WAVEFORM) { /* begin A/D input */ if (adin_begin(recog->adin, file_or_dev_name) == FALSE) { return -2; } /* create A/D-in thread here */ #ifdef HAVE_PTHREAD if (recog->adin->enable_thread && ! recog->adin->input_side_segment) { if (adin_thread_create(recog) == FALSE) { return -2; } } #endif /* when using adin func, input name should be obtained when called */ } else { switch(jconf->input.speech_input) { case SP_MFCMODULE: param_init_content(recog->mfcclist->param); if (mfc_module_begin(recog->mfcclist) == FALSE) return -2; /* when using mfc module func, input name should be obtained when called */ break; case SP_MFCFILE: /* read parameter file */ param_init_content(recog->mfcclist->param); if (rdparam(file_or_dev_name, recog->mfcclist->param) == FALSE) { jlog("ERROR: error in reading parameter file: %s\n", file_or_dev_name); return -1; } /* check and strip invalid frames */ if (jconf->preprocess.strip_zero_sample) { param_strip_zero(recog->mfcclist->param); } /* output frame length */ callback_exec(CALLBACK_STATUS_PARAM, recog); /* store the input filename here */ strncpy(recog->adin->current_input_name, file_or_dev_name, MAXPATHLEN); break; default: jlog("ERROR: j_open_stream: none of SP_MFC_*??\n"); return -1; } } if (jconf->input.speech_input != SP_MFCFILE) { /* store current input name using input source specific function */ p = j_get_current_filename(recog); if (p) { strncpy(recog->adin->current_input_name, p, MAXPATHLEN); } else { recog->adin->current_input_name[0] = '\0'; } } return 0; } /** * * Close input stream. The main recognition loop will be stopped after * stream has been closed. * * * 音声入力ストリームを閉じる.認識のメインループは閉じられた後終了する. * * * @param recog [i/o] engine instance * * @return 0 on success, -1 on general error, -2 on device error. * * @callgraph * @callergraph * @ingroup engine */ int j_close_stream(Recog *recog) { Jconf *jconf; jconf = recog->jconf; if (jconf->input.type == INPUT_WAVEFORM) { #ifdef HAVE_PTHREAD /* close A/D-in thread here */ if (! recog->adin->input_side_segment) { if (recog->adin->enable_thread) { if (adin_thread_cancel(recog) == FALSE) { return -2; } } else { recog->adin->end_of_stream = TRUE; } } #else if (! recog->adin->input_side_segment) { recog->adin->end_of_stream = TRUE; } #endif } else { switch(jconf->input.speech_input) { case SP_MFCMODULE: if (mfc_module_end(recog->mfcclist) == FALSE) return -2; break; case SP_MFCFILE: /* nothing to do */ break; default: jlog("ERROR: j_close_stream: none of SP_MFC_*??\n"); return -1; } } return 0; } /**********************************************************************/ /**********************************************************************/ /**********************************************************************/ /** * * Recognition error handling. * * * エラーによる認識終了時の処理. * * * @param recog [in] engine instance * @param status [in] error status to be set * */ static void result_error(Recog *recog, int status) { MFCCCalc *mfcc; RecogProcess *r; boolean ok_p; for(r=recog->process_list;r;r=r->next) r->result.status = status; ok_p = FALSE; for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->f > 0) { ok_p = TRUE; break; } } if (ok_p) { /* had some input */ /* output as rejected */ callback_exec(CALLBACK_RESULT, recog); #ifdef ENABLE_PLUGIN plugin_exec_process_result(recog); #endif } } /** * * @brief Execute recognition. * * This function repeats recognition sequences until the input stream * reached its end. It detects speech segment (if needed), recognize * the detected segment, output result, and go back to the first. * * This function will be stopped and exited if reached end of stream * (mostly in case of file input), some error has been occured, or * termination requested from application by calling * j_request_pause() and j_request_terminate(). * * * * @brief 音声認識の実行. * * この関数は入力ストリームが終わるまで音声認識を繰り返す. * 必要であれば入力待ちを行って区間を検出し,音声認識を行い,結果を * 出力してふたたび入力待ちに戻る. * * 入力ストリームを終わりまで認識するか,エラーが生じたときに終了する. * * あるいは,認識処理中に,j_request_pause() や j_request_terminate() が * アプリから呼ばれた場合,認識処理の切れ目で終了する. * * * * @param recog [i/o] engine instance * * @return 1 when stopped by application request, 0 when reached end of stream, * or -1 when an error occured. Note that the input stream can still continues * when 1 is returned. * */ static int j_recognize_stream_core(Recog *recog) { Jconf *jconf; int ret; float seclen, mseclen; RecogProcess *r; MFCCCalc *mfcc; PROCESS_AM *am; PROCESS_LM *lm; boolean ok_p; boolean process_segment_last; boolean on_the_fly; boolean pass2_p; jconf = recog->jconf; /* determine whether on-the-fly decoding should be done */ on_the_fly = FALSE; switch(jconf->input.type) { case INPUT_VECTOR: switch(jconf->input.speech_input) { case SP_MFCFILE: on_the_fly = FALSE; break; case SP_MFCMODULE: on_the_fly = TRUE; break; } break; case INPUT_WAVEFORM: if (jconf->decodeopt.realtime_flag) { on_the_fly = TRUE; } else { on_the_fly = FALSE; } break; } if (jconf->input.type == INPUT_WAVEFORM || jconf->input.speech_input == SP_MFCMODULE) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } } /* if no process instance exist, start with terminated */ if (recog->process_list == NULL) { jlog("STAT: no recog process, engine inactive\n"); j_request_pause(recog); } /* update initial recognition process status */ for(r=recog->process_list;r;r=r->next) { if (r->active > 0) { r->live = TRUE; } else if (r->active < 0) { r->live = FALSE; } r->active = 0; } /******************************************************************/ /* do recognition for each incoming segment from the input stream */ /******************************************************************/ while (1) { start_recog: /*************************************/ /* Update recognition process status */ /*************************************/ for(r=recog->process_list;r;r=r->next) { if (r->active > 0) { r->live = TRUE; jlog("STAT: SR%02d %s now active\n", r->config->id, r->config->name); } else if (r->active < 0) { r->live = FALSE; jlog("STAT: SR%02d %s now inactive\n", r->config->id, r->config->name); } r->active = 0; } if (debug2_flag) { for(r=recog->process_list;r;r=r->next) { jlog("DEBUG: %s: SR%02d %s\n", r->live ? "live" : "dead", r->config->id, r->config->name); } } /* check if any process is live */ if (recog->process_active) { ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (r->live) ok_p = TRUE; } if (!ok_p) { /* no process is alive */ /* make whole process as inactive */ jlog("STAT: all recog process inactive, pause engine now\n"); j_request_pause(recog); } } /* Check whether process status was changed while in the last run */ if (recog->process_online != recog->process_active) { recog->process_online = recog->process_active; if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); } /* execute poll callback */ if (recog->process_active) { callback_exec(CALLBACK_POLL, recog); } /* reset reload flag here */ j_reset_reload(recog); if (!recog->process_active) { /* now sleeping, return */ /* in the next call, we will resume from here */ return 1; } /* update process status */ if (recog->process_online != recog->process_active) { recog->process_online = recog->process_active; if (recog->process_online) callback_exec(CALLBACK_EVENT_PROCESS_ONLINE, recog); else callback_exec(CALLBACK_EVENT_PROCESS_OFFLINE, recog); } /*********************************************************/ /* check for grammar to change, and rebuild if necessary */ /*********************************************************/ for(lm=recog->lmlist;lm;lm=lm->next) { if (lm->lmtype == LM_DFA) { multigram_update(lm); /* some modification occured if return TRUE*/ } } for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->lmtype == LM_DFA && r->lm->global_modified) { multigram_build(r); } } for(lm=recog->lmlist;lm;lm=lm->next) { if (lm->lmtype == LM_DFA) lm->global_modified = FALSE; } ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->lmtype == LM_DFA) { if (r->lm->winfo == NULL || (r->lmvar == LM_DFA_GRAMMAR && r->lm->dfa == NULL)) { /* make this instance inactive */ r->active = -1; ok_p = TRUE; } } } if (ok_p) { /* at least one instance has no grammar */ goto start_recog; } /******************/ /* start 1st pass */ /******************/ if (on_the_fly) { /********************************************/ /* REALTIME ON-THE-FLY DECODING OF 1ST-PASS */ /********************************************/ /* store, analysis and search in a pipeline */ /* main function is RealTimePipeLine() at realtime-1stpass.c, and it will be periodically called for each incoming input segment from the AD-in function adin_go(). RealTimePipeLine() will be called as a callback function from adin_go() */ /* after this part, directly jump to the beginning of the 2nd pass */ if (recog->process_segment) { /*****************************************************************/ /* short-pause segmentation: process last remaining frames first */ /*****************************************************************/ /* last was segmented by short pause */ /* the margin segment in the last input will be re-processed first, and then the speech input will be processed */ /* process the last remaining parameters */ ret = RealTimeResume(recog); if (ret < 0) { /* error end in the margin */ jlog("ERROR: failed to process last remaining samples on RealTimeResume\n"); /* exit now! */ return -1; } if (ret != 1) { /* if segmented again in the margin, not process the rest */ /* last parameters has been processed, so continue with the current input as normal */ /* process the incoming input */ if (jconf->input.type == INPUT_WAVEFORM) { /* get speech and process it on real-time */ ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); } else { /* get feature vector and process it */ ret = mfcc_go(recog, callback_check_in_adin); } if (ret < 0) { /* error end in adin_go */ if (ret == -2 || recog->process_want_terminate) { /* terminated by callback */ RealTimeTerminate(recog); /* reset param */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* execute callback at end of pass1 */ if (recog->triggered) { callback_exec(CALLBACK_EVENT_PASS1_END, recog); /* output result terminate */ result_error(recog, J_RESULT_STATUS_TERMINATE); } goto end_recog; /* cancel this recognition */ } jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ return(-1); } } } else { /***********************************************************/ /* last was not segmented, process the new incoming input */ /***********************************************************/ /* end of this input will be determined by either end of stream (in case of file input), or silence detection by adin_go(), or 'TERMINATE' command from module (if module mode) */ /* prepare work area for on-the-fly processing */ if (RealTimePipeLinePrepare(recog) == FALSE) { jlog("ERROR: failed to prepare for on-the-fly 1st pass decoding\n"); return (-1); } /* process the incoming input */ if (jconf->input.type == INPUT_WAVEFORM) { /* get speech and process it on real-time */ ret = adin_go(RealTimePipeLine, callback_check_in_adin, recog); } else { /* get feature vector and process it */ ret = mfcc_go(recog, callback_check_in_adin); } if (ret < 0) { /* error end in adin_go */ if (ret == -2 || recog->process_want_terminate) { /* terminated by callback */ RealTimeTerminate(recog); /* reset param */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* execute callback at end of pass1 */ if (recog->triggered) { callback_exec(CALLBACK_EVENT_PASS1_END, recog); /* output result terminate */ result_error(recog, J_RESULT_STATUS_TERMINATE); } goto end_recog; } jlog("ERROR: an error occured at on-the-fly 1st pass decoding\n"); /* exit now! */ return(-1); } } /******************************************************************/ /* speech stream has been processed on-the-fly, and 1st pass ends */ /******************************************************************/ if (ret == 1 || ret == 2) { /* segmented */ #ifdef HAVE_PTHREAD /* check for audio overflow */ if (recog->adin->enable_thread && recog->adin->adinthread_buffer_overflowed) { jlog("Warning: input buffer overflow: some input may be dropped, so disgard the input\n"); result_error(recog, J_RESULT_STATUS_BUFFER_OVERFLOW); /* skip 2nd pass */ goto end_recog; } #endif /* check for long input */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->f >= recog->real.maxframelen) { jlog("Warning: too long input (> %d frames), segment it now\n", recog->real.maxframelen); } } } /* last procedure of 1st-pass */ if (RealTimeParam(recog) == FALSE) { jlog("ERROR: fatal error occured, program terminates now\n"); return -1; } #ifdef BACKEND_VAD /* if not triggered, skip this segment */ if (recog->jconf->decodeopt.segment && ! recog->triggered) { goto end_recog; } #endif /* output segment status */ if (recog->adin->adin_cut_on && (jconf->input.speech_input == SP_RAWFILE || jconf->input.speech_input == SP_STDIN)) { seclen = (float)recog->adin->last_trigger_sample / (float)jconf->input.sfreq; jlog("STAT: triggered: [%d..%d] %.2fs from %02d:%02d:%02.2f\n", recog->adin->last_trigger_sample, recog->adin->last_trigger_sample + recog->adin->last_trigger_len, (float)(recog->adin->last_trigger_len) / (float)jconf->input.sfreq, (int)(seclen / 3600), (int)(seclen / 60), seclen - (int)(seclen / 60) * 60); } /* execute callback for 1st pass result */ /* result.status <0 must be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1, recog); #ifdef WORD_GRAPH /* result.wg1 == NULL should be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); #endif /* execute callback at end of pass1 */ callback_exec(CALLBACK_EVENT_PASS1_END, recog); /* output frame length */ callback_exec(CALLBACK_STATUS_PARAM, recog); /* if terminate signal has been received, discard this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* END OF ON-THE-FLY INPUT AND DECODING OF 1ST PASS */ } else { /******************/ /* buffered input */ /******************/ if (jconf->input.type == INPUT_VECTOR) { /***********************/ /* feature vector input */ /************************/ if (jconf->input.speech_input == SP_MFCFILE) { /************************/ /* parameter file input */ /************************/ /* parameter type check --- compare the type to that of HMM, and adjust them if necessary */ if (jconf->input.paramtype_check_flag) { for(am=recog->amlist;am;am=am->next) { /* return param itself or new malloced param */ if (param_check_and_adjust(am->hmminfo, am->mfcc->param, verbose_flag) == -1) { /* failed */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* tell failure */ result_error(recog, J_RESULT_STATUS_FAIL); goto end_recog; } } } /* whole input is already read, so set input status to end of stream */ /* and jump to the start point of 1st pass */ ret = 0; } } else { /*************************/ /* buffered speech input */ /*************************/ if (!recog->process_segment) { /* no segment left */ /****************************************/ /* store raw speech samples to speech[] */ /****************************************/ recog->speechlen = 0; for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { param_init_content(mfcc->param); } /* tell module to start recording */ /* the "adin_cut_callback_store_buffer" simply stores the input speech to a buffer "speech[]" */ /* end of this input will be determined by either end of stream (in case of file input), or silence detection by adin_go(), or 'TERMINATE' command from module (if module mode) */ ret = adin_go(adin_cut_callback_store_buffer, callback_check_in_adin, recog); if (ret < 0) { /* error end in adin_go */ if (ret == -2 || recog->process_want_terminate) { /* terminated by module */ /* output fail */ result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } jlog("ERROR: an error occured while recording input\n"); return -1; } /* output recorded length */ seclen = (float)recog->speechlen / (float)jconf->input.sfreq; jlog("STAT: %d samples (%.2f sec.)\n", recog->speechlen, seclen); /* -rejectshort 指定時, 入力が指定時間以下であれば ここで入力を棄却する */ /* when using "-rejectshort", and input was shorter than specified, reject the input here */ if (jconf->reject.rejectshortlen > 0) { if (seclen * 1000.0 < jconf->reject.rejectshortlen) { result_error(recog, J_RESULT_STATUS_REJECT_SHORT); goto end_recog; } } /**********************************************/ /* acoustic analysis and encoding of speech[] */ /**********************************************/ jlog("STAT: ### speech analysis (waveform -> MFCC)\n"); /* CMN will be computed for the whole buffered input */ if (wav2mfcc(recog->speech, recog->speechlen, recog) == FALSE) { /* error end, end stream */ ret = -1; /* tell failure */ result_error(recog, J_RESULT_STATUS_FAIL); goto end_recog; } /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* output frame length */ callback_exec(CALLBACK_STATUS_PARAM, recog); } } #ifdef ENABLE_PLUGIN /* call post-process plugin if exist */ plugin_exec_vector_postprocess_all(recog->mfcclist->param); #endif /******************************************************/ /* 1st-pass --- backward search to compute heuristics */ /******************************************************/ if (!jconf->decodeopt.realtime_flag) { /* prepare for outprob cache for each HMM state and time frame */ /* assume all MFCCCalc has params of the same sample num */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); } } /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* execute computation of left-to-right backtrellis */ if (get_back_trellis(recog) == FALSE) { jlog("ERROR: fatal error occured, program terminates now\n"); return -1; } #ifdef BACKEND_VAD /* if not triggered, skip this segment */ if (recog->jconf->decodeopt.segment && ! recog->triggered) { goto end_recog; } #endif /* execute callback for 1st pass result */ /* result.status <0 must be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1, recog); #ifdef WORD_GRAPH /* result.wg1 == NULL should be skipped inside callback */ callback_exec(CALLBACK_RESULT_PASS1_GRAPH, recog); #endif /* execute callback at end of pass1 */ if (recog->triggered) { callback_exec(CALLBACK_EVENT_PASS1_END, recog); } /* END OF BUFFERED 1ST PASS */ } /**********************************/ /* end processing of the 1st-pass */ /**********************************/ /* on-the-fly 1st pass processing will join here */ /* -rejectshort 指定時, 入力が指定時間以下であれば探索失敗として */ /* 第2パスを実行せずにここで終了する */ /* when using "-rejectshort", and input was shorter than the specified length, terminate search here and output recognition failure */ if (jconf->reject.rejectshortlen > 0) { mseclen = (float)recog->mfcclist->param->samplenum * (float)jconf->input.period * (float)jconf->input.frameshift / 10000.0; if (mseclen < jconf->reject.rejectshortlen) { result_error(recog, J_RESULT_STATUS_REJECT_SHORT); goto end_recog; } } #ifdef POWER_REJECT if (power_reject(recog)) { result_error(recog, J_RESULT_STATUS_REJECT_POWER); goto end_recog; } #endif /* if terminate signal has been received, cancel this input */ if (recog->process_want_terminate) { result_error(recog, J_RESULT_STATUS_TERMINATE); goto end_recog; } /* if GMM is specified and result are to be rejected, terminate search here */ if (jconf->reject.gmm_reject_cmn_string != NULL) { if (! gmm_valid_input(recog)) { result_error(recog, J_RESULT_STATUS_REJECT_GMM); goto end_recog; } } /* for instances with "-1pass", copy 1st pass result as final */ /* execute stack-decoding search */ /* they will be skipepd in the next pass */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* skip if 1st pass was failed */ if (r->result.status < 0) continue; /* already stored on word recognition, so skip this */ if (r->lmvar == LM_DFA_WORD) continue; if (r->config->compute_only_1pass) { if (verbose_flag) { jlog("%02d %s: \"-1pass\" specified, output 1st pass result as a final result\n", r->config->id, r->config->name); } /* prepare result storage */ result_sentence_malloc(r, 1); /* finalize result when no hypothesis was obtained */ pass2_finalize_on_no_result(r, TRUE); } } /***********************************************/ /* 2nd-pass --- forward search with heuristics */ /***********************************************/ pass2_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, skip 2nd pass */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; pass2_p = TRUE; } if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_BEGIN, recog); #if !defined(PASS2_STRICT_IWCD) || defined(FIX_35_PASS2_STRICT_SCORE) /* adjust trellis score not to contain outprob of the last frames */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, skip 2nd pass */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; if (! r->am->hmminfo->multipath) { bt_discount_pescore(r->wchmm, r->backtrellis, r->am->mfcc->param); } #ifdef LM_FIX_DOUBLE_SCORING if (r->lmtype == LM_PROB) { bt_discount_lm(r->backtrellis); } #endif } #endif /* execute stack-decoding search */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if [-1pass] is specified, just copy from 1st pass result */ if (r->config->compute_only_1pass) continue; /* if search already failed on 1st pass, skip 2nd pass */ if (r->result.status < 0) continue; /* prepare result storage */ if (r->lmtype == LM_DFA && r->config->output.multigramout_flag) { result_sentence_malloc(r, r->config->output.output_hypo_maxnum * multigram_get_all_num(r->lm)); } else { result_sentence_malloc(r, r->config->output.output_hypo_maxnum); } /* do 2nd pass */ if (r->lmtype == LM_PROB) { wchmm_fbs(r->am->mfcc->param, r, 0, 0); } else if (r->lmtype == LM_DFA) { if (r->config->output.multigramout_flag) { /* execute 2nd pass multiple times for each grammar sequencially */ /* to output result for each grammar */ MULTIGRAM *m; boolean has_success = FALSE; for(m = r->lm->grammars; m; m = m->next) { if (m->active) { jlog("STAT: execute 2nd pass limiting words for gram #%d\n", m->id); wchmm_fbs(r->am->mfcc->param, r, m->cate_begin, m->dfa->term_num); if (r->result.status == J_RESULT_STATUS_SUCCESS) { has_success = TRUE; } } } r->result.status = (has_success == TRUE) ? J_RESULT_STATUS_SUCCESS : J_RESULT_STATUS_FAIL; } else { /* only the best among all grammar will be output */ wchmm_fbs(r->am->mfcc->param, r, 0, r->lm->dfa->term_num); } } } /* do forced alignment if needed */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; /* if search failed on 2nd pass, skip this */ if (r->result.status < 0) continue; /* do needed alignment */ do_alignment_all(r, r->am->mfcc->param); } /* output result */ callback_exec(CALLBACK_RESULT, recog); #ifdef ENABLE_PLUGIN plugin_exec_process_result(recog); #endif /* output graph */ /* r->result.wg == NULL should be skipped inside the callback */ ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->config->compute_only_1pass) continue; if (r->result.status < 0) continue; if (r->config->graph.lattice) ok_p = TRUE; } if (ok_p) callback_exec(CALLBACK_RESULT_GRAPH, recog); /* output confnet */ /* r->result.confnet == NULL should be skipped inside the callback */ ok_p = FALSE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->config->compute_only_1pass) continue; if (r->result.status < 0) continue; if (r->config->graph.confnet) ok_p = TRUE; } if (ok_p) callback_exec(CALLBACK_RESULT_CONFNET, recog); /* clear work area for output */ for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; clear_result(r); } /* output end of 2nd pass */ if (pass2_p) callback_exec(CALLBACK_EVENT_PASS2_END, recog); #ifdef DEBUG_VTLN_ALPHA_TEST if (r->am->mfcc->para->vtln_alpha == 1.0) { /* if vtln parameter remains default, search for VTLN parameter */ vtln_alpha(recog, r); } #endif end_recog: /**********************/ /* end of recognition */ /**********************/ /* update CMN info for next input (in case of realtime wave input) */ if (jconf->input.type == INPUT_WAVEFORM && jconf->decodeopt.realtime_flag) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->param->samplenum > 0) { RealTimeCMNUpdate(mfcc, recog); } } } process_segment_last = recog->process_segment; if (jconf->decodeopt.segment) { /* sp-segment mode */ /* param is now shrinked to hold only the processed input, and */ /* the rests are holded in (newly allocated) "rest_param" */ /* if this is the last segment, rest_param is NULL */ /* assume all segmentation are synchronized */ recog->process_segment = FALSE; for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->rest_param != NULL) { /* process the rest parameters in the next loop */ recog->process_segment = TRUE; free_param(mfcc->param); mfcc->param = mfcc->rest_param; mfcc->rest_param = NULL; } } } /* callback of recognition end */ if (jconf->decodeopt.segment) { #ifdef BACKEND_VAD if (recog->triggered) callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); if (process_segment_last && !recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); #else callback_exec(CALLBACK_EVENT_SEGMENT_END, recog); if (!recog->process_segment) callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); #endif } else { callback_exec(CALLBACK_EVENT_RECOGNITION_END, recog); } if (verbose_flag) jlog("\n"); jlog_flush(); if (jconf->decodeopt.segment) { /* sp-segment mode */ if (recog->process_segment == TRUE) { if (verbose_flag) jlog("STAT: <<>>\n\n"); } else { /* input has reached end of stream, terminate program */ if (ret <= 0 && ret != -2) break; } } else { /* not sp-segment mode */ /* input has reached end of stream, terminate program */ if (ret <= 0 && ret != -2) break; } /* recognition continues for next (silence-aparted) segment */ } /* END OF STREAM LOOP */ /* close the stream */ if (jconf->input.type == INPUT_WAVEFORM) { if (adin_end(recog->adin) == FALSE) return -1; } if (jconf->input.speech_input == SP_MFCMODULE) { if (mfc_module_end(recog->mfcclist) == FALSE) return -1; } /* return to the opening of input stream */ return(0); } /** * * @brief Recognize an input stream. * * This function repeat recognition process for the whole input stream, * using segmentation and detection if required. It ends when the * whole input has been processed. * * When a recognition stop is requested from application, the following * callbacks will be called in turn: CALLBACK_EVENT_PAUSE, * CALLBACK_PAUSE_FUNCTION, CALLBACK_EVENT_RESUME. After finishing executing * all functions in these callbacks, recognition will restart. * If you have something to be processed while recognition stops, * you should write the function as callback to CALLBACK_PAUSE_FUNCTION. * Note that recognition will restart immediately after all functions * registered in CALLBACK_PAUSE_FUNCTION has been finished. * * * * @brief 入力ストリームの認識を行う * * 入力ストリームに対して * (必要であれば)区間検出やVADを行いながら認識を繰り返し行っていく. * 入力が終端に達するかあるいはエラーで終了する. * * アプリケーションから認識の中断をリクエストされたときは, * CALLBACK_EVENT_PAUSE,CALLBACK_PAUSE_FUNCTION, * CALLBACK_EVENT_RESUME の順に呼んだあと認識に戻る. このため, * 認識を中断させている間に行う処理は,CALLBACK_PAUSE_FUNCTION * に登録しておく必要がある. CALLBACK_PAUSE_FUNCTION に * 登録されている全ての処理が終了したら認識を自動的に再開するので * 注意すること. * * * * @param recog [i/o] engine instance * * @return 0 when finished recognizing all the input stream to the end, * or -1 on error. * * @callgraph * @callergraph * @ingroup engine */ int j_recognize_stream(Recog *recog) { int ret; do { ret = j_recognize_stream_core(recog); switch(ret) { case 1: /* paused by a callback (stream will continue) */ /* call pause event callbacks */ callback_exec(CALLBACK_EVENT_PAUSE, recog); /* call pause functions */ /* block until all pause functions exits */ if (! callback_exist(recog, CALLBACK_PAUSE_FUNCTION)) { jlog("WARNING: pause requested but no pause function specified\n"); jlog("WARNING: engine will resume now immediately\n"); } callback_exec(CALLBACK_PAUSE_FUNCTION, recog); /* after here, recognition will restart for the rest input */ /* call resume event callbacks */ callback_exec(CALLBACK_EVENT_RESUME, recog); break; case 0: /* end of stream */ /* go on to the next input */ break; case -1: /* error */ jlog("ERROR: an error occured while recognition, terminate stream\n"); return -1; } } while (ret == 1); /* loop when paused by callback */ return 0; } /* end of file */ julius-4.2.2/libjulius/src/ngram_decode.c0000644001051700105040000004503012004452401016666 0ustar ritrlab/** * @file ngram_decode.c * * * @brief N-gram確率に基づく次単語予測(第2パス) * * Julius のN-gramを用いたスタックデコーディング(第2パス)において, * 次に接続しうる単語の集合を決定する. * * 与えられた展開元仮説の始端フレームを予測し,単語トレリス上で * その予測フレーム周辺に終端が存在する単語の集合を, * そのN-gram出現確率とともに返す. * * Julius では ngram_firstwords(), ngram_nextwords(), ngram_acceptable() が * それぞれ第2パスのメイン関数 wchmm_fbs() から呼び出される. なお, * Julian ではこれらの関数の代わりに dfa_decode.c の関数が用いられる. * * * * @brief N-gram based word prediction for the 2nd pass. * * These functions returns next word candidates in the 2nd recognition * pass of Julius, i.e. N-gram based stack decoding. * * Given a partial sentence hypothesis, it first estimate the beginning frame * of the hypothesis based on the word trellis. Then the words in the word * trellis around the estimated frame are extracted from the word trellis. * They will be returned with their N-gram probabilities. * * In Julius, ngram_firstwords(), ngram_nextwords() and ngram_acceptable() * are called from main search function wchmm_fbs(). In Julian, * corresponding functions in dfa_decode.c will be used instead. * * * @author Akinobu Lee * @date Fri Jul 8 14:57:51 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 次単語候補ソート用 qsort コールバック関数. * * @param a [in] 要素1 * @param b [in] 要素2 * * @return aの単語ID > bの単語ID なら1, 逆なら -1, 同じなら 0 を返す. * * * qsort callback function to sort next word candidates by their word ID. * * @param a [in] element 1 * @param b [in] element 2 * * @return 1 if word id of a > that of b, -1 if negative, 0 if equal. * */ static int compare_nw(NEXTWORD **a, NEXTWORD **b) { if ((*a)->id > (*b)->id) return 1; if ((*a)->id < (*b)->id) return -1; return 0; } /** * * 次単語候補リスト内から単語を検索する. * * @param nw [in] 次単語候補リスト * @param w [in] 検索する単語のID * @param num [in] 次単語候補リストの長さ * * @return 見つかった場合その次単語候補構造体へのポインタ,見つからなければ * NULL を返す. * * * Find a word from list of next word candidates. * * @param nw [in] list of next word candidates * @param w [in] word id to search for * @param num [in] length of @a nw * * @return the pointer to the NEXTWORD data if found, or NULL if not found. * */ /* find next word candiate whose id 'w' */ static NEXTWORD * search_nw(NEXTWORD **nw, WORD_ID w, int num) { int left,right,mid; NEXTWORD *tmp; if (num == 0) return NULL; left = 0; right = num - 1; while (left < right) { mid = (left + right) / 2; if ((nw[mid])->id < w) { left = mid + 1; } else { right = mid; } } tmp = nw[left]; if (tmp->id == w) { return tmp; } else { return NULL; } } /** * * Compute backward N-gram score from forward N-gram. * * * 後向きの N-gram スコアを前向き N-gram から算出する. * * * @param ngram [in] N-gram data structure * @param w [in] word sequence * @param wlen [in] length of @a w * * @return the backward probability of the word w[0]. * */ static LOGPROB ngram_forw2back(NGRAM_INFO *ngram, WORD_ID *w, int wlen) { int i; LOGPROB p1, p2; p1 = 0.0; for(i = 1; i < ngram->n; i++) { if (i >= wlen) break; p1 += ngram_prob(ngram, i, &(w[1])); } p2 = 0.0; for(i = 0; i < ngram->n; i++) { if (i >= wlen) break; p2 += ngram_prob(ngram, i+1, w); } return(p2 - p1); } /** * * @brief 単語トレリスから次単語候補を抽出する. * * 単語トレリス上の指定したフレーム上に終端が存在するトレリス単語 * のリストを抽出し,それらの次単語としての N-gram 接続確率を計算する. * そのリストを次単語情報構造体に追加して返す. * * @param r [in] 認識処理インスタンス * @param nw [i/o] 次単語候補リスト(抽出結果は @a oldnum 以降に追加される) * @param oldnum [in] @a nw にすでに格納されている次単語の数 * @param hypo [in] 展開元の文仮説 * @param t [in] 指定フレーム * * @return 抽出リストを追加したあとの @a nw に含まれる次単語の総数. * * * @brief Extract next word candidates from word trellis. * * This function extracts the list of trellis words whose word end * has survived in the word trellis at the specified frame. * The N-gram probabilities of them are then computed and added to * the current next word candidates data. * * @param r [in] recognition process instance * @param nw [in] list of next word candidates (new words will be appended at @a oldnum) * @param oldnum [in] number of words already stored in @a nw * @param hypo [in] the source sentence hypothesis * @param t [in] specified frame * * @return the total number of words currently stored in the @a nw. * */ static int pick_backtrellis_words(RecogProcess *r, NEXTWORD **nw, int oldnum, NODE *hypo, short t) { int i; WORD_ID w; LOGPROB rawscore; #ifdef WPAIR int w_old = WORD_INVALID; #endif int num; int cnnum; ///< Num of found non-transparent words (<=2) int last_trans; ///< Num of skipped transparent words StackDecode *dwrk; BACKTRELLIS *bt; WORD_INFO *winfo; NGRAM_INFO *ngram; LOGPROB lm_weight2, lm_penalty2, lm_penalty_trans; num = oldnum; bt = r->backtrellis; winfo = r->lm->winfo; ngram = r->lm->ngram; lm_weight2 = r->config->lmp.lm_weight2; lm_penalty2 = r->config->lmp.lm_penalty2; lm_penalty_trans = r->config->lmp.lm_penalty_trans; dwrk = &(r->pass2); /* set word contexts to cnword[] from 1 considering transparent words */ if (ngram) { cnnum = 0; last_trans = 0; for(i=hypo->seqnum-1;i>=0;i--) { if (! winfo->is_transparent[hypo->seq[i]]) { dwrk->cnword[cnnum+1] = hypo->seq[i]; cnnum++; if (cnnum >= ngram->n - 1) break; } else { last_trans++; } } if (ngram->dir == DIR_RL) { for(i=0;icnwordrev[cnnum-1-i] = dwrk->cnword[i+1]; } } /* use ngram id */ if (ngram->dir == DIR_RL) { for(i=0;icnwordrev[i] = winfo->wton[dwrk->cnwordrev[i]]; } else { for(i=0;icnword[i+1] = winfo->wton[dwrk->cnword[i+1]]; } } /* lookup survived words in backtrellis on time frame 't' */ for (i=0;inum[t];i++) { w = (bt->rw[t][i])->wid; #ifdef WORD_GRAPH /* only words on the word graphs are expanded */ if (!(bt->rw[t][i])->within_wordgraph) continue; #endif /* not WORD_GRAPH */ #ifdef WPAIR /* some word have same word ID with different previous word, so only one will be opened (best word will be selected later by next_word() */ if (w == w_old) continue; /* backtrellis is sorted by word ID */ else w_old = w; #endif /* WPAIR */ /* skip if already exist */ if (search_nw(nw, w, oldnum) != NULL) continue; /* compute LM probability of the word */ if (ngram) { /* compute N-gram probability */ if (ngram->dir == DIR_RL) { /* just compute N-gram prob of the word candidate */ dwrk->cnwordrev[cnnum] = winfo->wton[w]; rawscore = ngram_prob(ngram, cnnum + 1, dwrk->cnwordrev); } else { dwrk->cnword[0] = winfo->wton[w]; rawscore = ngram_forw2back(ngram, dwrk->cnword, cnnum + 1); } #ifdef CLASS_NGRAM rawscore += winfo->cprob[w]; #endif } if (r->lmvar == LM_NGRAM_USER) { /* call user-defined function */ /* be careful that the word context is ordered in backward direction */ rawscore = (*(r->lm->lmfunc.lmprob))(winfo, hypo->seq, hypo->seqnum, w, rawscore); } nw[num]->tre = bt->rw[t][i]; nw[num]->id = w; nw[num]->lscore = rawscore * lm_weight2 + lm_penalty2; if (winfo->is_transparent[w]) { /*nw[num]->lscore -= (LOGPROB)last_trans * TRANS_RENZOKU_PENALTY;*/ if (winfo->is_transparent[hypo->seq[hypo->seqnum-1]]) { nw[num]->lscore += lm_penalty_trans; } } /* j_printf("%d: %s added\n", num, winfo->wname[nw[num]->id]); */ num++; } return num; } /** * * @brief 単語トレリスから次単語集合を決定する. * * 指定フレームの前後 lookup_range 分に終端があるトレリス上の単語を集め, * 次単語構造体を構築する. 同じ単語が上記の範囲内に複数ある場合, * 指定フレームにもっとも近いトレリス上の単語が選択される. * * @param r [in] 認識処理インスタンス * @param nw [out] 次単語集合を格納する構造体へのポインタ * @param hypo [in] 展開元の部分文仮説 * @param tm [in] 単語を探す中心となる指定フレーム * @param t_end [in] 単語を探すフレームの右端 * * @return @a nw に格納された次単語候補の数を返す. * * * @brief Determine next word candidates from the word trellis. * * This function builds a list of next word candidates by looking up * the word trellis at specified frame, with lookup_range frame margin. * If the same words exists in the near frames, only the one nearest to the * specified frame will be chosen. * * @param r [in] recognition process instance * @param nw [out] pointer to hold the extracted words as list of next word candidates * @param hypo [in] partial sentence hypothesis from which the words will be expanded * @param tm [in] center time frame to look up the words * @param t_end [in] right frame boundary for the lookup. * * @return the number of next words candidates stored in @a nw. * */ static int get_backtrellis_words(RecogProcess *r, NEXTWORD **nw, NODE *hypo, short tm, short t_end) { int num = 0; int t, t_step; int oldnum=0; BACKTRELLIS *bt; int lookup_range; if (tm < 0) return(0); bt = r->backtrellis; lookup_range = r->config->pass2.lookup_range; #ifdef PREFER_CENTER_ON_TRELLIS_LOOKUP /* fix for 3.2 (01/10/18 by ri) */ /* before and after (one near center frame has high priority) */ for (t_step = 0; t_step < lookup_range; t_step++) { /* before or center */ t = tm - t_step; if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue; num = pick_backtrellis_words(r, nw, oldnum, hypo, t); if (num > oldnum) { qsort(nw, num, sizeof(NEXTWORD *), (int (*)(const void *,const void *))compare_nw); oldnum = num; } if (t_step == 0) continue; /* center */ /* after */ t = tm + t_step; if (t < 0 || t > bt->framelen - 1 || t >= t_end) continue; num = pick_backtrellis_words(r, nw, oldnum, hypo, t); if (num > oldnum) { qsort(nw, num, sizeof(NEXTWORD *), (int (*)(const void *,const void *))compare_nw); oldnum = num; } } #else /* before the center frame */ for(t = tm; t >= tm - lookup_range; t--) { if (t < 0) break; num = pick_backtrellis_words(r, nw, oldnum, hypo, t); if (num > oldnum) { qsort(nw, num, sizeof(NEXTWORD *), (int (*)(const void *,const void *))compare_nw); oldnum = num; } } /* after the center frame */ for(t = tm + 1; t < tm + lookup_range; t++) { if (t > bt->framelen - 1) break; if (t >= t_end) break; num = pick_backtrellis_words(r, nw, oldnum, hypo, t); if (num > oldnum) { qsort(nw, num, sizeof(NEXTWORD *), (int (*)(const void *,const void *))compare_nw); oldnum = num; } } #endif return num; } /** * * @brief 非展開単語を除去. * * 制約により展開対象とならない単語をリストから消去する. * * @param nw [i/o] 次単語集合(集合中の展開できない単語が消去される) * @param hypo [in] 展開元の部分文仮説 * @param num [in] @a nw に現在格納されている単語数 * @param winfo [in] 単語辞書 * * @return 新たに nw に含まれる次単語数 * * * @brief Remove non-expansion word from list. * * Remove words in the nextword list which should not be expanded. * * @param nw [i/o] list of next word candidates (will be shrinked by removing some words) * @param hypo [in] partial sentence hypothesis from which the words will be expanded * @param num [in] current number of next words in @a nw * @param winfo [in] word dictionary * * @return the new number of words in @a nw * */ static int limit_nw(NEXTWORD **nw, NODE *hypo, int num, WORD_INFO *winfo) { int src,dst; int newnum; /* からは何も展開しない */ /* no hypothesis will be generated after "" */ if (hypo->seq[hypo->seqnum-1] == winfo->head_silwid) { return(0); } dst = 0; for (src=0; srcid == winfo->tail_silwid) { /* は展開しない */ /* do not expand (it only appears at start) */ continue; } #ifdef FIX_35_INHIBIT_SAME_WORD_EXPANSION /* 直前単語と同じトレリス単語は展開しない */ /* inhibit expanding the exactly the same trellis word twice */ if (nw[src]->tre == hypo->tre) continue; #endif if (src != dst) memcpy(nw[dst], nw[src], sizeof(NEXTWORD)); dst++; } newnum = dst; return newnum; } /** * * @brief 初期単語仮説集合を求める. * * N-gramベースの探索では,初期仮説は単語末尾の無音単語に固定されている. * ただし,ショートポーズセグメンテーション時は,第1パスで最終フレームに終端が * 残った単語の中で尤度最大の単語となる. * * @param nw [out] 次単語候補リスト(得られた初期単語仮説を格納する) * @param peseqlen [in] 入力フレーム長 * @param maxnw [in] @a nw に格納できる単語の最大数 * @param r [in] 認識処理インスタンス * * @return @a nw に格納された単語候補数を返す. * * * @brief Get initial word hypotheses at the beginning. * * on N-gram based recogntion, the initial hypothesis is fixed to the tail * silence word. Exception is that, in short-pause segmentation mode, the * initial hypothesis will be chosen from survived words on the last input * frame in the first pass. * * @param nw [out] pointer to hold the initial word candidates * @param peseqlen [in] input frame length * @param maxnw [in] maximum number of words that can be stored in @a nw * @param r [in] recognition process instance * * @return the number of words extracted and stored to @a nw. * * * @callgraph * @callergraph */ int ngram_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r) { if (r->config->successive.enabled) { /* in sp segment mode */ if (r->sp_break_2_begin_word != WORD_INVALID) { /* 初期仮説は 最終フレームに残った単語トレリス上の最尤単語 */ /* the initial hypothesis is the best word survived on the last frame of the segment */ nw[0]->id = r->sp_break_2_begin_word; } else { /* 最終セグメント: 初期仮説は 単語の末尾の無音単語(=winfo->tail_silwid) */ /* we are in the last of sentence: initial hypothesis is word-end silence word */ nw[0]->id = r->lm->winfo->tail_silwid; } } else { /* initial hypothesis should be word-end silence word */ nw[0]->id = r->lm->winfo->tail_silwid; } nw[0]->lscore = uni_prob(r->wchmm->ngram, r->wchmm->winfo->wton[nw[0]->id]); #ifdef CLASS_NGRAM nw[0]->lscore += r->wchmm->winfo->cprob[nw[0]->id]; #endif nw[0]->lscore *= r->config->lmp.lm_weight2; #ifndef FIX_PENALTY nw[0]->lscore += r->config->lmp.lm_penalty2; #endif return 1; /* number of words = 1 */ } /** * * @brief 次単語仮説集合を返す. * * 与えられた部分文仮説から,次に接続しうる単語の集合を返す. 実際には, * 第1パスの結果であるトレリス単語集合 bt 上で,展開元の部分文仮説の最終単語の * (推定された)始端フレーム hypo->estimated_next_t の前後に存在する * 単語集合を取出し,それらの N-gram 接続確率を計算して返す. * 取り出された次単語仮説は,あらかじめ maxnm の長さだけ * 領域が確保されている nw に格納される. * * @param hypo [in] 展開元の文仮説 * @param nw [out] 次単語候補リストを格納する領域へのポインタ * @param maxnw [in] @a nw の最大長 * @param r [in] 認識処理インスタンス * * @return 抽出され nw に格納された次単語仮説の数を返す. * * * @brief Return the list of next word candidate. * * Given a partial sentence hypothesis "hypo", it returns the list of * next word candidates. Actually, it extracts from word trellis the * list of words whose word-end node has survived near the estimated * beginning-of-word frame of last word "hypo->estimated_next_t", and store * them to "nw" with their N-gram probabilities. * * @param hypo [in] source partial sentence hypothesis * @param nw [out] pointer to store the list of next word candidates (should be already allocated) * @param maxnw [in] maximum number of words that can be stored to @a nw * @param r [in] recognition process instance * * @return the number of extracted next word candidates in @a nw. * * @callgraph * @callergraph */ int ngram_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r) { int num, num2; if (hypo->seqnum == 0) { j_internal_error("ngram_nextwords: hypo contains no word\n"); } /* 仮説の推定終端時刻において backtrellis内に残っている単語を得る */ /* get survived words on backtrellis at the estimated end frame */ num = get_backtrellis_words(r, nw, hypo, hypo->estimated_next_t, hypo->bestt); /* 展開できない単語をチェックして外す */ /* exclude unallowed words */ num2 = limit_nw(nw, hypo, num, r->lm->winfo); if (debug2_flag) jlog("DEBUG: ngram_decode: %d-%d=%d unfolded\n",num, num-num2,num2); return(num2); } /** * * @brief 受理判定 * * 与えられた部分文仮説が,文(すなわち探索終了)として * 受理可能であるかどうかを返す. N-gram では文頭に対応する無音単語 * (silhead) であれば受理する. * * @param hypo [in] 部分文仮説 * @param r [in] 認識処理インスタンス * * @return 文として受理可能であれば TRUE,不可能なら FALSE を返す. * * * @brief Acceptance check. * * Return whether the given partial hypothesis is acceptable as a sentence * and can be treated as a final search candidate. In N-gram mode, it checks * whether the last word is the beginning-of-sentence silence (silhead). * * @param hypo [in] partial sentence hypothesis to be examined * @param r [in] recognition process instance * * @return TRUE if acceptable as a sentence, or FALSE if not. * * @callgraph * @callergraph */ boolean ngram_acceptable(NODE *hypo, RecogProcess *r) { if (r->config->successive.enabled) { /* 最後の仮説が第1パス最尤仮説の最初の単語と一致しなければならない */ /* the last word should be equal to the first word on the best hypothesis on 1st pass */ if (hypo->seq[hypo->seqnum-1] == r->sp_break_2_end_word) { return TRUE; } } else { /* 最後の仮説が文頭無音単語でなければならない */ /* the last word should be head silence word */ if (hypo->seq[hypo->seqnum-1] == r->lm->winfo->head_silwid) { return TRUE; } } return FALSE; } /* end of file */ julius-4.2.2/libjulius/src/search_bestfirst_v2.c0000644001051700105040000013005612004452401020223 0ustar ritrlab/** * @file search_bestfirst_v2.c * * * @brief 第2パスのViterbi演算および仮説スコア計算 (通常版) * * ここでは,第2パスにおいて探索中の仮説のViterbiスコアの更新演算, * 次単語とのトレリス接続,および仮説のスコア計算を行う関数が定義されて * います. * * 単語接続部の単語間音素環境依存性は,正確な nextscan アルゴリズムを用います. * このファイルで定義されている関数は,config.h において PASS2_STRICT_IWCD * が define であるときに使用されます. 逆に上記が undef であるときは, * search_bestfirst_v1.c の関数が用いられます. * * Backscan では,デコーディングの精度を重視して,次単語とその前の単語に * おける単語間音素コンテキストは仮説展開時にすべて厳密に計算されます. * Backscan を行なう search_bestfirst_v1.c が,仮説の POP 時に行なうのに * 比べて,ここでは仮説生成の時点で正確なスコアを計算するため, * スコア精度は高い. ただし,生成されるすべての仮説に対して * (たとえスタックに入らない仮説であっても)トライフォンの再計算を行なうため, * 計算量は backscan に比べて増大します. * * * * @brief Viterbi path update and scoring on the second pass (standard version) * * This file has functions for score calculations on the 2nd pass. * It includes Viterbi path update calculation of a hypothesis, calculations * of scores and word trellis connection at word expansion. * * The cross-word triphone will be computed just at word expansion time, * for precise scoring. This is called "nextscan" altgorithm. These * functions are enabled when PASS2_STRICT_IWCD is DEFINED in config.h. * If undefined, the "backscan" functions in search_bestfirst_v1.c will be * used instead. * * Here in nextscan algorithm, all cross-word context dependencies between * next word and source hypothesis are computed as soon as a new hypotheses * is expanded. As the precise cross-word triphone score is applied on * hypothesis generation with no delay, more accurate search-time score can * be obtained than the delayed backscan method in search_bestfirst_v1.c. * On the other hand, the computational cost grows much by re-calculating * forward score of cross-word triphones for all the generated hypothethes, * even non-promising ones. * * * @author Akinobu Lee * @date Mon Sep 12 00:58:50 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* By "fast" setting (default), search_bestfirst_v1.c is used for faster decoding. Please specify option "--enable-setup=standard" or "--enable-strict-iwcd2" at "./configure" to activate this. */ #include #ifdef PASS2_STRICT_IWCD #undef TCD ///< Define if want triphone debug messages /**********************************************************************/ /************ 仮説ノードの基本操作 ************/ /************ Basic functions for hypothesis node handling ************/ /**********************************************************************/ #undef STOCKER_DEBUG #ifdef STOCKER_DEBUG static int stocked_num = 0; static int reused_num = 0; static int new_num = 0; static int request_num = 0; #endif /** * * 仮説ノードを実際にメモリ上から解放する. * * @param node [in] 仮説ノード * * * Free a hypothesis node actually. * * @param node [in] hypothesis node * */ static void free_node_exec(NODE *node) { if (node == NULL) return; free(node->g); #ifdef GRAPHOUT_PRECISE_BOUNDARY if (node->region->graphout) { free(node->wordend_frame); free(node->wordend_gscore); } #endif free(node); } /** * * 仮説ノードの利用を終了してリサイクル用にストックする * * @param node [in] 仮説ノード * * * Stock an unused hypothesis node for recycle. * * @param node [in] hypothesis node * * @callgraph * @callergraph */ void free_node(NODE *node) { if (node == NULL) return; if (node->region->graphout) { if (node->prevgraph != NULL && node->prevgraph->saved == FALSE) { wordgraph_free(node->prevgraph); } } /* save to stocker */ node->next = node->region->pass2.stocker_root; node->region->pass2.stocker_root = node; #ifdef STOCKER_DEBUG stocked_num++; #endif } /** * * リサイクル用ノード格納庫を空にする. * * @param s [in] stack decoding work area * * * * Clear the node stocker for recycle. * * @param s [in] stack decoding work area * * * @callgraph * @callergraph */ void clear_stocker(StackDecode *s) { NODE *node, *tmp; node = s->stocker_root; while(node) { tmp = node->next; free_node_exec(node); node = tmp; } s->stocker_root = NULL; #ifdef STOCKER_DEBUG jlog("DEBUG: %d times requested, %d times newly allocated, %d times reused\n", request_num, new_num, reused_num); stocked_num = 0; reused_num = 0; new_num = 0; request_num = 0; #endif } /** * * 仮説をコピーする. * * @param dst [out] コピー先の仮説 * @param src [in] コピー元の仮説 * * @return @a dst を返す. * * * Copy the content of node to another. * * @param dst [out] target hypothesis * @param src [in] source hypothesis * * @return the value of @a dst. * * @callgraph * @callergraph */ NODE * cpy_node(NODE *dst, NODE *src) { int peseqlen; peseqlen = src->region->peseqlen; dst->next = src->next; dst->prev = src->prev; memcpy(dst->g, src->g, sizeof(LOGPROB) * peseqlen); memcpy(dst->seq, src->seq, sizeof(WORD_ID) * MAXSEQNUM); #ifdef CM_SEARCH #ifdef CM_MULTIPLE_ALPHA { int w; for(w=0;wseqnum;w++) { memcpy(dst->cmscore[w], src->cmscore[w], sizeof(LOGPROB) * src->region->config->annotate.cm_alpha_num); } } #else memcpy(dst->cmscore, src->cmscore, sizeof(LOGPROB) * MAXSEQNUM); #endif #endif /* CM_SEARCH */ dst->seqnum = src->seqnum; dst->score = src->score; dst->bestt = src->bestt; dst->estimated_next_t = src->estimated_next_t; dst->endflag = src->endflag; dst->state = src->state; dst->tre = src->tre; if (src->region->ccd_flag) { dst->last_ph = src->last_ph; dst->last_ph_sp_attached = src->last_ph_sp_attached; } dst->totallscore = src->totallscore; dst->final_g = src->final_g; #ifdef VISUALIZE dst->popnode = src->popnode; #endif if (src->region->graphout) { #ifdef GRAPHOUT_PRECISE_BOUNDARY memcpy(dst->wordend_frame, src->wordend_frame, sizeof(short) * peseqlen); memcpy(dst->wordend_gscore, src->wordend_gscore, sizeof(LOGPROB) * peseqlen); #endif dst->prevgraph = src->prevgraph; dst->lastcontext = src->lastcontext; #ifndef GRAPHOUT_PRECISE_BOUNDARY dst->tail_g_score = src->tail_g_score; #endif } return(dst); } /** * * 新たな仮説ノードを割り付ける. もし格納庫に以前試用されなくなった * ノードがある場合はそれを再利用する. なければ新たに割り付ける. * * @param r [in] 認識処理インスタンス * * @return 新たに割り付けられた仮説ノードへのポインタを返す. * * * Allocate a new hypothesis node. If the node stocker is not empty, * the one in the stocker is re-used. Otherwise, allocate as new. * * @param r [in] recognition process instance * * @return pointer to the newly allocated node. * * @callgraph * @callergraph */ NODE * newnode(RecogProcess *r) { NODE *tmp; int i; int peseqlen; peseqlen = r->peseqlen; #ifdef STOCKER_DEBUG request_num++; #endif if ((tmp = r->pass2.stocker_root) != NULL) { /* re-use ones in the stocker */ r->pass2.stocker_root = tmp->next; #ifdef STOCKER_DEBUG stocked_num--; reused_num++; #endif } else { /* allocate new */ tmp = (NODE *)mymalloc(sizeof(NODE)); tmp->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { tmp->wordend_frame = (short *)mymalloc(sizeof(short) * peseqlen); tmp->wordend_gscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); } #endif #ifdef STOCKER_DEBUG new_num++; #endif } /* clear the data */ /*bzero(tmp,sizeof(NODE));*/ tmp->next=NULL; tmp->prev=NULL; tmp->last_ph = NULL; tmp->last_ph_sp_attached = FALSE; if (r->ccd_flag) { tmp->totallscore = LOG_ZERO; } tmp->endflag = FALSE; tmp->seqnum = 0; for(i = 0; i < peseqlen; i++) { tmp->g[i] = LOG_ZERO; } tmp->final_g = LOG_ZERO; #ifdef VISUALIZE tmp->popnode = NULL; #endif if (r->graphout) { tmp->prevgraph = NULL; tmp->lastcontext = NULL; } tmp->region = r; return(tmp); } /**********************************************************************/ /************ 前向きトレリス展開と尤度計算 ****************/ /************ Expand trellis and update forward score *****************/ /**********************************************************************/ /** * * 1単語分のトレリス計算用のワークエリアを確保. * * @param r [in] 認識処理インスタンス * * * * Allocate work area for trellis computation of a word. * * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void malloc_wordtrellis(RecogProcess *r) { int maxwn; StackDecode *dwrk; maxwn = r->lm->winfo->maxwn + 10; /* CCDによる変動を考慮 */ dwrk = &(r->pass2); dwrk->wordtrellis[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->wordtrellis[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen); dwrk->phmmlen_max = r->lm->winfo->maxwlen + 2; dwrk->phmmseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * dwrk->phmmlen_max); if (r->lm->config->enable_iwsp && r->am->hmminfo->multipath) { dwrk->has_sp = (boolean *)mymalloc(sizeof(boolean) * dwrk->phmmlen_max); } else { dwrk->has_sp = NULL; } dwrk->wef = NULL; dwrk->wes = NULL; dwrk->wend_token_frame[0] = NULL; dwrk->wend_token_frame[1] = NULL; dwrk->wend_token_gscore[0] = NULL; dwrk->wend_token_gscore[1] = NULL; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wef = (short *)mymalloc(sizeof(short) * r->peseqlen); dwrk->wes = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen); dwrk->wend_token_frame[0] = (short *)mymalloc(sizeof(short) * maxwn); dwrk->wend_token_frame[1] = (short *)mymalloc(sizeof(short) * maxwn); dwrk->wend_token_gscore[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->wend_token_gscore[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); } #endif } /** * * 1単語分のトレリス計算用のワークエアリアを解放 * * * * Free the work area for trellis computation of a word. * * * @callgraph * @callergraph */ void free_wordtrellis(StackDecode *dwrk) { free(dwrk->wordtrellis[0]); free(dwrk->wordtrellis[1]); free(dwrk->g); free(dwrk->phmmseq); if (dwrk->has_sp) { free(dwrk->has_sp); dwrk->has_sp = NULL; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (dwrk->wef) { free(dwrk->wef); free(dwrk->wes); free(dwrk->wend_token_frame[0]); free(dwrk->wend_token_frame[1]); free(dwrk->wend_token_gscore[0]); free(dwrk->wend_token_gscore[1]); dwrk->wef = NULL; } #endif } /**********************************************************************/ /************ 仮説の前向き尤度計算 *******************/ /************ Compute forward score of a hypothesis *******************/ /**********************************************************************/ /* 与えられた音素のならび phmmseq[0..phmmlen-1]に対してviterbi計算を行う. g[0..framelen-1] のスコアを初期値として g_new[0..framelen-1]に更新値を代入. 最低 least_frame まではscanする. */ /* Viterbi computation for the given phoneme sequence 'phmmseq[0..phmmlen-1]' with g[0..framelen-1] as initial values. The results are stored in g_new[0..framelen-1]. Scan should not terminate at least it reaches 'least_frame'. */ /** * * 与えられた音素の並びに対して Viterbi 計算を行い,前向きスコアを * 更新する汎用関数. * * @param g [in] 現在の時間ごとの前向きスコア * @param g_new [out] 更新後の新たな前向きスコアを格納するバッファ * @param phmmseq [in] 音素HMMの並び * @param has_sp [in] short-pause location * @param phmmlen [in] @a phmmseq の長さ * @param param [in] 入力パラメータ * @param framelen [in] 入力フレーム長 * @param least_frame [in] ビーム設定時,このフレーム数以上は Viterbi計算する * @param final_g [in] final g scores * @param wordend_frame_src [in] 現在の単語終端フレームトークン * @param wordend_frame_dst [out] 更新後の新たな単語終端フレームトークン * @param wordend_gscore_src [in] 現在の単語終端スコアトークン * @param wordend_gscore_dst [out] 更新後の新たな単語終端スコアトークン * @param r [in] recognition process instance * * * Generic function to perform Viterbi path updates for given phoneme * sequence. * * @param g [in] current forward scores at each input frame * @param g_new [out] buffer to save the resulting score updates * @param phmmseq [in] phoneme sequence to perform Viterbi * @param has_sp [in] short-pause location * @param phmmlen [in] length of @a phmmseq. * @param param [in] input parameter vector * @param framelen [in] input frame length to compute * @param least_frame [in] Least frame length to force viterbi even with beam * @param final_g [in] final g scores * @param wordend_frame_src [in] current word-end frame tokens * @param wordend_frame_dst [out] buffer to store updated word-end frame tokens * @param wordend_gscore_src [in] current word-end score tokens * @param wordend_gscore_dst [out] buffer to store updated word-end score tokens * @param r [in] recognition process instance * * */ static void do_viterbi(LOGPROB *g, LOGPROB *g_new, HMM_Logical **phmmseq, boolean *has_sp, int phmmlen, HTK_Param *param, int framelen, int least_frame, LOGPROB *final_g, short *wordend_frame_src, short *wordend_frame_dst, LOGPROB *wordend_gscore_src, LOGPROB *wordend_gscore_dst, RecogProcess *r) /* has_sp and final_g is for multipath only */ { HMM *whmm; /* HMM */ int wordhmmnum; /* length of above */ int startt; /* scan start frame */ LOGPROB tmpmax,tmpscore; /* variables for Viterbi process */ A_CELL *ac; int t,i,j; boolean node_exist_p; int tn; ///< Temporal pointer to current buffer int tl; ///< Temporal pointer to previous buffer /* store global values to local for rapid access */ StackDecode *dwrk; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; LOGPROB *framemaxscore; #ifdef SCAN_BEAM LOGPROB scan_beam_thres; #endif dwrk = &(r->pass2); winfo = r->lm->winfo; hmminfo = r->am->hmminfo; framemaxscore = r->pass2.framemaxscore; #ifdef SCAN_BEAM scan_beam_thres = r->config->pass2.scan_beam_thres; #endif #ifdef TCD jlog("DEBUG: scan for:"); for (i=0;iname); } jlog("\n"); #endif /* 単語HMMを作る */ /* make word HMM */ whmm = new_make_word_hmm(hmminfo, phmmseq, phmmlen, has_sp); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm\n"); } wordhmmnum = whmm->len; if (wordhmmnum >= winfo->maxwn + 10) { j_internal_error("do_viterbi: word too long (>%d)\n", winfo->maxwn + 10); } /* scan開始点を検索 -> starttへ*/ /* search for the start frame -> set to startt */ for(t = framelen-1; t >=0 ; t--) { if ( #ifdef SCAN_BEAM g[t] > framemaxscore[t] - scan_beam_thres && #endif g[t] > LOG_ZERO) { break; } } if (t < 0) { /* no node has score > LOG_ZERO */ /* reset all scores and end */ for(t=0;tgraphout) { wordend_frame_dst[t] = -1; wordend_gscore_dst[t] = LOG_ZERO; } #endif } free_hmm(whmm); return; } startt = t; /* 開始点以降[startt+1..framelen-1] の g_new[] をリセット */ /* clear g_new[] for [startt+1..framelen-1] */ for(t=framelen-1;t>startt;t--) { g_new[t] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { wordend_frame_dst[t] = -1; wordend_gscore_dst[t] = LOG_ZERO; } #endif } /*****************/ /* viterbi start */ /*****************/ /* set initial swap buffer */ tn = 0; tl = 1; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { for(i=0;iwend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } } #endif if (! hmminfo->multipath) { /* 時間 [startt] 上の値を初期化 */ /* initialize scores on frame [startt] */ for(i=0;iwordtrellis[tn][i] = LOG_ZERO; dwrk->wordtrellis[tn][wordhmmnum-1] = g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param); g_new[startt] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[startt]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[startt]; wordend_frame_dst[startt] = dwrk->wend_token_frame[tn][0]; wordend_gscore_dst[startt] = dwrk->wend_token_gscore[tn][0]; } #endif } /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */ /* main loop: start from [startt], and compute Viterbi toward [0] */ for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) { /* wordtrellisのワークエリアをスワップ */ /* swap workarea of wordtrellis */ i = tn; tn = tl; tl = i; node_exist_p = FALSE; /* TRUE if there is at least 1 survived node in this frame */ if (! hmminfo->multipath) { /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */ /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */ tmpscore = LOG_ZERO; for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) { if (tmpscore < dwrk->wordtrellis[tl][ac->arc] + ac->a) { j = ac->arc; tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a; } } if (g[t] > tmpscore) { tmpmax = g[t]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = wordend_frame_src[t]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = wordend_gscore_src[t]; } #endif } else { tmpmax = tmpscore; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; } #endif } /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */ /* check if the edge node is within score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; } #endif } else { node_exist_p = TRUE; dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param); } } /* node[wordhmmnum-2..0]についてトレリスを展開 */ /* expand trellis for node [t][wordhmmnum-2..0] */ for(i=wordhmmnum-2;i>=0;i--) { /* 最尤パスと最尤スコア tmpmax を見つける */ /* find most likely path and the max score 'tmpmax' */ tmpmax = LOG_ZERO; for (ac=whmm->state[i].ac;ac;ac=ac->next) { if (hmminfo->multipath) { if (ac->arc == wordhmmnum-1) tmpscore = g[t]; else if (t + 1 > startt) tmpscore = LOG_ZERO; else tmpscore = dwrk->wordtrellis[tl][ac->arc]; tmpscore += ac->a; } else { tmpscore = dwrk->wordtrellis[tl][ac->arc] + ac->a; } if (tmpmax < tmpscore) { tmpmax = tmpscore; j = ac->arc; } } /* スコアエンベロープチェック: 一定幅外なら落とす */ /* check if score of this node is within the score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { /* invalid node */ dwrk->wordtrellis[tn][i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } #endif } else { /* survived node */ node_exist_p = TRUE; dwrk->wordtrellis[tn][i] = tmpmax; if (! hmminfo->multipath || i > 0) { dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (hmminfo->multipath) { if (j == wordhmmnum-1) { dwrk->wend_token_frame[tn][i] = wordend_frame_src[t]; dwrk->wend_token_gscore[tn][i] = wordend_gscore_src[t]; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } #endif } } /* end of node loop */ /* 時間 t のViterbi計算終了. 新たな前向きスコア g_new[t] をセット */ /* Viterbi end for frame [t]. set the new forward score g_new[t] */ g_new[t] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { /* new wordend */ wordend_frame_dst[t] = dwrk->wend_token_frame[tn][0]; wordend_gscore_dst[t] = dwrk->wend_token_gscore[tn][0]; } #endif /* 指定された least_frame より先まで t が進んでおり,かつこの t において スコアエンベロープによって生き残ったノードが一つも無かった場合, このフレームで計算を打ち切りそれ以上先([0..t-1])は計算しない */ /* if frame 't' already reached the 'least_frame' and no node was survived in this frame (all nodes pruned by score envelope), terminate computation at this frame and do not computer further frame ([0..t-1]). */ if (t < least_frame && (!node_exist_p)) { /* crear the rest scores */ for (i=t-1;i>=0;i--) { g_new[i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { wordend_frame_dst[i] = -1; wordend_gscore_dst[i] = LOG_ZERO; } #endif } /* terminate loop */ break; } } /* end of time loop */ if (hmminfo->multipath) { /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */ /* compute the total forward score (transition from state 0 to frame 0 */ if (t < 0) { /* computed till the end */ tmpmax = LOG_ZERO; for(ac=whmm->state[0].ac;ac;ac=ac->next) { tmpscore = dwrk->wordtrellis[tn][ac->arc] + ac->a; if (tmpmax < tmpscore) tmpmax = tmpscore; } *final_g = tmpmax; } else { *final_g = LOG_ZERO; } } /* free work area */ free_hmm(whmm); } /** * * 最後の1音素に対して Viterbi 計算を進める. * * @param now [in] 展開元の文仮説. 一音素前の前向きスコアが g[] にあるとする. * @param new [out] 計算後の前向きスコアが g[] に格納される. * @param lastphone [in] Viterbi計算を行う音素HMM * @param sp [in] short-pause insertion * @param param [in] 入力ベクトル列 * @param r [in] 認識処理インスタンス * * * Proceed Viterbi for the last one phoneme. * * @param now [in] source hypothesis where the forward scores prior to the * last one phone is stored at g[] * @param new [out] the resulting updated forward scores will be saved to g[] * @param lastphone [in] phone HMM for the Viterbi processing * @param sp [in] short-pause insertion * @param param [in] input vectors * @param r [in] recognition process instance * */ static void do_viterbi_next_word(NODE *now, NODE *new, HMM_Logical *lastphone, boolean sp, HTK_Param *param, RecogProcess *r) /* sp is for multipath only */ { int t, n; LOGPROB a_value; /* for non multi-path */ int peseqlen; boolean multipath; StackDecode *dwrk; dwrk = &(r->pass2); multipath = r->am->hmminfo->multipath; peseqlen = r->peseqlen; if (! multipath) { /* もし展開元仮説の最後の単語の音素長が 1 であれば,その音素は 直前の scan_word で計算されていない. この場合, now->g[] に以前の 初期値が格納されている. もし音素長が1以上であれば,now->g[] はその手前まで計算した状態 のスコアが入っているので,now->g[t] から初期値を設定する必要がある */ /* If the length of last word is 1, it means the last phone was not scanned in the last call of scan_word(). In this case, now->g[] keeps the previous initial value, so start viterbi with the old scores. If the length is more than 1, the now->g[] keeps the values of the scan result till the previous phone, so make initial value considering last transition probability. */ if (r->lm->winfo->wlen[now->seq[now->seqnum-1]] > 1) { n = hmm_logical_state_num(lastphone); a_value = (hmm_logical_trans(lastphone))->a[n-2][n-1]; for(t=0; tg[t] = now->g[t+1] + a_value; dwrk->g[peseqlen-1] = LOG_ZERO; } else { for(t=0; tg[t] = now->g[t]; } } else { for(t=0; tg[t] = now->g[t]; dwrk->phmmseq[0] = lastphone; if (r->lm->config->enable_iwsp) dwrk->has_sp[0] = sp; } do_viterbi(dwrk->g, new->g, multipath ? dwrk->phmmseq : &lastphone, (r->lm->config->enable_iwsp && multipath) ? dwrk->has_sp : NULL, 1, param, peseqlen, now->estimated_next_t, &(new->final_g) #ifdef GRAPHOUT_PRECISE_BOUNDARY , now->wordend_frame, new->wordend_frame , now->wordend_gscore, new->wordend_gscore #else , NULL, NULL , NULL, NULL #endif , r ); #ifdef GRAPHOUT_PRECISE_BOUNDARY if (! multipath) { if (r->graphout) { /* 次回の next_word 用に境界情報を調整 */ /* proceed word boundary for one step for next_word */ new->wordend_frame[r->peseqlen-1] = new->wordend_frame[0]; new->wordend_gscore[r->peseqlen-1] = new->wordend_gscore[0]; for (t=0;tpeseqlen-1;t++) { new->wordend_frame[t] = new->wordend_frame[t+1]; new->wordend_gscore[t] = new->wordend_gscore[t+1]; } } } #endif } /** * * 最後の1単語の前向きトレリスを計算して,文仮説の前向き尤度を更新する. * * @param now [i/o] 文仮説 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * Compute the forward viterbi for the last word to update forward scores * and ready for word connection. * * @param now [i/o] hypothesis * @param param [in] input parameter vectors * @param r [in] recognition process instance * * @callgraph * @callergraph */ void scan_word(NODE *now, HTK_Param *param, RecogProcess *r) { int i,t; WORD_ID word; int phmmlen; HMM_Logical *tailph; /* store global values to local for rapid access */ WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; int peseqlen; boolean ccd_flag; boolean enable_iwsp; /* multipath */ StackDecode *dwrk; dwrk = &(r->pass2); winfo = r->lm->winfo; hmminfo = r->am->hmminfo; peseqlen = r->peseqlen; ccd_flag = r->ccd_flag; if (hmminfo->multipath) { enable_iwsp = r->lm->config->enable_iwsp; } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (ccd_flag) { now->tail_g_score = now->g[now->bestt]; } } #endif /* ----------------------- prepare phoneme sequence ------------------ */ /* triphoneなら先頭の1音素はここでは対象外(あとでnext_wordでやる) */ /* 末尾の1音素はコンテキストにしたがって置換 */ /* with triphone, modify the tail phone of the last word according to the previous word, and do not compute the head phone here (that will be computed later in next_word() */ word = now->seq[now->seqnum-1]; #ifdef TCD jlog("DEBUG: w="); for(i=0;iwlen[word];i++) { jlog(" %s",(winfo->wseq[word][i])->name); } if (ccd_flag) { if (now->last_ph != NULL) { jlog(" | %s", (now->last_ph)->name); } } jlog("\n"); #endif /* TCD */ if (ccd_flag) { /* the tail triphone of the last word varies by context */ if (now->last_ph != NULL) { tailph = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo); if (tailph == NULL) { /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){ error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name); } tailph = winfo->wseq[word][winfo->wlen[word]-1]; } } else { tailph = winfo->wseq[word][winfo->wlen[word]-1]; } /* 長さ1の単語は次のnextwordでさらに変化するのでここではscanしない */ /* do not scan word if the length is 1, as it further varies in the following next_word() */ if (winfo->wlen[word] == 1) { now->last_ph = tailph; if (enable_iwsp && hmminfo->multipath) now->last_ph_sp_attached = TRUE; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { /* 単語境界伝搬情報を初期化 */ /* initialize word boundary propagation info */ for (t=0;twordend_frame[t] = t; now->wordend_gscore[t] = now->g[t]; } } #endif #ifdef TCD jlog("DEBUG: suspended as %s\n", (now->last_ph)->name); #endif return; } /* scan範囲の音素列を準備 */ /* prepare HMM of the scan range */ phmmlen = winfo->wlen[word] - 1; if (phmmlen > dwrk->phmmlen_max) { j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max); } for (i=0;iphmmseq[i] = winfo->wseq[word][i+1]; } dwrk->phmmseq[phmmlen-1] = tailph; if (enable_iwsp && hmminfo->multipath) { for (i=0;ihas_sp[i] = FALSE; dwrk->has_sp[phmmlen-1] = TRUE; } } else { /* ~ccd_flag */ phmmlen = winfo->wlen[word]; for (i=0;iphmmseq[i] = winfo->wseq[word][i]; if (enable_iwsp && hmminfo->multipath) { for (i=0;ihas_sp[i] = FALSE; dwrk->has_sp[phmmlen-1] = TRUE; } } /* 元のg[]をいったん待避しておく */ /* temporally keeps the original g[] */ for (t=0;tg[t] = now->g[t]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { /* 単語境界伝搬情報を初期化 */ /* initialize word boundary propagation info */ for (t=0;twef[t] = t; dwrk->wes[t] = now->g[t]; } } #endif /* viterbiを実行して g[] から now->g[] を更新する */ /* do viterbi computation for phmmseq from g[] to now->g[] */ do_viterbi(dwrk->g, now->g, dwrk->phmmseq, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL, phmmlen, param, peseqlen, now->estimated_next_t, &(now->final_g) #ifdef GRAPHOUT_PRECISE_BOUNDARY /* 単語境界情報 we[] から now->wordend_frame[] を更新する */ /* propagate word boundary info from we[] to now->wordend_frame[] */ , dwrk->wef, now->wordend_frame , dwrk->wes, now->wordend_gscore #else , NULL, NULL , NULL, NULL #endif , r ); #ifdef GRAPHOUT_PRECISE_BOUNDARY if (! hmminfo->multipath) { if (r->graphout) { /* 次回の next_word 用に境界情報を調整 */ /* proceed word boundary for one step for next_word */ now->wordend_frame[peseqlen-1] = now->wordend_frame[0]; now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0]; for (t=0;twordend_frame[t] = now->wordend_frame[t+1]; now->wordend_gscore[t] = now->wordend_gscore[t+1]; } } } #endif if (ccd_flag) { /* 次回のために now->last_ph を更新 */ /* update 'now->last_ph' for future scan_word() */ now->last_ph = winfo->wseq[word][0]; if (enable_iwsp && hmminfo->multipath) now->last_ph_sp_attached = FALSE; /* wlen > 1 here */ #ifdef TCD jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name); #endif } } /**************************************************************************/ /*** 新仮説の展開とヒューリスティックを繋いだ全体スコアを計算 ***/ /*** Expand new hypothesis and compute the total score (with heuristic) ***/ /**************************************************************************/ /** * * 展開元仮説に次単語を接続して新しい仮説を生成する. 次単語の単語トレリス上の * スコアから最尤接続点を求め,仮説スコアを計算する. * * @param now [in] 展開元仮説 * @param new [out] 新たに生成された仮説が格納される * @param nword [in] 接続する次単語の情報 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * Connect a new word to generate a next hypothesis. The optimal connection * point and new sentence score of the new hypothesis will be estimated by * looking up the corresponding words on word trellis. * * @param now [in] source hypothesis * @param new [out] pointer to save the newly generated hypothesis * @param nword [in] next word to be connected * @param param [in] input parameter vector * @param r [in] recognition process instance * * @callgraph * @callergraph */ void next_word(NODE *now, NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) { HMM_Logical *lastphone, *newphone; LOGPROB *g_src; int t; int lastword; int i; LOGPROB a_value; LOGPROB tmpp; int startt; int word; TRELLIS_ATOM *tre; LOGPROB totalscore; BACKTRELLIS *backtrellis; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; int peseqlen; boolean ccd_flag; StackDecode *dwrk; dwrk = &(r->pass2); backtrellis = r->backtrellis; winfo = r->lm->winfo; hmminfo = r->am->hmminfo; peseqlen = r->peseqlen; ccd_flag = r->ccd_flag; word = nword->id; lastword = now->seq[now->seqnum-1]; /* lastphone (直前単語の先頭音素) を準備 */ /* prepare lastphone (head phone of previous word) */ if (ccd_flag) { /* 最終音素 triphone を接続単語に会わせて変化 */ /* modify triphone of last phone according to the next word */ lastphone = get_left_context_HMM(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name, hmminfo); if (lastphone == NULL) { /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (now->last_ph->is_pseudo){ error_missing_left_triphone(now->last_ph, winfo->wseq[word][winfo->wlen[word]-1]->name); } lastphone = now->last_ph; } } /* newphone (接続単語の末尾音素) を準備 */ /* prepare newphone (tail phone of next word) */ if (ccd_flag) { newphone = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo); if (newphone == NULL) { /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){ error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name); } newphone = winfo->wseq[word][winfo->wlen[word]-1]; } } else { newphone = winfo->wseq[word][winfo->wlen[word]-1]; } /* 単語並び、DFA状態番号、言語スコアを new へ継承・更新 */ /* inherit and update word sequence, DFA state and total LM score to 'new' */ new->score = LOG_ZERO; for (i=0;i< now->seqnum;i++){ new->seq[i] = now->seq[i]; #ifdef CM_SEARCH #ifdef CM_MULTIPLE_ALPHA memcpy(new->cmscore[i], now->cmscore[i], sizeof(LOGPROB) * r->config->annotate.cm_alpha_num); #else new->cmscore[i] = now->cmscore[i]; #endif #endif /* CM_SEARCH */ } new->seq[i] = word; new->seqnum = now->seqnum+1; new->state = nword->next_state; new->totallscore = now->totallscore + nword->lscore; if (ccd_flag) { /* 次仮説の履歴情報として保存 */ /* keep the lastphone for next scan_word() */ new->last_ph = lastphone; new->last_ph_sp_attached = now->last_ph_sp_attached; } if (ccd_flag) { /* 最後の1音素(lastphone)分をscanし,更新したスコアを new に保存 */ /* scan the lastphone and set the updated score to new->g[] */ do_viterbi_next_word(now, new, lastphone, hmminfo->multipath ? now->last_ph_sp_attached : FALSE, param, r); g_src = new->g; } else { g_src = now->g; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*peseqlen); memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*peseqlen); } #endif } /* 次回の scan_word に備えて new->g[] を変更しておく */ /* prepare new->g[] for next scan_word() */ if (hmminfo->multipath) { startt = peseqlen-1; } else { startt = peseqlen-2; } i = hmm_logical_state_num(newphone); a_value = (hmm_logical_trans(newphone))->a[i-2][i-1]; if (hmminfo->multipath) { for(t=0; t <= startt; t++) { new->g[t] = g_src[t] + nword->lscore; } } else { for(t=0; t <= startt; t++) { new->g[t] = g_src[t+1] + a_value + nword->lscore; } } /***************************************************************************/ /* 前向き(第2パス),後ろ向き(第1パス)トレリスを接続し最尤接続点を見つける */ /* connect forward/backward trellis to look for the best connection time */ /***************************************************************************/ /*-----------------------------------------------------------------*/ /* 単語トレリスを探して, 次単語の最尤接続点を発見する */ /* determine the best connection time of the new word, seeking the word trellis */ /*-----------------------------------------------------------------*/ if (r->lmtype == LM_DFA && !r->config->pass2.looktrellis_flag) { /* すべてのフレームにわたって最尤を探す */ /* search for best trellis word throughout all frame */ for(t = startt; t >= 0; t--) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) continue; totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } return; } /* 最後に参照したTRELLIS_ATOMの終端時間の前後 */ /* newの推定時間は,上記で採用したTRELLIS_ATOMの始端時間 */ /* この展開単語のトレリス上の終端時間の前後のみスキャンする 前後に連続して存在するフレームについてのみ計算 */ /* search for best trellis word only around the estimated time */ /* 1. search forward */ for(t = (nword->tre)->endtime; t >= 0; t--) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) break; /* go to 2 if the trellis word disappear */ totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } /* 2. search bckward */ for(t = (nword->tre)->endtime + 1; t <= startt; t++) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) break; /* end if the trellis word disapper */ totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } /* set current LM score */ new->lscore = nword->lscore; } /**********************************************************************/ /********** 初期仮説の生成 ****************************/ /********** Generate an initial hypothesis ****************************/ /**********************************************************************/ /** * * 与えられた単語から初期仮説を生成する. * * @param new [out] 新たに生成された仮説が格納される * @param nword [in] 初期仮説単語の情報 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * Generate an initial hypothesis from given word. * * @param new [out] pointer to save the newly generated hypothesis * @param nword [in] words of the first candidates * @param param [in] input parameter vector * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void start_word(NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) { HMM_Logical *newphone; WORD_ID word; TRELLIS_ATOM *tre = NULL; LOGPROB tmpp; int t; BACKTRELLIS *backtrellis; WORD_INFO *winfo; int peseqlen; boolean ccd_flag; boolean multipath; backtrellis = r->backtrellis; winfo = r->lm->winfo; peseqlen = r->peseqlen; ccd_flag = r->ccd_flag; multipath = r->am->hmminfo->multipath; /* initialize data */ word = nword->id; new->score = LOG_ZERO; new->seqnum = 1; new->seq[0] = word; new->state = nword->next_state; new->totallscore = nword->lscore; /* set current LM score */ new->lscore = nword->lscore; /* cross-word triphone need not be handled on startup */ newphone = winfo->wseq[word][winfo->wlen[word]-1]; if (ccd_flag) { new->last_ph = NULL; } new->g[peseqlen-1] = nword->lscore; for (t=peseqlen-1; t>=0; t--) { tre = bt_binsearch_atom(backtrellis, t, word); if (tre != NULL) { if (r->graphout) { new->bestt = peseqlen-1; } else { new->bestt = t; } new->score = new->g[peseqlen-1] + tre->backscore; if (! multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), peseqlen-1, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), peseqlen-1, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } new->score += tmpp; } new->estimated_next_t = tre->begintime - 1; new->tre = tre; break; } } if (tre == NULL) { /* no word in backtrellis */ new->score = LOG_ZERO; } } /** * * 終端処理:終端まで達した文仮説の最終的なスコアをセットする. * * @param now [in] 終端まで達した仮説 * @param new [out] 最終的な文仮説のスコアを格納する場所へのポインタ * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * Hypothesis termination: set the final sentence scores of hypothesis * that has already reached to the end. * * @param now [in] hypothesis that has already reached to the end * @param new [out] pointer to save the final sentence information * @param param [in] input parameter vectors * @param r [in] recognition process instance * * @callgraph * @callergraph */ void last_next_word(NODE *now, NODE *new, HTK_Param *param, RecogProcess *r) { cpy_node(new, now); if (r->ccd_flag) { /* 最終音素分を viterbi して最終スコアを設定 */ /* scan the last phone and update the final score */ if (r->am->hmminfo->multipath) { do_viterbi_next_word(now, new, now->last_ph, now->last_ph_sp_attached, param, r); new->score = new->final_g; } else { do_viterbi_next_word(now, new, now->last_ph, FALSE, param, r); new->score = new->g[0]; } } else { if (r->am->hmminfo->multipath) { new->score = now->final_g; } else { new->score = now->g[0]; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { /* last boundary has moved to [peseqlen-1] in last scan_word() */ memcpy(new->wordend_frame, now->wordend_frame, sizeof(short)*r->peseqlen); memcpy(new->wordend_gscore, now->wordend_gscore, sizeof(LOGPROB)*r->peseqlen); } #endif } } #endif /* PASS2_STRICT_IWCD */ /* end of file */ julius-4.2.2/libjulius/src/outprob_style.c0000644001051700105040000004517012004452401017176 0ustar ritrlab/** * @file outprob_style.c * * * @brief 状態の出力確率計算(第1パス) * * 第1パスで,木構造化辞書上のノードの,入力ベクトルに対する HMM の * 出力対数確率を計算する. * * トライフォン使用時は,単語内の音素環境依存については辞書読み込み時に * 考慮されて対応するトライフォンがすでに割り当てられているので,ここで * 特別な処理は行われない. 単語先頭および末尾の音素は,木構造化辞書上では * pseudo triphone が割り当たっており,これらについては,以下のように * 単語間を音素環境依存性を考慮した計算が行われる. * -# 単語内音素: 通常通り計算する. * -# 単語の先頭音素: 直前単語の情報から,pseudo triphone を正しい * トライフォンに動的に切り替えて計算. * -# 単語の末尾音素: その pseudo triphone に含まれる(同じ左コンテキストを * 持つトライフォンの)状態集合中のすべての状態について尤度を計算し, * - "-iwcd1 max" 指定時は最大値 * - "-iwcd1 avg" 指定時は平均値(default) * - "-iwcd1 best N" 指定時は上位N個の平均値 * をその状態の尤度として採用する. (これは outprob_cd() 内で自動的に選択 * され計算される. * -# 1音素からなる単語の場合: 上記を両方とも考慮する. * * 上記の処理を行うには,木構造化辞書の状態ごとに,それぞれが単語内でどの * 位置の音素に属する状態であるかの情報が必要である. 木構造化辞書では, * 状態ごとに上記のどの処理を行えば良いかを AS_Style であらかじめ保持している. * * また,上記の 2 と 4 の状態では,コンテキストに伴うtriphone変化を, * 直前単語ID とともに状態ごとにフレーム単位でキャッシュしている. これにより * 計算量の増大を防ぐ. * * * * @brief Compute output probability of a state (1st pass) * * These functions compute the output probability of an input vector * from a state on the lexicon tree. * * When using triphone acoustic model, the cross-word triphone handling is * done here. The head and tail phoneme of every words has corresponding * pseudo phone set on the tree lexicon, so the actual likelihood computation * will be done as the following: * -# word-internal: compute as normal. * -# Word head phone: the correct triphone phone, according to the last * word information on the passing token, will be dynamically assigned * to compute the cross-word dependency. * -# Word tail phone: all the states in the pseudo phone set (they are * states of triphones that has the same left context as the word end) * will be computed, and use * - maximum value if "-iwcd1 max" specified, or * - average value if "-iwcd1 avg" specified, or * - average of best N states if "-iwcd1 best N" specified (default: 3) * the actual pseudo phoneset computation will be done in outprob_cd(). * -# word with only one state: both of above should be considered. * * To denote which operation to do for a state, AS_Style ID is assigned * to each state. * * The triphone transformation, that will be performed on the state * of 2 and 4 above, will be cached on the tree lxicon by each state * per frame, to suppress computation overhead. * * * * @author Akinobu Lee * @date Mon Aug 22 17:14:26 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #ifdef PASS1_IWCD /** * * 語頭トライフォン変化用キャッシュの初期化 * * @param wchmm [i/o] 木構造化辞書 * * * Initialize cache for triphone changing on every word head. * * @param wchmm [i/o] tree lexicon * * @callgraph * @callergraph */ void outprob_style_cache_init(WCHMM_INFO *wchmm) { int n; for(n=0;nn;n++) { if (wchmm->state[n].out.state == NULL) continue; if (wchmm->outstyle[n] == AS_RSET) { (wchmm->state[n].out.rset)->cache.state = NULL; } else if (wchmm->outstyle[n] == AS_LRSET) { (wchmm->state[n].out.lrset)->cache.state = NULL; } } } /**********************************************************************/ /** * * @brief 単語末尾のトライフォンセット (pseudo phone set) を検索する. * * 文法認識では,各カテゴリごとに独立した pseudo phone set を用いる. * ここでは単語末用カテゴリ付き pseudo phone set を検索する. * * @param wchmm [in] 木構造化辞書 * @param hmm [in] 単語の末尾の HMM * @param category [in] 単語の属するカテゴリ * * @return 該当 set が見つかればそこへのポインタ,あるいは見つからなければ * NULL を返す. * * * Lookup a word-end triphone set (aka pseudo phone set) with * category id for grammar recognition. * * @param wchmm [in] word lexicon tree * @param hmm [in] logical HMM of word end phone * @param category [in] belonging category id of the word * * @return pointer to the corresponding phone set if found, or NULL if * not found. * * @callgraph * @callergraph */ CD_Set * lcdset_lookup_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category) { CD_Set *cd; leftcenter_name(hmm->name, wchmm->lccbuf); sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category); if (wchmm->lcdset_category_root != NULL) { cd = aptree_search_data(wchmm->lccbuf2, wchmm->lcdset_category_root); if (cd == NULL) return NULL; if (strmatch(wchmm->lccbuf2, cd->name)) { return cd; } } return NULL; } /** * * @brief 単語末用カテゴリ付き pseudo phone set を生成する. * * Julian では,ある単語に後続可能な単語集合は文法によって制限される. よって, * 単語末尾から次に後続しうる単語先頭音素の種類も文法によって限定 * される. そこで,与えられた辞書上で,単語のカテゴリごとに,後続しうる先頭音素 * をカテゴリ対情報から作成し,それらをカテゴリ付き pseudo phone set として * 定義して単語終端に用いることで,Julian における単語間トライフォンの * 近似誤差を小さくすることができる. * * この phone set の名前は通常の "a-k" などと異なり "a-k::38" となる * (数字はカテゴリID). ここでは,辞書を検索して可能なすべてのカテゴリ付き * pseudo phone set を,生成する. これは通常の pseudo phone set とは別に * 保持され,単語末端のみで使用される. * * @param wchmm [i/o] 木構造化辞書 * @param hmm [in] これから登録する単語の終端の論理HMM * @param category [in] これから登録する単語の文法カテゴリID * * * * @brief Make a category-indexed context-dependent (pseudo) state set * for word ends. * * In Julian, the word-end pseudo triphone set can be shrinked by using the * category-pair constraint, since the number of possible right-context * phones on the word end will be smaller than all phone. This shrinking not * only saves computation time but also improves recognition since the * approximated value will be closer to the actual value. * * For example, if a word belongs to category ID 38 and has a phone "a-k" * at word end, CD_Set "a-k::38" is generated and assigned to the * phone instead of normal CD_Set "a-k". The "a-k::38" set consists * of triphones whose right context are the beginning phones within * possibly fllowing categories. These will be separated from the normal * pseudo phone set. * * @param wchmm [i/o] tree lexicon * @param hmm [in] logical HMM at the end of a word, of which the * category-indexed pseudo state set will be generated. * @param category [in] category ID of the word. * * */ static void lcdset_register_with_category(WCHMM_INFO *wchmm, HMM_Logical *hmm, WORD_ID category) { WORD_ID c2, i, w; HMM_Logical *ltmp; int cnt_c, cnt_w, cnt_p; if (lcdset_lookup_with_category(wchmm, hmm, category) == NULL) { leftcenter_name(hmm->name, wchmm->lccbuf); sprintf(wchmm->lccbuf2, "%s::%04d", wchmm->lccbuf, category); if (debug2_flag) { jlog("DEBUG: category-aware lcdset {%s}...", wchmm->lccbuf2); } cnt_c = cnt_w = cnt_p = 0; /* search for category that can connect after this category */ for(c2=0;c2dfa->term_num;c2++) { if (! dfa_cp(wchmm->dfa, category, c2)) continue; /* for each word in the category, register triphone whose right context is the beginning phones */ for(i=0;idfa->term.wnum[c2];i++) { w = wchmm->dfa->term.tw[c2][i]; ltmp = get_right_context_HMM(hmm, wchmm->winfo->wseq[w][0]->name, wchmm->hmminfo); if (ltmp == NULL) { ltmp = hmm; if (ltmp->is_pseudo) { error_missing_right_triphone(hmm, wchmm->winfo->wseq[w][0]->name); } } if (! ltmp->is_pseudo) { if (regist_cdset(&(wchmm->lcdset_category_root), ltmp->body.defined, wchmm->lccbuf2, &(wchmm->lcdset_mroot))) { cnt_p++; } } } cnt_c++; cnt_w += wchmm->dfa->term.wnum[c2]; } if (debug2_flag) { jlog("%d categories (%d words) can follow, %d HMMs registered\n", cnt_c, cnt_w, cnt_p); } } } /** * * 全ての単語末用カテゴリ付き pseudo phone set を生成する. * 辞書上のすべての単語について,その末尾に登場しうるカテゴリ付き pseudo phone * set を生成する(文法認識用). * * @param wchmm [i/o] 木構造化辞書情報 * * * Generate all possible category-indexed pseudo phone sets for * grammar recognition. * * @param wchmm [i/o] tree lexicon * * @callgraph * @callergraph */ void lcdset_register_with_category_all(WCHMM_INFO *wchmm) { WORD_INFO *winfo; WORD_ID c1, w, w_prev; int i; HMM_Logical *ltmp; winfo = wchmm->winfo; /* (1) 単語終端の音素について */ /* word end phone */ for(w=0;wnum;w++) { ltmp = winfo->wseq[w][winfo->wlen[w]-1]; lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]); } /* (2)1音素単語の場合, 先行しうる単語の終端音素を考慮 */ /* for one-phoneme word, possible left context should be also considered */ for(w=0;wnum;w++) { if (winfo->wlen[w] > 1) continue; for(c1=0;c1dfa->term_num;c1++) { if (! dfa_cp(wchmm->dfa, c1, winfo->wton[w])) continue; for(i=0;idfa->term.wnum[c1];i++) { w_prev = wchmm->dfa->term.tw[c1][i]; ltmp = get_left_context_HMM(winfo->wseq[w][0], winfo->wseq[w_prev][winfo->wlen[w_prev]-1]->name, wchmm->hmminfo); if (ltmp == NULL) continue; /* 1音素自身のlcd_setは(1)で作成済 */ if (ltmp->is_pseudo) continue; /* pseudo phone ならlcd_setはいらない */ lcdset_register_with_category(wchmm, ltmp, winfo->wton[w]); } } } } /** * * カテゴリ付き pseudo phone set をすべて消去する. この関数は Julian で文法が * 変更された際に,カテゴリ付き pseudo phone set を再構築するのに用いられる. * * @param wchmm [i/o] 木構造化辞書 * * * Remove all the registered category-indexed pseudo state sets. * This function will be called when a grammar is changed to re-build the * state sets. * * @param wchmm [i/o] lexicon tree information * * @callgraph * @callergraph */ void lcdset_remove_with_category_all(WCHMM_INFO *wchmm) { free_cdset(&(wchmm->lcdset_category_root), &(wchmm->lcdset_mroot)); } #endif /* PASS1_IWCD */ /** * * 木構造化辞書上の状態の出力確率を計算する. * * @param wchmm [in] 木構造化辞書情報 * @param node [in] ノード番号 * @param last_wid [in] 直前単語(単語先頭のトライフォン計算に用いる) * @param t [in] 時間フレーム * @param param [in] 特徴量パラメータ構造体 (@a t 番目のベクトルについて計算する) * * @return 出力確率の対数値を返す. * * * Calculate output probability on a tree lexion node. This function * calculates log output probability of an input vector on time frame @a t * in input paramter @a param at a node on tree lexicon. * * @param wchmm [in] tree lexicon structure * @param node [in] node ID to compute the output probability * @param last_wid [in] word ID of last word hypothesis (used when the node is * within the word beginning phone and triphone is used. * @param t [in] time frame of input vector in @a param to compute. * @param param [in] input parameter structure * * @return the computed log probability. * * @callgraph * @callergraph */ LOGPROB outprob_style(WCHMM_INFO *wchmm, int node, int last_wid, int t, HTK_Param *param) { char rbuf[MAX_HMMNAME_LEN]; ///< Local workarea for HMM name conversion #ifndef PASS1_IWCD /* if cross-word triphone handling is disabled, we simply compute the output prob of the state */ return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out, param)); #else /* PASS1_IWCD */ /* state type and context cache is considered */ HMM_Logical *ohmm, *rhmm; RC_INFO *rset; LRC_INFO *lrset; CD_Set *lcd; WORD_INFO *winfo = wchmm->winfo; HTK_HMM_INFO *hmminfo = wchmm->hmminfo; /* the actual computation is different according to their context dependency handling */ switch(wchmm->outstyle[node]) { case AS_STATE: /* normal state (word-internal or context-independent )*/ /* compute as usual */ return(outprob_state(wchmm->hmmwrk, t, wchmm->state[node].out.state, param)); case AS_LSET: /* node in word end phone */ /* compute approximated value using the state set in pseudo phone */ return(outprob_cd(wchmm->hmmwrk, t, wchmm->state[node].out.lset, param)); case AS_RSET: /* note in the beginning phone of word */ /* depends on the last word hypothesis to compute the actual triphone */ rset = wchmm->state[node].out.rset; /* consult cache */ if (rset->cache.state == NULL || rset->lastwid_cache != last_wid) { /* cache miss...calculate */ /* rset contains either defined biphone or pseudo biphone */ if (last_wid != WORD_INVALID) { /* lookup triphone with left-context (= last phoneme) */ if ((ohmm = get_left_context_HMM(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { rhmm = ohmm; } else { /* if triphone not found, try to use the bi-phone itself */ rhmm = rset->hmm; /* If the bi-phone is explicitly specified in hmmdefs/HMMList, use it. if both triphone and biphone not found in user-given hmmdefs/HMMList, use "pseudo" phone, as same as the end of word */ if (debug2_flag) { if (rhmm->is_pseudo) { error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } } } } else { /* if last word is WORD_INVALID try to use the bi-phone itself */ rhmm = rset->hmm; /* If the bi-phone is explicitly specified in hmmdefs/HMMList, use it. if not, use "pseudo" phone, as same as the end of word */ if (debug2_flag) { if (rhmm->is_pseudo) { error_missing_left_triphone(rset->hmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } } } /* rhmm may be a pseudo phone */ /* store to cache */ if (rhmm->is_pseudo) { rset->last_is_lset = TRUE; rset->cache.lset = &(rhmm->body.pseudo->stateset[rset->state_loc]); } else { rset->last_is_lset = FALSE; rset->cache.state = rhmm->body.defined->s[rset->state_loc]; } rset->lastwid_cache = last_wid; } /* calculate outprob and return */ if (rset->last_is_lset) { return(outprob_cd(wchmm->hmmwrk, t, rset->cache.lset, param)); } else { return(outprob_state(wchmm->hmmwrk, t, rset->cache.state, param)); } case AS_LRSET: /* node in word with only one phoneme --- both beginning and end */ lrset = wchmm->state[node].out.lrset; if (lrset->cache.state == NULL || lrset->lastwid_cache != last_wid) { /* cache miss...calculate */ rhmm = lrset->hmm; /* lookup cdset for given left context (= last phoneme) */ strcpy(rbuf, rhmm->name); if (last_wid != WORD_INVALID) { add_left_context(rbuf, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name); } if (wchmm->category_tree) { #ifdef USE_OLD_IWCD lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); #else /* use category-indexed cdset */ if (last_wid != WORD_INVALID && (ohmm = get_left_context_HMM(rhmm, (winfo->wseq[last_wid][winfo->wlen[last_wid]-1])->name, hmminfo)) != NULL) { lcd = lcdset_lookup_with_category(wchmm, ohmm, lrset->category); } else { lcd = lcdset_lookup_with_category(wchmm, rhmm, lrset->category); } #endif } else { lcd = lcdset_lookup_by_hmmname(hmminfo, rbuf); } if (lcd != NULL) { /* found, set to cache */ lrset->last_is_lset = TRUE; lrset->cache.lset = &(lcd->stateset[lrset->state_loc]); lrset->lastwid_cache = last_wid; } else { /* no relating lcdset found, falling to normal state */ if (rhmm->is_pseudo) { lrset->last_is_lset = TRUE; lrset->cache.lset = &(rhmm->body.pseudo->stateset[lrset->state_loc]); lrset->lastwid_cache = last_wid; } else { lrset->last_is_lset = FALSE; lrset->cache.state = rhmm->body.defined->s[lrset->state_loc]; lrset->lastwid_cache = last_wid; } } /*printf("[%s->%s]\n", lrset->hmm->name, rhmm->name);*/ } /* calculate outprob and return */ if (lrset->last_is_lset) { return(outprob_cd(wchmm->hmmwrk, t, lrset->cache.lset, param)); } else { return(outprob_state(wchmm->hmmwrk, t, lrset->cache.state, param)); } default: /* should not happen */ j_internal_error("outprob_style: no outprob style??\n"); return(LOG_ZERO); } #endif /* PASS1_IWCD */ } /** * * @brief トライフォンエラーメッセージ:右コンテキスト用 * * 指定した右コンテキストを持つトライフォンが * 見つからなかった場合にエラーメッセージを出力する関数. * * @param base [in] ベースのトライフォン * @param rc_name [in] 右コンテキストの音素名 * * * @brief Triphone error message for right context. * * Output error message when a triphone with the specified right context is * not defined. * * @param base [in] base triphone * @param rc_name [in] name of right context phone * * @callgraph * @callergraph */ void error_missing_right_triphone(HMM_Logical *base, char *rc_name) { char rbuf[MAX_HMMNAME_LEN]; ///< Local workarea for HMM name conversion /* only output message */ strcpy(rbuf, base->name); add_right_context(rbuf, rc_name); jlog("WARNING: IW-triphone for word end \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name); } /** * * @brief トライフォンエラーメッセージ:左コンテキスト用 * * 指定した左コンテキストを持つトライフォンが * 見つからなかった場合にエラーメッセージを出力する関数. * * @param base [in] ベースのトライフォン * @param lc_name [in] 左コンテキストの音素名 * * * @brief Triphone error message for left context. * * Output error message when a triphone with the specified right context is * not defined. * * @param base [in] base triphone * @param lc_name [in] name of left context phone * * @callgraph * @callergraph */ void error_missing_left_triphone(HMM_Logical *base, char *lc_name) { char rbuf[MAX_HMMNAME_LEN]; ///< Local workarea for HMM name conversion /* only output message */ strcpy(rbuf, base->name); add_left_context(rbuf, lc_name); jlog("WARNING: IW-triphone for word head \"%s\" not found, fallback to pseudo {%s}\n", rbuf, base->name); } /* end of file */ julius-4.2.2/libjulius/src/search_bestfirst_v1.c0000644001051700105040000014317412004452401020227 0ustar ritrlab/** * @file search_bestfirst_v1.c * * * @brief 第2パスのViterbi演算および仮説スコア計算 (高速版) * * ここでは,第2パスにおいて探索中の仮説のViterbiスコアの更新演算, * 次単語とのトレリス接続,および仮説のスコア計算を行う関数が定義されて * います. * * 単語接続部の単語間音素環境依存性は,高速な backscan アルゴリズムに * よって行われます. このファイルで定義されている関数は,config.h において * PASS2_STRICT_IWCD が undef であるときに使用されます. 逆に上記が define * されているときは,search_bestfirst_v2.c の関数が用いられます. * * Backscan アルゴリズムでは,デコーディングの高速化のため, * 次単語とその前の単語の接続点について,「単語間音素コンテキストの遅延処理」 * を行ないます: * * -# 新仮説の生成(next_word())では,次単語の最後の音素の右コンテキスト * のみが考慮される. * -# その単語間の完全な音素環境依存性は,その仮説がいったんスタックに * 入った後もう一度 POP されたときに scan_word() にて改めて計算する. * * 仮説生成時にはすべての生成仮説に対して依存計算を行なず,あとでスコアが * 高く POP された仮説についてのみ再計算を行ないます. このため処理が * 高速化されますが,仮説スコア計算(next_word())において次単語接続部分の * 環境依存性が考慮されないので, 探索中のスコアに誤差が生じる場合があります. * * 実装について: * * -# next_word() では,次単語の最後の音素のみを右コンテキスト(=展開元 * 単語の最初の音素)を考慮して変化させ,トレリス接続点の出力確率を求める. * -# scan_word() では,新単語部分ともう1つ前の単語の最初の音素を変化 * させ,scan する. そのため新単語部分だけでなく,そのもう一音素前まで * scan の対象となる. この "1-phoneme backscan" を行なうため, * 各仮説ノードは最終HMM状態の前向きスコア (NODEにおける g[]) だけでなく, * その backscan 開始点(もう1つ前の単語の最初の音素の手前)のスコア * も保存しておく必要がある (NODE における g_prev[]). * * なお,1音素のみからなる単語では backscan 開始点と単語境界が重なることを * 考慮する必要があるため,実装はもう少し複雑になる. * * * * @brief Viterbi path update and scoring on the second pass (fast version) * * This file has functions for score calculations on the 2nd pass. * It includes Viterbi path update calculation of a hypothesis, calculations * of scores and word trellis connection at word expansion. * * The cross-word triphone will be computed not at word expansion time, * but at later pop up for rapid decoding. This is called "backscan" * altgorithm. These functions are enabled when PASS2_STRICT_IWCD is * UNDEFINED in config.h. If defined, "nextscan" functions in * search_bestfirst_v2.c are used instead. * * Here we use "delayed cross-word context handling" method * for connection of next word and last word of the hypothesis for * speeding up decoding: * * -# Only right context of the tail phone in the next word is considered * when generating a new hypothesis (next_word()). * * -# The whole context dependency will be fully computed when the * hypothesis is once pushed to stack and later popped in scan_word(). * * This method avoid computing full context-dependency handling for all * generated hypothesis in next_word(), and only re-compute it after * primising ones are popped from stack later. This speeds up decoding. * But since the context dependency is not considered in the total hypothesis * score (computed in next_word()). * * The actual implimentation: * * -# In nextword(), the tail phone in the new word is modified considering * the right context (= head phone in the last word of source hypothesis), * and the outprob on the connection point between backtrellis and forward * trellis is computed using the triphone. * * -# In scan_word(), not only the new word but also the head phone in the * previous word should be modified and re-scanned. * To realize this '1-phoneme backscan' procedure, hypothesis nodes * have to keep forward scores not only at the last HMM state (g[] in * NODE), but also at the backscan restart point (= before the head * phone in the previous word, g_prev[] in NODE). * * Note that the actual implementation becomes a little more complicated * to handle 1-phoneme words... * * * @author Akinobu Lee * @date Sun Sep 11 23:54:53 2005 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #ifndef PASS2_STRICT_IWCD #undef TCD ///< Define if want triphone debug messages /**********************************************************************/ /************ 仮説ノードの基本操作 ************/ /************ Basic functions for hypothesis node handling ************/ /**********************************************************************/ #undef STOCKER_DEBUG #ifdef STOCKER_DEBUG static int stocked_num = 0; static int reused_num = 0; static int new_num = 0; static int request_num = 0; #endif /** * * 仮説ノードを実際にメモリ上から解放する. * * @param node [in] 仮説ノード * * * Free a hypothesis node actually. * * @param node [in] hypothesis node * */ static void free_node_exec(NODE *node) { if (node == NULL) return; free(node->g); if (node->g_prev != NULL) free(node->g_prev); #ifdef GRAPHOUT_PRECISE_BOUNDARY if (node->region->graphout) { free(node->wordend_frame); free(node->wordend_gscore); } #endif free(node); } /** * * 仮説ノードの利用を終了してリサイクル用にストックする * * @param node [in] 仮説ノード * * * Stock an unused hypothesis node for recycle. * * @param node [in] hypothesis node * * * @callgraph * @callergraph */ void free_node(NODE *node) { if (node == NULL) return; if (node->region->graphout) { if (node->prevgraph != NULL && node->prevgraph->saved == FALSE) { wordgraph_free(node->prevgraph); } } /* save to stocker */ node->next = node->region->pass2.stocker_root; node->region->pass2.stocker_root = node; #ifdef STOCKER_DEBUG stocked_num++; #endif } /** * * リサイクル用ノード格納庫を空にする. * * @param s [in] stack decoding work area * * * * Clear the node stocker for recycle. * * @param s [in] stack decoding work area * * * @callgraph * @callergraph */ void clear_stocker(StackDecode *s) { NODE *node, *tmp; node = s->stocker_root; while(node) { tmp = node->next; free_node_exec(node); node = tmp; } s->stocker_root = NULL; #ifdef STOCKER_DEBUG jlog("DEBUG: %d times requested, %d times newly allocated, %d times reused\n", request_num, new_num, reused_num); stocked_num = 0; reused_num = 0; new_num = 0; request_num = 0; #endif } /** * * 仮説をコピーする. * * @param dst [out] コピー先の仮説 * @param src [in] コピー元の仮説 * * @return @a dst を返す. * * * Copy the content of node to another. * * @param dst [out] target hypothesis * @param src [in] source hypothesis * * @return the value of @a dst. * * @callgraph * @callergraph */ NODE * cpy_node(NODE *dst, NODE *src) { int peseqlen; peseqlen = src->region->peseqlen; dst->next = src->next; dst->prev = src->prev; memcpy(dst->g, src->g, sizeof(LOGPROB) * peseqlen); memcpy(dst->seq, src->seq, sizeof(WORD_ID) * MAXSEQNUM); #ifdef CM_SEARCH #ifdef CM_MULTIPLE_ALPHA { int w; for(w=0;wseqnum;w++) { memcpy(dst->cmscore[w], src->cmscore[w], sizeof(LOGPROB) * src->region->config->annotate.cm_alpha_num); } } #else memcpy(dst->cmscore, src->cmscore, sizeof(LOGPROB) * MAXSEQNUM); #endif #endif /* CM_SEARCH */ dst->seqnum = src->seqnum; dst->score = src->score; dst->bestt = src->bestt; dst->estimated_next_t = src->estimated_next_t; dst->endflag = src->endflag; dst->state = src->state; dst->tre = src->tre; if (src->g_prev != NULL) { // ccd_flag == TRUE memcpy(dst->g_prev, src->g_prev, sizeof(LOGPROB) * peseqlen); dst->last_ph = src->last_ph; dst->last_ph_sp_attached = src->last_ph_sp_attached; dst->lscore = src->lscore; } dst->totallscore = src->totallscore; dst->final_g = src->final_g; #ifdef VISUALIZE dst->popnode = src->popnode; #endif if (src->region->graphout) { #ifdef GRAPHOUT_PRECISE_BOUNDARY memcpy(dst->wordend_frame, src->wordend_frame, sizeof(short) * peseqlen); memcpy(dst->wordend_gscore, src->wordend_gscore, sizeof(LOGPROB) * peseqlen); #endif dst->prevgraph = src->prevgraph; dst->lastcontext = src->lastcontext; #ifndef GRAPHOUT_PRECISE_BOUNDARY dst->tail_g_score = src->tail_g_score; #endif } return(dst); } /** * * 新たな仮説ノードを割り付ける. もし格納庫に以前試用されなくなった * ノードがある場合はそれを再利用する. なければ新たに割り付ける. * * @param r [in] 認識処理インスタンス * * @return 新たに割り付けられた仮説ノードへのポインタを返す. * * * Allocate a new hypothesis node. If the node stocker is not empty, * the one in the stocker is re-used. Otherwise, allocate as new. * * @param r [in] recognition process instance * * @return pointer to the newly allocated node. * * @callgraph * @callergraph */ NODE * newnode(RecogProcess *r) { NODE *tmp; int i; int peseqlen; peseqlen = r->peseqlen; #ifdef STOCKER_DEBUG request_num++; #endif if ((tmp = r->pass2.stocker_root) != NULL) { /* re-use ones in the stocker */ r->pass2.stocker_root = tmp->next; #ifdef STOCKER_DEBUG stocked_num--; reused_num++; #endif } else { /* allocate new */ tmp =(NODE *)mymalloc(sizeof(NODE)); tmp->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); if (r->ccd_flag) { tmp->g_prev = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); } else { tmp->g_prev = NULL; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { tmp->wordend_frame = (short *)mymalloc(sizeof(short) * peseqlen); tmp->wordend_gscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * peseqlen); } #endif #ifdef STOCKER_DEBUG new_num++; #endif } /* clear the data */ /*bzero(tmp,sizeof(NODE));*/ tmp->next=NULL; tmp->prev=NULL; tmp->last_ph = NULL; tmp->last_ph_sp_attached = FALSE; if (r->ccd_flag) { if (r->lmtype == LM_PROB) { tmp->lscore = LOG_ZERO; tmp->totallscore = LOG_ZERO; } else if (r->lmtype == LM_DFA) { tmp->lscore = 0.0; tmp->totallscore = 0.0; } } tmp->endflag = FALSE; tmp->seqnum = 0; for(i=0;ig[i] = LOG_ZERO; } if (r->ccd_flag) { for(i=0;ig_prev[i] = LOG_ZERO; } } tmp->final_g = LOG_ZERO; #ifdef VISUALIZE tmp->popnode = NULL; #endif tmp->tre = NULL; if (r->graphout) { tmp->prevgraph = NULL; tmp->lastcontext = NULL; } tmp->region = r; return(tmp); } /**********************************************************************/ /************ 前向きトレリス展開と尤度計算 ***************/ /************ Expand trellis and update forward viterbi ***************/ /**********************************************************************/ /** * * 1単語分のトレリス計算用のワークエリアを確保. * * @param r [in] 認識処理インスタンス * * * * Allocate work area for trellis computation of a word. * * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void malloc_wordtrellis(RecogProcess *r) { int maxwn; StackDecode *dwrk; maxwn = r->lm->winfo->maxwn + 10; dwrk = &(r->pass2); dwrk->wordtrellis[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->wordtrellis[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->g = (LOGPROB *)mymalloc(sizeof(LOGPROB) * r->peseqlen); dwrk->phmmlen_max = r->lm->winfo->maxwlen + 2; dwrk->phmmseq = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * dwrk->phmmlen_max); if (r->lm->config->enable_iwsp && r->am->hmminfo->multipath) { dwrk->has_sp = (boolean *)mymalloc(sizeof(boolean) * dwrk->phmmlen_max); } else { dwrk->has_sp = NULL; } dwrk->wend_token_frame[0] = NULL; dwrk->wend_token_frame[1] = NULL; dwrk->wend_token_gscore[0] = NULL; dwrk->wend_token_gscore[1] = NULL; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[0] = (short *)mymalloc(sizeof(short) * maxwn); dwrk->wend_token_frame[1] = (short *)mymalloc(sizeof(short) * maxwn); dwrk->wend_token_gscore[0] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); dwrk->wend_token_gscore[1] = (LOGPROB *)mymalloc(sizeof(LOGPROB) * maxwn); } #endif } /** * * 1単語分のトレリス計算用のワークエアリアを解放 * * * * Free the work area for trellis computation of a word. * * * @callgraph * @callergraph */ void free_wordtrellis(StackDecode *dwrk) { int i; free(dwrk->wordtrellis[0]); free(dwrk->wordtrellis[1]); free(dwrk->g); free(dwrk->phmmseq); if (dwrk->has_sp) { free(dwrk->has_sp); dwrk->has_sp = NULL; } #ifdef GRAPHOUT_PRECISE_BOUNDARY for(i=0;i<2;i++) { if (dwrk->wend_token_frame[i]) { free(dwrk->wend_token_frame[i]); dwrk->wend_token_frame[i] = NULL; } if (dwrk->wend_token_gscore[i]) { free(dwrk->wend_token_gscore[i]); dwrk->wend_token_gscore[i] = NULL; } } #endif } /**********************************************************************/ /************ 仮説の前向き尤度計算 *******************/ /************ Compute forward score of a hypothesis *******************/ /**********************************************************************/ /** * * 最終状態への遷移確率の最大値を求める (multipath) * * @param tr [in] 遷移行列 * @param state_num [in] 状態数 * * @return 最終状態への遷移確率への最大値を返す. * * * Get the maximum transition log probability to final state. (multipath) * * @param tr [in] transition matrix * @param state_num [in] number of states * * @return the maximum log probability of transition to the final state. * */ static LOGPROB get_max_out_arc(HTK_HMM_Trans *tr, int state_num) { LOGPROB max_a; int afrom; LOGPROB a; max_a = LOG_ZERO; for (afrom = 0; afrom < state_num - 1; afrom++) { a = tr->a[afrom][state_num-1]; if (max_a < a) max_a = a; } return(max_a); } /** * * 音素の出力状態への遷移確率の最大値を求める. (multipath) * * @param l [in] 音素 * * @return 出力状態への遷移確率の最大値を返す. * * * Get the maximum transition log probability outside a phone. (multipath) * * @param l [in] phone * * @return the maximum transition log probability outside a phone. * */ static LOGPROB max_out_arc(HMM_Logical *l) { return(get_max_out_arc(hmm_logical_trans(l), hmm_logical_state_num(l))); } /** * * 最後の1単語の前向きトレリスを計算して,文仮説の前向き尤度を更新する. * * @param now [i/o] 文仮説 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * * Compute the forward viterbi for the last word to update forward scores * and ready for word connection. * * @param now [i/o] hypothesis * @param param [in] input parameter vectors * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void scan_word(NODE *now, HTK_Param *param, RecogProcess *r) { int i,t, j; HMM *whmm; A_CELL *ac; WORD_ID word; LOGPROB tmpmax, tmptmp, score1; int startt = 0, endt; int wordhmmnum; LOGPROB tmpmax_store, store_point_maxarc; /* multipath */ LOGPROB tmpmax2 = LOG_ZERO; int phmmlen; HMM_Logical *ret, *wend; int store_point; int crossword_point = 0; boolean back_rescan = FALSE; boolean node_exist_p; int tn; ///< Temporal pointer to current buffer int tl; ///< Temporal pointer to previous buffer /* store global values to local for rapid access */ WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; LOGPROB *framemaxscore; int peseqlen; boolean ccd_flag; boolean enable_iwsp; #ifdef SCAN_BEAM LOGPROB scan_beam_thres; #endif StackDecode *dwrk; winfo = r->lm->winfo; hmminfo = r->am->hmminfo; dwrk = &(r->pass2); peseqlen = r->peseqlen; framemaxscore = r->pass2.framemaxscore; ccd_flag = r->ccd_flag; enable_iwsp = r->lm->config->enable_iwsp; /* multipath */ #ifdef SCAN_BEAM scan_beam_thres = r->config->pass2.scan_beam_thres; #endif if (hmminfo->multipath) { store_point = -1; } else { store_point = 0; } /* ----------------------- prepare HMM ----------------------- */ if (ccd_flag) { /* 直前の音素があれば,そこまでさかのぼって scan する */ /* if there are any last phone, enable backscan */ if (now->last_ph == NULL) { /* initial score: now->g[] */ /* scan range: phones in now->seq[now->seqnum-1] */ back_rescan = FALSE; } else { /* initial score: now->g_prev[] (1-phone before)*/ /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ back_rescan = TRUE; } } #ifdef TCD if (now->last_ph != NULL) { jlog("DEBUG: inherited last_ph: %s\n", (now->last_ph)->name); if (now->last_ph_sp_attached) jlog("DEBUG: (sp attached)\n"); /* multipath */ } else { jlog("DEBUG: no last_ph inherited\n"); } #endif /* scan 範囲分のHMMを準備 */ /* prepare HMM of the scan range */ word = now->seq[now->seqnum-1]; if (ccd_flag) { if (back_rescan) { /* scan range: phones in now->seq[now->seqnum-1] + now->last_ph */ phmmlen = winfo->wlen[word] + 1; if (phmmlen > dwrk->phmmlen_max) { j_internal_error("scan_word: num of phonemes in a word exceed phmmlenmax (%d) ?\n", dwrk->phmmlen_max); } for (i=0;iphmmseq[i] = winfo->wseq[word][i]; if (enable_iwsp && hmminfo->multipath) { for (i=0;ihas_sp[i] = FALSE; } /* 最終単語と last_ph 間の単語間triphoneを考慮 */ /* consider cross-word context dependency between the last word and now->last_ph */ wend = winfo->wseq[word][winfo->wlen[word]-1]; ret = get_right_context_HMM(wend, now->last_ph->name, hmminfo); if (ret == NULL) { /* triphone not found */ /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (winfo->wlen[word] > 1 && wend->is_pseudo) { error_missing_right_triphone(wend, now->last_ph->name); } dwrk->phmmseq[phmmlen-2] = wend; } else { dwrk->phmmseq[phmmlen-2] = ret; } ret = get_left_context_HMM(now->last_ph, wend->name, hmminfo); if (ret == NULL) { /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ if (now->last_ph->is_pseudo) { error_missing_left_triphone(now->last_ph, wend->name); } dwrk->phmmseq[phmmlen-1] = now->last_ph; } else { dwrk->phmmseq[phmmlen-1] = ret; } if (enable_iwsp && hmminfo->multipath) { dwrk->has_sp[phmmlen-2] = TRUE; dwrk->has_sp[phmmlen-1] = now->last_ph_sp_attached; } #ifdef TCD jlog("DEBUG: w="); for(i=0;iwlen[word];i++) { jlog(" %s",(winfo->wseq[word][i])->name); if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); } jlog(" | %s\n", (now->last_ph)->name); if (hmminfo->multipath && now->last_ph_sp_attached) jlog("DEBUG: (sp)\n"); jlog("DEBUG: scan for:"); for (i=0;iphmmseq[i]->name); if (enable_iwsp && hmminfo->multipath && dwrk->has_sp[i]) jlog("(sp)"); } jlog("\n"); #endif /* 単語HMMを作る */ /* make word HMM */ whmm = new_make_word_hmm(hmminfo, dwrk->phmmseq, phmmlen, (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* backscan なので,計算前の g[] 初期値は now->g_prev[] を使用 */ /* As backscan enabled, the initial forward score g[] is set by now->g_prev[] */ for (t=0;tg[t]=now->g_prev[t]; } /* 次段用のg_prevを格納するノード位置を設定 */ /* set where to store scores as new g_prev[] for the next backscan in the HMM */ if (hmminfo->multipath) { store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2; store_point_maxarc = max_out_arc(dwrk->phmmseq[0]); if (enable_iwsp && dwrk->has_sp[0]) { store_point += hmm_logical_state_num(hmminfo->sp) - 2; if (store_point_maxarc < max_out_arc(hmminfo->sp)) { store_point_maxarc = max_out_arc(hmminfo->sp); } } } else { store_point = hmm_logical_state_num(dwrk->phmmseq[0]) - 2 - 1; } /* scan中に直前単語とこの単語をまたぐ場所を設定 */ /* set where is the connection point of the last word in the HMM */ if (hmminfo->multipath) { crossword_point = whmm->len - hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]); if (enable_iwsp && dwrk->has_sp[phmmlen-1]) { crossword_point -= hmm_logical_state_num(hmminfo->sp) - 2; } } else { crossword_point = whmm->len - (hmm_logical_state_num(dwrk->phmmseq[phmmlen-1]) - 2) - 1; } } else { /* not backscan mode */ /* scan range: phones in now->seq[now->seqnum-1] */ #ifdef TCD jlog("DEBUG: scan(org):"); for (i=0;iwlen[word];i++) { jlog(" %s", (winfo->wseq[word][i])->name); } jlog("\n"); #endif if (enable_iwsp && hmminfo->multipath) { /* 必要ならばショートポーズを挟み込む位置を指定する */ for(i=0;iwlen[word];i++) { dwrk->has_sp[i] = FALSE; } dwrk->has_sp[winfo->wlen[word]-1] = TRUE; } /* 単語HMMを作る */ /* make word HMM */ whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* 計算前の g[] 初期値は now->g[] を使用 */ /* the initial forward score g[] is set by now->g[] */ for (t=0;tg[t]=now->g[t]; } /* 次段用のg_prevを格納するノード位置を設定 */ /* set where to store scores as new g_prev[] for the next backscan in the HMM */ if (hmminfo->multipath) { store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2; store_point_maxarc = max_out_arc(winfo->wseq[word][0]); if (enable_iwsp && dwrk->has_sp[0]) { store_point += hmm_logical_state_num(hmminfo->sp) - 2; if (store_point_maxarc < max_out_arc(hmminfo->sp)) { store_point_maxarc = max_out_arc(hmminfo->sp); } } } else { store_point = hmm_logical_state_num(winfo->wseq[word][0]) - 2 - 1; } /* scan中に直前単語とこの単語をまたぐ場所は,なし */ /* the connection point of the last word is not exist in the HMM */ crossword_point = -1; } } else { /* ccd_flag == FALSE */ if (enable_iwsp && hmminfo->multipath) { /* 必要ならばショートポーズを挟み込む位置を指定する */ for(i=0;iwlen[word];i++) { dwrk->has_sp[i] = FALSE; } dwrk->has_sp[winfo->wlen[word]-1] = TRUE; } /* 音素環境非依存の場合は単純に最終単語分の HMM を作成 */ /* for monophone: simple make HMM for the last word */ whmm = new_make_word_hmm(hmminfo, winfo->wseq[word], winfo->wlen[word], (enable_iwsp && hmminfo->multipath) ? dwrk->has_sp : NULL); if (whmm == NULL) { j_internal_error("Error: failed to make word hmm for word #%d \"%s [%s]\"\n", word, winfo->wname[word], winfo->woutput[word]); } /* 計算前の g[] 初期値は now->g[] を使用 */ /* the initial forward score g[] is set by now->g[] */ for (t=0;tg[t]=now->g[t]; } } #ifdef TCD jlog("DEBUG: whmm len = %d\n",whmm->len); jlog("DEBUG: crossword_point = %d\n", crossword_point); jlog("DEBUG: g[] store point = %d\n", store_point); #endif wordhmmnum = whmm->len; if (wordhmmnum >= winfo->maxwn + 10) { j_internal_error("scan_word: word too long (>%d)\n", winfo->maxwn + 10); } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (ccd_flag) { now->tail_g_score = now->g[now->bestt]; } } #endif /* ----------------------- do scan ----------------------- */ /* scan開始点を検索 -> starttへ*/ /* search for the start frame -> set to startt */ for(t = peseqlen-1; t >=0 ; t--) { if ( #ifdef SCAN_BEAM dwrk->g[t] > framemaxscore[t] - scan_beam_thres && #endif dwrk->g[t] > LOG_ZERO) { break; } } if (t < 0) { /* no node has score > LOG_ZERO */ for(t=0;tg_prev[t] = LOG_ZERO; now->g[t] = LOG_ZERO; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { for(t=0;twordend_frame[t] = -1; now->wordend_gscore[t] = LOG_ZERO; } } #endif goto end_of_scan; } startt = t; /* clear [startt+1..peseqlen-1] */ for(t=peseqlen-1;t>startt;t--) { if (ccd_flag) now->g_prev[t] = LOG_ZERO; now->g[t] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[t] = -1; now->wordend_gscore[t] = LOG_ZERO; } #endif } /* バッファポインタ初期化 */ tn = 0; tl = 1; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { for(i=0;iwend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } } #endif if (! hmminfo->multipath) { /* Below initialization is not needed on multipath version, since the actual viterbi will begin at frame 0 in multipath mode in main loop */ /* 時間 [startt] 上の値を初期化 */ /* initialize scores on frame [startt] */ for(i=0;iwordtrellis[tn][i] = LOG_ZERO; dwrk->wordtrellis[tn][wordhmmnum-1] = dwrk->g[startt] + outprob(&(r->am->hmmwrk), startt, &(whmm->state[wordhmmnum-1]), param); if (ccd_flag) { now->g_prev[startt] = dwrk->wordtrellis[tn][store_point]; } now->g[startt] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (ccd_flag) { if (back_rescan) { if (wordhmmnum-1 == crossword_point) { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; } } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = startt; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[startt]; } now->wordend_frame[startt] = dwrk->wend_token_frame[tn][0]; now->wordend_gscore[startt] = dwrk->wend_token_gscore[tn][0]; } #endif } /* end of hmminfo->multipath */ endt = startt; /* メインループ: startt から始まり 0 に向かって Viterbi 計算 */ /* main loop: start from [startt], and compute Viterbi toward [0] */ for(t = hmminfo->multipath ? startt : startt - 1; t >= 0; t--) { /* wordtrellisのワークエリアをスワップ */ i = tn; tn = tl; tl = i; node_exist_p = FALSE; /* TRUE if there is at least 1 survived node in this frame */ if (hmminfo->multipath) { /* 端のノード [t][wordhmmnum-1]は g[] を参照する */ /* the edge node [t][wordhmmnum-1] is equal to g[] */ /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ /* expand trellis for node [t][wordhmmnum-2..0] */ tmpmax_store = LOG_ZERO; } else { /* 端のノード [t][wordhmmnum-1]は,内部遷移 か g[]の高い方になる */ /* the edge node [t][wordhmmnum-1] is either internal transitin or g[] */ tmptmp = LOG_ZERO; for (ac=whmm->state[wordhmmnum-1].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; if (tmptmp < score1) { j = ac->arc; tmptmp = score1; } } if (dwrk->g[t] > tmptmp) { tmpmax = dwrk->g[t]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (!back_rescan || wordhmmnum-1 == crossword_point) { dwrk->wend_token_frame[tn][wordhmmnum-1] = t; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->g[t]; } else { dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; } } #endif } else { tmpmax = tmptmp; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][wordhmmnum-1] = dwrk->wend_token_gscore[tl][j]; } #endif } /* 端のノードのスコアエンベロープチェック: 一定幅外なら落とす */ /* check if the edge node is within score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { dwrk->wordtrellis[tn][wordhmmnum-1] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][wordhmmnum-1] = -1; dwrk->wend_token_gscore[tn][wordhmmnum-1] = LOG_ZERO; } #endif } else { node_exist_p = TRUE; dwrk->wordtrellis[tn][wordhmmnum-1] = tmpmax + outprob(&(r->am->hmmwrk), t, &(whmm->state[wordhmmnum-1]), param); } } /* end of ~multipath */ /* ノード [t][wordhmmnum-2..0] についてトレリスを計算 */ /* expand trellis for node [t][wordhmmnum-2..0] */ for(i=wordhmmnum-2;i>=0;i--) { if (ccd_flag) { /* 最尤パスと最尤スコア tmpmax を見つける */ /* tmpmax2 は次回用 g_prev[] のための最大値(自己遷移を除いた最大値) */ /* find most likely path and the max score 'tmpmax' */ /* 'tmpmax2' is max score excluding self transition, for next g_prev[] */ if (! hmminfo->multipath) { if (i == store_point) { tmpmax2 = LOG_ZERO; } } tmpmax = LOG_ZERO; for (ac=whmm->state[i].ac;ac;ac=ac->next) { if (hmminfo->multipath) { if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; else if (t + 1 > startt) score1 = LOG_ZERO; else score1 = dwrk->wordtrellis[tl][ac->arc]; score1 += ac->a; } else { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; } if (i <= crossword_point && ac->arc > crossword_point) { /* これは単語を越える遷移 (backscan 実行時) */ /* this is a transition across word (when backscan is enabled) */ score1 += now->lscore; /* add LM score */ } if (hmminfo->multipath) { if (i <= store_point && ac->arc > store_point) { if (tmpmax_store < score1) tmpmax_store = score1; } } else { if (i == store_point && i != ac->arc) { if (tmpmax2 < score1) tmpmax2 = score1; } } if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } /* スコアエンベロープチェック: 一定幅外なら落とす */ /* check if score of this node is within the score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { /* invalid node */ dwrk->wordtrellis[tn][i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } #endif if (! hmminfo->multipath) { if (i == store_point) now->g_prev[t] = LOG_ZERO; } } else { /* survived node */ if (! hmminfo->multipath) { if (i == store_point) now->g_prev[t] = tmpmax2; } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (hmminfo->multipath) { if ((back_rescan && i <= crossword_point && j > crossword_point) || j == wordhmmnum-1) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } else { if (i <= crossword_point && j > crossword_point) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } } #endif node_exist_p = TRUE; /* at least one node survive in this frame */ dwrk->wordtrellis[tn][i] = tmpmax; if (! hmminfo->multipath || i > 0) { /* compute output probability */ dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); } } } else { /* not triphone */ /* backscan 無し: store_point, crossword_point は無関係 */ /* no backscan: store_point, crossword_point ignored */ tmpmax = LOG_ZERO; if (hmminfo->multipath) { for (ac=whmm->state[i].ac;ac;ac=ac->next) { if (ac->arc == wordhmmnum-1) score1 = dwrk->g[t]; else if (t + 1 > startt) score1 = LOG_ZERO; else score1 = dwrk->wordtrellis[tl][ac->arc]; score1 += ac->a; if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } } else { for (ac=whmm->state[i].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tl][ac->arc] + ac->a; if (tmpmax < score1) { tmpmax = score1; j = ac->arc; } } } /* スコアエンベロープチェック: 一定幅外なら落とす */ /* check if score of this node is within the score envelope */ if ( #ifdef SCAN_BEAM tmpmax <= framemaxscore[t] - scan_beam_thres || #endif tmpmax <= LOG_ZERO ) { /* invalid node */ dwrk->wordtrellis[tn][i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { dwrk->wend_token_frame[tn][i] = -1; dwrk->wend_token_gscore[tn][i] = LOG_ZERO; } #endif } else { /* survived node */ node_exist_p = TRUE; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (hmminfo->multipath) { if (j == wordhmmnum-1) { dwrk->wend_token_frame[tn][i] = t; dwrk->wend_token_gscore[tn][i] = tmpmax; } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } else { dwrk->wend_token_frame[tn][i] = dwrk->wend_token_frame[tl][j]; dwrk->wend_token_gscore[tn][i] = dwrk->wend_token_gscore[tl][j]; } } #endif /* score of node [t][i] has been determined here */ dwrk->wordtrellis[tn][i] = tmpmax; if (! hmminfo->multipath || i > 0) { dwrk->wordtrellis[tn][i] += outprob(&(r->am->hmmwrk), t, &(whmm->state[i]), param); } } } } /* end of node loop */ /* 時間 t のViterbi計算終了. 前向きスコアはscanした単語の始端 */ /* Viterbi end for frame [t]. the forward score is the score of word beginning scanned */ now->g[t] = dwrk->wordtrellis[tn][0]; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[t] = dwrk->wend_token_frame[tn][0]; now->wordend_gscore[t] = dwrk->wend_token_gscore[tn][0]; } #endif if (hmminfo->multipath) { /* triphone 時, 次段のために store_point のデータをg_prevに保存 */ /* store the scores crossing the store_point to g_prev, for next scan */ if (ccd_flag) { /* the max arc crossing the store_point always selected as tmpmax_score */ tmpmax_store -= store_point_maxarc; if (tmpmax_store < LOG_ZERO) tmpmax_store = LOG_ZERO; now->g_prev[t] = tmpmax_store; } } /* store the number of last computed frame */ if (node_exist_p) endt = t; /* scanした単語の第1パスでの始端時刻より先まで t が進んでおり,かつ この t においてスコアエンベロープによって生き残ったノードが一つも 無かったならば,このフレームで計算を打ち切りそれ以上先([0..t-1])は 計算しない */ /* if frame 't' already reached the beginning frame of scanned word in 1st pass and no node was survived in this frame (all nodes pruned by score envelope), terminate computation at this frame and do not computer further frame ([0..t-1]). */ if (t < now->estimated_next_t && (!node_exist_p)) { /* clear the rest scores */ for (i=t-1;i>=0;i--) { now->g[i] = LOG_ZERO; #ifdef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { now->wordend_frame[i] = -1; now->wordend_gscore[i] = LOG_ZERO; } #endif if (ccd_flag) now->g_prev[i] = LOG_ZERO; } /* terminate loop */ break; } } /* end of time loop */ if (debug2_flag) jlog("DEBUG: scanned: [%3d-%3d]\n", endt, startt); end_of_scan: if (hmminfo->multipath) { /* 前向きスコアの最終値を計算 (状態 0 から時間 0 への遷移) */ /* compute the total forward score (transition from state 0 to frame 0 */ if (endt == 0) { tmpmax = LOG_ZERO; for(ac=whmm->state[0].ac;ac;ac=ac->next) { score1 = dwrk->wordtrellis[tn][ac->arc] + ac->a; if (tmpmax < score1) tmpmax = score1; } now->final_g = score1; } else { now->final_g = LOG_ZERO; } } /* 次回 backscan のための情報格納 */ /* store data for next backscan */ if (ccd_flag) { if (store_point == (hmminfo->multipath ? wordhmmnum - 2 : wordhmmnum - 1)) { /* last_ph無し,かつ単語の音素長=1の場合、次回の scan_word() で 単語全体がもう一度再計算される. この場合, g_prev は,このscan_wordを開始する前のスコアを入れておく必要がある */ /* if there was no 'last_ph' and the scanned word consists of only 1 phone, the whole word should be re-computed in the future scan_word(). So the next 'g_prev[]' should be the initial forward scores before we begin Viterbi (= g[t]). */ for (t = startt; t>=0; t--) { now->g_prev[t] = dwrk->g[t]; } } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { if (now->tail_g_score != LOG_ZERO) { if (now->prevgraph != NULL) { (now->prevgraph)->leftscore = now->tail_g_score; } } } #endif /* 次回のために now->last_ph を更新 */ /* update 'now->last_ph' for future scan_word() */ if (back_rescan) { now->last_ph = dwrk->phmmseq[0]; } else { now->last_ph = winfo->wseq[word][0]; } if (enable_iwsp && hmminfo->multipath) { now->last_ph_sp_attached = dwrk->has_sp[0]; } } #ifdef GRAPHOUT_PRECISE_BOUNDARY if (! hmminfo->multipath) { if (r->graphout) { /* 次回の next_word 用に境界情報を調整 */ /* proceed word boundary for one step for next_word */ now->wordend_frame[peseqlen-1] = now->wordend_frame[0]; now->wordend_gscore[peseqlen-1] = now->wordend_gscore[0]; for (t=0;twordend_frame[t] = now->wordend_frame[t+1]; now->wordend_gscore[t] = now->wordend_gscore[t+1]; } } } #endif /* free work area */ free_hmm(whmm); #ifdef TCD if (hmminfo->multipath) { if (ccd_flag) { jlog("DEBUG: last_ph = %s", (now->last_ph)->name); if (now->last_ph_sp_attached) jlog(" (sp attached)"); jlog("\n"); } } else { jlog("DEBUG: last_ph = %s\n", (now->last_ph)->name); } #endif } /**************************************************************************/ /*** 新仮説の展開とヒューリスティックを繋いだ全体スコアを計算 ***/ /*** Expand new hypothesis and compute the total score (with heuristic) ***/ /**************************************************************************/ /** * * 展開元仮説に次単語を接続して新しい仮説を生成する. 次単語の単語トレリス上の * スコアから最尤接続点を求め,仮説スコアを計算する. * * @param now [in] 展開元仮説 * @param new [out] 新たに生成された仮説が格納される * @param nword [in] 接続する次単語の情報 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * * Connect a new word to generate a next hypothesis. The optimal connection * point and new sentence score of the new hypothesis will be estimated by * looking up the corresponding words on word trellis. * * @param now [in] source hypothesis * @param new [out] pointer to save the newly generated hypothesis * @param nword [in] next word to be connected * @param param [in] input parameter vector * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void next_word(NODE *now, NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) { int t; HMM_Logical *newphone; int lastword; int i; LOGPROB tmpp; LOGPROB a_value; int startt; int word; LOGPROB totalscore; TRELLIS_ATOM *tre; BACKTRELLIS *backtrellis; WORD_INFO *winfo; HTK_HMM_INFO *hmminfo; int peseqlen; boolean ccd_flag; backtrellis = r->backtrellis; winfo = r->lm->winfo; hmminfo = r->am->hmminfo; peseqlen = r->peseqlen; ccd_flag = r->ccd_flag; new->score = LOG_ZERO; word = nword->id; lastword=now->seq[now->seqnum-1]; /* 単語並び、DFA状態番号、言語スコアを継承・更新 */ /* inherit and update word sequence, DFA state and total LM score */ for (i=0;i< now->seqnum;i++){ new->seq[i] = now->seq[i]; #ifdef CM_SEARCH #ifdef CM_MULTIPLE_ALPHA memcpy(new->cmscore[i], now->cmscore[i], sizeof(LOGPROB) * r->config->annotate.cm_alpha_num); #else new->cmscore[i] = now->cmscore[i]; #endif #endif /* CM_SEARCH */ } new->seq[i] = word; new->seqnum = now->seqnum+1; new->state = nword->next_state; new->totallscore = now->totallscore + nword->lscore; if (hmminfo->multipath) new->final_g = now->final_g; if (ccd_flag) { /* 展開単語の接続点の音素HMMをnewphoneにセットする. 元仮説 now との単語間の音素環境依存性を考慮する */ /* set the triphone at the connection point to 'newphone', considering cross-word context dependency to 'now' */ newphone = get_right_context_HMM(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name, hmminfo); if (newphone == NULL) { /* triphone not found */ /* fallback to the original bi/mono-phone */ /* error if the original is pseudo phone (not explicitly defined in hmmdefs/hmmlist) */ /* exception: word with 1 phone (triphone may exist in the next expansion */ if (winfo->wlen[word] > 1 && winfo->wseq[word][winfo->wlen[word]-1]->is_pseudo){ error_missing_right_triphone(winfo->wseq[word][winfo->wlen[word]-1], now->last_ph->name); } newphone = winfo->wseq[word][winfo->wlen[word]-1]; } /* 元仮説をscanした時の末端音素HMM -> 新仮説の直前音素HMM */ /* inherit last_ph */ new->last_ph = now->last_ph; if (hmminfo->multipath) { new->last_ph_sp_attached = now->last_ph_sp_attached; } /* backscan用接続ポイントのスコア g_prev[] をコピー */ /* copy g_prev[] that are scores at backscan connection point */ for (t=0;tg_prev[t] = now->g_prev[t]; } } else { /* not triphone */ /* 展開単語の接続(=終端)の音素HMMをnewphoneにセット */ /* set the phone at the connection point to 'newphone' */ newphone = winfo->wseq[word][winfo->wlen[word]-1]; } /* 接続確率を与える */ new->lscore = nword->lscore; if (! hmminfo->multipath) { /* a_value: 接続点の遷移確率 */ /* a_value: transition probability of connection point */ i = hmm_logical_state_num(newphone); a_value = (hmm_logical_trans(newphone))->a[i-2][i-1]; } /***************************************************************************/ /* 前向き(第2パス),後ろ向き(第1パス)トレリスを接続し最尤接続点を見つける */ /* connect forward/backward trellis to look for the best connection time */ /***************************************************************************/ if (hmminfo->multipath) { startt = peseqlen-1; } else { startt = peseqlen-2; new->g[startt+1] = LOG_ZERO; } /*-----------------------------------------------------------------*/ /* 単語トレリスを探して, 次単語の最尤接続点を発見する */ /* determine the best connection time of the new word, seeking the word trellis */ /*-----------------------------------------------------------------*/ /* update new->g[t] */ if (hmminfo->multipath) { for(t=startt;t>=0;t--) { new->g[t] = now->g[t] + nword->lscore; } } else { for(t=startt;t>=0;t--) { new->g[t] = now->g[t+1] + a_value + nword->lscore; } } new->tre = NULL; if (r->lmtype == LM_DFA && !r->config->pass2.looktrellis_flag) { /* すべてのフレームにわたって最尤を探す */ /* search for best trellis word throughout all frame */ for(t = startt; t >= 0; t--) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) continue; totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } return; } /* この展開単語のトレリス上の終端時間の前後のみスキャンする 前後に連続して存在するフレームについてのみ計算 */ /* search for best trellis word only around the estimated time */ /* 1. search forward */ for(t = (nword->tre)->endtime; t >= 0; t--) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) break; /* go to 2 if the trellis word disappear */ totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } /* 2. search backward */ for(t = (nword->tre)->endtime + 1; t <= startt; t++) { tre = bt_binsearch_atom(backtrellis, t, (WORD_ID) word); if (tre == NULL) break; /* end if the trellis word disapper */ totalscore = new->g[t] + tre->backscore; if (! hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), t, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), t, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } totalscore += tmpp; } if (new->score < totalscore) { new->score = totalscore; new->bestt = t; new->estimated_next_t = tre->begintime - 1; new->tre = tre; } } } /**********************************************************************/ /********** 初期仮説の生成 ****************************/ /********** Generate an initial hypothesis ****************************/ /**********************************************************************/ /** * * 与えられた単語から初期仮説を生成する. * * @param new [out] 新たに生成された仮説が格納される * @param nword [in] 初期仮説単語の情報 * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * * Generate an initial hypothesis from given word. * * @param new [out] pointer to save the newly generated hypothesis * @param nword [in] words of the first candidates * @param param [in] input parameter vector * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void start_word(NODE *new, NEXTWORD *nword, HTK_Param *param, RecogProcess *r) { HMM_Logical *newphone; WORD_ID word; LOGPROB tmpp; int t; TRELLIS_ATOM *tre = NULL; BACKTRELLIS *backtrellis; WORD_INFO *winfo; int peseqlen; boolean ccd_flag; backtrellis = r->backtrellis; winfo = r->lm->winfo; peseqlen = r->peseqlen; ccd_flag = r->ccd_flag; /* initialize data */ word = nword->id; new->score = LOG_ZERO; new->seqnum = 1; new->seq[0] = word; new->state = nword->next_state; new->totallscore = nword->lscore; /* cross-word triphone handling is not needed on startup */ newphone = winfo->wseq[word][winfo->wlen[word]-1]; if (ccd_flag) { new->last_ph = NULL; new->last_ph_sp_attached = FALSE; } new->lscore = nword->lscore; new->g[peseqlen-1] = nword->lscore; for (t=peseqlen-1; t>=0; t--) { tre = bt_binsearch_atom(backtrellis, t, word); if (tre != NULL) { if (r->graphout) { new->bestt = peseqlen-1; } else { new->bestt = t; } new->score = new->g[peseqlen-1] + tre->backscore; if (! r->am->hmminfo->multipath) { if (newphone->is_pseudo) { tmpp = outprob_cd(&(r->am->hmmwrk), peseqlen-1, &(newphone->body.pseudo->stateset[newphone->body.pseudo->state_num-2]), param); } else { tmpp = outprob_state(&(r->am->hmmwrk), peseqlen-1, newphone->body.defined->s[newphone->body.defined->state_num-2], param); } new->score += tmpp; } new->estimated_next_t = tre->begintime - 1; new->tre = tre; break; } } if (tre == NULL) { /* no word in backtrellis */ new->score = LOG_ZERO; } } /** * * 終端処理:終端まで達した文仮説の最終的なスコアをセットする. * * @param now [in] 終端まで達した仮説 * @param new [out] 最終的な文仮説のスコアを格納する場所へのポインタ * @param param [in] 入力パラメータ列 * @param r [in] 認識処理インスタンス * * * * Hypothesis termination: set the final sentence scores of hypothesis * that has already reached to the end. * * @param now [in] hypothesis that has already reached to the end * @param new [out] pointer to save the final sentence information * @param param [in] input parameter vectors * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void last_next_word(NODE *now, NODE *new, HTK_Param *param, RecogProcess *r) { cpy_node(new, now); /* 最終スコアを設定 */ /* update the final score */ if (r->am->hmminfo->multipath) { new->score = now->final_g; } else { new->score = now->g[0]; } } #endif /* PASS2_STRICT_IWCD */ /* end of file */ julius-4.2.2/libjulius/src/m_info.c0000644001051700105040000007344312004452401015537 0ustar ritrlab/** * @file m_info.c * * * @brief システム情報の出力 * * * * @brief Output system informations. * * * @author Akinobu Lee * @date Thu May 12 14:14:01 2005 * * $Revision: 1.15 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * Output module overview in a global configuration variables to log. * * * 全体設定パラメータ内のモジュール構成の概要をログに出力する. * * * @param jconf [in] global configuration variables * * @callgraph * @callergraph * */ void print_jconf_overview(Jconf *jconf) { JCONF_AM *amconf; JCONF_LM *lmconf; JCONF_SEARCH *sconf; GRAMLIST *g; int i, n; jlog("------------------------------------------------------------\n"); jlog("Configuration of Modules\n\n"); jlog(" Number of defined modules:"); i = 0; for(amconf=jconf->am_root;amconf;amconf=amconf->next) i++; jlog(" AM=%d,", i); i = 0; for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) i++; jlog(" LM=%d,", i); i = 0; for(sconf=jconf->search_root;sconf;sconf=sconf->next) i++; jlog(" SR=%d\n", i); jlog("\n"); jlog(" Acoustic Model (with input parameter spec.):\n"); for(amconf=jconf->am_root;amconf;amconf=amconf->next) { if (amconf->name[0] != '\0') { jlog(" - AM%02d \"%s\"\n", amconf->id, amconf->name); } else { jlog(" - AM%02d\n", amconf->id); } jlog("\thmmfilename=%s\n",amconf->hmmfilename); if (amconf->mapfilename != NULL) { jlog("\thmmmapfilename=%s\n",amconf->mapfilename); } if (amconf->hmm_gs_filename != NULL) { jlog("\thmmfile for Gaussian Selection: %s\n", amconf->hmm_gs_filename); } } jlog("\n"); jlog(" Language Model:\n"); for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) { if (lmconf->name[0] != '\0') { jlog(" - LM%02d \"%s\"\n", lmconf->id, lmconf->name); } else { jlog(" - LM%02d\n", lmconf->id); } if (lmconf->lmtype == LM_PROB) { jlog("\tvocabulary filename=%s\n",lmconf->dictfilename); if (lmconf->ngram_filename != NULL) { jlog("\tn-gram filename=%s (binary format)\n", lmconf->ngram_filename); } else { if (lmconf->ngram_filename_rl_arpa != NULL) { jlog("\tbackward n-gram filename=%s\n", lmconf->ngram_filename_rl_arpa); if (lmconf->ngram_filename_lr_arpa != NULL) { jlog("\tforward 2-gram for pass1=%s\n", lmconf->ngram_filename_lr_arpa); } } else if (lmconf->ngram_filename_lr_arpa != NULL) { jlog("\tforward n-gram filename=%s\n", lmconf->ngram_filename_lr_arpa); } } } if (lmconf->lmtype == LM_DFA) { switch(lmconf->lmvar) { case LM_DFA_GRAMMAR: n = 1; for(g = lmconf->gramlist_root; g; g = g->next) { jlog("\tgrammar #%d:\n", n++); jlog("\t dfa = %s\n", g->dfafile); jlog("\t dict = %s\n", g->dictfile); } break; case LM_DFA_WORD: n = 1; for(g = lmconf->wordlist_root; g; g = g->next) { jlog("\twordlist #%d: %s\n", n++, g->dictfile); } break; } } } jlog("\n"); jlog(" Recognizer:\n"); for(sconf=jconf->search_root; sconf; sconf=sconf->next) { if (sconf->name[0] != '\0') { jlog(" - SR%02d \"%s\"", sconf->id, sconf->name); } else { jlog(" - SR%02d", sconf->id); } jlog(" (AM%02d, LM%02d)\n", sconf->amconf->id, sconf->lmconf->id); } jlog("\n"); } /** * * エンジンインスタンスの全情報をログに出力する. * * * Output all informations of an engine instance to log. * * * @param recog [in] engine instance * * @callgraph * @callergraph */ void print_engine_info(Recog *recog) { FILE *fp; Jconf *jconf; MFCCCalc *mfcc; PROCESS_AM *am; PROCESS_LM *lm; RecogProcess *r; jconf = recog->jconf; /* set output file pointer to fp */ fp = jlog_get_fp(); if (fp == NULL) return; jlog("----------------------- System Information begin ---------------------\n"); j_put_header(fp); j_put_compile_defs(fp); jlog("\n"); /* print current argument setting to log */ print_jconf_overview(jconf); if (jconf->input.type == INPUT_WAVEFORM) { /* acoustic parameter conditions for this model */ jlog("------------------------------------------------------------\n"); jlog("Speech Analysis Module(s)\n\n"); for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { jlog("[MFCC%02d] for", mfcc->id); for(am=recog->amlist;am;am=am->next) { if (am->mfcc == mfcc) { jlog(" [AM%02d %s]", am->config->id, am->config->name); } } if (recog->gmm != NULL) { if (recog->gmmmfcc == mfcc) { jlog(" [GMM]"); } } jlog("\n\n"); put_para(fp, mfcc->para); if (jconf->input.type == INPUT_WAVEFORM) { jlog(" spectral subtraction = "); if (mfcc->frontend.ssload_filename || mfcc->frontend.sscalc) { if (mfcc->frontend.sscalc) { jlog("use head silence of each input\n"); jlog("\t head sil length = %d msec\n", mfcc->frontend.sscalc_len); } else { /* ssload_filename != NULL */ jlog("use a constant value from file\n"); jlog(" noise spectrum file = \"%s\"\n", mfcc->frontend.ssload_filename); } jlog("\t alpha coef. = %f\n", mfcc->frontend.ss_alpha); jlog("\t spectral floor = %f\n", mfcc->frontend.ss_floor); } else { jlog("off\n"); } } jlog(" cepstral normalization = "); if (mfcc->para->cmn || mfcc->para->cvn) { if (jconf->decodeopt.realtime_flag) { jlog("real-time MAP-"); } else { jlog("sentence "); } if (mfcc->para->cmn) { jlog("CMN"); } if (mfcc->para->cmn && mfcc->para->cvn) { jlog("+"); } if (mfcc->para->cvn) { jlog("CVN"); } jlog("\n"); } else { jlog("no\n"); } jlog("\t base setup from ="); if (mfcc->htk_loaded == 1 || mfcc->hmm_loaded == 1) { if (mfcc->hmm_loaded == 1) { jlog(" binhmm-embedded"); if (mfcc->htk_loaded == 1) { jlog(", then overridden by HTK Config and defaults"); } } else { if (mfcc->htk_loaded == 1) { jlog(" HTK Config (and HTK defaults)"); } } } else { jlog(" Julius defaults"); } jlog("\n"); jlog("\n"); if (jconf->decodeopt.realtime_flag && (mfcc->para->cmn || mfcc->para->cvn)) { jlog(" MAP-"); if (mfcc->para->cmn) jlog("CMN"); if (mfcc->para->cmn && mfcc->para->cvn) jlog("+"); if (mfcc->para->cvn) jlog("CVN"); jlog(":\n"); jlog(" initial cep. data = "); if (mfcc->cmn.load_filename) { jlog("load from \"%s\"\n", mfcc->cmn.load_filename); } else { jlog("none\n"); } jlog(" beginning data weight = %6.2f\n", mfcc->cmn.map_weight); if (mfcc->cmn.update) { jlog(" beginning data update = yes, from last inputs at each input\n"); } else { jlog(" beginning data update = no, use default as initial at each input\n"); } if (mfcc->cmn.save_filename) { jlog(" save cep. data to = file \"%s\" at end of each input\n", mfcc->cmn.save_filename); } jlog("\n"); } } } if (recog->gmm != NULL) { jlog("------------------------------------------------------------\n"); jlog("GMM\n"); jlog("\n"); jlog(" GMM definition file = %s\n", jconf->reject.gmm_filename); jlog(" GMM gprune num = %d\n", jconf->reject.gmm_gprune_num); if (jconf->reject.gmm_reject_cmn_string != NULL) { jlog(" GMM names to reject = %s\n", jconf->reject.gmm_reject_cmn_string); } #ifdef GMM_VAD jlog("\n GMM-based VAD\n\n"); jlog(" backstep on trigger = %d frames\n", jconf->detect.gmm_margin); jlog(" up-trigger thres score = %.1f\n", jconf->detect.gmm_uptrigger_thres); jlog(" down-trigger thres score = %.1f\n", jconf->detect.gmm_downtrigger_thres); #endif jlog("\n GMM"); print_hmmdef_info(fp, recog->gmm); jlog("\n"); } jlog("------------------------------------------------------------\n"); jlog("Acoustic Model(s)\n"); jlog("\n"); for(am = recog->amlist; am; am = am->next) { if (am->config->name[0] != '\0') { jlog("[AM%02d \"%s\"]\n\n", am->config->id, am->config->name); } else { jlog("[AM%02d]\n\n", am->config->id); } print_hmmdef_info(fp, am->hmminfo); jlog("\n"); if (am->config->hmm_gs_filename != NULL) { jlog("GS "); print_hmmdef_info(fp, am->hmm_gs); jlog("\n"); } jlog(" AM Parameters:\n"); jlog(" Gaussian pruning = "); switch(am->config->gprune_method){ case GPRUNE_SEL_NONE: jlog("none (full computation)"); break; case GPRUNE_SEL_BEAM: jlog("beam"); break; case GPRUNE_SEL_HEURISTIC: jlog("heuristic"); break; case GPRUNE_SEL_SAFE: jlog("safe"); break; case GPRUNE_SEL_USER: jlog("(use plugin function)"); break; } jlog(" (-gprune)\n"); if (am->config->gprune_method != GPRUNE_SEL_NONE && am->config->gprune_method != GPRUNE_SEL_USER) { jlog(" top N mixtures to calc = %d / %d (-tmix)\n", am->config->mixnum_thres, am->hmminfo->maxcodebooksize); } if (am->config->hmm_gs_filename != NULL) { jlog(" GS state num thres = %d / %d selected (-gsnum)\n", am->config->gs_statenum, am->hmm_gs->totalstatenum); } jlog(" short pause HMM name = \"%s\" specified", am->config->spmodel_name); if (am->hmminfo->sp != NULL) { jlog(", \"%s\" applied", am->hmminfo->sp->name); if (am->hmminfo->sp->is_pseudo) { jlog(" (pseudo)"); } else { jlog(" (physical)"); } } else { jlog(" but not assigned"); } jlog(" (-sp)\n"); jlog(" cross-word CD on pass1 = "); #ifdef PASS1_IWCD jlog("handle by approx. "); switch(am->hmminfo->cdset_method) { case IWCD_AVG: jlog("(use average prob. of same LC)\n"); break; case IWCD_MAX: jlog("(use max. prob. of same LC)\n"); break; case IWCD_NBEST: jlog("(use %d-best of same LC)\n", am->hmminfo->cdmax_num); break; } #else jlog("disabled\n"); #endif if (am->hmminfo->multipath) { jlog(" sp transition penalty = %+2.1f\n", am->config->iwsp_penalty); } jlog("\n"); } jlog("------------------------------------------------------------\n"); jlog("Language Model(s)\n"); for(lm = recog->lmlist; lm; lm = lm->next) { jlog("\n"); if (lm->config->name[0] != '\0') { jlog("[LM%02d \"%s\"]", lm->config->id, lm->config->name); } else { jlog("[LM%02d]", lm->config->id); } if (lm->lmtype == LM_PROB) { if (lm->lmvar == LM_NGRAM) { jlog(" type=n-gram\n\n"); if (lm->ngram) { print_ngram_info(fp, lm->ngram); jlog("\n"); } } else if (lm->lmvar == LM_NGRAM_USER) { if (lm->ngram) { jlog(" type=n-gram + user\n\n"); print_ngram_info(fp, lm->ngram); jlog("\n"); } else { jlog(" type=user\n\n"); } } else { jlog(" type=UNKNOWN??\n\n"); } } else if (lm->lmtype == LM_DFA) { if (lm->lmvar == LM_DFA_GRAMMAR) { jlog(" type=grammar\n\n"); if (lm->dfa) { print_dfa_info(fp, lm->dfa); jlog("\n"); if (debug2_flag) { print_dfa_cp(fp, lm->dfa); jlog("\n"); } } } else if (lm->lmvar == LM_DFA_WORD) { jlog(" type=word\n\n"); } else { jlog(" type=UNKNOWN??\n\n"); } } else { jlog(" type=UNKNOWN??\n\n"); } if (lm->winfo != NULL) { print_voca_info(fp, lm->winfo); jlog("\n"); } jlog(" Parameters:\n"); if (lm->lmtype == LM_DFA && lm->lmvar == LM_DFA_GRAMMAR) { if (lm->dfa != NULL) { int i; jlog(" found sp category IDs ="); for(i=0;idfa->term_num;i++) { if (lm->dfa->is_sp[i]) { jlog(" %d", i); } } jlog("\n"); } } if (lm->lmtype == LM_PROB) { if (lm->config->enable_iwspword) { jlog("\tIW-sp word added to dict= \"%s\"\n", lm->config->iwspentry); } if (lm->config->additional_dict_files) { JCONF_LM_NAMELIST *nl; jlog("\tadditional dictionaries:\n"); for(nl=lm->config->additional_dict_files;nl;nl=nl->next) { jlog("\t\t\t%s\n", nl->name); } jlog("\n"); } if (lm->config->additional_dict_entries) { JCONF_LM_NAMELIST *nl; int n = 0; jlog("\tadditional dict entries:\n"); for(nl=lm->config->additional_dict_entries;nl;nl=nl->next) { jlog("\t\t\t%s\n", nl->name); n++; } jlog("--- total %d entries\n", n); } } if (lm->lmtype == LM_PROB) { jlog("\t(-silhead)head sil word = "); put_voca(fp, lm->winfo, lm->winfo->head_silwid); jlog("\t(-siltail)tail sil word = "); put_voca(fp, lm->winfo, lm->winfo->tail_silwid); } if (lm->lmvar == LM_DFA_WORD) { jlog(" silence model names to add at word head / tail: (-wsil)\n"); jlog("\tword head = \"%s\"\n", lm->config->wordrecog_head_silence_model_name); jlog("\tword tail = \"%s\"\n", lm->config->wordrecog_tail_silence_model_name); jlog("\ttheir context name = \"%s\"\n", (lm->config->wordrecog_silence_context_name[0] == '\0') ? "NULL (blank)" : lm->config->wordrecog_silence_context_name); } } jlog("\n"); jlog("------------------------------------------------------------\n"); jlog("Recognizer(s)\n\n"); for(r = recog->process_list; r; r = r->next) { jlog("[SR%02d", r->config->id); if (r->config->name[0] != '\0') { jlog(" \"%s\"", r->config->name); } jlog("] "); if (r->am->config->name[0] != '\0') { jlog("AM%02d \"%s\"", r->am->config->id, r->am->config->name); } else { jlog("AM%02d", r->am->config->id); } jlog(" + "); if (r->lm->config->name[0] != '\0') { jlog("LM%02d \"%s\"", r->lm->config->id, r->lm->config->name); } else { jlog("LM%02d", r->lm->config->id); } jlog("\n\n"); if (r->wchmm != NULL) { print_wchmm_info(r->wchmm); jlog("\n"); } if (r->lmtype == LM_PROB) { jlog(" Inter-word N-gram cache: \n"); { int num, len; #ifdef UNIGRAM_FACTORING len = r->wchmm->isolatenum; jlog("\troot node to be cached = %d / %d (isolated only)\n", len, r->wchmm->startnum); #else len = r->wchmm->startnum; jlog("\troot node to be cached = %d (all)\n", len); #endif #ifdef HASH_CACHE_IW num = (r->config->pass1.iw_cache_rate * r->lm->ngram->max_word_num) / 100; jlog("\tword ends to be cached = %d / %d\n", num, r->lm->ngram->max_word_num); #else num = r->lm->ngram->max_word_num; jlog("\tword ends to be cached = %d (all)\n", num); #endif jlog("\t max. allocation size = %dMB\n", num * len / 1000 * sizeof(LOGPROB) / 1000); } } if (r->lmtype == LM_PROB) { jlog("\t(-lmp) pass1 LM weight = %2.1f ins. penalty = %+2.1f\n", r->config->lmp.lm_weight, r->config->lmp.lm_penalty); jlog("\t(-lmp2) pass2 LM weight = %2.1f ins. penalty = %+2.1f\n", r->config->lmp.lm_weight2, r->config->lmp.lm_penalty2); jlog("\t(-transp)trans. penalty = %+2.1f per word\n", r->config->lmp.lm_penalty_trans); } else if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) { jlog("\t(-penalty1) IW penalty1 = %+2.1f\n", r->config->lmp.penalty1); jlog("\t(-penalty2) IW penalty2 = %+2.1f\n", r->config->lmp.penalty2); } #ifdef CONFIDENCE_MEASURE #ifdef CM_MULTIPLE_ALPHA jlog("\t(-cmalpha)CM alpha coef = from %f to %f by step of %f (%d outputs)\n", r->config->annotate.cm_alpha_bgn, r->config->annotate.cm_alpha_end, r->config->annotate.cm_alpha_step, r->config->annotate.cm_alpha_num); #else jlog("\t(-cmalpha)CM alpha coef = %f\n", r->config->annotate.cm_alpha); #endif #ifdef CM_SEARCH_LIMIT jlog("\t(-cmthres) CM cut thres = %f for hypo generation\n", r->config->annotate.cm_cut_thres); #endif #ifdef CM_SEARCH_LIMIT_POP jlog("\t(-cmthres2)CM cut thres = %f for popped hypo\n", r->config->annotate.cm_cut_thres_pop); #endif #endif /* CONFIDENCE_MEASURE */ jlog("\n"); if (r->am->hmminfo->multipath) { if (r->lm->config->enable_iwsp) { jlog("\t inter-word short pause = on (append \"%s\" for each word tail)\n", r->am->hmminfo->sp->name); jlog("\t sp transition penalty = %+2.1f\n", r->am->config->iwsp_penalty); } } if (r->lmvar == LM_DFA_WORD) { #ifdef DETERMINE jlog(" early word determination: (-wed)\n"); jlog("\tscore threshold = %f\n", r->config->pass1.determine_score_thres); jlog("\tframe dur. thres = %d\n", r->config->pass1.determine_duration_thres); #endif } jlog(" Search parameters: \n"); jlog("\t multi-path handling = "); if (r->am->hmminfo->multipath) { jlog("yes, multi-path mode enabled\n"); } else { jlog("no\n"); } jlog("\t(-b) trellis beam width = %d", r->trellis_beam_width); if (r->config->pass1.specified_trellis_beam_width == -1) { jlog(" (-1 or not specified - guessed)\n"); } else if (r->config->pass1.specified_trellis_beam_width == 0) { jlog(" (0 - full)\n"); } else { jlog("\n"); } #ifdef SCORE_PRUNING if (r->config->pass1.score_pruning_width < 0.0) { jlog("\t(-bs)score pruning thres= disabled\n"); } else { jlog("\t(-bs)score pruning thres= %f\n", r->config->pass1.score_pruning_width); } #endif jlog("\t(-n)search candidate num= %d\n", r->config->pass2.nbest); jlog("\t(-s) search stack size = %d\n", r->config->pass2.stack_size); jlog("\t(-m) search overflow = after %d hypothesis poped\n", r->config->pass2.hypo_overflow); jlog("\t 2nd pass method = "); if (r->config->graph.enabled) { #ifdef GRAPHOUT_DYNAMIC #ifdef GRAPHOUT_SEARCH jlog("searching graph, generating dynamic graph\n"); #else jlog("searching sentence, generating dynamic graph\n"); #endif /* GRAPHOUT_SEARCH */ #else /* ~GRAPHOUT_DYNAMIC */ jlog("searching sentence, generating static graph from N-best\n"); #endif } else { jlog("searching sentence, generating N-best\n"); } if (r->config->pass2.enveloped_bestfirst_width >= 0) { jlog("\t(-b2) pass2 beam width = %d\n", r->config->pass2.enveloped_bestfirst_width); } jlog("\t(-lookuprange)lookup range= %d (tm-%d <= t config->pass2.lookup_range,r->config->pass2.lookup_range,r->config->pass2.lookup_range); #ifdef SCAN_BEAM jlog("\t(-sb)2nd scan beamthres = %.1f (in logscore)\n", r->config->pass2.scan_beam_thres); #endif jlog("\t(-n) search till = %d candidates found\n", r->config->pass2.nbest); jlog("\t(-output) and output = %d candidates out of above\n", r->config->output.output_hypo_maxnum); if (r->ccd_flag) { jlog("\t IWCD handling:\n"); #ifdef PASS1_IWCD jlog("\t 1st pass: approximation "); switch(r->am->hmminfo->cdset_method) { case IWCD_AVG: jlog("(use average prob. of same LC)\n"); break; case IWCD_MAX: jlog("(use max. prob. of same LC)\n"); break; case IWCD_NBEST: jlog("(use %d-best of same LC)\n", r->am->hmminfo->cdmax_num); break; } #else jlog("\t 1st pass: ignored\n"); #endif #ifdef PASS2_STRICT_IWCD jlog("\t 2nd pass: strict (apply when expanding hypo. )\n"); #else jlog("\t 2nd pass: loose (apply when hypo. is popped and scanned)\n"); #endif } if (r->lmtype == LM_PROB) { jlog("\t factoring score: "); #ifdef UNIGRAM_FACTORING jlog("1-gram prob. (statically assigned beforehand)\n"); #else jlog("2-gram prob. (dynamically computed while search)\n"); #endif } if (r->config->annotate.align_result_word_flag) { jlog("\t output word alignments\n"); } if (r->config->annotate.align_result_phoneme_flag) { jlog("\t output phoneme alignments\n"); } if (r->config->annotate.align_result_state_flag) { jlog("\t output state alignments\n"); } if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) { if (r->config->pass2.looktrellis_flag) { jlog("\t only words in backtrellis will be expanded in 2nd pass\n"); } else { jlog("\t all possible words will be expanded in 2nd pass\n"); } } if (r->wchmm != NULL) { if (r->wchmm->category_tree) { if (r->config->pass1.old_tree_function_flag) { jlog("\t build_wchmm() used\n"); } else { jlog("\t build_wchmm2() used\n"); } #ifdef PASS1_IWCD #ifdef USE_OLD_IWCD jlog("\t full lcdset used\n"); #else jlog("\t lcdset limited by word-pair constraint\n"); #endif #endif /* PASS1_IWCD */ } } if (r->config->output.progout_flag) { jlog("\tprogressive output on 1st pass\n"); } if (r->config->compute_only_1pass) { jlog("\tCompute only 1-pass\n"); } if (r->config->graph.enabled) { jlog("\n"); jlog("Graph-based output with graph-oriented search:\n"); jlog("\t(-lattice) word lattice = %s\n", r->config->graph.lattice ? "yes" : "no"); jlog("\t(-confnet) confusion network = %s\n", r->config->graph.confnet ? "yes" : "no"); if (r->config->graph.lattice == TRUE) { jlog("\t(-graphrange) margin = %d frames", r->config->graph.graph_merge_neighbor_range); if (r->config->graph.graph_merge_neighbor_range < 0) { jlog(" (all post-marging disabled)\n"); } else if (r->config->graph.graph_merge_neighbor_range == 0) { jlog(" (merge same word with the same boundary)\n"); } else { jlog(" (merge same words around this margin)\n"); } } #ifdef GRAPHOUT_DEPTHCUT jlog("\t(-graphcut)cutoff depth = "); if (r->config->graph.graphout_cut_depth < 0) { jlog("disabled (-1)\n"); } else { jlog("%d words\n",r->config->graph.graphout_cut_depth); } #endif #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP jlog("\t(-graphboundloop)loopmax = %d for boundary adjustment\n",r->config->graph.graphout_limit_boundary_loop_num); #endif #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION jlog("\tInhibit graph search termination before 1st sentence found = "); if (r->config->graph.graphout_search_delay) { jlog("enabled\n"); } else { jlog("disabled\n"); } #endif } if (r->config->successive.enabled) { jlog("\tshort pause segmentation = on\n"); jlog("\t sp duration length = %d frames\n", r->config->successive.sp_frame_duration); #ifdef SPSEGMENT_NAIST jlog(" backstep margin on trigger = %d frames\n", r->config->successive.sp_margin); jlog("\t delay on trigger = %d frames\n", r->config->successive.sp_delay); #endif if (r->config->successive.pausemodelname) { jlog("\t pause models for seg. = %s\n", r->config->successive.pausemodelname); } } else { jlog("\tshort pause segmentation = off\n"); } if (r->config->output.progout_flag) { jlog("\t progout interval = %d msec\n", r->config->output.progout_interval); } jlog("\tfall back on search fail = "); if (r->config->sw.fallback_pass1_flag) { jlog("on, adopt 1st pass result as final\n"); } else { jlog("off, returns search failure\n"); } jlog("\n"); } jlog("------------------------------------------------------------\n"); jlog("Decoding algorithm:\n\n"); jlog("\t1st pass input processing = "); if (jconf->decodeopt.force_realtime_flag) jlog("(forced) "); if (jconf->decodeopt.realtime_flag) { jlog("real time, on-the-fly\n"); } else { jlog("buffered, batch\n"); } jlog("\t1st pass method = "); #ifdef WPAIR # ifdef WPAIR_KEEP_NLIMIT jlog("word-pair approx., keeping only N tokens "); # else jlog("word-pair approx. "); # endif #else jlog("1-best approx. "); #endif #ifdef WORD_GRAPH jlog("generating word_graph\n"); #else jlog("generating indexed trellis\n"); #endif #ifdef CONFIDENCE_MEASURE jlog("\toutput word confidence measure "); #ifdef CM_NBEST jlog("based on N-best candidates\n"); #endif #ifdef CM_SEARCH jlog("based on search-time scores\n"); #endif #endif /* CONFIDENCE_MEASURE */ jlog("\n"); jlog("------------------------------------------------------------\n"); jlog("FrontEnd:\n\n"); jlog(" Input stream:\n"); jlog("\t input type = "); switch(jconf->input.type) { case INPUT_WAVEFORM: jlog("waveform\n"); break; case INPUT_VECTOR: jlog("feature vector sequence\n"); break; } jlog("\t input source = "); if (jconf->input.plugin_source != -1) { jlog("plugin\n"); } else if (jconf->input.speech_input == SP_RAWFILE) { jlog("waveform file\n"); jlog("\t input filelist = "); if (jconf->input.inputlist_filename == NULL) { jlog("(none, get file name from stdin)\n"); } else { jlog("%s\n", jconf->input.inputlist_filename); } } else if (jconf->input.speech_input == SP_MFCFILE) { jlog("feature vector file (HTK format)\n"); jlog("\t filelist = "); if (jconf->input.inputlist_filename == NULL) { jlog("(none, get file name from stdin)\n"); } else { jlog("%s\n", jconf->input.inputlist_filename); } } else if (jconf->input.speech_input == SP_STDIN) { jlog("standard input\n"); } else if (jconf->input.speech_input == SP_ADINNET) { jlog("adinnet client\n"); #ifdef USE_NETAUDIO } else if (jconf->input.speech_input == SP_NETAUDIO) { char *p; jlog("NetAudio server on "); if (jconf->input.netaudio_devname != NULL) { jlog("%s\n", jconf->input.netaudio_devname); } else if ((p = getenv("AUDIO_DEVICE")) != NULL) { jlog("%s\n", p); } else { jlog("local port\n"); } #endif } else if (jconf->input.speech_input == SP_MIC) { jlog("microphone\n"); jlog("\t device API = "); switch(jconf->input.device) { case SP_INPUT_DEFAULT: jlog("default\n"); break; case SP_INPUT_ALSA: jlog("alsa\n"); break; case SP_INPUT_OSS: jlog("oss\n"); break; case SP_INPUT_ESD: jlog("esd\n"); break; case SP_INPUT_PULSEAUDIO: jlog("pulseaudio\n"); break; } } if (jconf->input.type == INPUT_WAVEFORM) { if (jconf->input.speech_input == SP_RAWFILE || jconf->input.speech_input == SP_STDIN || jconf->input.speech_input == SP_ADINNET) { if (jconf->input.use_ds48to16) { jlog("\t sampling freq. = assume 48000Hz, then down to %dHz\n", jconf->input.sfreq); } else { jlog("\t sampling freq. = %d Hz required\n", jconf->input.sfreq); } } else { if (jconf->input.use_ds48to16) { jlog("\t sampling freq. = 48000Hz, then down to %d Hz\n", jconf->input.sfreq); } else { jlog("\t sampling freq. = %d Hz\n", jconf->input.sfreq); } } } if (jconf->input.type == INPUT_WAVEFORM) { jlog("\t threaded A/D-in = "); #ifdef HAVE_PTHREAD if (recog->adin->enable_thread) { jlog("supported, on\n"); } else { jlog("supported, off\n"); } #else jlog("not supported (live input may be dropped)\n"); #endif } if (jconf->preprocess.strip_zero_sample) { jlog("\t zero frames stripping = on\n"); } else { jlog("\t zero frames stripping = off\n"); } if (jconf->input.type == INPUT_WAVEFORM) { if (recog->adin->adin_cut_on) { jlog("\t silence cutting = on\n"); jlog("\t level thres = %d / 32767\n", jconf->detect.level_thres); jlog("\t zerocross thres = %d / sec.\n", jconf->detect.zero_cross_num); jlog("\t head margin = %d msec.\n", jconf->detect.head_margin_msec); jlog("\t tail margin = %d msec.\n", jconf->detect.tail_margin_msec); jlog("\t chunk size = %d samples\n", jconf->detect.chunk_size); } else { jlog("\t silence cutting = off\n"); } if (jconf->preprocess.use_zmean) { jlog("\t long-term DC removal = on"); if (jconf->input.speech_input == SP_RAWFILE) { jlog(" (will compute for each file)\n"); } else { jlog(" (will compute from first %.1f sec)\n", (float)ZMEANSAMPLES / (float)jconf->input.sfreq); } } else { jlog("\t long-term DC removal = off\n"); } } jlog("\t reject short input = "); if (jconf->reject.rejectshortlen > 0) { jlog("< %d msec\n", jconf->reject.rejectshortlen); } else { jlog("off\n"); } #ifdef POWER_REJECT jlog("\t power rejection thres = %f", jconf->reject.powerthres); #endif jlog("\n"); jlog("----------------------- System Information end -----------------------\n"); #ifdef USE_MIC if (jconf->decodeopt.realtime_flag) { boolean flag; flag = FALSE; for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->cmn && mfcc->cmn.loaded) { flag = TRUE; break; } } if (flag) { jlog("\n"); jlog("initial CMN parameter loaded from file\nfor"); for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->cmn && mfcc->cmn.loaded) { jlog(" MFCC%02d", mfcc->id); } } jlog("\n"); } flag = FALSE; for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->cmn && !mfcc->cmn.loaded) { flag = TRUE; break; } } if (flag) { jlog("\n"); jlog("\t*************************************************************\n"); jlog("\t* NOTICE: The first input may not be recognized, since *\n"); jlog("\t* no initial CMN parameter is available on startup. *\n"); jlog("\t* for"); for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->cmn && !mfcc->cmn.loaded) { jlog(" MFCC%02d", mfcc->id); } } jlog("*\n"); jlog("\t*************************************************************\n"); } flag = FALSE; for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->energy && mfcc->para->enormal) { flag = TRUE; break; } } if (flag) { jlog("\t*************************************************************\n"); jlog("\t* NOTICE: Energy normalization is activated on live input: *\n"); jlog("\t* maximum energy of LAST INPUT will be used for it. *\n"); jlog("\t* So, the first input will not be recognized. *\n"); jlog("\t* for"); for(mfcc=recog->mfcclist; mfcc; mfcc=mfcc->next) { if (mfcc->para->energy && mfcc->para->enormal) { jlog(" MFCC%02d", mfcc->id); } } jlog("*\n"); jlog("\t*************************************************************\n"); } } #endif } /* end of file */ julius-4.2.2/libjulius/src/factoring_sub.c0000644001051700105040000010660712004452401017114 0ustar ritrlab/** * @file factoring_sub.c * * * @brief 言語スコアのfactoring計算(第1パス) * * このファイルには,第1パスにおいて言語スコアの factoring を行うための * 関数が含まれています. 木構造化辞書上でのサブツリー内の単語リスト * (successor list) の構築,および認識中の言語スコア計算ルーチンが * 含まれます. * * successor list は,木構造化辞書の各ノードに割り付けられる, * そのノードを共有する単語のリストです. 木構造化辞書において, * 枝部分の次のノードがこのリストを保持します. 実際にはリストが変化する * 場所,すなわち木構造化辞書の枝の分岐点に割り付けられます. * 例えば,以下のような木構造化辞書の場合,数字の書いてあるノードに * successor list が割り付けられます. *
 *
 *        2-o-o - o-o-o - o-o-o          word "A" 
 *       /
 *  1-o-o
 *       \       4-o-o                   word "B"
 *        \     /   
 *         3-o-o - 5-o-o - 7-o-o         word "C"
 *              \        \ 
 *               \        8-o-o          word "D"
 *                6-o-o                  word "E"
 * 
* * 各 successor list はそのサブツリーに含まれる単語のリストです. * この例では以下のようになります. * *
 *   node  | successor list (wchmm->state[node].sc)
 *   =======================
 *     1   | A B C D E
 *     2   | A
 *     3   |   B C D E
 *     4   |   B
 *     5   |     C D
 *     6   |         E
 *     7   |     C
 *     8   |       D
 * 
* * ある successor list に含まれる単語が1つになったとき,その時点で * 単語が確定する. 上記の場合,単語 "A" はノード 2 の位置ですでに * その後続単語として "A" 以外無いので,そこで確定する. * すなわち,単語 A の正確な言語スコアは,単語終端を待たずノード 2 で決まる. * * 第1パスにおける factoring の計算は,実際には beam.c で行なわれる. * 2-gram factoringの場合,次ノードに successor list が存在すれば, * その successor list の単語の 2-gram の最大値を求め, 伝搬してきている * factoring 値を更新する. successor list に単語が1つのノードでは, * 正しい2-gramが自動的に割り当てられる. * 1-gram factoringの場合,次ノードに successor list が存在する場合, * その successor list の単語の 1-gram の最大値を求め,伝搬してきている * factoring 値を更新する. successor list に単語が1つのノードで,はじめて * 2-gram を計算する. * * 実際では 1-gram factoring では各 successor list における factoring 値 * は単語履歴に非依存なので,successor list 構築時に全てあらかじめ計算して * おく. すなわち,エンジン起動時に木構造化辞書を構築後,successor list * を構築したら,単語を2個以上含む successor list についてはその 1-gram の * 最大値を計算して,それをそのノードの fscore メンバに格納しておき,その * successor list は free してしまえばよい. 単語が1つのみの successor list * についてはその単語IDを残しておき,探索時にパスがそこに到達したら * 正確な2-gramを計算すれば良い. * * DFA文法使用時は,デフォルトでは言語制約(カテゴリ対制約)を * カテゴリ単位で木を構築することで静的に表現する. このため, * これらの factoring 機構は用いられない. ただし, * CATEGORY_TREE が undefined であれば,決定的 factoring を用いた言語制約 * 適用を行うことも可能である. * すなわち,次ノードに successor list が存在すれば, * その successor list 内の各単語と直前単語の単語対制約を調べ, * そのうち一つでも接続可能な単語があれば,その遷移を許し,一つも * なければ遷移させない. この機能は技術参考のために残されているのみである. *
* * * @brief LM factoring on 1st pass. * * * This file contains functions to do language score factoring on the 1st * pass. They build a successor lists which holds the successive words in * each sub tree on the tree lexicon, and also provide a factored LM * probability on each nodes on the tree lexicon. * * The "successor list" will be assigned for each lexicon tree node to * represent a list of words that exist in the sub-tree and share the node. * Actually they will be assigned to the branch node. * Below is the example of successor lists on a tree lexicon, in which * the lists is assigned to the numbered nodes. * *
 *         2-o-o - o-o-o - o-o-o          word "A" 
 *        /
 *   1-o-o
 *        \       4-o-o                   word "B"
 *         \     /   
 *          3-o-o - 5-o-o - 7-o-o         word "C"
 *           \            \ 
 *            \            8-o-o          word "D"
 *             6-o-o                      word "E"
 * 
* * The contents of the successor lists are the following: * *
 *  node  | successor list (wchmm->state[node].sc)
 *  =======================
 *    1   | A B C D E
 *    2   | A
 *    3   |   B C D E
 *    4   |   B
 *    5   |     C D
 *    6   |         E
 *    7   |     C
 *    8   |       D
 * 
* * When the 1st pass proceeds, if the next going node has a successor list, * all the word 2-gram scores in the successor list on the next node * will be computed, and the propagating LM value in the token on * the current node will be replaced by the maximum value of the scores * when copied to the next node. Appearently, if the successor list has * only one word, it means that the word can be determined on that point, * and the precise 2-gram value will be assigned as is. * * When using 1-gram factoring, the computation will be slightly different. * Since the factoring value (maximum value of 1-gram scores on each successor * list) is independent of the word context, they can be computed statically * before the search. Thus, for all the successor lists that have more than * two words, the maximum 1-gram value is computed and stored to * "fscore" member in tree lexicon, and the successor lists will be freed. * The successor lists with only one word should still remain in the * tree lexicon, to compute the precise 2-gram scores for the words. * * * When using DFA grammar, Julian builds separated lexicon trees for every * word categories, to statically express the catergory-pair constraint. * Thus these factoring scheme is not used by default. * However you can still force Julian to use the grammar-based * deterministic factoring scheme by undefining CATEGORY_TREE. * If CATEGORY_TREE is undefined, the word connection constraint will be * performed based on the successor list at the middle of tree lexicon. * This enables single tree search on Julian. This function is left * only for technical reference. * * @author Akinobu LEE * @date Mon Mar 7 23:20:26 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /*----------------------------------------------------------------------*/ /** * * 木構造化辞書上の全ノードに successor list を構築するメイン関数 * * @param wchmm [i/o] 木構造化辞書 * * * Main function to build whole successor list to lexicon tree. * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void make_successor_list(WCHMM_INFO *wchmm) { int node; WORD_ID w; int i, j; int s; WORD_ID *scnumlist; WORD_ID *sclen; int scnum, new_scnum; int *scidmap; boolean *freemark; jlog("STAT: make successor lists for factoring\n"); /* 1. initialize */ /* initialize node->sclist index on wchmm tree */ for (node=0;noden;node++) wchmm->state[node].scid = 0; /* parse the tree to assign unique scid and get the maximum size of successor list */ scnum = 1; for (w=0;wwinfo->num;w++) { for (i=0;iwinfo->wlen[w];i++) { if (wchmm->state[wchmm->offset[w][i]].scid == 0) { wchmm->state[wchmm->offset[w][i]].scid = scnum; scnum++; } } if (wchmm->state[wchmm->wordend[w]].scid == 0) { wchmm->state[wchmm->wordend[w]].scid = scnum; scnum++; } } if (debug2_flag) { jlog("DEBUG: initial successor list size = %d\n", scnum); } /* 2. count number of each successor */ sclen = (WORD_ID *)mymalloc(sizeof(WORD_ID) * scnum); for (i=1;iwinfo->num;w++) { for (i=0;iwinfo->wlen[w];i++) { sclen[wchmm->state[wchmm->offset[w][i]].scid]++; } sclen[wchmm->state[wchmm->wordend[w]].scid]++; } /* 3. delete bogus successor lists */ freemark = (boolean *)mymalloc(sizeof(boolean) * scnum); for (i=1;iwinfo->num;w++) { node = wchmm->wordend[w]; /* begin from the word end node */ i = wchmm->winfo->wlen[w]-1; while (i >= 0) { /* for each phoneme start node */ if (node == wchmm->offset[w][i]) { /* word with only 1 state: skip */ i--; continue; } if (wchmm->state[node].scid == 0) break; /* already parsed */ if (sclen[wchmm->state[node].scid] == sclen[wchmm->state[wchmm->offset[w][i]].scid]) { freemark[wchmm->state[node].scid] = TRUE; /* mark the node */ wchmm->state[node].scid = 0; } node = wchmm->offset[w][i]; i--; } } /* build compaction map */ scidmap = (int *)mymalloc(sizeof(int) * scnum); scidmap[0] = 0; j = 1; for (i=1;in;node++) { if (wchmm->state[node].scid > 0) { wchmm->state[node].scid = scidmap[wchmm->state[node].scid]; } } wchmm->sclen = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * new_scnum, &(wchmm->malloc_root)); for (i=1;isclen[scidmap[i]] = sclen[i]; } wchmm->scnum = new_scnum; free(scidmap); free(freemark); free(sclen); /* 5. now index completed, make word list for each list */ wchmm->sclist = (WORD_ID **)mybmalloc2(sizeof(WORD_ID *) * wchmm->scnum, &(wchmm->malloc_root)); scnumlist = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->scnum); for(i=1;iscnum;i++) { wchmm->sclist[i] = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->sclen[i], &(wchmm->malloc_root)); scnumlist[i] = 0; } { int scid; for (w=0;wwinfo->num;w++) { for (i=0;iwinfo->wlen[w];i++) { scid = wchmm->state[wchmm->offset[w][i]].scid; if (scid != 0) { wchmm->sclist[scid][scnumlist[scid]] = w; scnumlist[scid]++; if (scnumlist[scid] > wchmm->sclen[scid]) { jlog("hogohohoho\n"); exit(1); } } } /* at word end */ scid = wchmm->state[wchmm->wordend[w]].scid; if (scid != 0) { wchmm->sclist[scid][scnumlist[scid]] = w; scnumlist[scid]++; if (scnumlist[scid] > wchmm->sclen[scid]) { jlog("hogohohoho\n"); exit(1); } } } } free(scnumlist); jlog("STAT: done\n"); } #ifdef UNIGRAM_FACTORING /** * * 木構造化辞書上の全ノードに successor list を構築するメイン関数(unigram factoring 用 * * @param wchmm [i/o] 木構造化辞書 * * * Main function to build whole successor list to lexicon tree for unigram factoring * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void make_successor_list_unigram_factoring(WCHMM_INFO *wchmm) { #ifndef FAST_FACTOR1_SUCCESSOR_LIST /* old way */ make_successor_list(wchmm); calc_all_unigram_factoring_values(wchmm); #else /* ~FAST_FACTOR1_SUCCESSOR_LIST */ /* new way */ int node, node2; WORD_ID w, w2; int i, j, n, f; int s; LOGPROB tmpprob; WORD_ID *mtmp; jlog("STAT: make successor lists for unigram factoring\n"); /* 1. initialize */ /* initialize node->sclist index on wchmm tree */ for (node=0;noden;node++) wchmm->state[node].scid = 0; /* in unigram factoring, number of successor = vocabulary size */ wchmm->scnum = wchmm->winfo->num + 1; if (debug2_flag) { jlog("DEBUG: successor list size = %d\n", wchmm->scnum); } /* allocate successor list for 1-gram factoring */ wchmm->scword = (WORD_ID *)mybmalloc2(sizeof(WORD_ID) * wchmm->scnum, &(wchmm->malloc_root)); /* 2. make successor list, and count needed fscore num */ f = 1; s = 1; for (w=0;wwinfo->num;w++) { for (i=0;iwinfo->wlen[w] + 1;i++) { if (i < wchmm->winfo->wlen[w]) { node = wchmm->offset[w][i]; } else { node = wchmm->wordend[w]; } if (wchmm->state[node].scid == 0) { /* not assigned */ /* new node found, assign new and exit here */ wchmm->state[node].scid = s; wchmm->scword[s] = w; s++; if (s > wchmm->scnum) { jlog("InternalError: make_successor_list_unigram_factoring: scid num exceeded?\n"); return; } break; } else if (wchmm->state[node].scid > 0) { /* that node has successor */ /* move it to the current first isolated node in that word */ w2 = wchmm->scword[wchmm->state[node].scid]; for(j=i+1;jwinfo->wlen[w2] + 1;j++) { if (j < wchmm->winfo->wlen[w2]) { node2 = wchmm->offset[w2][j]; } else { node2 = wchmm->wordend[w2]; } if (wchmm->state[node2].scid == 0) { /* not assigned */ /* move successor to there */ wchmm->state[node2].scid = wchmm->state[node].scid; break; } } if (j >= wchmm->winfo->wlen[w2] + 1) { /* not found? */ jlog("InternalError: make_successor_list_unigram_factoring: no isolated word for %d\n", w2); return; } /* make current node as fscore node */ n = f++; wchmm->state[node].scid = -n; /* not compute unigram factoring value yet */ } } } /* 2. allocate fscore buffer */ wchmm->fsnum = f; wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum); for(n=0;nfsnum;n++) wchmm->fscore[n] = LOG_ZERO; /* 3. parse again to assign fscore values */ for (w=0;wwinfo->num;w++) { for (i=0;iwinfo->wlen[w] + 1;i++) { if (i < wchmm->winfo->wlen[w]) { node = wchmm->offset[w][i]; } else { node = wchmm->wordend[w]; } if (wchmm->state[node].scid < 0) { /* update max */ if (wchmm->ngram) { tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[w]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[w] #endif ; } else { tmpprob = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, w, tmpprob); } n = - wchmm->state[node].scid; if (wchmm->fscore[n] < tmpprob) { wchmm->fscore[n] = tmpprob; } } } } #endif /* ~FAST_FACTOR1_SUCCESSOR_LIST */ jlog("STAT: done\n"); } #endif /* UNIGRAM_FACTORING */ /** * * 構築された factoring 情報を multipath 用に調整する. factoring 情報を, * モデル全体をスキップする遷移がある場合はその先の音素へコピーする. * また,(出力を持たない)文頭文法ノードに単語先頭ノードからコピーする. * * @param wchmm [in] 木構造化辞書 * * * Adjust factoring data in tree lexicon for multipath transition handling. * * @param wchmm [in] tree lexicon * * * @callgraph * @callergraph * */ void adjust_sc_index(WCHMM_INFO *wchmm) { WORD_ID w; int i,j,k; HMM_Logical *ltmp; int ltmp_state_num; int ato; LOGPROB prob; int node, scid; A_CELL2 *ac; /* duplicate scid for HMMs with more than one arc from initial state */ for(w=0;wwinfo->num;w++) { for(k=0;kwinfo->wlen[w];k++) { node = wchmm->offset[w][k]; scid = wchmm->state[node].scid; if (scid == 0) continue; ltmp = wchmm->winfo->wseq[w][k]; ltmp_state_num = hmm_logical_state_num(ltmp); if ((hmm_logical_trans(ltmp))->a[0][ltmp_state_num-1] != LOG_ZERO) { j = k + 1; if (j == wchmm->winfo->wlen[w]) { if (wchmm->state[wchmm->wordend[w]].scid == 0) { jlog("STAT: word %d: factoring node copied for skip phone\n", w); wchmm->state[wchmm->wordend[w]].scid = scid; } } else { if (wchmm->state[wchmm->offset[w][j]].scid == 0) { jlog("STAT: word %d: factoring node copied for skip phone\n", w); wchmm->state[wchmm->offset[w][j]].scid = scid; } } } for(ato=1;atoa[0][ato]; if (prob != LOG_ZERO) { wchmm->state[node+ato-1].scid = scid; } } } } /* move scid and fscore on the head state to the head grammar state */ for(i=0;istartnum;i++) { node = wchmm->startnode[i]; if (wchmm->state[node].out.state != NULL) { j_internal_error("adjust_sc_index: outprob exist in word-head node??\n"); } if (wchmm->next_a[node] != LOG_ZERO) { if (wchmm->state[node+1].scid != 0) { if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[node+1].scid) { j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); } wchmm->state[node].scid = wchmm->state[node+1].scid; wchmm->state[node+1].scid = 0; } } for(ac=wchmm->ac[node];ac;ac=ac->next) { for(k=0;kn;k++) { if (wchmm->state[ac->arc[k]].scid != 0) { if (wchmm->state[node].scid != 0 && wchmm->state[node].scid != wchmm->state[ac->arc[k]].scid) { j_internal_error("adjust_sc_index: different successor list within word-head phone?\n"); } wchmm->state[node].scid = wchmm->state[ac->arc[k]].scid; wchmm->state[ac->arc[k]].scid = 0; } } } } } /* -------------------------------------------------------------------- */ /* factoring computation */ /** * * 木構造化辞書用の factoring キャッシュをメモリ割り付けして初期化する. * この関数はプログラム開始時に一度だけ呼ばれる. * * @param wchmm [i/o] 木構造化辞書 * * * Initialize factoring cache for a tree lexicon, allocating memory for * cache. This should be called only once on start up. * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void max_successor_cache_init(WCHMM_INFO *wchmm) { int i; LM_PROB_CACHE *l; WORD_ID wnum; /* for word-internal */ l = &(wchmm->lmcache); l->probcache = (LOGPROB *) mymalloc(sizeof(LOGPROB) * wchmm->scnum); l->lastwcache = (WORD_ID *) mymalloc(sizeof(WORD_ID) * wchmm->scnum); for (i=0;iscnum;i++) { l->lastwcache[i] = WORD_INVALID; } /* for cross-word */ if (wchmm->ngram) { wnum = wchmm->ngram->max_word_num; } else { wnum = wchmm->winfo->num; } #ifdef HASH_CACHE_IW l->iw_cache_num = wnum * jconf.search.pass1.iw_cache_rate / 100; if (l->iw_cache_num < 10) l->iw_cache_num = 10; #else l->iw_cache_num = wnum; #endif /* HASH_CACHE_IW */ l->iw_sc_cache = (LOGPROB **)mymalloc(sizeof(LOGPROB *) * l->iw_cache_num); for (i=0;iiw_cache_num;i++) { l->iw_sc_cache[i] = NULL; } #ifdef HASH_CACHE_IW l->iw_lw_cache = (WORD_ID *)mymalloc(sizeof(WORD_ID) * l->iw_cache_num); for (i=0;iiw_cache_num;i++) { l->iw_lw_cache[i] = WORD_INVALID; } #endif } /** * * 単語間の factoring cache のメモリ領域を解放する. * * @param wchmm [i/o] 木構造化辞書 * * * Free cross-word factoring cache. * * @param wchmm [i/o] tree lexicon * */ static void max_successor_prob_iw_free(WCHMM_INFO *wchmm) { int i; LM_PROB_CACHE *l; l = &(wchmm->lmcache); for (i=0;iiw_cache_num;i++) { if (l->iw_sc_cache[i] != NULL) free(l->iw_sc_cache[i]); l->iw_sc_cache[i] = NULL; } } /** * * factoring 用 cache のメモリ領域を全て解放する. * * @param wchmm [i/o] 木構造化辞書 * * * Free all memory for factoring cache. * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void max_successor_cache_free(WCHMM_INFO *wchmm) { free(wchmm->lmcache.probcache); free(wchmm->lmcache.lastwcache); max_successor_prob_iw_free(wchmm); free(wchmm->lmcache.iw_sc_cache); #ifdef HASH_CACHE_IW free(wchmm->lmcache.iw_lw_cache); #endif } #ifdef UNIGRAM_FACTORING /** * * @brief 単語先頭ノードのうちFactoring においてキャッシュが必要なノードの * リストを作成する. * * 1-gram factoring は,枝ノードにおいて直前単語に依存しない固定値 * (unigramの最大値)を与える. このため,単語間の factoring 計算において, * 木構造化辞書上で複数の単語で共有されている単語先頭ノードについては, * その値は直前単語によらず固定値であり,認識時に単語間キャッシュを保持 * する必要はない. * * この関数では,単語先頭ノードのリストからそのような factoring キャッシュが * 不要なノードを除外して,1-gram factoring 時に単語間キャッシュが必要な * 単語先頭ノード(=他の単語と共有されていない独立した単語先頭ノード)の * リストを作成し,wchmm->start2isolate および wchmm->isolatenum に格納する. * * @param wchmm [i/o] 木構造化辞書 * * * @brief Make a list of word head nodes on which cross-word factoring cache * is needed. * * On 1-gram factoring, the branch nodes on tree lexicon has a fixed * factoring value (maximum 1-gram score of all sub-tree words). Thus, when * computing cross-word factoring at word head nodes on inter-word * transition, such 1-gram factoring nodes on word head, shared by several * words, need not be cached in inter-word factoring cache. * * This function make a list of word-head nodes which requires inter-word * factoring caching (i.e. isolated word head nodes, does not shared by other * words) from the existing list of word head nodes, and set it to * wchmm->start2isolate and wchmm->isolatenum. * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void make_iwcache_index(WCHMM_INFO *wchmm) { int i, node, num; wchmm->start2isolate = (int *)mymalloc(sizeof(int) * wchmm->startnum); num = 0; for(i=0;istartnum;i++) { node = wchmm->startnode[i]; if (wchmm->state[node].scid >= 0) { /* not a factoring node (isolated node, has no 1-gram factoring value) */ wchmm->start2isolate[i] = num; num++; } else { /* factoring node (shared) */ wchmm->start2isolate[i] = -1; } } wchmm->isolatenum = num; } #ifndef FAST_FACTOR1_SUCCESSOR_LIST /** * * @brief 木構造化辞書上の 1-gram factoring 値を計算して格納する. * * 1-gram factoring では単語間で共有されている枝ノードでは 1-gram の最大値 * を与える. 単語履歴によらないため,その値は認識開始前に * 計算しておくことができる. この関数は木構造化辞書 * 全体について,共有されている(successor list に2つ以上の単語を持つノード) * ノードの 1-gram factoring 値を計算して格納する. 1-gram factoring値を * 計算後は,そのノードの successor list はもはや不要であるため,ここで * 削除する. * * 実際には,factoring 値は wchmm->fscore に順次保存され,ノードの * scid にその保存値へのインデックス(1-)の負の値が格納される. 不要になった * successor list は,実際には compaction_successor 内で,対応するノードの * scid が負になっている successor list を削除することで行なわれる. * * @param wchmm [i/o] 木構造化辞書 * * * @brief Calculate all the 1-gram factoring values on tree lexicon. * * On 1-gram factoring, the shared nodes on branch has fixed factoring score * from 1-gram values, independent of the word context on recognition. So * the values are fixed for all recognition and can be calculated before * search. This function stores all the neede 1-gram factoring value by * traversing tree lexicon with successor lists and compute maximum 1-gram * for each successor lists that has more than two words (=shared). * Since a successor list is no more neede after the 1-gram value is computed, * they will be freed. * * Actually, computed factoring scores will be stored in wchmm->fscore * sequencially, and the index value, starting from 1, * to the fscore list is stored in scid of each nodes as a negative value. * The free will be performed in compaction_successor() by checking if a * successor's corresponding scid on tree lexicon has negative value. * * @param wchmm [i/o] tree lexicon * * * @callgraph * @callergraph * */ void calc_all_unigram_factoring_values(WCHMM_INFO *wchmm) { S_CELL *sc, *sctmp; LOGPROB tmpprob, maxprob; int i, n; /* count needed number of 1-gram factoring nodes */ n = 0; for (i=1;iscnum;i++) { sc = wchmm->sclist[i]; if (sc == NULL) { j_internal_error("call_all_unigram_factoring_values: sclist has no sc?\n"); } if (sc->next != NULL) { /* more than two words, so compute maximum 1-gram probability */ n++; } } wchmm->fsnum = n + 1; /* allocate area */ wchmm->fscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->fsnum); /* assign values */ n = 1; for (i=1;iscnum;i++) { sc = wchmm->sclist[i]; if (sc->next != NULL) { maxprob = LOG_ZERO; for (sctmp = sc; sctmp; sctmp = sctmp->next) { if (wchmm->ngram) { tmpprob = uni_prob(wchmm->ngram, wchmm->winfo->wton[sctmp->word]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[sctmp->word] #endif ; } else { tmpprob = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { tmpprob = (*(wchmm->uni_prob_user))(wchmm->winfo, sctmp->word, tmpprob); } if (maxprob < tmpprob) maxprob = tmpprob; } wchmm->fscore[n] = maxprob; free_successor(wchmm, i); wchmm->state[wchmm->sclist2node[i]].scid = - n; n++; } } /* garbage collection of factored sclist */ compaction_successor(wchmm); } #endif #else /* ~UNIGRAM_FACTORING */ /** * * 木構造化辞書上のあるノードについて,与えられた単語履歴に対する2-gram * スコアを計算する. * * @param wchmm [in] 木構造化辞書 * @param lastword [in] 直前単語 * @param node [in] @a wchmm 上のノード番号 * * @return 2-gram 確率. * * * Compute 2-gram factoring value for the node and return the probability. * * @param wchmm [in] tree lexicon * @param lastword [in] the last context word * @param node [in] node ID on @a wchmm * * @return the log probability of 2-gram on that node. * * */ static LOGPROB calc_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node) { LOGPROB tmpprob, maxprob; WORD_ID lw, w; int i; int scid; maxprob = LOG_ZERO; if (wchmm->ngram) { lw = wchmm->winfo->wton[lastword]; } scid = wchmm->state[node].scid; for (i = 0; i < wchmm->sclen[scid]; i++) { w = wchmm->sclist[scid][i]; if (wchmm->ngram) { tmpprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, lw , wchmm->winfo->wton[w]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[w] #endif ; } else { tmpprob = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { tmpprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, tmpprob); } if (maxprob < tmpprob) maxprob = tmpprob; } return(maxprob); } #endif /* ~UNIGRAM_FACTORING */ /** * * @brief 単語内のあるノードについて factoring 値を計算する. * * 1-gram factoring で固定factoring値がある場合はその値が即座に返される. * 他の場合は,そのノードのサブツリー内の単語の 2-gram確率(の最大値)が * 計算される. * * 単語内 factoring キャッシュが考慮される. すなわち各ノードについて * 直前単語が前回アクセスされたときと同じであれば, * 前回の値が返され,そうでなければ値を計算し,キャッシュが更新される. * * @param wchmm [in] 木構造化辞書 * @param lastword [in] 直前単語のID * @param node [in] ノード番号 * * @return 言語モデルスコア * * * @brief compute factoring LM score for the given word-internal node. * * If it is a shared branch node and 1-gram factoring is used, the * constant factoring value which has already been assigned before search * will be returned immediately. Else, the maximum 2-gram probability * of corresponding successor words are computed. * * The word-internal factoring cache is consulted within this function. * If the given last word is the same as the last call on that node, * the last computed value will be returned, else the maximum value * will be computed update the cache with the last word and value. * * @param wchmm [in] tree lexicon * @param lastword [in] word ID of last context word * @param node [in] node ID * * @return the LM factoring score. * * * @callgraph * @callergraph * */ LOGPROB max_successor_prob(WCHMM_INFO *wchmm, WORD_ID lastword, int node) { LOGPROB maxprob; WORD_ID last_nword, w; int scid; LM_PROB_CACHE *l; l = &(wchmm->lmcache); if (lastword != WORD_INVALID) { /* return nothing if no previous word */ if (wchmm->ngram) { last_nword = wchmm->winfo->wton[lastword]; } else { last_nword = lastword; } scid = wchmm->state[node].scid; #ifdef UNIGRAM_FACTORING if (scid < 0) { /* return 1-gram factoring value already calced */ return(wchmm->fscore[(- scid)]); } else { /* this node has only one successor */ /* return precise 2-gram score */ if (last_nword != l->lastwcache[scid]) { /* calc and cache */ w = wchmm->scword[scid]; if (wchmm->ngram) { maxprob = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[w] #endif ; } else { maxprob = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { maxprob = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, maxprob); } l->lastwcache[scid] = last_nword; l->probcache[scid] = maxprob; return(maxprob); } else { /* return cached */ return (l->probcache[scid]); } } #else /* UNIGRAM_FACTORING */ /* 2-gram */ if (last_nword != l->lastwcache[scid]) { maxprob = calc_successor_prob(wchmm, lastword, node); /* store to cache */ l->lastwcache[scid] = last_nword; l->probcache[scid] = maxprob; return(maxprob); } else { return (l->probcache[scid]); } #endif /* UNIGRAM_FACTORING */ } else { return(0.0); #if 0 maxprob = LOG_ZERO; for (sc=wchmm->state[node].sc;sc;sc=sc->next) { tmpprob = uni_prob(wchmm->ngram, sc->word); if (maxprob < tmpprob) maxprob = tmpprob; } return(maxprob); #endif } } /** * * @brief 単語間の factoring 値のリストを返す. * * 与えられた直前単語に対して,factoring値を計算すべき全ての単語先頭への * factoring 値を計算し,そのリストを返す. このfactoring値は * 直前単語ごとにリスト単位でキャッシュされる. すなわち,その直前単語が * それまでに一度でも直前単語として出現していた場合,そのリストをそのまま * 返す. * * @param wchmm [in] 木構造化辞書 * @param lastword [in] 直前単語 * * @return 全単語先頭ノードへの factoring スコアのリスト * * * @brief Compute cross-word facgtoring values for word head nodes and return * the list. * * Given a last word, this function compute the factoring LM scores for all * the word head node to which the context-dependent (not 1-gram) factoring * values should be computed. The resulting list of factoring values are * cached within this function per the last word. * * @param wchmm [in] tree lexicon * @param lastword [in] last word * * @return the list of factoring LM scores for all the needed word-head nodes. * * * @callgraph * @callergraph * */ LOGPROB * max_successor_prob_iw(WCHMM_INFO *wchmm, WORD_ID lastword) { int i, j, x, node; int last_nword; WORD_ID w; LM_PROB_CACHE *l; LOGPROB p; l = &(wchmm->lmcache); if (wchmm->ngram) { last_nword = wchmm->winfo->wton[lastword]; } else { last_nword = lastword; } #ifdef HASH_CACHE_IW x = last_nword % l->iw_cache_num; if (l->iw_lw_cache[x] == last_nword) { /* cache hit */ return(l->iw_sc_cache[x]); } #else /* full cache */ if (l->iw_sc_cache[last_nword] != NULL) { /* cache hit */ return(l->iw_sc_cache[last_nword]); } x = last_nword; /* cache mis-hit, calc probs and cache them as new */ #endif /* allocate cache memory */ if (l->iw_sc_cache[x] == NULL) { #ifdef UNIGRAM_FACTORING l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum); #else l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum); #endif if (l->iw_sc_cache[x] == NULL) { /* malloc failed */ /* clear existing cache, and retry */ max_successor_prob_iw_free(wchmm); jlog("STAT: inter-word LM cache (%dMB) rehashed\n", (l->iw_cache_num * #ifdef UNIGRAM_FACTORING wchmm->isolatenum #else wchmm->startnum #endif ) / 1000 * sizeof(LOGPROB) / 1000); #ifdef UNIGRAM_FACTORING l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->isolatenum); #else l->iw_sc_cache[x] = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->startnum); #endif if (l->iw_sc_cache[x] == NULL) { /* malloc failed again? */ j_internal_error("max_successor_prob_iw: cannot malloc\n"); } } } /* calc prob for all startid */ #ifdef UNIGRAM_FACTORING for (j=0;jstartnum;j++) { i = wchmm->start2isolate[j]; if (i == -1) continue; node = wchmm->startnode[j]; if (wchmm->state[node].scid <= 0) { /* should not happen!!! below is just for debugging */ j_internal_error("max_successor_prob_iw: isolated (not shared) tree root node has unigram factoring value??\n"); } else { w = wchmm->scword[wchmm->state[node].scid]; if (wchmm->ngram) { p = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, last_nword, wchmm->winfo->wton[w]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[w] #endif ; } else { p = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { p = (*(wchmm->bi_prob_user))(wchmm->winfo, lastword, w, p); } l->iw_sc_cache[x][i] = p; } } #else /* ~UNIGRAM_FACTORING */ for (i=0;istartnum;i++) { node = wchmm->startnode[i]; l->iw_sc_cache[x][i] = calc_successor_prob(wchmm, lastword, node); } #endif #ifdef HASH_CACHE_IW l->iw_lw_cache[x] = last_nword; #endif return(l->iw_sc_cache[x]); } /** * * @brief 文法による単語内決定的 factoring * * Julian において CATEGORY_TREE が定義されているとき(デフォルト), * 木構造化辞書はカテゴリ単位(すなわち構文制約の記述単位)で構築されるため, * 第1パスでの言語モデルであるカテゴリ対制約は単語の始終端で適用できる. * * この CATEGORY_TREE が定義されていない場合,木構造化辞書は * 辞書全体で単一の木が作られるため,カテゴリ対制約は N-gram (Julius) と * 同様に単語内で factoring と同様の機構で適用される必要がある. * * この関数は CATEGORY_TREE が定義されていないときに,上記の factoring * (決定的 factoring と呼ばれる)を行なうために提供されている. * * @param wchmm [in] 木構造化辞書 * @param lastword [in] 直前単語 * @param node [in] ノード番号 * * @return カテゴリ制約上その枝への遷移が許されれば TRUE, 不可能であれば FALSE * * * @brief Deterministic factoring for grammar-based recognition (Julian) * * If CATEGORY_TREE is defined (this is default) on Julian, the tree lexicon * will be organized per category and the category-pair constraint used * in the 1st pass can be applied statically at cross-word transition. * * If the CATEGORY_TREE is not defined, a single tree lexicon will be * constucted for a whole dictionary. In this case, the category-pair * constraint should be applied dynamically in the word-internal transition, * like the factoring scheme with N-gram (Julius). * * This function provides such word-internal factoring for grammar-based * recognition (called deterministic factoring) when CATEGORY_TREE is * undefined in Julian. * * @param wchmm [in] tree lexicon * @param lastword [in] last word * @param node [in] node ID to check the constraint * * @return TRUE if the transition to the branch is allowed on the category-pair * constraint, or FALSE if not allowed. * * * @callgraph * @callergraph * */ boolean can_succeed(WCHMM_INFO *wchmm, WORD_ID lastword, int node) { int lc; int i; int s; /* return TRUE if at least one subtree word can connect */ s = wchmm->state[node].scid; if (lastword == WORD_INVALID) { /* case at beginning-of-word */ for (i = 0; i < wchmm->sclen[s]; i++) { if (dfa_cp_begin(wchmm->dfa, wchmm->sclist[s][i]) == TRUE) return(TRUE); } return(FALSE); } else { lc = wchmm->winfo->wton[lastword]; for (i = 0; i < wchmm->sclen[s]; i++) { if (dfa_cp(wchmm->dfa, lc, wchmm->sclist[s][i]) == TRUE) return(TRUE); } return(FALSE); } } /* end of file */ julius-4.2.2/libjulius/src/gramlist.c0000644001051700105040000002266412004452401016111 0ustar ritrlab/** * @file gramlist.c * * * @brief Grammar file list management on startup. * * These functions are for managing list of grammar files to be loaded * at startup. You can also specify (list of) grammars to be included * for recognition at startup by calling these functions. If you want to * add, modify or remove grammars while recognition, you should prepare * grammar data and call functions in multi-gram.c directly. * @sa julius/module.c for the implementation details. * * * * * @brief 起動時に読み込む文法ファイルのリスト管理. * * これらの関数はエンジン起動時に読み込まれる文法ファイルのリストを管理する * 関数です. これらの関数を起動前に呼ぶことで,認識用の文法をアプリケーション * 上で明示的に追加することができます. エンジン起動後に動的に文法の * 追加や削除,変更を行いたい場合は,文法データを自前で用意して,multi-gram.c * 内の関数を直接呼び出す必要があります. その場合は julius/module.c が * 実装の参考になるでしょう. (@sa julius/module.c) * * * * @author Akinobu Lee * @date Tue Oct 30 12:27:53 2007 * * $Revision: 1.3 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 起動時読み込みリストに文法を追加する. * * @param dfafile [in] DFAファイル * @param dictfile [in] 単語辞書 * @param j [in] LM 設定パラメータ * @param lmvar [in] LM 詳細型 id * * * Add a grammar to the grammar list to be read at startup. * * @param dfafile [in] DFA file * @param dictfile [in] dictionary file * @param j [in] LM configuration variables * @param lmvar [in] LM type variant id * * * @callgraph * @callergraph * @ingroup grammar */ void multigram_add_gramlist(char *dfafile, char *dictfile, JCONF_LM *j, int lmvar) { GRAMLIST *new; new = (GRAMLIST *)mymalloc(sizeof(GRAMLIST)); new->dfafile = new->dictfile = NULL; if (dfafile) new->dfafile = strcpy((char *)mymalloc(strlen(dfafile)+1), dfafile); if (dictfile) new->dictfile = strcpy((char *)mymalloc(strlen(dictfile)+1), dictfile); switch(lmvar) { case LM_DFA_GRAMMAR: new->next = j->gramlist_root; j->gramlist_root = new; break; case LM_DFA_WORD: new->next = j->wordlist_root; j->wordlist_root = new; break; } } /** * * 起動時読み込みリストを消す. * * @param j [in] LM 設定パラメータ * * * Remove the grammar list to be read at startup. * * @param j [in] LM configuration variables * * * @callgraph * @callergraph * @ingroup grammar */ void multigram_remove_gramlist(JCONF_LM *j) { GRAMLIST *g; GRAMLIST *tmp; g = j->gramlist_root; while (g) { tmp = g->next; if (g->dfafile) free(g->dfafile); if (g->dictfile) free(g->dictfile); free(g); g = tmp; } j->gramlist_root = NULL; g = j->wordlist_root; while (g) { tmp = g->next; if (g->dfafile) free(g->dfafile); if (g->dictfile) free(g->dictfile); free(g); g = tmp; } j->wordlist_root = NULL; } /** * * @brief プレフィックスから複数の文法を起動時読み込みリストに追加する. * * プレフィックスは "foo", あるいは "foo,bar" のようにコンマ区切りで * 複数与えることができます. 各文字列の後ろに ".dfa", ".dict" をつけた * ファイルを,それぞれ文法ファイル・辞書ファイルとして順次読み込みます. * 読み込まれた文法は順次,文法リストに追加されます. * * @param prefix_list [in] プレフィックスのリスト * @param cwd [in] カレントディレクトリの文字列 * @param j [in] LM 設定パラメータ * @param lmvar [in] LM 詳細型 id * * * @brief Add multiple grammars given by their prefixs to the grammar list. * * This function read in several grammars, given a prefix string that * contains a list of file prefixes separated by comma: "foo" or "foo,bar". * For each prefix, string ".dfa" and ".dict" will be appended to read * dfa file and dict file. The read grammars will be added to the grammar * list. * * @param prefix_list [in] string that contains comma-separated list of grammar path prefixes * @param cwd [in] string of current working directory * @param j [in] LM configuration variables * @param lmvar [in] LM type variant id * * * @callgraph * @callergraph * @ingroup grammar */ boolean multigram_add_prefix_list(char *prefix_list, char *cwd, JCONF_LM *j, int lmvar) { char buf[MAXGRAMNAMELEN], *p, *q; char buf2_d[MAXGRAMNAMELEN], *buf_d; char buf2_v[MAXGRAMNAMELEN], *buf_v; boolean ok_p, ok_p_total; if (prefix_list == NULL) return TRUE; p = &(prefix_list[0]); ok_p_total = TRUE; while(*p != '\0') { /* extract one prefix to buf[] */ q = p; while(*p != '\0' && *p != ',') { buf[p-q] = *p; p++; } buf[p-q] = '\0'; switch(lmvar) { case LM_DFA_GRAMMAR: /* register the new grammar to the grammar list to be read later */ /* making file names from the prefix */ ok_p = TRUE; strcpy(buf2_d, buf); strcat(buf2_d, ".dfa"); buf_d = filepath(buf2_d, cwd); if (!checkpath(buf_d)) { jlog("ERROR: gramlist: cannot read dfa file \"%s\"\n", buf_d); ok_p = FALSE; } strcpy(buf2_v, buf); strcat(buf2_v, ".dict"); buf_v = filepath(buf2_v, cwd); if (!checkpath(buf_v)) { jlog("ERROR: gramlist: cannot read dict file \"%s\"\n", buf_v); ok_p = FALSE; } if (ok_p == TRUE) { multigram_add_gramlist(buf_d, buf_v, j, lmvar); } else { ok_p_total = FALSE; } break; case LM_DFA_WORD: /* register the new word list to the list */ /* treat the file name as a full file path (not prefix) */ buf_v = filepath(buf, cwd); if (!checkpath(buf_v)) { jlog("ERROR: gramlist: cannot read wordlist file \"%s\"\n", buf_v); ok_p_total = FALSE; } else { multigram_add_gramlist(NULL, buf_v, j, lmvar); } break; } /* move to next */ if (*p == ',') p++; } return ok_p_total; } /** * * @brief リストファイルを読み込み複数文法を起動時読み込みリストに追加する. * * ファイル内に1行に1つずつ記述された文法のプレフィックスから, * 対応する文法ファイルを順次読み込みます. * * 各行の文字列の後ろに ".dfa", ".dict" をつけたファイルを, * それぞれ文法ファイル・辞書ファイルとして順次読み込みます. * 読み込まれた文法は順次,文法リストに追加されます. * * @param listfile [in] プレフィックスリストのファイル名 * @param j [in] LM 設定パラメータ * @param lmvar [in] LM 詳細型 id * * * @brief Add multiple grammars from prefix list file to the grammar list. * * This function read in multiple grammars at once, given a file that * contains a list of grammar prefixes, each per line. * * For each prefix, string ".dfa" and ".dict" will be appended to read the * corresponding dfa and dict file. The read grammars will be added to the * grammar list. * * @param listfile [in] path of the prefix list file * @param j [in] LM configuration variables * @param lmvar [in] LM type variant id * * * @callgraph * @callergraph * @ingroup grammar */ boolean multigram_add_prefix_filelist(char *listfile, JCONF_LM *j, int lmvar) { FILE *fp; char buf[MAXGRAMNAMELEN], *p, *src_bgn, *src_end, *dst; char *cdir; char buf2_d[MAXGRAMNAMELEN], *buf_d; char buf2_v[MAXGRAMNAMELEN], *buf_v; boolean ok_p, ok_p_total; if (listfile == NULL) return FALSE; if ((fp = fopen(listfile, "r")) == NULL) { jlog("ERROR: gramlist: failed to open grammar list file %s\n", listfile); return FALSE; } /* convert relative paths as relative to this list file */ cdir = strcpy((char *)mymalloc(strlen(listfile)+1), listfile); get_dirname(cdir); ok_p_total = TRUE; while(getl_fp(buf, MAXGRAMNAMELEN, fp) != NULL) { /* remove comment */ p = &(buf[0]); while(*p != '\0') { if (*p == '#') { *p = '\0'; break; } p++; } if (buf[0] == '\0') continue; /* trim head/tail blanks */ p = (&buf[0]); while(*p == ' ' || *p == '\t' || *p == '\r') p++; if (*p == '\0') continue; src_bgn = p; p = (&buf[strlen(buf) - 1]); while((*p == ' ' || *p == '\t' || *p == '\r') && p > src_bgn) p--; src_end = p; dst = (&buf[0]); p = src_bgn; while(p <= src_end) *dst++ = *p++; *dst = '\0'; if (buf[0] == '\0') continue; switch(lmvar) { case LM_DFA_GRAMMAR: /* register the new grammar to the grammar list to be read later */ ok_p = TRUE; strcpy(buf2_d, buf); strcat(buf2_d, ".dfa"); buf_d = filepath(buf2_d, cdir); if (!checkpath(buf_d)) { jlog("ERROR: gramlist: cannot read dfa file \"%s\"\n", buf_d); ok_p = FALSE; } strcpy(buf2_v, buf); strcat(buf2_v, ".dict"); buf_v = filepath(buf2_v, cdir); if (!checkpath(buf_v)) { jlog("ERROR: gramlist: cannot read dict file \"%s\"\n", buf_v); ok_p = FALSE; } if (ok_p == TRUE) { multigram_add_gramlist(buf_d, buf_v, j, lmvar); } else { ok_p_total = FALSE; } break; case LM_DFA_WORD: /* register the new word list to the list */ /* treat the file name as a full file path (not prefix) */ buf_v = filepath(buf, cdir); if (!checkpath(buf_v)) { jlog("ERROR: gramlist: cannot read wordlist file \"%s\"\n", buf_v); ok_p_total = FALSE; } else { multigram_add_gramlist(NULL, buf_v, j, lmvar); } break; } } free(cdir); fclose(fp); return ok_p_total; } /* end of file */ julius-4.2.2/libjulius/src/wchmm.c0000644001051700105040000020020312004452401015365 0ustar ritrlab/** * @file wchmm.c * * * @brief 木構造化辞書の構築 * * ここでは,与えられた単語辞書, HMM定義および言語制約から木構造化辞書を * 構築する関数が定義されています. 木構造化辞書は起動時に構築され, * 第1パスの認識に用いられます. 木構造化辞書は状態単位で構成され, * 各状態はHMM出力確率と遷移先の他,および探索のための様々な情報を含みます. * * 開発の経緯上,ソース内では木構造化辞書は wchmm (word-conjunction HMM) と * も表現されています. * * * * * @brief Construction of tree lexicon. * * Functions to build a tree lexicon (or called word-conjunction HMM here) * from word dictionary, HMM and language models are defined here. The * constructed tree lexicon will be used for the recognition of the 1st pass. * The lexicon is composed per HMM state unit, and various informations * about output probabilities, arcs, language model constraints, and others * are assembled in the lexicon. * * Note that the word "wchmm" in the source code is a synonim of * "tree lexicon". * * * @author Akinobu Lee * @date Mon Sep 19 23:39:15 2005 * * $Revision: 1.10 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* wchmm = word conjunction HMM = lexicon tree */ #include #define WCHMM_SIZE_CHECK ///< If defined, do wchmm size estimation (for debug only) /**************************************************************/ /*********** Initialization of tree lexicon *******************/ /**************************************************************/ /** * * 木構造化辞書構造体を新規に割り付ける. * * @return 新たにメモリ上に割り付けられた木構造化辞書構造体へのポインタを返す. * * * Allocate a new tree lexicon structure. * * @return pointer to the newly allocated tree lexicon structure. * * @callgraph * @callergraph */ WCHMM_INFO * wchmm_new() { WCHMM_INFO *w; w = (WCHMM_INFO *)mymalloc(sizeof(WCHMM_INFO)); w->lmtype = LM_UNDEF; w->lmvar = LM_UNDEF; w->ngram = NULL; w->dfa = NULL; w->winfo = NULL; w->malloc_root = NULL; #ifdef PASS1_IWCD w->lcdset_category_root = NULL; w->lcdset_mroot = NULL; #endif /* PASS1_IWCD */ w->wrk.out_from_len = 0; /* reset user function entry point */ w->uni_prob_user = NULL; w->bi_prob_user = NULL; return w; } /** * * 木構造化辞書の内容を初期化する. * * @param wchmm [out] 木構造化辞書へのポインタ * * * Initialize content of a lexicon tree. * * @param wchmm [out] pointer to the lexicon tree structure * */ static void wchmm_init(WCHMM_INFO *wchmm) { /* the resulting tree size is typically half of total state num */ wchmm->maxwcn = wchmm->winfo->totalstatenum / 2; wchmm->state = (WCHMM_STATE *)mymalloc(sizeof(WCHMM_STATE)*wchmm->maxwcn); wchmm->self_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn); wchmm->next_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->maxwcn); wchmm->ac = (A_CELL2 **)mymalloc(sizeof(A_CELL2 *)*wchmm->maxwcn); wchmm->stend = (WORD_ID *)mymalloc(sizeof(WORD_ID)*wchmm->maxwcn); wchmm->offset = (int **)mymalloc(sizeof(int *)*wchmm->winfo->num); wchmm->wordend = (int *)mymalloc(sizeof(int)*wchmm->winfo->num); wchmm->maxstartnum = STARTNODE_STEP; wchmm->startnode = (int *)mymalloc(sizeof(int)*STARTNODE_STEP); wchmm->startnum = 0; if (wchmm->category_tree) { wchmm->start2wid = (WORD_ID *)mymalloc(sizeof(WORD_ID)*STARTNODE_STEP); } if (wchmm->hmminfo->multipath) { wchmm->wordbegin = (int *)mymalloc(sizeof(int)*wchmm->winfo->num); wchmm->wrk.out_from = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn); wchmm->wrk.out_from_next = (int *)mymalloc(sizeof(int) * wchmm->winfo->maxwn); wchmm->wrk.out_a = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn); wchmm->wrk.out_a_next = (LOGPROB *)mymalloc(sizeof(LOGPROB) * wchmm->winfo->maxwn); wchmm->wrk.out_from_len = wchmm->winfo->maxwn; } else { wchmm->wordend_a = (LOGPROB *)mymalloc(sizeof(LOGPROB)*wchmm->winfo->num); } #ifdef PASS1_IWCD wchmm->outstyle = (unsigned char *)mymalloc(sizeof(unsigned char)*wchmm->maxwcn); #endif #ifdef UNIGRAM_FACTORING wchmm->start2isolate = NULL; wchmm->isolatenum = 0; #endif if (!wchmm->category_tree) { #ifdef UNIGRAM_FACTORING wchmm->scword = NULL; wchmm->fscore = NULL; #endif wchmm->sclist = NULL; wchmm->sclen = NULL; } wchmm->n = 0; } /** * * 木構造化辞書の状態格納領域を MAXWCNSTEP 分だけ伸長する. * * @param wchmm [i/o] 木構造化辞書 * * * Expand state-related area in a tree lexicon by MAXWCNSTEP. * * @param wchmm [i/o] tree lexicon * */ static void wchmm_extend(WCHMM_INFO *wchmm) { /* practical value! */ wchmm->maxwcn += wchmm->winfo->totalstatenum / 6; wchmm->state = (WCHMM_STATE *)myrealloc(wchmm->state, sizeof(WCHMM_STATE)*wchmm->maxwcn); wchmm->self_a = (LOGPROB *)myrealloc(wchmm->self_a, sizeof(LOGPROB)*wchmm->maxwcn); wchmm->next_a = (LOGPROB *)myrealloc(wchmm->next_a, sizeof(LOGPROB)*wchmm->maxwcn); wchmm->ac = (A_CELL2 **)myrealloc(wchmm->ac, sizeof(A_CELL2 *)*wchmm->maxwcn); wchmm->stend = (WORD_ID *)myrealloc(wchmm->stend, sizeof(WORD_ID)*wchmm->maxwcn); #ifdef PASS1_IWCD wchmm->outstyle = (unsigned char *)myrealloc(wchmm->outstyle, sizeof(unsigned char)*wchmm->maxwcn); #endif } /** * * 木構造化辞書の単語先頭ノード格納領域を STARTNODE_STEP分だけ伸長する. (multipath) * * @param wchmm [i/o] 木構造化辞書 * * * Expand word-start nodes area in a tree lexicon by STARTNODE_STEP. (multipath) * * @param wchmm [i/o] tree lexicon * */ static void wchmm_extend_startnode(WCHMM_INFO *wchmm) { wchmm->maxstartnum += STARTNODE_STEP; wchmm->startnode = (int *)myrealloc(wchmm->startnode, sizeof(int) * wchmm->maxstartnum); if (wchmm->category_tree) { wchmm->start2wid = (WORD_ID *)myrealloc(wchmm->start2wid, sizeof(WORD_ID) * wchmm->maxstartnum); } } /** * * 木構造化辞書およびその内部の割付メモリを全て解放する. * * @param w [in] 木構造化辞書 * * * Free all data in a tree lexicon. * * @param w [in] tree lexicon * * @callgraph * @callergraph */ void wchmm_free(WCHMM_INFO *w) { int i; /* wchmm->state[i].ac malloced by mybmalloc2() */ /* wchmm->offset[][] malloced by mybmalloc2() */ #ifdef PASS1_IWCD /* LRC_INFO, RC_INFO in wchmm->state[i].outsty malloced by mybmalloc2() */ #endif /* wchmm->sclist[][] and wchmm->sclen[] malloced by mybmalloc2() */ /* they all will be freed by a single mybfree2() call */ mybfree2(&(w->malloc_root)); if (!w->category_tree) { #ifdef UNIGRAM_FACTORING if (w->fscore != NULL) free(w->fscore); #endif } #ifdef UNIGRAM_FACTORING if (w->start2isolate != NULL) free(w->start2isolate); #endif #ifdef PASS1_IWCD free(w->outstyle); #endif if (w->hmminfo->multipath) { free(w->wordbegin); } else { free(w->wordend_a); } if (w->category_tree) free(w->start2wid); free(w->startnode); free(w->wordend); free(w->offset); free(w->stend); free(w->ac); free(w->next_a); free(w->self_a); free(w->state); #ifdef PASS1_IWCD if (w->category_tree) lcdset_remove_with_category_all(w); #endif /* PASS1_IWCD */ if (w->wrk.out_from_len != 0) { free(w->wrk.out_from); free(w->wrk.out_from_next); free(w->wrk.out_a); free(w->wrk.out_a_next); w->wrk.out_from_len = 0; } free(w); } /**************************************************************/ /*********** Word sort functions for tree construction ********/ /**************************************************************/ /** * * 単語を音素のならびでソートするqsort_reentrant関数 * * @param widx1 [in] 単語ID 1 へのポインタ * @param widx2 [in] 単語ID 2 へのポインタ * * @return 単語widx2が単語widx1の一部か昇順であれば 1, 単語widx1が単語widx2の一部か昇順であれば -1, 全く同じ音素並びであれば 0 を返す. * * * qsort_reentrant function to sort words by their phoneme sequence. * * @param widx1 [in] pointer to word id #1 * @param widx2 [in] pointer to wrod id #2 * * @return 1 if word[widx2] is part of word[widx1], -1 if word[widx1] is part of word[widx2], or 0 if the two words are equal. * */ static int compare_wseq(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo) { int len1, len2, n; int p=0; len1 = winfo->wlen[*widx1]; len2 = winfo->wlen[*widx2]; n=0; /* while (n < len1 && n < len2 && (p = (int)winfo->wseq[*widx1][n] - (int)winfo->wseq[*widx2][n]) == 0 ) n++;*/ while (n < len1 && n < len2 && (p = strcmp((winfo->wseq[*widx1][n])->name, (winfo->wseq[*widx2][n])->name)) == 0 ) n++; if (n < len1) { if (n < len2) { /* differ */ return(p); } else { /* 2 is part of 1 */ return(1); } } else { if (n < len2) { /* 1 is part of 2 */ return(-1); } else { /* same */ return(0); } } } /** * * 単語IDの集合 windex[bgn..bgn+len-1] を単語の音素ならびでソートする. * * @param winfo [in] 単語辞書 * @param windex [i/o] 単語IDのインデックス列(内部でソートされる) * @param bgn [in] @a windex のソート開始点 * @param len [in] @a windex の @a bgn からのソートする要素数 * * * Sort word IDs in windex[bgn..bgn+len-1] by their phoneme sequence order. * * @param winfo [in] word lexicon * @param windex [i/o] index sequence of word IDs, (will be sorted in this function) * @param bgn [in] start point to sort in @a windex * @param len [in] length of indexes to be sorted from @a bgn * */ static void wchmm_sort_idx_by_wseq(WORD_INFO *winfo, WORD_ID *windex, WORD_ID bgn, WORD_ID len) { qsort_reentrant(&(windex[bgn]), len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_wseq, winfo); } /** * * 単語をカテゴリIDでソートするqsort関数. * * @param widx1 [in] 要素1へのポインタ * @param widx2 [in] 要素2へのポインタ * * @return * * * qsort function to sort words by their category ID. * * @param widx1 [in] pointer to element #1 * @param widx2 [in] pointer to element #2 * * @return * */ static int compare_category(WORD_ID *widx1, WORD_ID *widx2, WORD_INFO *winfo) { int c1,c2; c1 = winfo->wton[*widx1]; c2 = winfo->wton[*widx2]; return(c1 - c2); } /** * * 単語ID集合 windex[0..len-1] をカテゴリIDでソートする. * * @param winfo [in] 単語辞書 * @param windex [i/o] 単語IDのインデックス列(内部でソートされる) * @param len [in] @a windex の要素数 * * * Sort word IDs in windex[0..len-1] by their category ID. * * @param winfo [in] tree lexicon * @param windex [i/o] index sequence of word IDs, (will be sorted in this function) * @param len [in] number of elements in @a windex * */ static void wchmm_sort_idx_by_category(WORD_INFO *winfo, WORD_ID *windex, WORD_ID len) { qsort_reentrant(windex, len, sizeof(WORD_ID), (int (*)(const void *, const void *, void *))compare_category, winfo); } /**********************************************************************/ /************** Subroutines to link part of words ********************/ /**********************************************************************/ /** * * 2単語間で,単語の先頭から同一で共有可能な音素の数を調べる. * * @param winfo [in] 単語辞書 * @param i [in] 単語1 * @param j [in] 単語2 * * @return 共有可能な先頭からの音素数を返す. * * * Compare two words from word head per phoneme to see how many phones * can be shared among the two. * * @param winfo [in] word dictionary * @param i [in] a word * @param j [in] another word * * @return the number of phonemes to be shared from the head of the words. * */ static int wchmm_check_match(WORD_INFO *winfo, int i, int j) { int k,tmplen; for (tmplen=0,k=0;kwlen[i];k++) { if (k > winfo->wlen[j]-1) break; if (! (strmatch(winfo->wseq[i][k]->name, winfo->wseq[j][k]->name))) break; tmplen++; } return(tmplen); } /** * * Initialize transition information on a node. * * * ノードの遷移情報を初期化する. * * * @param wchmm [i/o] tree lexicon * @param node [in] node id * */ static void acc_init(WCHMM_INFO *wchmm, int node) { wchmm->self_a[node] = LOG_ZERO; wchmm->next_a[node] = LOG_ZERO; wchmm->ac[node] = NULL; } /** * * Add an arc to a node. * This function is for transition other than self and next node. * * * ノードに遷移を追加する. * この関数は自己遷移・隣への遷移以外の場合に使用される. * * * @param wchmm [i/o] tree lexicon * @param node [in] node id * @param a [in] transition probability in log10 * @param arc [in] transition destination node id * */ static void add_ac(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc) { A_CELL2 *ac2; for(ac2=wchmm->ac[node];ac2;ac2=ac2->next) { if (ac2->n < A_CELL2_ALLOC_STEP) break; } if (ac2 == NULL) { ac2 = (A_CELL2 *)mybmalloc2(sizeof(A_CELL2), &(wchmm->malloc_root)); ac2->n = 0; ac2->next = wchmm->ac[node]; wchmm->ac[node] = ac2; } ac2->arc[ac2->n] = arc; ac2->a[ac2->n] = a; ac2->n++; } /** * * 木構造化辞書のあるノードに,別のノードへの遷移を追加する * * @param wchmm [i/o] 木構造化辞書 * @param node [in] ノード番号 * @param a [in] 遷移確率(対数) * @param arc [in] 遷移先のノード番号 * * * Add a transition arc between two nodes on the tree lexicon * * @param wchmm [i/o] tree lexicon * @param node [in] node number of source node * @param a [in] transition probability in log scale * @param arc [in] node number of destination node * */ static void add_wacc(WCHMM_INFO *wchmm, int node, LOGPROB a, int arc) { if (arc == node) { wchmm->self_a[node] = a; } else if (arc == node + 1) { wchmm->next_a[node] = a; } else { add_ac(wchmm, node, a, arc); } } /** * * ある単語のある位置の音素から単語末端の外へ出る遷移のリストを得る. (multipath) * * @param wchmm [in] 木構造化辞書 * @param w [in] 単語ID * @param pos [in] 音素位置 * @param node [out] 音素内の,単語末端外への遷移を持つ状態のリスト * @param a [out] @a node の各要素の遷移確率 * @param num [out] @a node の要素数. 発見数だけ増加される. * @param maxnum [in] @a node の格納可能な最大数 * @param insert_sp [in] 単語終端での sp 挟み込みを考慮するならTRUE * * * Make outgoing transition list for given phone position of a word. (multipath) * * @param wchmm [in] tree lexicon * @param w [in] word ID * @param pos [in] location of target phone to be inspected in the word @a w * @param node [out] list of wchmm states that possibly has outgoing transition * @param a [out] transition probabilities of the outgoing transitions in @a node * @param num [out] number of elements in @a out (found num will be added) * @param maxnum [in] maximum number of elements that can be stored in @a node * @param insert_sp [in] TRUE if consider short-pause insertion on word end * */ static void get_outtrans_list(WCHMM_INFO *wchmm, WORD_ID w, int pos, int *node, LOGPROB *a, int *num, int maxnum, boolean insert_sp) { HMM_Logical *ltmp; int states; int k; LOGPROB prob; int oldnum; if (pos < 0) { /* set the word-beginning node, and return */ node[*num] = wchmm->wordbegin[w]; a[*num] = 0.0; (*num)++; } else { ltmp = wchmm->winfo->wseq[w][pos]; states = hmm_logical_state_num(ltmp); /* check initial->final state */ if ((hmm_logical_trans(ltmp))->a[0][states-1] != LOG_ZERO) { /* recursive call for previous phone */ oldnum = *num; get_outtrans_list(wchmm, w, pos-1, node, a, num, maxnum, FALSE); /* previous phone should not be an sp-inserted phone */ /* add probability of the skip transition to all the previous ones */ for(k=oldnum;k<*num;k++) { a[k] += (hmm_logical_trans(ltmp))->a[0][states-1]; } } /* add to list the arcs from output state to final state */ for (k = 1; k < states - 1; k++) { prob = (hmm_logical_trans(ltmp))->a[k][states-1]; if (prob != LOG_ZERO) { if (*num >= maxnum) { j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum); } node[*num] = wchmm->offset[w][pos] + k - 1; a[*num] = prob; (*num)++; } } /* for -iwsp, add outgoing arc from the tail sp model only if need_sp == TRUE. need_sp should be TRUE only when the connecting [pos] phone is also an end phone of the to-be-added word (i.e. homophone word) */ /* */ if (insert_sp) { /* consider sp */ for (k = 1; k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) { prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][hmm_logical_state_num(wchmm->hmminfo->sp)-1]; if (prob != LOG_ZERO) { if (*num >= maxnum) { j_internal_error("get_outtrans_list: maximum outtrans list num exceeded %d\n", maxnum); } node[*num] = wchmm->offset[w][pos] + (states - 2) + k - 1; a[*num] = prob; (*num)++; } } } } /*printf(" %d(%s)-%d:\"%s\", num=%d\n", w, wchmm->winfo->woutput[w], pos, (pos < 0) ? "BGN" : wchmm->winfo->wseq[w][pos]->name, *num);*/ return; } /** * * ある音素の末尾の状態から,ある音素の先頭状態への遷移を追加する. * * @param wchmm [i/o] 木構造化辞書 * @param from_node [in] ある音素の末尾の状態 * @param to_node [in] ある音素の先頭状態 * @param tinfo [in] @a from_node の属する音素HMMの遷移確率行列 * * * Add a transition from end node of a phone to start node of another phone. * * @param wchmm [i/o] tree lexicon * @param from_node [in] end node of a phone * @param to_node [in] start node of a phone * @param tinfo [in] transition prob. matrix of the @a from_node phone. * */ static void wchmm_link_hmm(WCHMM_INFO *wchmm, int from_node, int to_node, HTK_HMM_Trans *tinfo) { A_CELL2 *actmp; LOGPROB a; int i, j; boolean tflag; /* get transition probability to outer state in tinfo */ for(i = tinfo->statenum - 2; i >= 0; i--) { if ((a = tinfo->a[i][tinfo->statenum-1]) != LOG_ZERO) { /* found */ /* check if the arc already exist */ tflag = FALSE; if (to_node == from_node && wchmm->self_a[from_node] == a) { tflag = TRUE; } else if (to_node == from_node + 1 && wchmm->next_a[from_node] == a) { tflag = TRUE; } else { for (actmp = wchmm->ac[from_node]; actmp; actmp = actmp->next) { for(j=0;jn;j++) { if (actmp->arc[j] == to_node && actmp->a[j] == a) { tflag = TRUE; break; } } if (tflag == TRUE) break; } } if (tflag) break; /* add the arc to wchmm */ add_wacc(wchmm, from_node, a, to_node); return; /* exit function here */ } } j_internal_error("wchmm_link_hmm: No arc to endstate?\n"); } /** * * 木構造化辞書中の2単語中のある音素間を接続する. * * @param wchmm [i/o] 木構造化辞書 * @param from_word [in] 遷移元の単語のID * @param from_seq [in] 遷移元の単語中の接続する音素の位置 * @param to_word [in] 遷移先の単語のID * @param to_seq [in] 遷移先の単語中の接続する音素の位置 * * * Connect two phonemes in tree lexicon. * * @param wchmm [i/o] tree lexicon * @param from_word [in] source word ID * @param from_seq [in] index of source phoneme in @a from_word from which the other will be connected * @param to_word [in] destination word ID * @param to_seq [in] index of destination phoneme in @a to_word to which the other will connect * */ static void wchmm_link_subword(WCHMM_INFO *wchmm, int from_word, int from_seq, int to_word, int to_seq) { HMM_Logical *last; int lastp; last = wchmm->winfo->wseq[from_word][from_seq]; lastp = wchmm->offset[from_word][from_seq] + hmm_logical_state_num(last)-2 -1; wchmm_link_hmm(wchmm, lastp, wchmm->offset[to_word][to_seq], hmm_logical_trans(last)); } /**************************************************************/ /******** homophone processing: duplicating leaf nodes ********/ /**************************************************************/ /** * @note * * 同音語処理: * 木構造化辞書においてすべての単語は独立した最終状態を持つ必要があるため, * 同音語は注意深く扱う必要がある. このため,最初の木構造化辞書を構築した後, * 別の単語と完全に共有された単語(同音語), あるいは別の単語の一部として * 埋め込まれてしまっている単語を発見するとともに, その最終ノードを * コピーして新たな単語終端ノードを作る必要がある. * * * Homophones: * As all words need to have an uniq state as a final state in a lexicon tree, * homophones should be handled carefully. After primal tree has been made, * we look through the tree to find the fully shared or embedded words * (homophone or part of other word), and duplicate the last leaf node * to have uniq end state. * */ /** * * 単語終端状態の独立化:与えられた単語の終端ノードをコピーして, * 新たにある単語の最終状態として定義する. * * @param wchmm [i/o] 木構造化辞書 * @param node [in] 同音語の終端ノード番号 * @param word [in] 新たに登録する単語 * * * Isolation of word-end nodes for homophones: duplicate the word-end state, * link as the same as original, and make it the new word-end node of the * given new word. * * @param wchmm [i/o] tree lexicon * @param node [in] the word end node of the already existing homophone * @param word [in] word ID to be added to the tree * */ static void wchmm_duplicate_state(WCHMM_INFO *wchmm, int node, int word) /* source node, new word */ { int j, n; int n_src, n_prev; A_CELL2 *ac; HMM_Logical *lastphone; /* 1 state will newly created: expand tree if needed */ if (wchmm->n + 1 >= wchmm->maxwcn) { wchmm_extend(wchmm); } /* n: the target new node to which 'node' is copied */ n = wchmm->n; n_src = node; /* copy output probability info */ #ifdef PASS1_IWCD { RC_INFO *rcnew; LRC_INFO *lrcnew; wchmm->outstyle[n] = wchmm->outstyle[n_src]; if (wchmm->outstyle[n] == AS_RSET) { /* duplicate RC_INFO because it has its own cache */ rcnew = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root)); memcpy(rcnew, wchmm->state[n_src].out.rset, sizeof(RC_INFO)); wchmm->state[n].out.rset = rcnew; } else if (wchmm->outstyle[n] == AS_LRSET) { /* duplicate LRC_INFO because it has its own cache */ lrcnew = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root)); memcpy(lrcnew, wchmm->state[n_src].out.lrset, sizeof(LRC_INFO)); wchmm->state[n].out.lrset = lrcnew; } else { /* share same info, simply copy the pointer */ memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(ACOUSTIC_SPEC)); } } #else /* ~PASS1_IWCD */ memcpy(&(wchmm->state[n].out), &(wchmm->state[n_src].out), sizeof(HTK_HMM_State *)); #endif lastphone = wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-1]; acc_init(wchmm, n); /* add self transition arc */ wchmm->self_a[n] = wchmm->self_a[n_src]; /* copy transition arcs whose destination is the source node to new node */ if (hmm_logical_state_num(lastphone) == 3) { /* = 1 state */ /* phone with only 1 state should be treated carefully */ if (wchmm->winfo->wlen[word] == 1) { /* word consists of only this phone */ /* no arcs need to be copied: this is also a start node of a word */ wchmm->offset[word][0] = n; /* index the new word-beginning node as startnode (old ststart) */ if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) { wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); } } else { /* copy arcs from the last state of the previous phone */ n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-2] + hmm_logical_state_num(wchmm->winfo->wseq[word][wchmm->winfo->wlen[word]-2]) - 3; if(n_src == n_prev + 1) { add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n); } else { for(ac=wchmm->ac[n_prev];ac;ac=ac->next) { for(j=0;jn;j++) { if (ac->arc[j] == n_src) { add_wacc(wchmm, n_prev, ac->a[j], n); } } } } /* also update the last offset (== wordend in this case) */ wchmm->offset[word][wchmm->winfo->wlen[word]-1] = n; } } else { /* phone with more than 2 states */ /* copy arcs from/to the source node to new node */ for (n_prev = wchmm->offset[word][wchmm->winfo->wlen[word]-1]; n_prev < n_src; n_prev++) { if (n_src == n_prev + 1) { add_wacc(wchmm, n_prev, wchmm->next_a[n_prev], n); } else { for(ac=wchmm->ac[n_prev];ac;ac=ac->next) { for(j=0;jn;j++) { if (ac->arc[j] == n_src) { add_wacc(wchmm, n_prev, ac->a[j], n); } } } } if (n_prev == n_src + 1) { add_wacc(wchmm, n, wchmm->next_a[n_src], n_prev); } else { for(ac=wchmm->ac[n_src];ac;ac=ac->next) { for(j=0;jn;j++) { if (ac->arc[j] == n_prev) { add_wacc(wchmm, n, ac->a[j], n_prev); } } } } } } /* map word <-> node */ wchmm->stend[n] = word; /* 'n' is an end node of word 'word' */ wchmm->wordend[word] = n; /* the word end node of 'word' is 'n' */ /* new state has been created: increment the size */ wchmm->n++; } /** * * 木構造化辞書全体を走査して,すべての同音語について単語終端状態の独立化 * を行う. * * @param wchmm [i/o] 木構造化辞書 * * * Scan the whole lexicon tree to find already registered homophones, and * make word-end nodes of the found homophones isolated from others. * * @param wchmm [i/o] tree lexicon * */ static int wchmm_duplicate_leafnode(WCHMM_INFO *wchmm) { int w, nlast, n, narc, narc_model; boolean *dupw; /* node marker */ A_CELL2 *actmp; int dupcount; dupcount = 0; nlast = wchmm->n; dupw = (boolean *)mymalloc(sizeof(boolean) * nlast); for(n=0;nwinfo->num;w++) { n = wchmm->wordend[w]; if (dupw[n]) { /* if already marked (2nd time or later */ wchmm_duplicate_state(wchmm, n, w); dupcount++; /* duplicate */ } else { /* if not marked yet (1st time) */ /* try to find an arc outside the word */ { /* count number of model-internal arc from the last state */ HMM_Logical *lastphone; HTK_HMM_Trans *tinfo; int laststate, i; lastphone = wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]; laststate = hmm_logical_state_num(lastphone) - 2; tinfo = hmm_logical_trans(lastphone); narc_model=0; for(i=1;ia[laststate][i] != LOG_ZERO) narc_model++; } /* count number of actual arc from the last state in the tree */ narc = 0; if (wchmm->self_a[n] != LOG_ZERO) narc++; if (wchmm->next_a[n] != LOG_ZERO) narc++; for(actmp=wchmm->ac[n];actmp;actmp=actmp->next) narc += actmp->n; } /* if both number does not match, it means it is not a single word tail */ if (narc_model != narc) { /* word 'w' is embedded as part of other words at this node 'n' */ /* duplicate this node now */ wchmm_duplicate_state(wchmm, n, w); dupcount++; /* as new node has been assigned as word end node of word 'w', reset this source node as it is not the word end node */ wchmm->stend[n] = WORD_INVALID; } else { /* no arc to other node found, it means it is a single word tail */ /* as this is first time, only make sure that this node is word end of [w] */ wchmm->stend[n] = w; } /* mark node 'n' */ dupw[n] = TRUE; } } free(dupw); return(dupcount); } /**************************************************************/ /*************** add a word to wchmm lexicon tree *************/ /**************************************************************/ /** * * 木構造化辞書に新たに単語を追加する. 追加場所の情報として,現在の木構造化 * 辞書内で最もその単語と先頭から良くマッチする単語,およびそのマッチする長さ * を指定する. * * @param wchmm [i/o] 木構造化辞書 * @param word [in] 追加する辞書単語のID * @param matchlen [in] @a word と @a matchword の先頭からマッチする音素長 * @param matchword [in] 既存の木構造化辞書中で @a word と最もマッチする単語 * @param enable_iwsp [in] 単語間ショートポーズ機能使用時TRUEを指定 * * * Add a new word to the lexicon tree. The longest matched word in the current * lexicon tree and the length of the matched phoneme from the word head should * be specified to tell where to insert the new word to the tree. * * @param wchmm [i/o] tree lexicon * @param word [in] word id to be added to the lexicon * @param matchlen [in] phoneme match length between @a word and @a matchword. * @param matchword [in] the longest matched word with @a word in the current lexicon tree * @param enable_iwsp [in] should be TRUE when using inter-word short pause option * */ static boolean wchmm_add_word(WCHMM_INFO *wchmm, int word, int matchlen, int matchword, boolean enable_iwsp) { boolean ok_p; int j,k,n; int add_head, add_tail, add_to; int word_len, matchword_len; HMM_Logical *ltmp; int ato; LOGPROB prob; int ntmp; int ltmp_state_num; #ifdef PASS1_IWCD CD_Set *lcd = NULL; #endif int *out_from; int *out_from_next; LOGPROB *out_a; LOGPROB *out_a_next; /* for multipath handling */ int out_num_prev, out_num_next; int kkk; ok_p = TRUE; if (wchmm->hmminfo->multipath) { out_from = wchmm->wrk.out_from; out_from_next = wchmm->wrk.out_from_next; out_a = wchmm->wrk.out_a; out_a_next = wchmm->wrk.out_a_next; } /* * if (matchlen > 0) { * printf("--\n"); * put_voca(stdout, wchmm->winfo, word); * put_voca(stdout, wchmm->winfo, matchword); * printf("matchlen=%d\n", matchlen); * } */ /* variable abbreviations */ n = wchmm->n; word_len = wchmm->winfo->wlen[word]; matchword_len = wchmm->winfo->wlen[matchword]; /* malloc phone offset area */ wchmm->offset[word] = (int *)mybmalloc2(sizeof(int)*word_len, &(wchmm->malloc_root)); /* allocate unshared (new) part */ add_head = matchlen; add_tail = word_len - 1; add_to = matchlen - 1; if (wchmm->hmminfo->multipath) { /* make word-beginning node if needed */ if (matchlen == 0) { /* create word-beginning node */ wchmm->wordbegin[word] = n; wchmm->stend[n] = WORD_INVALID; acc_init(wchmm, n); wchmm->state[n].out.state = NULL; /* index the new word-beginning node as startnode (old ststart) */ wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); if (++n >= wchmm->maxwcn) wchmm_extend(wchmm); } else { wchmm->wordbegin[word] = wchmm->wordbegin[matchword]; } /* now n is at beginning of output state */ /* store the initial outgoing arcs to out_from[] and out_a[] */ out_num_prev = 0; if (matchlen == 0) { /* set the word-beginning node */ out_from[0] = wchmm->wordbegin[word]; out_a[0] = 0.0; out_num_prev = 1; } else { /*printf("%d(%s)\n", word, wchmm->winfo->woutput[word]);*/ /* on -iwsp, trailing sp is needed only when no phone will be created */ get_outtrans_list(wchmm, matchword, add_to, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, (enable_iwsp && add_tail - add_head + 1 <= 0) ? TRUE : FALSE); /*printf("NUM=%d\n", out_num_prev);*/ } } else { /* end of multipath block */ if (matchlen == 0) { if (wchmm->lmtype != LM_PROB || word != wchmm->winfo->head_silwid) { /* index the new word-beginning node as startnode (old ststart) */ wchmm->startnode[wchmm->startnum] = n; if (wchmm->category_tree) wchmm->start2wid[wchmm->startnum] = word; /* expand data area if necessary */ if (++wchmm->startnum >= wchmm->maxstartnum) wchmm_extend_startnode(wchmm); } } } if (add_tail - add_head + 1 > 0) { /* there are new phones to be created */ ntmp = n; for (j=add_head; j <= add_tail; j++) { /* for each new phones */ ltmp = wchmm->winfo->wseq[word][j]; ltmp_state_num = hmm_logical_state_num(ltmp); #ifdef PASS1_IWCD if (wchmm->ccd_flag) { /* in the triphone lexicon tree, the last phone of a word has left-context cdset */ if (wchmm->winfo->wlen[word] > 1 && j == wchmm->winfo->wlen[word] - 1) { if (wchmm->category_tree) { #ifdef USE_OLD_IWCD lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name); #else lcd = lcdset_lookup_with_category(wchmm, ltmp, wchmm->winfo->wton[word]); if (lcd == NULL) { /* no category-aware cdset found. This is case when no word can follow this word grammatically. so fallback to normal state */ jlog("WARNING: wchmm: no lcdset found for [%s::%04d], fallback to [%s]\n", ltmp->name, wchmm->winfo->wton[word], ltmp->name); lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name); } #endif } else { lcd = lcdset_lookup_by_hmmname(wchmm->hmminfo, ltmp->name); } if (lcd == NULL) { jlog("ERROR: wchmm: at word #%d: no lcdset found for [%s]\n", word, ltmp->name); ok_p = FALSE; } } } #endif /* PASS1_IWCD */ for (k = 1; k < ltmp_state_num - 1; k++) { /* for each state in the phone */ /* set state output prob info */ #ifdef PASS1_IWCD if (wchmm->ccd_flag) { /* output info of triphones needs special handling */ if (wchmm->winfo->wlen[word] == 1) { /* word with only 1 phone */ wchmm->outstyle[ntmp] = AS_LRSET; wchmm->state[ntmp].out.lrset = (LRC_INFO *)mybmalloc2(sizeof(LRC_INFO), &(wchmm->malloc_root)); (wchmm->state[ntmp].out.lrset)->hmm = ltmp; (wchmm->state[ntmp].out.lrset)->state_loc = k; if (wchmm->category_tree) { (wchmm->state[ntmp].out.lrset)->category = wchmm->winfo->wton[word]; } } else if (j == 0) { /* head phone of a word */ wchmm->outstyle[ntmp] = AS_RSET; wchmm->state[ntmp].out.rset = (RC_INFO *)mybmalloc2(sizeof(RC_INFO), &(wchmm->malloc_root)); (wchmm->state[ntmp].out.rset)->hmm = ltmp; (wchmm->state[ntmp].out.rset)->state_loc = k; } else if (j == wchmm->winfo->wlen[word] - 1) { /* last phone of a word */ wchmm->outstyle[ntmp] = AS_LSET; wchmm->state[ntmp].out.lset = &(lcd->stateset[k]); } else { wchmm->outstyle[ntmp] = AS_STATE; if (ltmp->is_pseudo) { jlog("WARNING: wchmm: word-internal phone should not be pseudo\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->state[ntmp].out.state = ltmp->body.defined->s[k]; } } else { /* monophone */ if (ltmp->is_pseudo) { j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->outstyle[ntmp] = AS_STATE; wchmm->state[ntmp].out.state = ltmp->body.defined->s[k]; } #else /* ~PASS1_IWCD */ if (ltmp->is_pseudo) { j_internal_error("wchmm_add_word: CDSET phoneme exist in monophone?\n"); put_voca(stdout, wchmm->winfo, word); ok_p = FALSE; } wchmm->state[ntmp].out = ltmp->body.defined->s[k]; #endif /* PASS1_IWCD */ /* initialize other info */ acc_init(wchmm, ntmp); wchmm->stend[ntmp] = WORD_INVALID; if (! wchmm->hmminfo->multipath) { /* make transition arc from HMM transition info */ for (ato = 1; ato < ltmp_state_num; ato++) { prob = (hmm_logical_trans(ltmp))->a[k][ato]; if (prob != LOG_ZERO) { if (j == add_tail && k == ltmp_state_num - 2 && ato == ltmp_state_num - 1) { /* arc outside new part will be handled later */ } else { add_wacc(wchmm, ntmp, prob, ntmp + ato - k); } } } } ntmp++; /* expand wchmm if neccesary */ if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm); } /* end of state loop */ } /* end of phone loop */ if (wchmm->hmminfo->multipath) { /* On multipath version, the skip transition should be handled! */ /* make transition arc from HMM transition info */ ntmp = n; for (j = add_head; j <= add_tail; j++) { ltmp = wchmm->winfo->wseq[word][j]; ltmp_state_num = hmm_logical_state_num(ltmp); out_num_next = 0; /* arc from initial state ... need arc expansion from precious phone */ for (ato = 1; ato < ltmp_state_num; ato++) { prob = (hmm_logical_trans(ltmp))->a[0][ato]; if (prob != LOG_ZERO) { /* expand arc from previous HMM */ if (ato == ltmp_state_num - 1) { /* to final state ... just register states for next expansion */ for(kkk=0; kkka[k][ato]; if (prob != LOG_ZERO) { if (ato == ltmp_state_num - 1) { /* to final state ... register states for next expansion */ out_from_next[out_num_next] = ntmp; out_a_next[out_num_next] = prob; out_num_next++; } else { add_wacc(wchmm, ntmp, prob, ntmp + ato - k); } } } ntmp++; } /* end of state loop */ /* swap out list for next phone */ for(kkk=0;kkkhmminfo->multipath && enable_iwsp && add_tail - add_head + 1 > 0) { /* there are new phones to be created */ int ntmp_bak; /* set short pause state info */ ntmp_bak = ntmp; if (wchmm->hmminfo->sp->is_pseudo) { for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) { wchmm->outstyle[ntmp] = AS_LSET; wchmm->state[ntmp].out.lset = &(wchmm->hmminfo->sp->body.pseudo->stateset[k]); acc_init(wchmm, ntmp); wchmm->stend[ntmp] = WORD_INVALID; ntmp++; if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm); } } else { for(k = 1;k < hmm_logical_state_num(wchmm->hmminfo->sp) - 1; k++) { wchmm->outstyle[ntmp] = AS_STATE; wchmm->state[ntmp].out.state = wchmm->hmminfo->sp->body.defined->s[k]; acc_init(wchmm, ntmp); wchmm->stend[ntmp] = WORD_INVALID; ntmp++; if (ntmp >= wchmm->maxwcn) wchmm_extend(wchmm); } } ntmp = ntmp_bak; /* connect incoming arcs from previous phone */ out_num_next = 0; for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) { prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[0][ato]; if (prob != LOG_ZERO) { /* to control short pause insertion, transition probability toward the word-end short pause will be given a penalty */ prob += wchmm->hmminfo->iwsp_penalty; if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) { /* model has a model skip transition, just inherit them to next */ for(kkk=0; kkkhmminfo->sp)->a[0][hmm_logical_state_num(wchmm->hmminfo->sp)-1] == LOG_ZERO) { /* to make insertion sp model to have no effect on the original path, the skip transition probability should be 0.0 (=100%) */ prob = 0.0; for(kkk=0; kkkhmminfo->sp) - 1; k++) { for (ato = 1; ato < hmm_logical_state_num(wchmm->hmminfo->sp); ato++) { prob = hmm_logical_trans(wchmm->hmminfo->sp)->a[k][ato]; if (prob != LOG_ZERO) { if (ato == hmm_logical_state_num(wchmm->hmminfo->sp) - 1) { out_from_next[out_num_next] = ntmp; out_a_next[out_num_next] = prob; out_num_next++; } else { add_wacc(wchmm, ntmp, prob, ntmp + ato - k); } } } ntmp++; } /* swap work area for next */ for(kkk=0;kkk node on wchmm */ for (j=0;joffset[word][j] = wchmm->offset[matchword][j]; } else if (add_tail < j) { /* shared tail part (should not happen..) */ wchmm->offset[word][j] = wchmm->offset[matchword][j+(matchword_len-word_len)]; } else { /* newly created part */ wchmm->offset[word][j] = n; n += hmm_logical_state_num(wchmm->winfo->wseq[word][j]) - 2; } } if (wchmm->hmminfo->multipath) { /* create word-end node */ /* paranoia check if the short-pause addition has been done well */ if (enable_iwsp && add_tail - add_head + 1 > 0) { n += hmm_logical_state_num(wchmm->hmminfo->sp) - 2; if (n != ntmp) j_internal_error("wchmm_add_word: cannot match\n"); } /* create word-end node */ wchmm->wordend[word] = n; /* tail node of 'word' is 'n' */ wchmm->stend[n] = word; /* node 'k' is a tail node of 'word' */ acc_init(wchmm, n); wchmm->state[n].out.state = NULL; /* connect the final outgoing arcs in out_from[] to the word end node */ for(k = 0; k < out_num_prev; k++) { add_wacc(wchmm, out_from[k], out_a[k], n); } n++; if (n >= wchmm->maxwcn) wchmm_extend(wchmm); if (matchlen == 0) { /* check if the new word has whole word-skipping transition */ /* (use out_from and out_num_prev temporary) */ out_num_prev = 0; get_outtrans_list(wchmm, word, word_len-1, out_from, out_a, &out_num_prev, wchmm->winfo->maxwn, enable_iwsp); for(k=0;kwordbegin[word]) { jlog("ERROR: *** ERROR: WORD SKIPPING TRANSITION NOT ALLOWED ***\n"); jlog("ERROR: Word id=%d (%s[%s]) has \"word skipping transition\".\n", word, wchmm->winfo->wname[word], wchmm->winfo->woutput[word]); jlog("ERROR: All HMMs in the word:\n "); for(kkk=0;kkkwinfo->wlen[word];kkk++) { jlog("%s ", wchmm->winfo->wseq[word][kkk]->name); } jlog("\n"); jlog("ERROR: has transitions from initial state to final state.\n"); jlog("ERROR: This type of word skipping is not supported.\n"); ok_p = FALSE; } } } wchmm->n = n; } else { wchmm->n = n; k = wchmm->offset[word][word_len-1] + hmm_logical_state_num(wchmm->winfo->wseq[word][word_len-1])-2 -1; wchmm->wordend[word] = k; /* tail node of 'word' is 'k' */ wchmm->stend[k] = word; /* node 'k' is a tail node of 'word' */ if (matchlen != 0 && add_tail - add_head + 1 > 0) { /* new part has been created in the above procedure: */ /* now make link from shared part to the new part */ wchmm_link_subword(wchmm, matchword,add_to,word,add_head); } } return(ok_p); } /*************************************************************/ /**** parse whole structure (after wchmm has been built) *****/ /*************************************************************/ /** * * 木構造化辞書を走査し,単語の終端状態から外への次遷移確率のリストを作成する. * (non multipath) * * @param wchmm [i/o] 木構造化辞書 * * * Scan the lexicon tree to make list of emission probability from the word end * state. (non multipath) * * @param wchmm [i/o] tree lexicon * */ static void wchmm_calc_wordend_arc(WCHMM_INFO *wchmm) { WORD_ID w; HTK_HMM_Trans *tr; LOGPROB a; for (w=0;wwinfo->num;w++) { tr = hmm_logical_trans(wchmm->winfo->wseq[w][wchmm->winfo->wlen[w]-1]); a = tr->a[tr->statenum-2][tr->statenum-1]; wchmm->wordend_a[w] = a; } } #ifdef SEPARATE_BY_UNIGRAM /********************************************************************/ /****** for separation (linearization) of high-frequent words *******/ /********************************************************************/ /** * * unigram確率でソートするための qsort コールバック関数. * * @param a [in] 要素1 * @param b [in] 要素2 * * @return 演算の結果の符合を返す. * * * qsort callback function to sort unigram values. * * @param a [in] element #1 * @param b [in] element #2 * * @return the result of comparison. * */ static int compare_prob(LOGPROB *a, LOGPROB *b) { if (*a < *b) return (1); if (*a > *b) return (-1); return(0); } /** * * 1-gramスコアの上位 N 番目の値を求める. * * @param winfo [in] 単語辞書 * @param n [in] 求める順位 * * @return 上位 N 番目の uni-gram 確率の値を返す. * * * Get the Nth-best unigram probability from all words. * * @param winfo [in] word dictionary * @param n [in] required rank * * @return the Nth-best unigram probability. * */ static LOGPROB get_nbest_uniprob(WCHMM_INFO *wchmm, int n) { LOGPROB *u_p; WORD_ID w; LOGPROB x; WORD_INFO *winfo; NGRAM_INFO *ngram; winfo = wchmm->winfo; ngram = wchmm->ngram; if (n < 1) n = 1; if (n > winfo->num) n = winfo->num; /* store all unigram probability to u_p[] */ u_p = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num); for(w=0;wnum;w++) { if (ngram) { x = uni_prob(ngram, winfo->wton[w]) #ifdef CLASS_NGRAM + winfo->cprob[w] #endif ; } else { x = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { x = (*(wchmm->uni_prob_user))(wchmm->winfo, w, x); } u_p[w] = x; } /* sort them downward */ qsort(u_p, winfo->num, sizeof(LOGPROB), (int (*)(const void *,const void *))compare_prob); /* return the Nth value */ x = u_p[n-1]; free(u_p); return(x); } #endif /**********************************************************/ /****** MAKE WCHMM (LEXICON TREE) --- main function *******/ /**********************************************************/ #define COUNT_STEP 500 ///< Word count step for debug progress output /** * * 与えられた単語辞書と言語モデルから木構造化辞書を構築する. この関数は * 処理が遅く,Julianで"-oldtree"オプション指定時のみ使用されます. オプション * 非指定時およびJuliusでは代わりに build_wchmm2() が用いられます. * * @param wchmm [i/o] 木構造化辞書 * @param lmconf [in] 言語モデル(LM)設定パラメータ * * * Build a tree lexicon from given word dictionary and language model. * This function is slow and only used when "-oldtree" option is specified * in Julian. Julian without that option and Julius uses build_wchmm2() * instead of this. * * @param wchmm [i/o] lexicon tree * @param lmconf [in] language model (LM) configuration parameters * * @callgraph * @callergraph */ boolean build_wchmm(WCHMM_INFO *wchmm, JCONF_LM *lmconf) { int i,j; int matchword=0, sharelen=0, maxsharelen=0; int num_duplicated; #ifdef SEPARATE_BY_UNIGRAM LOGPROB separate_thres; LOGPROB p; #endif boolean ok_p; /* lingustic infos must be set before build_wchmm() is called */ /* check if necessary lingustic info is already assigned (for debug) */ if (wchmm->winfo == NULL || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL) || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL) ) { jlog("ERROR: wchmm: linguistic info not available!!\n"); return FALSE; } ok_p = TRUE; #ifdef SEPARATE_BY_UNIGRAM /* 上位[separate_wnum]番目の1-gramスコアを求める */ /* 1-gramスコアがこの値以上のものは木から分ける */ separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum); #endif #ifdef PASS1_IWCD #ifndef USE_OLD_IWCD if (wchmm->category_tree) { if (wchmm->ccd_flag) { /* 全てのカテゴリID付き lcd_set を作成 */ lcdset_register_with_category_all(wchmm); } } #endif #endif /* PASS1_IWCD */ /* wchmmを初期化 */ wchmm_init(wchmm); /* カウンタリセット */ wchmm->separated_word_count=0; jlog("STAT: wchmm: Building HMM lexicon tree (left-to-right)\n"); for (i=0;iwinfo->num;i++) { if (wchmm->lmtype == LM_PROB) { if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) { /* 先頭/末尾の無音モデルは木構造化せず, * 先頭の無音単語の先頭への遷移,末尾単語の末尾からの遷移は作らない*/ /* sharelen=0でそのまま */ if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } continue; } #ifndef NO_SEPARATE_SHORT_WORD if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) { /* 長さの短い単語を木構造化しない(ここでは1音節) */ /* sharelen=0でそのまま */ if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } wchmm->separated_word_count++; continue; } #endif #ifdef SEPARATE_BY_UNIGRAM if (wchmm->ngram) { p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[i] #endif ; } else { p = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p); } if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) { /* 頻度の高い単語を木構造化しない */ /* separate_thres は上位separate_wnum番目のスコア */ if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } wchmm->separated_word_count++; continue; } #endif } /* 最も長く音素を共有出来る単語を探す */ maxsharelen=0; for (j=0;jcategory_tree && wchmm->lmtype == LM_DFA) { if (wchmm->winfo->wton[i] != wchmm->winfo->wton[j]) continue; } sharelen = wchmm_check_match(wchmm->winfo, i, j); if (sharelen == wchmm->winfo->wlen[i] && sharelen == wchmm->winfo->wlen[j]) { /* word に同音語が存在する */ /* 必ず最大の長さであり,重複カウントを避けるためここで抜ける */ maxsharelen = sharelen; matchword = j; break; } if (sharelen > maxsharelen) { matchword = j; maxsharelen = sharelen; } } if (wchmm_add_word(wchmm, i, maxsharelen, matchword, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } } #if 0 /* 木構造を作らない */ for (i=0;iwinfo->num;i++) { if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } } #endif jlog("STAT: %5d words ended (%6d nodes)\n",i,wchmm->n); if (! wchmm->hmminfo->multipath) { /* 同一音素系列を持つ単語同士の leaf node を2重化して区別する */ num_duplicated = wchmm_duplicate_leafnode(wchmm); jlog("STAT: %d leaf nodes are made unshared\n", num_duplicated); /* 単語の終端から外への遷移確率を求めておく */ wchmm_calc_wordend_arc(wchmm); } /* wchmmの整合性をチェックする */ check_wchmm(wchmm); /* factoring用に各状態に後続単語のリストを付加する */ if (!wchmm->category_tree) { #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { /* 同時に前もってfactoring値を計算 */ make_successor_list_unigram_factoring(wchmm); jlog("STAT: 1-gram factoring values has been pre-computed\n"); } else { make_successor_list(wchmm); } #else make_successor_list(wchmm); #endif /* UNIGRAM_FACTORING */ if (wchmm->hmminfo->multipath) { /* 構築された factoring 情報をスキップ遷移および文頭文法ノードにコピー */ adjust_sc_index(wchmm); } #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { /* 単語間LMキャッシュが必要なノードのリストを作る */ make_iwcache_index(wchmm); } #endif /* UNIGRAM_FACTORING */ } jlog("STAT: done\n"); return ok_p; } /** * * 与えられた単語辞書と言語モデルから木構造化辞書を構築する. * この関数は bulid_wchmm() と同じ処理を行いますが, * 最初に単語を音素列でソートして音素列の似た順に単語を並べるため, * より高速に木構造化を行うことができる. とくにオプション指定をしない * 限り,Julius/Julianではこちらが用いられる. * * @param wchmm [i/o] 木構造化辞書 * @param lmconf [in] 言語モデル(LM)設定パラメータ * * * Build a tree lexicon from given word dictionary and language model. * This function does the same job as build_wchmm(), but it is much * faster because finding of the longest matched word to an adding word * is done by first sorting all the words in the dictoinary by their phoneme * sequence order. This function will be used instead of build_wchmm() * by default. * * @param wchmm [i/o] lexicon tree * @param lmconf [in] language model (LM) configuration parameters * * @callgraph * @callergraph */ boolean build_wchmm2(WCHMM_INFO *wchmm, JCONF_LM *lmconf) { int i,j, last_i; int num_duplicated; WORD_ID *windex; #ifdef SEPARATE_BY_UNIGRAM LOGPROB separate_thres; LOGPROB p; #endif boolean ok_p; boolean ret; /* lingustic infos must be set before build_wchmm() is called */ /* check if necessary lingustic info is already assigned (for debug) */ if (wchmm->winfo == NULL || (wchmm->lmvar == LM_NGRAM && wchmm->ngram == NULL) || (wchmm->lmvar == LM_DFA_GRAMMAR && wchmm->dfa == NULL) ) { jlog("ERROR: wchmm: linguistic info not available!!\n"); return FALSE; } ok_p = TRUE; wchmm->separated_word_count = 0; jlog("STAT: Building HMM lexicon tree\n"); if (wchmm->lmtype == LM_PROB) { #ifdef SEPARATE_BY_UNIGRAM /* compute score threshold beforehand to separate words from tree */ /* here we will separate best [separate_wnum] words from tree */ separate_thres = get_nbest_uniprob(wchmm, lmconf->separate_wnum); #endif } #ifdef PASS1_IWCD #ifndef USE_OLD_IWCD if (wchmm->category_tree) { if (wchmm->ccd_flag) { /* when Julian mode (category-tree) and triphone is used, make all category-indexed context-dependent phone set (cdset) here */ /* these will be assigned on the last phone of each word on tree */ lcdset_register_with_category_all(wchmm); } } #endif #endif /* PASS1_IWCD */ /* initialize wchmm */ wchmm_init(wchmm); /* make sorted word index ordered by phone sequence */ windex = (WORD_ID *)mymalloc(sizeof(WORD_ID) * wchmm->winfo->num); for(i=0;iwinfo->num;i++) windex[i] = i; if (wchmm->category_tree && wchmm->lmtype == LM_DFA) { /* sort by category -> sort by word ID in each category */ wchmm_sort_idx_by_category(wchmm->winfo, windex, wchmm->winfo->num); { int last_cate; last_i = 0; last_cate = wchmm->winfo->wton[windex[0]]; for(i = 1;iwinfo->num;i++) { if (wchmm->winfo->wton[windex[i]] != last_cate) { wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, i - last_i); last_cate = wchmm->winfo->wton[windex[i]]; last_i = i; } } wchmm_sort_idx_by_wseq(wchmm->winfo, windex, last_i, wchmm->winfo->num - last_i); } } else { /* sort by word ID for whole vocabulary */ wchmm_sort_idx_by_wseq(wchmm->winfo, windex, 0, wchmm->winfo->num); } /* * { * int i,w; * for(i=0;iwinfo->num;i++) { * w = windex[i]; * printf("%d: cate=%4d wid=%4d %s\n",i, wchmm->winfo->wton[w], w, wchmm->winfo->woutput[w]); * } * } */ /* incrementaly add words to lexicon tree */ /* now for each word, the previous word (last_i) is always the most matched one */ last_i = WORD_INVALID; for (j=0;jwinfo->num;j++) { i = windex[j]; if (wchmm->lmtype == LM_PROB) { /* start/end silence word should not be shared */ if (i == wchmm->winfo->head_silwid || i == wchmm->winfo->tail_silwid) { /* add whole word as new (sharelen=0) */ if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } continue; } #ifndef NO_SEPARATE_SHORT_WORD /* separate short words from tree */ if (wchmm->winfo->wlen[i] <= SHORT_WORD_LEN) { if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } wchmm->separated_word_count++; continue; } #endif #ifdef SEPARATE_BY_UNIGRAM if (wchmm->ngram) { p = uni_prob(wchmm->ngram, wchmm->winfo->wton[i]) #ifdef CLASS_NGRAM + wchmm->winfo->cprob[i] #endif ; } else { p = LOG_ZERO; } if (wchmm->lmvar == LM_NGRAM_USER) { p = (*(wchmm->uni_prob_user))(wchmm->winfo, i, p); } /* separate high-frequent words from tree (threshold = separate_thres) */ if (p >= separate_thres && wchmm->separated_word_count < lmconf->separate_wnum) { if (wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp) == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } wchmm->separated_word_count++; continue; } #endif } if (last_i == WORD_INVALID) { /* first word */ ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp); } else { /* the previous word (last_i) is always the most matched one */ if (wchmm->category_tree && wchmm->lmtype == LM_DFA) { if (wchmm->winfo->wton[i] != wchmm->winfo->wton[last_i]) { ret = wchmm_add_word(wchmm, i, 0, 0, lmconf->enable_iwsp); } else { ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp); } } else { ret = wchmm_add_word(wchmm, i, wchmm_check_match(wchmm->winfo, i, last_i), last_i, lmconf->enable_iwsp); } } if (ret == FALSE) { jlog("ERROR: wchmm: failed to add word #%d to lexicon tree\n"); ok_p = FALSE; } last_i = i; } /* end of add word loop */ /*j_printerr("\r %5d words ended (%6d nodes)\n",j,wchmm->n);*/ /* free work area */ free(windex); if (wchmm->hmminfo->multipath) { jlog("STAT: lexicon size: %d nodes\n", wchmm->n); } else { /* duplicate leaf nodes of homophone/embedded words */ jlog("STAT: lexicon size: %d", wchmm->n); num_duplicated = wchmm_duplicate_leafnode(wchmm); jlog("+%d=%d\n", num_duplicated, wchmm->n); } if (! wchmm->hmminfo->multipath) { /* calculate transition probability of word end node to outside */ wchmm_calc_wordend_arc(wchmm); } /* check wchmm coherence (internal debug) */ check_wchmm(wchmm); /* make successor list for all branch nodes for N-gram factoring */ if (!wchmm->category_tree) { #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { /* for 1-gram factoring, we can compute the values before search */ make_successor_list_unigram_factoring(wchmm); jlog("STAT: 1-gram factoring values has been pre-computed\n"); } else { make_successor_list(wchmm); } #else make_successor_list(wchmm); #endif /* UNIGRAM_FACTORING */ if (wchmm->hmminfo->multipath) { /* Copy the factoring data according to the skip transitions and startword nodes */ adjust_sc_index(wchmm); } #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { /* make list of start nodes that needs inter-word LM cache */ make_iwcache_index(wchmm); } #endif /* UNIGRAM_FACTORING */ } //jlog("STAT: done\n"); #ifdef WCHMM_SIZE_CHECK if (debug2_flag) { /* detailed check of lexicon tree size (inaccurate!) */ jlog("STAT: --- memory size of word lexicon ---\n"); jlog("STAT: wchmm: %d words, %d nodes\n", wchmm->winfo->num, wchmm->n); jlog("STAT: %9d bytes: wchmm->state[node] (exclude ac, sc)\n", sizeof(WCHMM_STATE) * wchmm->n); { int count1 = 0; int count2 = 0; int count3 = 0; for(i=0;in;i++) { if (wchmm->self_a[i] != LOG_ZERO) count1++; if (wchmm->next_a[i] != LOG_ZERO) count2++; if (wchmm->ac[i] != NULL) count3++; } jlog("STAT: %9d bytes: wchmm->self_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count1 / (float)wchmm->n); jlog("STAT: %9d bytes: wchmm->next_a[node] (%4.1f%% filled)\n", sizeof(LOGPROB) * wchmm->n, 100.0 * count2 / (float)wchmm->n); jlog("STAT: %9d bytes: wchmm->ac[node] (%4.1f%% used)\n", sizeof(A_CELL2 *) * wchmm->n, 100.0 * count3 / (float)wchmm->n); } jlog("STAT: %9d bytes: wchmm->stend[node]\n", sizeof(WORD_ID) * wchmm->n); { int w,count; count = 0; for(w=0;wwinfo->num;w++) { count += wchmm->winfo->wlen[w] * sizeof(int) + sizeof(int *); } jlog("STAT: %9d bytes: wchmm->offset[w][]\n", count); } if (wchmm->hmminfo->multipath) { jlog("STAT: %9d bytes: wchmm->wordbegin[w]\n", wchmm->winfo->num * sizeof(int)); } jlog("STAT: %9d bytes: wchmm->wordend[w]\n", wchmm->winfo->num * sizeof(int)); jlog("STAT: %9d bytes: wchmm->startnode[]\n", wchmm->startnum * sizeof(int)); if (wchmm->category_tree) { jlog("STAT: %9d bytes: wchmm->start2wid[]\n", wchmm->startnum * sizeof(WORD_ID)); } #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { jlog("STAT: %9d bytes: wchmm->start2isolate[]\n", wchmm->isolatenum * sizeof(int)); } #endif if (!wchmm->hmminfo->multipath) { jlog("STAT: %9d bytes: wchmm->wordend_a[]\n", wchmm->winfo->num * sizeof(LOGPROB)); } #ifdef PASS1_IWCD jlog("STAT: %9d bytes: wchmm->outstyle[]\n", wchmm->n * sizeof(unsigned char)); { int c; c = 0; for(i=0;in;i++) { switch(wchmm->outstyle[i]) { case AS_RSET: c += sizeof(RC_INFO); break; case AS_LRSET: c += sizeof(LRC_INFO); break; } } if (c > 0) jlog("STAT: %9d bytes: wchmm->out (RC_INFO / LRC_INFO)\n", c); } #endif if (!wchmm->category_tree) { int c = 0; #ifdef UNIGRAM_FACTORING jlog("STAT: %9d bytes: wchmm->scword[]\n", sizeof(WORD_ID) * wchmm->scnum); #else for(i=1;iscnum;i++) { c += wchmm->sclen[i]; } jlog("STAT: %9d bytes: wchmm->sclist[]\n", c * sizeof(WORD_ID) + wchmm->scnum * sizeof(WORD_ID *)); jlog("STAT: %9d bytes: wchmm->sclen[]\n", wchmm->scnum * sizeof(WORD_ID)); #endif #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { jlog("STAT: %9d bytes: wchmm->fscore[]\n", wchmm->fsnum * sizeof(LOGPROB)); } #endif } { int count, n; A_CELL2 *ac; count = 0; for(n=0;nn;n++) { for(ac=wchmm->ac[n];ac;ac=ac->next) { count += sizeof(A_CELL2); } } jlog("STAT: %9d bytes: A_CELL2\n", count); } } #endif /* WCHMM_SIZE_CHECK */ return ok_p; } /** * * 木構造化辞書のサイズなどの情報を標準出力に出力する. * * @param wchmm [in] 木構造化辞書 * * * Output some specifications of the tree lexicon (size etc.) to stdout. * * @param wchmm [in] tree lexicon already built * * @callgraph * @callergraph */ void print_wchmm_info(WCHMM_INFO *wchmm) { int n,i, rootnum; if (wchmm->hmminfo->multipath) { rootnum = wchmm->startnum; } else { if (wchmm->lmtype == LM_PROB) { rootnum = wchmm->startnum + 1; /* including winfo->head_silwid */ } else if (wchmm->lmtype == LM_DFA) { rootnum = wchmm->startnum; } } jlog(" Lexicon tree:\n"); jlog("\t total node num = %6d\n", wchmm->n); if (wchmm->lmtype == LM_PROB) { jlog("\t root node num = %6d\n", rootnum); #ifdef NO_SEPARATE_SHORT_WORD #ifdef SEPARATE_BY_UNIGRAM jlog("\t(%d hi-freq. words are separated from tree lexicon)\n", wchmm->separated_word_count); #else jlog(" (no words are separated from tree)\n"); #endif /* SEPARATE_BY_UNIGRAM */ #else jlog(" (%d short words (<= %d phonemes) are separated from tree)\n", wchmm->separated_word_count, SHORT_WORD_LEN); #endif /* NO_SEPARATE_SHORT_WORD */ } if (wchmm->lmtype == LM_DFA) { jlog("\t root node num = %6d\n", rootnum); } for(n=0,i=0;in;i++) { if (wchmm->stend[i] != WORD_INVALID) n++; } jlog("\t leaf node num = %6d\n", n); if (!wchmm->category_tree) { jlog("\t fact. node num = %6d\n", wchmm->scnum - 1); } } /* end of file */ julius-4.2.2/libjulius/src/realtime-1stpass.c0000644001051700105040000014121612004452401017460 0ustar ritrlab/** * @file realtime-1stpass.c * * * @brief 第1パス:フレーム同期ビーム探索(実時間処理版) * * 第1パスを入力開始と同時にスタートし,入力と平行して認識処理を行うための * 関数が定義されている. * * バッチ処理の場合,Julius の音声認識処理は以下の手順で * main_recognition_loop() 内で実行される. * * -# 音声入力 adin_go() → 入力音声が speech[] に格納される * -# 特徴量抽出 new_wav2mfcc() →speechから特徴パラメータを param に格納 * -# 第1パス実行 get_back_trellis() →param とモデルから単語トレリスの生成 * -# 第2パス実行 wchmm_fbs() * -# 認識結果出力 * * 第1パスを平行処理する場合,上記の 1 〜 3 が平行して行われる. * Julius では,この並行処理を,音声入力の断片が得られるたびに * 認識処理をその分だけ漸次的に進めることで実装している. * * - 特徴量抽出と第1パス実行を,一つにまとめてコールバック関数として定義. * - 音声入力関数 adin_go() のコールバックとして上記の関数を与える * * 具体的には,ここで定義されている RealTimePipeLine() がコールバックとして * adin_go() に与えられる. adin_go() は音声入力がトリガするとその得られた入力 * 断片ごとに RealTimePipeLine() を呼び出す. RealTimePipeLine() は得られた * 断片分について特徴量抽出と第1パスの計算を進める. * * CMN について注意が必要である. CMN は通常発話単位で行われるが, * マイク入力やネットワーク入力のように,第1パスと平行に認識を行う * 処理時は発話全体のケプストラム平均を得ることができない. バージョン 3.5 * 以前では直前の発話5秒分(棄却された入力を除く)の CMN がそのまま次発話に * 流用されていたが,3.5.1 からは,上記の直前発話 CMN を初期値として * 発話内 CMN を MAP-CMN を持ちいて計算するようになった. なお, * 最初の発話用の初期CMNを "-cmnload" で与えることもでき,また * "-cmnnoupdate" で入力ごとの CMN 更新を行わないようにできる. * "-cmnnoupdate" と "-cmnload" と組み合わせることで, 最初にグローバルな * ケプストラム平均を与え,それを常に初期値として MAP-CMN することができる. * * 主要な関数は以下の通りである. * * - RealTimeInit() - 起動時の初期化 * - RealTimePipeLinePrepare() - 入力ごとの初期化 * - RealTimePipeLine() - 第1パス平行処理用コールバック(上述) * - RealTimeResume() - ショートポーズセグメンテーション時の認識復帰 * - RealTimeParam() - 入力ごとの第1パス終了処理 * - RealTimeCMNUpdate() - CMN の更新 * * * * * @brief The first pass: frame-synchronous beam search (on-the-fly version) * * These are functions to perform on-the-fly decoding of the 1st pass * (frame-synchronous beam search). These function can be used * instead of new_wav2mfcc() and get_back_trellis(). These functions enable * recognition as soon as an input triggers. The 1st pass processing * will be done concurrently with the input. * * The basic recognition procedure of Julius in main_recognition_loop() * is as follows: * * -# speech input: (adin_go()) ... buffer `speech' holds the input * -# feature extraction: (new_wav2mfcc()) ... compute feature vector * from `speech' and store the vector sequence to `param'. * -# recognition 1st pass: (get_back_trellis()) ... frame-wise beam decoding * to generate word trellis index from `param' and models. * -# recognition 2nd pass: (wchmm_fbs()) * -# Output result. * * At on-the-fly decoding, procedures from 1 to 3 above will be performed * in parallel. It is implemented by a simple scheme, processing the captured * small speech fragments one by one progressively: * * - Define a callback function that can do feature extraction and 1st pass * processing progressively. * - The callback will be given to A/D-in function adin_go(). * * Actual procedure is as follows. The function RealTimePipeLine() * will be given to adin_go() as callback. Then adin_go() will watch * the input, and if speech input starts, it calls RealTimePipeLine() * for every captured input fragments. RealTimePipeLine() will * compute the feature vector of the given fragment and proceed the * 1st pass processing for them, and return to the capture function. * The current status will be hold to the next call, to perform * inter-frame processing (computing delta coef. etc.). * * Note about CMN: With acoustic models trained with CMN, Julius performs * CMN to the input. On file input, the whole sentence mean will be computed * and subtracted. At the on-the-fly decoding, the ceptral mean will be * performed using the cepstral mean of last 5 second input (excluding * rejected ones). This was a behavier earlier than 3.5, and 3.5.1 now * applies MAP-CMN at on-the-fly decoding, using the last 5 second cepstrum * as initial mean. Initial cepstral mean at start can be given by option * "-cmnload", and you can also prohibit the updates of initial cepstral * mean at each input by "-cmnnoupdate". The last option is useful to always * use static global cepstral mean as initial mean for each input. * * The primary functions in this file are: * - RealTimeInit() - initialization at application startup * - RealTimePipeLinePrepare() - initialization before each input * - RealTimePipeLine() - callback for on-the-fly 1st pass decoding * - RealTimeResume() - recognition resume procedure for short-pause segmentation. * - RealTimeParam() - finalize the on-the-fly 1st pass when input ends. * - RealTimeCMNUpdate() - update CMN data for next input * * * * @author Akinobu Lee * @date Tue Aug 23 11:44:14 2005 * * $Revision: 1.9 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #undef RDEBUG ///< Define if you want local debug message /** * * MFCC計算インスタンス内に特徴パラメータベクトル格納エリアを準備する. * * mfcc->para の情報に基づいてヘッダ情報を格納し,初期格納領域を確保する. * 格納領域は,入力時に必要に応じて自動的に伸長されるので,ここでは * その準備だけ行う. すでに格納領域が確保されているときはそれをキープする. * * これは入力/認識1回ごとに繰り返し呼ばれる. * * * * * Prepare parameter holder in MFCC calculation instance to store MFCC * vectors. * * This function will store header information based on the parameters * in mfcc->para, and allocate initial buffer for the incoming * vectors. The vector buffer will be expanded as needed while * recognition, so at this time only the minimal amount is allocated. * If the instance already has a certain length of vector buffer, it * will be kept. * * This function will be called each time a new input begins. * * * * @param mfcc [i/o] MFCC calculation instance * */ static void init_param(MFCCCalc *mfcc) { Value *para; para = mfcc->para; /* これから計算されるパラメータの型をヘッダに設定 */ /* set header types */ mfcc->param->header.samptype = F_MFCC; if (para->delta) mfcc->param->header.samptype |= F_DELTA; if (para->acc) mfcc->param->header.samptype |= F_ACCL; if (para->energy) mfcc->param->header.samptype |= F_ENERGY; if (para->c0) mfcc->param->header.samptype |= F_ZEROTH; if (para->absesup) mfcc->param->header.samptype |= F_ENERGY_SUP; if (para->cmn) mfcc->param->header.samptype |= F_CEPNORM; mfcc->param->header.wshift = para->smp_period * para->frameshift; mfcc->param->header.sampsize = para->veclen * sizeof(VECT); /* not compressed */ mfcc->param->veclen = para->veclen; /* 認識処理中/終了後にセットされる変数: param->parvec (パラメータベクトル系列) param->header.samplenum, param->samplenum (全フレーム数) */ /* variables that will be set while/after computation has been done: param->parvec (parameter vector sequence) param->header.samplenum, param->samplenum (total number of frames) */ /* MAP-CMN の初期化 */ /* Prepare for MAP-CMN */ if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); } /** * * @brief 第1パス平行認識処理の初期化. * * MFCC計算のワークエリア確保を行う. また必要な場合は,スペクトル減算用の * ワークエリア準備,ノイズスペクトルのロード,CMN用の初期ケプストラム * 平均データのロードなども行われる. * * この関数は,システム起動後1回だけ呼ばれる. * * * @brief Initializations for the on-the-fly 1st pass decoding. * * Work areas for all MFCC caculation instances are allocated. * Additionaly, * some initialization will be done such as allocating work area * for spectral subtraction, loading noise spectrum from file, * loading initial ceptral mean data for CMN from file, etc. * * This will be called only once, on system startup. * * * @param recog [i/o] engine instance * * @callgraph * @callergraph */ boolean RealTimeInit(Recog *recog) { Value *para; Jconf *jconf; RealBeam *r; MFCCCalc *mfcc; jconf = recog->jconf; r = &(recog->real); /* 最大フレーム長を最大入力時間数から計算 */ /* set maximum allowed frame length */ r->maxframelen = MAXSPEECHLEN / recog->jconf->input.frameshift; /* -ssload 指定時, SS用のノイズスペクトルをファイルから読み込む */ /* if "-ssload", load noise spectrum for spectral subtraction from file */ for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->frontend.ssload_filename && mfcc->frontend.ssbuf == NULL) { if ((mfcc->frontend.ssbuf = new_SS_load_from_file(mfcc->frontend.ssload_filename, &(mfcc->frontend.sslen))) == NULL) { jlog("ERROR: failed to read \"%s\"\n", mfcc->frontend.ssload_filename); return FALSE; } /* check ssbuf length */ if (mfcc->frontend.sslen != mfcc->wrk->bflen) { jlog("ERROR: noise spectrum length not match\n"); return FALSE; } mfcc->wrk->ssbuf = mfcc->frontend.ssbuf; mfcc->wrk->ssbuflen = mfcc->frontend.sslen; mfcc->wrk->ss_alpha = mfcc->frontend.ss_alpha; mfcc->wrk->ss_floor = mfcc->frontend.ss_floor; } } for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { para = mfcc->para; /* 対数エネルギー正規化のための初期値 */ /* set initial value for log energy normalization */ if (para->energy && para->enormal) energy_max_init(&(mfcc->ewrk)); /* デルタ計算のためのサイクルバッファを用意 */ /* initialize cycle buffers for delta and accel coef. computation */ if (para->delta) mfcc->db = WMP_deltabuf_new(para->baselen, para->delWin); if (para->acc) mfcc->ab = WMP_deltabuf_new(para->baselen * 2, para->accWin); /* デルタ計算のためのワークエリアを確保 */ /* allocate work area for the delta computation */ mfcc->tmpmfcc = (VECT *)mymalloc(sizeof(VECT) * para->vecbuflen); /* MAP-CMN 用の初期ケプストラム平均を読み込んで初期化する */ /* Initialize the initial cepstral mean data from file for MAP-CMN */ if (para->cmn || para->cvn) mfcc->cmn.wrk = CMN_realtime_new(para, mfcc->cmn.map_weight); /* -cmnload 指定時, CMN用のケプストラム平均の初期値をファイルから読み込む */ /* if "-cmnload", load initial cepstral mean data from file for CMN */ if (mfcc->cmn.load_filename) { if (para->cmn) { if ((mfcc->cmn.loaded = CMN_load_from_file(mfcc->cmn.wrk, mfcc->cmn.load_filename))== FALSE) { jlog("WARNING: failed to read initial cepstral mean from \"%s\", do flat start\n", mfcc->cmn.load_filename); } } else { jlog("WARNING: CMN not required on AM, file \"%s\" ignored\n", mfcc->cmn.load_filename); } } } /* 窓長をセット */ /* set window length */ r->windowlen = recog->jconf->input.framesize + 1; /* 窓かけ用バッファを確保 */ /* set window buffer */ r->window = mymalloc(sizeof(SP16) * r->windowlen); return TRUE; } /** * * Prepare work are a for MFCC calculation. * Reset values in work area for starting the next input. * Output probability cache for each acoustic model will be also * prepared at this function. * * This function will be called before starting each input (segment). * * * MFCC計算を準備する. * いくつかのワークエリアをリセットして認識に備える. * また,音響モデルごとの出力確率計算キャッシュを準備する. * * この関数は,ある入力(あるいはセグメント)の認識が * 始まる前に必ず呼ばれる. * * * * @param recog [i/o] engine instance * * @callgraph * @callergraph */ void reset_mfcc(Recog *recog) { Value *para; MFCCCalc *mfcc; RealBeam *r; r = &(recog->real); /* 特徴抽出モジュールを初期化 */ /* initialize parameter extraction module */ for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { para = mfcc->para; /* 対数エネルギー正規化のための初期値をセット */ /* set initial value for log energy normalization */ if (para->energy && para->enormal) energy_max_prepare(&(mfcc->ewrk), para); /* デルタ計算用バッファを準備 */ /* set the delta cycle buffer */ if (para->delta) WMP_deltabuf_prepare(mfcc->db); if (para->acc) WMP_deltabuf_prepare(mfcc->ab); } } /** * * @brief 第1パス平行認識処理の準備 * * 計算用変数をリセットし,各種データを準備する. * この関数は,ある入力(あるいはセグメント)の認識が * 始まる前に呼ばれる. * * * * @brief Preparation for the on-the-fly 1st pass decoding. * * Variables are reset and data are prepared for the next input recognition. * * This function will be called before starting each input (segment). * * * * @param recog [i/o] engine instance * * @return TRUE on success. FALSE on failure. * * @callgraph * @callergraph * */ boolean RealTimePipeLinePrepare(Recog *recog) { RealBeam *r; PROCESS_AM *am; MFCCCalc *mfcc; #ifdef SPSEGMENT_NAIST RecogProcess *p; #endif r = &(recog->real); /* 計算用の変数を初期化 */ /* initialize variables for computation */ r->windownum = 0; /* parameter check */ for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { /* パラメータ初期化 */ /* parameter initialization */ if (recog->jconf->input.speech_input == SP_MFCMODULE) { if (mfc_module_set_header(mfcc, recog) == FALSE) return FALSE; } else { init_param(mfcc); } /* フレームごとのパラメータベクトル保存の領域を確保 */ /* あとで必要に応じて伸長される */ if (param_alloc(mfcc->param, 1, mfcc->param->veclen) == FALSE) { j_internal_error("ERROR: segmented: failed to allocate memory for rest param\n"); } /* フレーム数をリセット */ /* reset frame count */ mfcc->f = 0; } /* 準備した param 構造体のデータのパラメータ型を音響モデルとチェックする */ /* check type coherence between param and hmminfo here */ if (recog->jconf->input.paramtype_check_flag) { for(am=recog->amlist;am;am=am->next) { if (!check_param_coherence(am->hmminfo, am->mfcc->param)) { jlog("ERROR: input parameter type does not match AM\n"); return FALSE; } } } /* 計算用のワークエリアを準備 */ /* prepare work area for calculation */ if (recog->jconf->input.type == INPUT_WAVEFORM) { reset_mfcc(recog); } /* 音響尤度計算用キャッシュを準備 */ /* prepare cache area for acoustic computation of HMM states and mixtures */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), r->maxframelen); } #ifdef BACKEND_VAD if (recog->jconf->decodeopt.segment) { /* initialize segmentation parameters */ spsegment_init(recog); } #else recog->triggered = FALSE; #endif #ifdef DEBUG_VTLN_ALPHA_TEST /* store speech */ recog->speechlen = 0; #endif return TRUE; } /** * * @brief 音声波形からパラメータベクトルを計算する. * * 窓単位で取り出された音声波形からMFCCベクトルを計算する. * 計算結果は mfcc->tmpmfcc に保存される. * * @param mfcc [i/o] MFCC計算インスタンス * @param window [in] 窓単位で取り出された音声波形データ * @param windowlen [in] @a window の長さ * * @return 計算成功時,TRUE を返す. デルタ計算において入力フレームが * 少ないなど,まだ得られていない場合は FALSE を返す. * * * @brief Compute a parameter vector from a speech window. * * This function calculates an MFCC vector from speech data windowed from * input speech. The obtained MFCC vector will be stored to mfcc->tmpmfcc. * * @param mfcc [i/o] MFCC calculation instance * @param window [in] speech input (windowed from input stream) * @param windowlen [in] length of @a window * * @return TRUE on success (an vector obtained). Returns FALSE if no * parameter vector obtained yet (due to delta delay). * * * @callgraph * @callergraph * */ boolean RealTimeMFCC(MFCCCalc *mfcc, SP16 *window, int windowlen) { int i; boolean ret; VECT *tmpmfcc; Value *para; tmpmfcc = mfcc->tmpmfcc; para = mfcc->para; /* 音声波形から base MFCC を計算 (recog->mfccwrk を利用) */ /* calculate base MFCC from waveform (use recog->mfccwrk) */ for (i=0; i < windowlen; i++) { mfcc->wrk->bf[i+1] = (float) window[i]; } WMP_calc(mfcc->wrk, tmpmfcc, para); if (para->energy && para->enormal) { /* 対数エネルギー項を正規化する */ /* normalize log energy */ /* リアルタイム入力では発話ごとの最大エネルギーが得られないので 直前の発話のパワーで代用する */ /* Since the maximum power of the whole input utterance cannot be obtained at real-time input, the maximum of last input will be used to normalize. */ tmpmfcc[para->baselen-1] = energy_max_normalize(&(mfcc->ewrk), tmpmfcc[para->baselen-1], para); } if (para->delta) { /* デルタを計算する */ /* calc delta coefficients */ ret = WMP_deltabuf_proceed(mfcc->db, tmpmfcc); #ifdef RDEBUG printf("DeltaBuf: ret=%d, status=", ret); for(i=0;idb->len;i++) { printf("%d", mfcc->db->is_on[i]); } printf(", nextstore=%d\n", mfcc->db->store); #endif /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* db->vec に現在の元データとデルタ係数が入っているので tmpmfcc にコピー */ /* now db->vec holds the current base and full delta, so copy them to tmpmfcc */ memcpy(tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); } if (para->acc) { /* Accelerationを計算する */ /* calc acceleration coefficients */ /* base+delta をそのまま入れる */ /* send the whole base+delta to the cycle buffer */ ret = WMP_deltabuf_proceed(mfcc->ab, tmpmfcc); #ifdef RDEBUG printf("AccelBuf: ret=%d, status=", ret); for(i=0;iab->len;i++) { printf("%d", mfcc->ab->is_on[i]); } printf(", nextstore=%d\n", mfcc->ab->store); #endif /* ret == FALSE のときはまだディレイ中なので認識処理せず次入力へ */ /* if ret == FALSE, there is no available frame. So just wait for next input */ if (! ret) { return FALSE; } /* ab->vec には,(base+delta) とその差分係数が入っている. [base] [delta] [delta] [acc] の順で入っているので, [base] [delta] [acc] を tmpmfcc にコピーする. */ /* now ab->vec holds the current (base+delta) and their delta coef. it holds a vector in the order of [base] [delta] [delta] [acc], so copy the [base], [delta] and [acc] to tmpmfcc. */ memcpy(tmpmfcc, mfcc->ab->vec, sizeof(VECT) * para->baselen * 2); memcpy(&(tmpmfcc[para->baselen*2]), &(mfcc->ab->vec[para->baselen*3]), sizeof(VECT) * para->baselen); } #ifdef POWER_REJECT if (para->energy || para->c0) { mfcc->avg_power += tmpmfcc[para->baselen-1]; } #endif if (para->delta && (para->energy || para->c0) && para->absesup) { /* 絶対値パワーを除去 */ /* suppress absolute power */ memmove(&(tmpmfcc[para->baselen-1]), &(tmpmfcc[para->baselen]), sizeof(VECT) * (para->vecbuflen - para->baselen)); } /* この時点で tmpmfcc に現時点での最新の特徴ベクトルが格納されている */ /* tmpmfcc[] now holds the latest parameter vector */ /* CMN を計算 */ /* perform CMN */ if (para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, tmpmfcc); return TRUE; } static int proceed_one_frame(Recog *recog) { MFCCCalc *mfcc; RealBeam *r; int maxf; PROCESS_AM *am; int rewind_frame; boolean reprocess; boolean ok_p; r = &(recog->real); /* call recognition start callback */ ok_p = FALSE; maxf = 0; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; if (maxf < mfcc->f) maxf = mfcc->f; if (mfcc->f == 0) { ok_p = TRUE; } } if (ok_p && maxf == 0) { /* call callback when at least one of MFCC has initial frame */ if (recog->jconf->decodeopt.segment) { #ifdef BACKEND_VAD /* not exec pass1 begin callback here */ #else if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; #endif } else { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ switch (decode_proceed(recog)) { case -1: /* error */ return -1; break; case 0: /* success */ break; case 1: /* segmented */ /* 認識処理のセグメント要求で終わったことをフラグにセット */ /* set flag which indicates that the input has ended with segmentation request */ r->last_is_segmented = TRUE; /* tell the caller to be segmented by this function */ /* 呼び出し元に,ここで入力を切るよう伝える */ return 1; } #ifdef BACKEND_VAD /* check up trigger in case of VAD segmentation */ if (recog->jconf->decodeopt.segment) { if (recog->triggered == FALSE) { if (spsegment_trigger_sync(recog)) { if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } } #endif if (spsegment_need_restart(recog, &rewind_frame, &reprocess) == TRUE) { /* set total length to the current frame */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->param->header.samplenum = mfcc->f + 1; mfcc->param->samplenum = mfcc->f + 1; } /* do rewind for all mfcc here */ spsegment_restart_mfccs(recog, rewind_frame, reprocess); /* also tell adin module to rehash the concurrent audio input */ recog->adin->rehash = TRUE; /* reset outprob cache for all AM */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), am->mfcc->param->samplenum); } if (reprocess) { /* process the backstep MFCCs here */ while(1) { ok_p = TRUE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; mfcc->f++; if (mfcc->f < mfcc->param->samplenum) { mfcc->valid = TRUE; ok_p = FALSE; } else { mfcc->valid = FALSE; } } if (ok_p) { /* すべての MFCC が終わりに達したのでループ終了 */ /* all MFCC has been processed, end of loop */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; mfcc->f--; } break; } /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ switch (decode_proceed(recog)) { case -1: /* error */ return -1; break; case 0: /* success */ break; case 1: /* segmented */ /* ignore segmentation while in the backstep segment */ break; } /* call frame-wise callback */ callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); } } } /* call frame-wise callback if at least one of MFCC is valid at this frame */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->valid) { callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); break; } } return 0; } /** * * @brief 第1パス平行音声認識処理のメイン * * この関数内では,漸次的な特徴量抽出および第1パスの認識が行われる. * 入力データに対して窓掛け・シフトを行いMFCC計算を行いながら, * 音声認識を1フレームずつ並列実行する. * * 認識処理(decode_proceed())において,音声区間終了が要求される * ことがある. この場合,未処理の音声を保存して第1パスを終了する * よう呼出元に要求する. * * SPSEGMENT_NAIST あるいは GMM_VAD などのバックエンドVAD定義時は,デコーダベースの * VAD (音声区間開始検出)に伴うデコーディング制御が行われる. * トリガ前は,認識処理が呼ばれるが,実際には各関数内で認識処理は * 行われていない. 開始を検出した時,この関数はそこまでに得られた * MFCC列を一定フレーム長分巻戻し,その巻戻し先から通常の認識処理を * 再開する. なお,複数処理インスタンス間がある場合,開始トリガは * どれかのインスタンスが検出した時点で全ての開始が同期される. * * この関数は,音声入力ルーチンのコールバックとして呼ばれる. * 音声データの数千サンプル録音ごとにこの関数が呼び出される. * * @param Speech [in] 音声データへのバッファへのポインタ * @param nowlen [in] 音声データの長さ * @param recog [i/o] engine instance * * @return エラー時に -1 を,正常時に 0 を返す. また,第1パスを * 終了するよう呼出元に要求するときは 1 を返す. * * * @brief Main function of the on-the-fly 1st pass decoding * * This function performs sucessive MFCC calculation and 1st pass decoding. * The given input data are windowed to a certain length, then converted * to MFCC, and decoding for the input frame will be performed in one * process cycle. The loop cycle will continue with window shift, until * the whole given input has been processed. * * In case of input segment request from decoding process (in * decode_proceed()), this function keeps the rest un-processed speech * to a buffer and tell the caller to stop input and end the 1st pass. * * When back-end VAD such as SPSEGMENT_NAIST or GMM_VAD is defined, Decoder-based * VAD is enabled and its decoding control will be managed here. * In decoder-based VAD mode, the recognition will be processed but * no output will be done at the first un-triggering input area. * when speech input start is detected, this function will rewind the * already obtained MFCC sequence to a certain frames, and re-start * normal recognition at that point. When multiple recognition process * instance is running, their segmentation will be synchronized. * * This function will be called each time a new speech sample comes as * as callback from A/D-in routine. * * @param Speech [in] pointer to the speech sample segments * @param nowlen [in] length of above * @param recog [i/o] engine instance * * @return -1 on error (will close stream and terminate recognition), * 0 on success (allow caller to call me for the next segment). It * returns 1 when telling the caller to segment now at the middle of * input , and 2 when input length overflow is detected. * * * @callgraph * @callergraph * */ int RealTimePipeLine(SP16 *Speech, int nowlen, Recog *recog) /* Speech[0...nowlen] = input */ { int i, now, ret; MFCCCalc *mfcc; RealBeam *r; r = &(recog->real); #ifdef DEBUG_VTLN_ALPHA_TEST /* store speech */ adin_cut_callback_store_buffer(Speech, nowlen, recog); #endif /* window[0..windownum-1] は前回の呼び出しで残った音声データが格納されている */ /* window[0..windownum-1] are speech data left from previous call */ /* 処理用ポインタを初期化 */ /* initialize pointer for local processing */ now = 0; /* 認識処理がセグメント要求で終わったのかどうかのフラグをリセット */ /* reset flag which indicates whether the input has ended with segmentation request */ r->last_is_segmented = FALSE; #ifdef RDEBUG printf("got %d samples\n", nowlen); #endif while (now < nowlen) { /* till whole input is processed */ /* 入力長が maxframelen に達したらここで強制終了 */ /* if input length reaches maximum buffer size, terminate 1st pass here */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->f >= r->maxframelen) return(1); } /* 窓バッファを埋められるだけ埋める */ /* fill window buffer as many as possible */ for(i = min(r->windowlen - r->windownum, nowlen - now); i > 0 ; i--) r->window[r->windownum++] = (float) Speech[now++]; /* もし窓バッファが埋まらなければ, このセグメントの処理はここで終わる. 処理されなかったサンプル (window[0..windownum-1]) は次回に持ち越し. */ /* if window buffer was not filled, end processing here, keeping the rest samples (window[0..windownum-1]) in the window buffer. */ if (r->windownum < r->windowlen) break; #ifdef RDEBUG /* printf("%d used, %d rest\n", now, nowlen - now); printf("[f = %d]\n", f);*/ #endif for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->valid = FALSE; /* 窓内の音声波形から特徴量を計算して r->tmpmfcc に格納 */ /* calculate a parameter vector from current waveform windows and store to r->tmpmfcc */ if ((*(recog->calc_vector))(mfcc, r->window, r->windowlen)) { #ifdef ENABLE_PLUGIN /* call post-process plugin if exist */ plugin_exec_vector_postprocess(mfcc->tmpmfcc, mfcc->param->veclen, mfcc->f); #endif /* MFCC完成,登録 */ mfcc->valid = TRUE; /* now get the MFCC vector of current frame, now store it to param */ if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); return -1; } memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); #ifdef RDEBUG printf("DeltaBuf: %02d: got frame %d\n", mfcc->id, mfcc->f); #endif } } /* 処理を1フレーム進める */ /* proceed one frame */ ret = proceed_one_frame(recog); if (ret == 1 && recog->jconf->decodeopt.segment) { /* ショートポーズセグメンテーション: バッファに残っているデータを 別に保持して,次回の最初に処理する */ /* short pause segmentation: there is some data left in buffer, so we should keep them for next processing */ r->rest_len = nowlen - now; if (r->rest_len > 0) { /* copy rest samples to rest_Speech */ if (r->rest_Speech == NULL) { r->rest_alloc_len = r->rest_len; r->rest_Speech = (SP16 *)mymalloc(sizeof(SP16)*r->rest_alloc_len); } else if (r->rest_alloc_len < r->rest_len) { r->rest_alloc_len = r->rest_len; r->rest_Speech = (SP16 *)myrealloc(r->rest_Speech, sizeof(SP16)*r->rest_alloc_len); } memcpy(r->rest_Speech, &(Speech[now]), sizeof(SP16) * r->rest_len); } } if (ret != 0) return ret; /* 1フレーム処理が進んだのでポインタを進める */ /* proceed frame pointer */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->f++; } /* 窓バッファを処理が終わった分シフト */ /* shift window */ memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); r->windownum -= recog->jconf->input.frameshift; } /* 与えられた音声セグメントに対する認識処理が全て終了 呼び出し元に, 入力を続けるよう伝える */ /* input segment is fully processed tell the caller to continue input */ return(0); } /** * * @brief セグメントの認識再開処理 * * この関数はデコーダベースVADやショートポーズセグメンテーションによって * 入力がセグメントに切られた場合に,その後の認識の再開に関する処理を行う. * 具体的には,入力の認識を開始する前に,前回の入力セグメントにおける * 巻戻し分のMFCC列から認識を開始する. さらに,前回のセグメンテーション時に * 未処理だった残りの音声サンプルがあればそれも処理する. * * @param recog [i/o] エンジンインスタンス * * @return エラー時 -1,正常時 0 を返す. また,この入力断片の処理中に * 文章の区切りが見つかったときは第1パスをここで中断するために 1 を返す. * * * * @brief Resuming recognition for short pause segmentation. * * This function process overlapped data and remaining speech prior * to the next input when input was segmented at last processing. * * @param recog [i/o] engine instance * * @return -1 on error (tell caller to terminate), 0 on success (allow caller * to call me for the next segment), or 1 when an end-of-sentence detected * at this point (in that case caller will stop input and go to 2nd pass) * * * @callgraph * @callergraph * */ int RealTimeResume(Recog *recog) { MFCCCalc *mfcc; RealBeam *r; boolean ok_p; #ifdef SPSEGMENT_NAIST RecogProcess *p; #endif PROCESS_AM *am; r = &(recog->real); /* 計算用のワークエリアを準備 */ /* prepare work area for calculation */ if (recog->jconf->input.type == INPUT_WAVEFORM) { reset_mfcc(recog); } /* 音響尤度計算用キャッシュを準備 */ /* prepare cache area for acoustic computation of HMM states and mixtures */ for(am=recog->amlist;am;am=am->next) { outprob_prepare(&(am->hmmwrk), r->maxframelen); } /* param にある全パラメータを処理する準備 */ /* prepare to process all data in param */ for(mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->param->samplenum == 0) mfcc->valid = FALSE; else mfcc->valid = TRUE; #ifdef RDEBUG printf("Resume: %02d: f=%d\n", mfcc->id, mfcc->mfcc->param->samplenum-1); #endif /* フレーム数をリセット */ /* reset frame count */ mfcc->f = 0; /* MAP-CMN の初期化 */ /* Prepare for MAP-CMN */ if (mfcc->para->cmn || mfcc->para->cvn) CMN_realtime_prepare(mfcc->cmn.wrk); } #ifdef BACKEND_VAD if (recog->jconf->decodeopt.segment) { spsegment_init(recog); } /* not exec pass1 begin callback here */ #else recog->triggered = FALSE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; break; } #endif /* param 内の全フレームについて認識処理を進める */ /* proceed recognition for all frames in param */ while(1) { ok_p = TRUE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; if (mfcc->f < mfcc->param->samplenum) { mfcc->valid = TRUE; ok_p = FALSE; } else { mfcc->valid = FALSE; } } if (ok_p) { /* すべての MFCC が終わりに達したのでループ終了 */ /* all MFCC has been processed, end of loop */ break; } /* 各インスタンスについて mfcc->f の認識処理を1フレーム進める */ switch (decode_proceed(recog)) { case -1: /* error */ return -1; break; case 0: /* success */ break; case 1: /* segmented */ /* segmented, end procs ([0..f])*/ r->last_is_segmented = TRUE; return 1; /* segmented by this function */ } #ifdef BACKEND_VAD /* check up trigger in case of VAD segmentation */ if (recog->jconf->decodeopt.segment) { if (recog->triggered == FALSE) { if (spsegment_trigger_sync(recog)) { callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } } #endif /* call frame-wise callback */ callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); /* 1フレーム処理が進んだのでポインタを進める */ /* proceed frame pointer */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->f++; } } /* 前回のセグメント時に入力をシフトしていない分をシフトする */ /* do the last shift here */ if (recog->jconf->input.type == INPUT_WAVEFORM) { memmove(r->window, &(r->window[recog->jconf->input.frameshift]), sizeof(SP16) * (r->windowlen - recog->jconf->input.frameshift)); r->windownum -= recog->jconf->input.frameshift; /* これで再開の準備が整ったので,まずは前回の処理で残っていた音声データから 処理する */ /* now that the search status has been prepared for the next input, we first process the rest unprocessed samples at the last session */ if (r->rest_len > 0) { return(RealTimePipeLine(r->rest_Speech, r->rest_len, recog)); } } /* 新規の入力に対して認識処理は続く… */ /* the recognition process will continue for the newly incoming samples... */ return 0; } /** * * @brief 第1パス平行認識処理の終了処理を行う. * * この関数は第1パス終了時に呼ばれ,入力長を確定したあと, * decode_end() (セグメントで終了したときは decode_end_segmented())を * 呼び出して第1パス終了処理を行う. * * もし音声入力ストリームの終了によって認識が終わった場合(ファイル入力で * 終端に達した場合など)は,デルタバッファに未処理の入力が残っているので, * それをここで処理する. * * @param recog [i/o] エンジンインスタンス * * @return 処理成功時 TRUE, エラー時 FALSE を返す. * * * @brief Finalize the 1st pass on-the-fly decoding. * * This function will be called after the 1st pass processing ends. * It fix the input length of parameter vector sequence, call * decode_end() (or decode_end_segmented() when last input was ended * by segmentation) to finalize the 1st pass. * * If the last input was ended by end-of-stream (in case input reached * EOF in file input etc.), process the rest samples remaining in the * delta buffers. * * @param recog [i/o] engine instance * * @return TRUE on success, or FALSE on error. * */ boolean RealTimeParam(Recog *recog) { boolean ret1, ret2; RealBeam *r; int ret; int maxf; boolean ok_p; MFCCCalc *mfcc; Value *para; #ifdef RDEBUG int i; #endif r = &(recog->real); if (r->last_is_segmented) { /* RealTimePipeLine で認識処理側の理由により認識が中断した場合, 現状態のMFCC計算データをそのまま次回へ保持する必要があるので, MFCC計算終了処理を行わずに第1パスの結果のみ出力して終わる. */ /* When input segmented by recognition process in RealTimePipeLine(), we have to keep the whole current status of MFCC computation to the next call. So here we only output the 1st pass result. */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->param->header.samplenum = mfcc->f + 1;/* len = lastid + 1 */ mfcc->param->samplenum = mfcc->f + 1; } decode_end_segmented(recog); /* この区間の param データを第2パスのために返す */ /* return obtained parameter for 2nd pass */ return(TRUE); } if (recog->jconf->input.type == INPUT_VECTOR) { /* finalize real-time 1st pass */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->param->header.samplenum = mfcc->f; mfcc->param->samplenum = mfcc->f; } /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ decode_end(recog); return TRUE; } /* MFCC計算の終了処理を行う: 最後の遅延フレーム分を処理 */ /* finish MFCC computation for the last delayed frames */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->para->delta || mfcc->para->acc) { mfcc->valid = TRUE; } else { mfcc->valid = FALSE; } } /* loop until all data has been flushed */ while (1) { /* if all mfcc became invalid, exit loop here */ ok_p = FALSE; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (mfcc->valid) { ok_p = TRUE; break; } } if (!ok_p) break; /* try to get 1 frame for all mfcc instances */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { para = mfcc->para; if (! mfcc->valid) continue; /* check if there is data in cycle buffer of delta */ ret1 = WMP_deltabuf_flush(mfcc->db); #ifdef RDEBUG printf("DeltaBufLast: ret=%d, status=", ret1); for(i=0;idb->len;i++) { printf("%d", mfcc->db->is_on[i]); } printf(", nextstore=%d\n", mfcc->db->store); #endif if (ret1) { /* uncomputed delta has flushed, compute it with tmpmfcc */ if (para->energy && para->absesup) { memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * (para->baselen - 1)); memcpy(&(mfcc->tmpmfcc[para->baselen-1]), &(mfcc->db->vec[para->baselen]), sizeof(VECT) * para->baselen); } else { memcpy(mfcc->tmpmfcc, mfcc->db->vec, sizeof(VECT) * para->baselen * 2); } if (para->acc) { /* this new delta should be given to the accel cycle buffer */ ret2 = WMP_deltabuf_proceed(mfcc->ab, mfcc->tmpmfcc); #ifdef RDEBUG printf("AccelBuf: ret=%d, status=", ret2); for(i=0;iab->len;i++) { printf("%d", mfcc->ab->is_on[i]); } printf(", nextstore=%d\n", mfcc->ab->store); #endif if (ret2) { /* uncomputed accel was given, compute it with tmpmfcc */ memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); } else { /* still no input is given: */ /* in case of very short input: go on to the next input */ continue; } } } else { /* no data left in the delta buffer */ if (para->acc) { /* no new data, just flush the accel buffer */ ret2 = WMP_deltabuf_flush(mfcc->ab); #ifdef RDEBUG printf("AccelBuf: ret=%d, status=", ret2); for(i=0;iab->len;i++) { printf("%d", mfcc->ab->is_on[i]); } printf(", nextstore=%d\n", mfcc->ab->store); #endif if (ret2) { /* uncomputed data has flushed, compute it with tmpmfcc */ memcpy(mfcc->tmpmfcc, mfcc->ab->vec, sizeof(VECT) * (para->veclen - para->baselen)); memcpy(&(mfcc->tmpmfcc[para->veclen - para->baselen]), &(mfcc->ab->vec[para->veclen - para->baselen]), sizeof(VECT) * para->baselen); } else { /* actually no data exists in both delta and accel */ mfcc->valid = FALSE; /* disactivate this instance */ continue; /* end this loop */ } } else { /* only delta: input fully flushed */ mfcc->valid = FALSE; /* disactivate this instance */ continue; /* end this loop */ } } /* a new frame has been obtained from delta buffer to tmpmfcc */ if(para->cmn || para->cvn) CMN_realtime(mfcc->cmn.wrk, mfcc->tmpmfcc); if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { jlog("ERROR: failed to allocate memory for incoming MFCC vectors\n"); return FALSE; } /* store to mfcc->f */ memcpy(mfcc->param->parvec[mfcc->f], mfcc->tmpmfcc, sizeof(VECT) * mfcc->param->veclen); #ifdef ENABLE_PLUGIN /* call postprocess plugin if any */ plugin_exec_vector_postprocess(mfcc->param->parvec[mfcc->f], mfcc->param->veclen, mfcc->f); #endif } /* call recognition start callback */ ok_p = FALSE; maxf = 0; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; if (maxf < mfcc->f) maxf = mfcc->f; if (mfcc->f == 0) { ok_p = TRUE; } } if (ok_p && maxf == 0) { /* call callback when at least one of MFCC has initial frame */ if (recog->jconf->decodeopt.segment) { #ifdef BACKEND_VAD /* not exec pass1 begin callback here */ #else if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; #endif } else { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } /* proceed for the curent frame */ ret = decode_proceed(recog); if (ret == -1) { /* error */ return -1; } else if (ret == 1) { /* segmented */ /* loop out */ break; } /* else no event occured */ #ifdef BACKEND_VAD /* check up trigger in case of VAD segmentation */ if (recog->jconf->decodeopt.segment) { if (recog->triggered == FALSE) { if (spsegment_trigger_sync(recog)) { if (!recog->process_segment) { callback_exec(CALLBACK_EVENT_RECOGNITION_BEGIN, recog); } callback_exec(CALLBACK_EVENT_SEGMENT_BEGIN, recog); callback_exec(CALLBACK_EVENT_PASS1_BEGIN, recog); recog->triggered = TRUE; } } } #endif /* call frame-wise callback */ callback_exec(CALLBACK_EVENT_PASS1_FRAME, recog); /* move to next */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (! mfcc->valid) continue; mfcc->f++; if (mfcc->f > r->maxframelen) mfcc->valid = FALSE; } } /* finalize real-time 1st pass */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->param->header.samplenum = mfcc->f; mfcc->param->samplenum = mfcc->f; } /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ decode_end(recog); return(TRUE); } /** * * ケプストラム平均の更新. * 次回の認識に備えて,入力データからCMN用のケプストラム平均を更新する. * * @param mfcc [i/o] 計算対象の MFCC計算インスタンス * @param recog [i/o] エンジンインスタンス * * * * Update cepstral mean. * * This function updates the initial cepstral mean for CMN of the next input. * * @param mfcc [i/o] MFCC Calculation instance to update its CMN * @param recog [i/o] engine instance * */ void RealTimeCMNUpdate(MFCCCalc *mfcc, Recog *recog) { boolean cmn_update_p; Value *para; Jconf *jconf; RecogProcess *r; jconf = recog->jconf; para = mfcc->para; /* update CMN vector for next speech */ if(para->cmn) { if (mfcc->cmn.update) { cmn_update_p = TRUE; for(r=recog->process_list;r;r=r->next) { if (!r->live) continue; if (r->am->mfcc != mfcc) continue; if (r->result.status < 0) { /* input rejected */ cmn_update_p = FALSE; break; } } if (cmn_update_p) { /* update last CMN parameter for next spech */ CMN_realtime_update(mfcc->cmn.wrk, mfcc->param); } else { /* do not update, because the last input is bogus */ if (verbose_flag) { #ifdef BACKEND_VAD if (!recog->jconf->decodeopt.segment || recog->triggered) { jlog("STAT: skip CMN parameter update since last input was invalid\n"); } #else jlog("STAT: skip CMN parameter update since last input was invalid\n"); #endif } } } /* if needed, save the updated CMN parameter to a file */ if (mfcc->cmn.save_filename) { if (CMN_save_to_file(mfcc->cmn.wrk, mfcc->cmn.save_filename) == FALSE) { jlog("WARNING: failed to save CMN parameter to \"%s\"\n", mfcc->cmn.save_filename); } } } } /** * * 第1パス平行認識処理を中断する. * * @param recog [i/o] エンジンインスタンス * * * Terminate the 1st pass on-the-fly decoding. * * @param recog [i/o] engine instance * */ void RealTimeTerminate(Recog *recog) { MFCCCalc *mfcc; for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { mfcc->param->header.samplenum = mfcc->f; mfcc->param->samplenum = mfcc->f; } /* 最終フレーム処理を行い,認識の結果出力と終了処理を行う */ decode_end(recog); } /** * * Free the whole work area for 1st pass on-the-fly decoding * * * 第1パス並行処理のためのワークエリアを開放する * * * @param recog [in] engine instance * */ void realbeam_free(Recog *recog) { RealBeam *r; r = &(recog->real); if (recog->real.window) { free(recog->real.window); recog->real.window = NULL; } if (recog->real.rest_Speech) { free(recog->real.rest_Speech); recog->real.rest_Speech = NULL; } } /************************************************************************/ /************************************************************************/ /************************************************************************/ /************************************************************************/ /* MFCC realtime input */ /** * * * * * * * * @param recog * @param ad_check * * @return 2 when input termination requested by recognition process, * 1 when segmentation request returned from input module, 0 when end * of input returned from input module, -1 on error, -2 when input * termination requested by ad_check(). * */ int mfcc_go(Recog *recog, int (*ad_check)(Recog *)) { RealBeam *r; MFCCCalc *mfcc; int new_f; int ret, ret3; r = &(recog->real); r->last_is_segmented = FALSE; while(1/*in_data_vec*/) { ret = mfc_module_read(recog->mfcclist, &new_f); if (debug2_flag) { if (recog->mfcclist->f < new_f) { jlog("%d: %d (%d)\n", recog->mfcclist->f, new_f, ret); } } /* callback poll */ if (ad_check != NULL) { if ((ret3 = (*(ad_check))(recog)) < 0) { if ((ret3 == -1 && recog->mfcclist->f == 0) || ret3 == -2) { return(-2); } } } while(recog->mfcclist->f < new_f) { recog->mfcclist->valid = TRUE; #ifdef ENABLE_PLUGIN /* call post-process plugin if exist */ plugin_exec_vector_postprocess(recog->mfcclist->param->parvec[recog->mfcclist->f], recog->mfcclist->param->veclen, recog->mfcclist->f); #endif /* 処理を1フレーム進める */ /* proceed one frame */ switch(proceed_one_frame(recog)) { case -1: /* error */ return -1; case 0: /* normal */ break; case 1: /* segmented by process */ return 2; } /* 1フレーム処理が進んだのでポインタを進める */ /* proceed frame pointer */ for (mfcc = recog->mfcclist; mfcc; mfcc = mfcc->next) { if (!mfcc->valid) continue; mfcc->f++; } } /* check if input end */ switch(ret) { case -1: /* end of input */ return 0; case -2: /* error */ return -1; case -3: /* end of segment request */ return 1; } } /* 与えられた音声セグメントに対する認識処理が全て終了 呼び出し元に, 入力を続けるよう伝える */ /* input segment is fully processed tell the caller to continue input */ return(1); } /* end of file */ julius-4.2.2/libjulius/src/instance.c0000644001051700105040000004505312004452401016070 0ustar ritrlab/** * @file instance.c * * * @brief Allocate/free various instances * * * * @brief 各種インスタンスの割り付けおよび開放 * * * @author Akinobu Lee * @date Sun Oct 28 18:06:20 2007 * * $Revision: 1.8 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * Allocate a new MFCC calculation instance * * * MFCC計算インスタンスを新たに割り付ける. * * * @param amconf [in] acoustic model configuration parameters * * @return the newly allocated MFCC calculation instance. * * @callgraph * @callergraph * */ MFCCCalc * j_mfcccalc_new(JCONF_AM *amconf) { MFCCCalc *mfcc; mfcc = (MFCCCalc *)mymalloc(sizeof(MFCCCalc)); memset(mfcc, 0, sizeof(MFCCCalc)); mfcc->param = NULL; mfcc->rest_param = NULL; mfcc->frontend.ssbuf = NULL; mfcc->cmn.loaded = FALSE; mfcc->plugin_source = -1; if (amconf) { mfcc->para = &(amconf->analysis.para); mfcc->hmm_loaded = (amconf->analysis.para_hmm.loaded == 1) ? TRUE : FALSE; mfcc->htk_loaded = (amconf->analysis.para_htk.loaded == 1) ? TRUE : FALSE; mfcc->wrk = WMP_work_new(mfcc->para); if (mfcc->wrk == NULL) { jlog("ERROR: j_mfcccalc_new: failed to initialize MFCC computation\n"); return NULL; } mfcc->cmn.load_filename = amconf->analysis.cmnload_filename; mfcc->cmn.update = amconf->analysis.cmn_update; mfcc->cmn.save_filename = amconf->analysis.cmnsave_filename; mfcc->cmn.map_weight = amconf->analysis.cmn_map_weight; mfcc->frontend.ss_alpha = amconf->frontend.ss_alpha; mfcc->frontend.ss_floor = amconf->frontend.ss_floor; mfcc->frontend.sscalc = amconf->frontend.sscalc; mfcc->frontend.sscalc_len = amconf->frontend.sscalc_len; mfcc->frontend.ssload_filename = amconf->frontend.ssload_filename; } mfcc->next = NULL; return mfcc; } /** * * Free an MFCC calculation instance. * * * MFCC計算インスタンスを開放する * * * @param mfcc [i/o] MFCC calculation instance * * @callgraph * @callergraph */ void j_mfcccalc_free(MFCCCalc *mfcc) { if (mfcc->rest_param) free_param(mfcc->rest_param); if (mfcc->param) free_param(mfcc->param); if (mfcc->wrk) WMP_free(mfcc->wrk); if (mfcc->tmpmfcc) free(mfcc->tmpmfcc); if (mfcc->db) WMP_deltabuf_free(mfcc->db); if (mfcc->ab) WMP_deltabuf_free(mfcc->ab); if (mfcc->cmn.wrk) CMN_realtime_free(mfcc->cmn.wrk); if (mfcc->frontend.ssbuf) free(mfcc->frontend.ssbuf); if (mfcc->frontend.mfccwrk_ss) WMP_free(mfcc->frontend.mfccwrk_ss); free(mfcc); } /** * * Allocate a new acoustic model processing instance. * * * 音響モデル計算インスタンスを新たに割り付ける. * * * @param recog [i/o] engine instance * @param amconf [in] AM configuration to assign * * @return newly allocated acoustic model processing instance. * * @callgraph * @callergraph */ PROCESS_AM * j_process_am_new(Recog *recog, JCONF_AM *amconf) { PROCESS_AM *new, *atmp; /* allocate memory */ new = (PROCESS_AM *)mymalloc(sizeof(PROCESS_AM)); memset(new, 0, sizeof(PROCESS_AM)); /* assign configuration */ new->config = amconf; /* append to last */ new->next = NULL; if (recog->amlist == NULL) { recog->amlist = new; } else { for(atmp = recog->amlist; atmp->next; atmp = atmp->next); atmp->next = new; } return new; } /** * * Free an acoustic model processing instance. * * * 音響モデル計算インスタンスを開放する. * * * @param am [i/o] AM process instance * * @callgraph * @callergraph */ void j_process_am_free(PROCESS_AM *am) { /* HMMWork hmmwrk */ outprob_free(&(am->hmmwrk)); if (am->hmminfo) hmminfo_free(am->hmminfo); if (am->hmm_gs) hmminfo_free(am->hmm_gs); /* not free am->jconf */ free(am); } /** * * Allocate a new language model processing instance. * * * 言語モデル計算インスタンスを新たに割り付ける. * * * @param recog [i/o] engine instance * @param lmconf [in] LM configuration to assign * * @return newly allocated language model processing instance. * * @callgraph * @callergraph */ PROCESS_LM * j_process_lm_new(Recog *recog, JCONF_LM *lmconf) { PROCESS_LM *new, *ltmp; /* allocate memory */ new = (PROCESS_LM *)mymalloc(sizeof(PROCESS_LM)); memset(new, 0, sizeof(PROCESS_LM)); /* assign config */ new->config = lmconf; /* initialize some values */ new->lmtype = lmconf->lmtype; new->lmvar = lmconf->lmvar; new->gram_maxid = 0; new->global_modified = FALSE; /* append to last */ new->next = NULL; if (recog->lmlist == NULL) { recog->lmlist = new; } else { for(ltmp = recog->lmlist; ltmp->next; ltmp = ltmp->next); ltmp->next = new; } return new; } /** * * Free a language model processing instance. * * * 言語モデル計算インスタンスを開放する. * * * @param lm [i/o] LM process instance * * @callgraph * @callergraph */ void j_process_lm_free(PROCESS_LM *lm) { if (lm->winfo) word_info_free(lm->winfo); if (lm->ngram) ngram_info_free(lm->ngram); if (lm->grammars) multigram_free_all(lm->grammars); if (lm->dfa) dfa_info_free(lm->dfa); /* not free lm->jconf */ free(lm); } /** * * Allocate a new recognition process instance. * * * 認識処理インスタンスを新たに生成する. * * * @param recog [i/o] engine instance * @param sconf [in] SEARCH configuration to assign * * @return the newly allocated recognition process instance. * * @callgraph * @callergraph */ RecogProcess * j_recogprocess_new(Recog *recog, JCONF_SEARCH *sconf) { RecogProcess *new, *ptmp; /* allocate memory */ new = (RecogProcess *)mymalloc(sizeof(RecogProcess)); memset(new, 0, sizeof(RecogProcess)); new->live = FALSE; new->active = 0; new->next = NULL; /* assign configuration */ new->config = sconf; /* append to last */ new->next = NULL; if (recog->process_list == NULL) { recog->process_list = new; } else { for(ptmp = recog->process_list; ptmp->next; ptmp = ptmp->next); ptmp->next = new; } return new; } /** * * Free a recognition process instance * * * 認識処理インスタンスを開放する. * * * @param process [i/o] recognition process instance * * @callgraph * @callergraph */ void j_recogprocess_free(RecogProcess *process) { /* not free jconf, am, lm here */ /* free part of StackDecode work area */ wchmm_fbs_free(process); /* free cache */ if (process->lmtype == LM_PROB) { max_successor_cache_free(process->wchmm); } /* free wchmm */ if (process->wchmm) wchmm_free(process->wchmm); /* free backtrellis */ if (process->backtrellis) bt_free(process->backtrellis); /* free pass1 work area */ fsbeam_free(&(process->pass1)); free(process); } /** * * Allocate a new acoustic model (AM) parameter structure. * Default parameter values are set to it. * * * 音響モデル(AM)パラメータ構造体を新たに割り付ける. * 内部メンバにはデフォルト値が格納される. * * * @return the newly allocated AM parameter structure * * @callgraph * @callergraph * @ingroup jconf */ JCONF_AM * j_jconf_am_new() { JCONF_AM *new; new = (JCONF_AM *)mymalloc(sizeof(JCONF_AM)); jconf_set_default_values_am(new); new->next = NULL; return new; } /** * * Release an acoustic model (AM) parameter structure * Default parameter values are set to it. * * * 音響モデル(AM)パラメータ構造体を解放する. * 内部メンバにはデフォルト値が格納される. * * * @param amconf [in] AM configuration * * @callgraph * @callergraph * @ingroup jconf * */ void j_jconf_am_free(JCONF_AM *amconf) { free(amconf); } /** * * Register AM configuration to global jconf. * Returns error if the same name already exist in the jconf. * * * 音響モデル(AM)パラメータ構造体を jconf に登録する. * jconf内に同じ名前のモジュールが既に登録されている場合はエラーとなる. * * * @param jconf [i/o] global jconf * @param amconf [in] AM configuration to register * @param name [in] module name * * @return TRUE on success, FALSE on failure * * @callgraph * @callergraph * @ingroup jconf */ boolean j_jconf_am_regist(Jconf *jconf, JCONF_AM *amconf, char *name) { JCONF_AM *atmp; if (!name) { jlog("ERROR: j_jconf_am_regist: no name specified to register an AM conf\n"); return FALSE; } for(atmp = jconf->am_root; atmp; atmp = atmp->next) { if (strmatch(atmp->name, name)) { jlog("ERROR: j_jconf_am_regist: failed to regist an AM conf: the same name \"%s\" already exist\n", atmp->name); return FALSE; } } /* set name */ strncpy(amconf->name, name, JCONF_MODULENAME_MAXLEN); /* append to last */ amconf->next = NULL; if (jconf->am_root == NULL) { amconf->id = 1; jconf->am_root = amconf; } else { for(atmp = jconf->am_root; atmp->next; atmp = atmp->next); amconf->id = atmp->id + 1; atmp->next = amconf; } return TRUE; } /** * * Allocate a new language model (LM) parameter structure. * Default parameter values are set to it. * * * 言語モデル (LM) パラメータ構造体を新たに割り付ける * 内部メンバにはデフォルト値が格納される. * * * @return the newly allocated LM parameter structure. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_LM * j_jconf_lm_new() { JCONF_LM *new; new = (JCONF_LM *)mymalloc(sizeof(JCONF_LM)); jconf_set_default_values_lm(new); new->next = NULL; return new; } /** * * Release a language model (LM) parameter structure * * * 言語モデル (LM) パラメータ構造体を解放する * * * @param lmconf [in] LM parameter structure * * @callgraph * @callergraph * @ingroup jconf * */ void j_jconf_lm_free(JCONF_LM *lmconf) { JCONF_LM_NAMELIST *nl, *nltmp; nl = lmconf->additional_dict_files; while (nl) { nltmp = nl->next; free(nl->name); free(nl); nl = nltmp; } nl = lmconf->additional_dict_entries; while (nl) { nltmp = nl->next; free(nl->name); free(nl); nl = nltmp; } free(lmconf); } /** * * Register LM configuration to global jconf. * Returns error if the same name already exist in the jconf. * * * 言語モデル(LM)パラメータ構造体を jconf に登録する. * jconf内に同じ名前のモジュールが既に登録されている場合はエラーとなる. * * * @param jconf [i/o] global jconf * @param lmconf [in] LM configuration to register * @param name [in] module name * * @return TRUE on success, FALSE on failure * * @callgraph * @callergraph * @ingroup jconf */ boolean j_jconf_lm_regist(Jconf *jconf, JCONF_LM *lmconf, char *name) { JCONF_LM *ltmp; if (!name) { jlog("ERROR: j_jconf_lm_regist: no name specified to register a LM conf\n"); return FALSE; } for(ltmp = jconf->lm_root; ltmp; ltmp = ltmp->next) { if (strmatch(ltmp->name, name)) { jlog("ERROR: j_jconf_lm_regist: failed to regist a LM conf: the same name \"%s\" already exist\n", ltmp->name); return FALSE; } } /* set name */ strncpy(lmconf->name, name, JCONF_MODULENAME_MAXLEN); /* append to last */ lmconf->next = NULL; if (jconf->lm_root == NULL) { lmconf->id = 1; jconf->lm_root = lmconf; } else { for(ltmp = jconf->lm_root; ltmp->next; ltmp = ltmp->next); lmconf->id = ltmp->id + 1; ltmp->next = lmconf; } return TRUE; } /** * * Allocate a new search (SEARCH) parameter structure. * Default parameter values are set to it. * * * 探索パラメータ(SEARCH)構造体を新たに割り付ける. * 内部メンバにはデフォルト値が格納される. * * * @return the newly allocated SEARCH parameter structure. * * @callgraph * @callergraph * @ingroup jconf */ JCONF_SEARCH * j_jconf_search_new() { JCONF_SEARCH *new; new = (JCONF_SEARCH *)mymalloc(sizeof(JCONF_SEARCH)); jconf_set_default_values_search(new); new->next = NULL; return new; } /** * * Release a search (SEARCH) parameter structure * * * 探索パラメータ(SEARCH)構造体を解放する * * * @param sconf [in] SEARCH parameter structure * * @callgraph * @callergraph * @ingroup jconf * */ void j_jconf_search_free(JCONF_SEARCH *sconf) { free(sconf); } /** * * Register SEARCH configuration to global jconf. * Returns error if the same name already exist in the jconf. * * * 探索(SEARCH)パラメータ構造体を jconf に登録する. * jconf内に同じ名前のモジュールが既に登録されている場合はエラーとなる. * * * @param jconf [i/o] global jconf * @param sconf [in] SEARCH configuration to register * @param name [in] module name * * @return TRUE on success, FALSE on failure * * @callgraph * @callergraph * @ingroup jconf */ boolean j_jconf_search_regist(Jconf *jconf, JCONF_SEARCH *sconf, char *name) { JCONF_SEARCH *stmp; if (!name) { jlog("ERROR: j_jconf_search_regist: no name specified to register a SR conf\n"); return FALSE; } for(stmp = jconf->search_root; stmp; stmp = stmp->next) { if (strmatch(stmp->name, name)) { jlog("ERROR: j_jconf_search_regist: failed to regist an SR conf: the same name \"%s\" already exist\n", stmp->name); return FALSE; } } /* set name */ strncpy(sconf->name, name, JCONF_MODULENAME_MAXLEN); /* append to last */ sconf->next = NULL; if (jconf->search_root == NULL) { sconf->id = 1; jconf->search_root = sconf; } else { for(stmp = jconf->search_root; stmp->next; stmp = stmp->next); sconf->id = stmp->id + 1; stmp->next = sconf; } return TRUE; } /** * * @brief Allocate a new global configuration parameter structure. * * JCONF_AM, JCONF_LM, JCONF_SEARCH are defined one for each, and * assigned to the newly allocated structure as initial instances. * * * * @brief 全体のパラメータ構造体を新たに割り付ける. * * JCONF_AM, JCONF_LM, JCONF_SEARCHも1つづつ割り当てられる. * これらは -AM 等の指定を含まない 3.x 以前の jconf を読み込んだときに, * そのまま用いられる. * * * * @return the newly allocated global configuration parameter structure. * * @callgraph * @callergraph * @ingroup jconf */ Jconf * j_jconf_new() { Jconf *jconf; /* allocate memory */ jconf = (Jconf *)mymalloc(sizeof(Jconf)); /* set default values */ jconf_set_default_values(jconf); /* allocate first one am / lm /search instance with their name left NULL */ jconf->am_root = j_jconf_am_new(); jconf->am_root->id = 0; strcpy(jconf->am_root->name, JCONF_MODULENAME_DEFAULT); jconf->lm_root = j_jconf_lm_new(); jconf->lm_root->id = 0; strcpy(jconf->lm_root->name, JCONF_MODULENAME_DEFAULT); jconf->search_root = j_jconf_search_new(); jconf->search_root->id = 0; strcpy(jconf->search_root->name, JCONF_MODULENAME_DEFAULT); /* assign the am /lm instance to the instance */ jconf->search_root->amconf = jconf->am_root; jconf->search_root->lmconf = jconf->lm_root; /* set current */ jconf->amnow = jconf->am_root; jconf->lmnow = jconf->lm_root; jconf->searchnow = jconf->search_root; /* set gmm am jconf */ jconf->gmm = NULL; return(jconf); } /** * * @brief Free a global configuration parameter structure. * * All JCONF_AM, JCONF_LM, JCONF_SEARCH are also released. * * * * @brief 全体のパラメータ構造体を開放する. * * JCONF_AM, JCONF_LM, JCONF_SEARCHもすべて開放される. * * * * @param jconf [in] global configuration parameter structure * * @callgraph * @callergraph * @ingroup jconf */ void j_jconf_free(Jconf *jconf) { JCONF_AM *am, *amtmp; JCONF_LM *lm, *lmtmp; JCONF_SEARCH *sc, *sctmp; opt_release(jconf); am = jconf->am_root; while(am) { amtmp = am->next; j_jconf_am_free(am); am = amtmp; } lm = jconf->lm_root; while(lm) { lmtmp = lm->next; j_jconf_lm_free(lm); lm = lmtmp; } sc = jconf->search_root; while(sc) { sctmp = sc->next; j_jconf_search_free(sc); sc = sctmp; } free(jconf); } /** * * Allocate memory for a new engine instance. * * * エンジンインスタンスを新たにメモリ割り付けする. * * * @return the newly allocated engine instance. * * @callgraph * @callergraph * @ingroup instance */ Recog * j_recog_new() { Recog *recog; /* allocate memory */ recog = (Recog *)mymalloc(sizeof(Recog)); /* clear all values to 0 (NULL) */ memset(recog, 0, sizeof(Recog)); /* initialize some values */ recog->jconf = NULL; recog->amlist = NULL; recog->lmlist = NULL; recog->process_list = NULL; recog->process_online = FALSE; recog->process_active = TRUE; recog->process_want_terminate = FALSE; recog->process_want_reload = FALSE; recog->gram_switch_input_method = SM_PAUSE; recog->process_segment = FALSE; /* set default function for vector calculation to RealTimeMFCC() */ recog->calc_vector = RealTimeMFCC; /* clear callback func. */ callback_init(recog); recog->adin = (ADIn *)mymalloc(sizeof(ADIn)); memset(recog->adin, 0, sizeof(ADIn)); return(recog); } /** * * @brief Free an engine instance. * * All allocated memories in the instance will be also released. * * * @brief エンジンインスタンスを開放する * * インスタンス内でこれまでにアロケートされた全てのメモリも開放される. * * * @param recog [in] engine instance. * * @callgraph * @callergraph * @ingroup instance */ void j_recog_free(Recog *recog) { if (recog->gmm) hmminfo_free(recog->gmm); if (recog->speech) free(recog->speech); /* free adin work area */ adin_free_param(recog); /* free GMM calculation work area if any */ gmm_free(recog); /* Output result -> free just after malloced and used */ /* StackDecode pass2 -> allocate and free within search */ /* RealBeam real */ realbeam_free(recog); /* adin */ if (recog->adin) free(recog->adin); /* instances */ { RecogProcess *p, *ptmp; p = recog->process_list; while(p) { ptmp = p->next; j_recogprocess_free(p); p = ptmp; } } { PROCESS_LM *lm, *lmtmp; lm = recog->lmlist; while(lm) { lmtmp = lm->next; j_process_lm_free(lm); lm = lmtmp; } } { PROCESS_AM *am, *amtmp; am = recog->amlist; while(am) { amtmp = am->next; j_process_am_free(am); am = amtmp; } } { MFCCCalc *mfcc, *tmp; mfcc = recog->mfcclist; while(mfcc) { tmp = mfcc->next; j_mfcccalc_free(mfcc); mfcc = tmp; } } /* jconf */ if (recog->jconf) { j_jconf_free(recog->jconf); } free(recog); } /* end of file */ julius-4.2.2/libjulius/src/m_fusion.c0000644001051700105040000013006512004452401016101 0ustar ritrlab/** * @file m_fusion.c * * * @brief 認識の最終準備をする. * * 設定に従い,モデルの読み込み・木構造化辞書などのデータ構造の構築・ * ワークエリアの確保など,認識開始に必要な環境の構築を行なう. * * * * @brief Final set up for recognition. * * These functions build everything needed for recognition: load * models into memory, build data structures such as tree lexicon, and * allocate work area for computation. * * * * @author Akinobu Lee * @date Thu May 12 13:31:47 2005 * * $Revision: 1.22 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * @brief 音響HMMをファイルから読み込み,認識用にセットアップする. * * ファイルからのHMM定義の読み込み,HMMList ファイルの読み込み, * パラメータ型のチェック,マルチパス扱いの on/off, ポーズモデルの設定など * が行われ,認識のための準備が行われる. * * この音響モデルの入力となる音響パラメータの種類やパラメータもここで * 最終決定される. 決定には,音響HMMのヘッダ,(バイナリHMMの場合,存 * 在すれば)バイナリHMMに埋め込まれた特徴量情報,jconf の設定(ばらば * らに,あるいは -htkconf 使用時)などの情報が用いられる. * * * @brief Read in an acoustic HMM from file and setup for recognition. * * This functions reads HMM definitions from file, reads also a * HMMList file, makes logical-to-physical model mapping, determine * required parameter type, determine whether multi-path handling is needed, * and find pause model in the definitions. * * The feature vector extraction parameters are also finally * determined in this function. Informations used for the * determination is (1) the header values in hmmdefs, (2) embedded * parameters in binary HMM if you are reading a binary HMM made with * recent mkbinhmm, (3) user-specified parameters in jconf * configurations (either by separatedly specified or by -htkconf * options). * * * * @param amconf [in] AM configuration variables * @param jconf [i/o] global configuration variables * * @return the newly created HMM information structure, or NULL on failure. * */ static HTK_HMM_INFO * initialize_HMM(JCONF_AM *amconf, Jconf *jconf) { HTK_HMM_INFO *hmminfo; /* at here, global variable "para" holds values specified by user or by user-specified HTK config file */ if (amconf->analysis.para_hmm.loaded == 1) { jlog("Warning: you seems to read more than one acoustic model for recognition, but\n"); jlog("Warning: previous one already has header-embedded acoustic parameters\n"); jlog("Warning: if you have different parameters, result may be wrong!\n"); } /* allocate new hmminfo */ hmminfo = hmminfo_new(); /* load hmmdefs */ if (init_hmminfo(hmminfo, amconf->hmmfilename, amconf->mapfilename, &(amconf->analysis.para_hmm)) == FALSE) { hmminfo_free(hmminfo); return NULL; } /* set multipath mode flag */ if (amconf->force_multipath) { jlog("STAT: m_fusion: force multipath HMM handling by user request\n"); hmminfo->multipath = TRUE; } else { hmminfo->multipath = hmminfo->need_multipath; } /* only MFCC is supported for audio input */ /* MFCC_{0|E}[_D][_A][_Z][_N] is supported */ /* check parameter type of this acoustic HMM */ if (jconf->input.type == INPUT_WAVEFORM) { /* Decode parameter extraction type according to the training parameter type in the header of the given acoustic HMM */ if ((hmminfo->opt.param_type & F_BASEMASK) != F_MFCC) { jlog("ERROR: m_fusion: for direct speech input, only HMM trained by MFCC is supported\n"); hmminfo_free(hmminfo); return NULL; } /* set acoustic analysis parameters from HMM header */ calc_para_from_header(&(amconf->analysis.para), hmminfo->opt.param_type, hmminfo->opt.vec_size); } /* check if tied_mixture */ if (hmminfo->is_tied_mixture && hmminfo->codebooknum <= 0) { jlog("ERROR: m_fusion: this tied-mixture model has no codebook!?\n"); hmminfo_free(hmminfo); return NULL; } #ifdef PASS1_IWCD /* make state clusters of same context for inter-word triphone approx. */ if (hmminfo->is_triphone) { if (hmminfo->cdset_root == NULL) { jlog("STAT: making pseudo bi/mono-phone for IW-triphone\n"); if (make_cdset(hmminfo) == FALSE) { jlog("ERROR: m_fusion: failed to make context-dependent state set\n"); hmminfo_free(hmminfo); return NULL; } } else { jlog("STAT: pseudo phones are loaded from binary hmmlist file\n"); } /* add those `pseudo' biphone and monophone to the logical HMM names */ /* they points not to the defined HMM, but to the CD_Set structure */ hmm_add_pseudo_phones(hmminfo); } #endif /* find short pause model and set to hmminfo->sp */ htk_hmm_set_pause_model(hmminfo, amconf->spmodel_name); hmminfo->cdset_method = amconf->iwcdmethod; hmminfo->cdmax_num = amconf->iwcdmaxn; if (amconf->analysis.para_htk.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_htk)); if (amconf->analysis.para_hmm.loaded == 1) apply_para(&(amconf->analysis.para), &(amconf->analysis.para_hmm)); apply_para(&(amconf->analysis.para), &(amconf->analysis.para_default)); return(hmminfo); } /** * * Gaussian Mixture Selection のための状態選択用モノフォンHMMを読み込む. * * * Initialize context-independent HMM for state selection with Gaussian * Mixture Selection. * * * @param amconf [in] AM configuratino variables * * @return the newly created HMM information structure, or NULL on failure. */ static HTK_HMM_INFO * initialize_GSHMM(JCONF_AM *amconf) { HTK_HMM_INFO *hmm_gs; Value para_dummy; jlog("STAT: Reading GS HMMs:\n"); hmm_gs = hmminfo_new(); undef_para(¶_dummy); if (init_hmminfo(hmm_gs, amconf->hmm_gs_filename, NULL, ¶_dummy) == FALSE) { hmminfo_free(hmm_gs); return NULL; } return(hmm_gs); } /** * * 発話検証・棄却用の1状態 GMM を読み込んで初期化する. * * * * Read and initialize an 1-state GMM for utterance verification and * rejection. * * * * @param jconf [in] global configuration variables * * @return the newly created GMM information structure in HMM format, * or NULL on failure. */ static HTK_HMM_INFO * initialize_GMM(Jconf *jconf) { HTK_HMM_INFO *gmm; jlog("STAT: reading GMM: %s\n", jconf->reject.gmm_filename); if (jconf->gmm == NULL) { /* no acoustic parameter setting was given for GMM using -AM_GMM, copy the first AM setting */ jlog("STAT: -AM_GMM not used, use parameter of the first AM\n"); jconf->gmm = j_jconf_am_new(); memcpy(jconf->gmm, jconf->am_root, sizeof(JCONF_AM)); jconf->gmm->hmmfilename = NULL; jconf->gmm->mapfilename = NULL; jconf->gmm->spmodel_name = NULL; jconf->gmm->hmm_gs_filename = NULL; if (jconf->am_root->analysis.cmnload_filename) { jconf->gmm->analysis.cmnload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnload_filename)+ 1), jconf->am_root->analysis.cmnload_filename); } if (jconf->am_root->analysis.cmnsave_filename) { jconf->gmm->analysis.cmnsave_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->analysis.cmnsave_filename)+ 1), jconf->am_root->analysis.cmnsave_filename); } if (jconf->am_root->frontend.ssload_filename) { jconf->gmm->frontend.ssload_filename = strcpy((char *)mymalloc(strlen(jconf->am_root->frontend.ssload_filename)+ 1), jconf->am_root->frontend.ssload_filename); } } gmm = hmminfo_new(); if (init_hmminfo(gmm, jconf->reject.gmm_filename, NULL, &(jconf->gmm->analysis.para_hmm)) == FALSE) { hmminfo_free(gmm); return NULL; } /* check parameter type of this acoustic HMM */ if (jconf->input.type == INPUT_WAVEFORM) { /* Decode parameter extraction type according to the training parameter type in the header of the given acoustic HMM */ if ((gmm->opt.param_type & F_BASEMASK) != F_MFCC) { jlog("ERROR: m_fusion: for direct speech input, only GMM trained by MFCC is supported\n"); hmminfo_free(gmm); return NULL; } } /* set acoustic analysis parameters from HMM header */ calc_para_from_header(&(jconf->gmm->analysis.para), gmm->opt.param_type, gmm->opt.vec_size); if (jconf->gmm->analysis.para_htk.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_htk)); if (jconf->gmm->analysis.para_hmm.loaded == 1) apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_hmm)); apply_para(&(jconf->gmm->analysis.para), &(jconf->gmm->analysis.para_default)); return(gmm); } /** * * @brief 単語辞書をファイルから読み込んでセットアップする. * * 辞書上のモノフォン表記からトライフォンへの計算は init_voca() で * 読み込み時に行われる. このため,辞書読み込み時には,認識で使用する * 予定のHMM情報を与える必要がある. * * N-gram 使用時は,文頭無音単語およぶ文末無音単語をここで設定する. * また,"-iwspword" 指定時は,ポーズ単語を辞書の最後に挿入する. * * * * @brief Read in word dictionary from a file and setup for recognition. * * Monophone-to-triphone conversion will be performed inside init_voca(). * So, an HMM definition data that will be used with the LM should also be * specified as an argument. * * When reading dictionary for N-gram, sentence head silence word and * tail silence word will be determined in this function. Also, * when an option "-iwspword" is specified, this will insert a pause * word at the last of the given dictionary. * * * * @param lmconf [in] LM configuration variables * @param hmminfo [in] HMM definition of each phone in dictionary, for * phone checking and monophone-to-triphone conversion. * * @return the newly created word dictionary structure, or NULL on failure. * */ static WORD_INFO * initialize_dict(JCONF_LM *lmconf, HTK_HMM_INFO *hmminfo) { WORD_INFO *winfo; JCONF_LM_NAMELIST *nl; char buf[MAXLINELEN]; int n; /* allocate new word dictionary */ winfo = word_info_new(); /* read in dictinary from file */ if ( ! #ifdef MONOTREE /* leave winfo monophone for 1st pass lexicon tree */ init_voca(winfo, lmconf->dictfilename, hmminfo, TRUE, lmconf->forcedict_flag) #else init_voca(winfo, lmconf->dictfilename, hmminfo, FALSE, lmconf->forcedict_flag) #endif ) { jlog("ERROR: m_fusion: failed to read dictionary, terminated\n"); word_info_free(winfo); return NULL; } /* load additional entries */ for (nl = lmconf->additional_dict_files; nl; nl=nl->next) { FILE *fp; if ((fp = fopen(nl->name, "rb")) == NULL) { jlog("ERROR: m_fusion: failed to open %s\n",nl->name); word_info_free(winfo); return NULL; } n = winfo->num; while (getl_fp(buf, MAXLINELEN, fp) != NULL) { if (voca_load_line(buf, winfo, hmminfo) == FALSE) break; } if (voca_load_end(winfo) == FALSE) { if (lmconf->forcedict_flag) { jlog("Warning: m_fusion: the error words above are ignored\n"); } else { jlog("ERROR: m_fusion: error in reading dictionary %s\n", nl->name); fclose(fp); word_info_free(winfo); return NULL; } } if (fclose(fp) == -1) { jlog("ERROR: m_fusion: failed to close %s\n", nl->name); word_info_free(winfo); return NULL; } jlog("STAT: + additional dictionary: %s (%d words)\n", nl->name, winfo->num - n); } n = winfo->num; for (nl = lmconf->additional_dict_entries; nl; nl=nl->next) { if (voca_load_line(nl->name, winfo, hmminfo) == FALSE) { jlog("ERROR: m_fusion: failed to set entry: %s\n", nl->name); } } if (lmconf->additional_dict_entries) { if (voca_load_end(winfo) == FALSE) { jlog("ERROR: m_fusion: failed to read additinoal word entry\n"); word_info_free(winfo); return NULL; } jlog("STAT: + additional entries: %d words\n", winfo->num - n); } if (lmconf->lmtype == LM_PROB) { /* if necessary, append a IW-sp word to the dict if "-iwspword" specified */ if (lmconf->enable_iwspword) { if ( #ifdef MONOTREE voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, TRUE) #else voca_append_htkdict(lmconf->iwspentry, winfo, hmminfo, FALSE) #endif == FALSE) { jlog("ERROR: m_fusion: failed to make IW-sp word entry \"%s\"\n", lmconf->iwspentry); word_info_free(winfo); return NULL; } else { jlog("STAT: 1 IW-sp word entry added\n"); } } /* set {head,tail}_silwid */ winfo->head_silwid = voca_lookup_wid(lmconf->head_silname, winfo); if (winfo->head_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: head sil word \"%s\" not exist in voca\n", lmconf->head_silname); word_info_free(winfo); return NULL; } winfo->tail_silwid = voca_lookup_wid(lmconf->tail_silname, winfo); if (winfo->tail_silwid == WORD_INVALID) { /* not exist */ jlog("ERROR: m_fusion: tail sil word \"%s\" not exist in voca\n", lmconf->tail_silname); word_info_free(winfo); return NULL; } } return(winfo); } /** * * @brief 単語N-gramをファイルから読み込んでセットアップする. * * ARPA フォーマットで指定時は,LRファイルと RL ファイルの組合せで * 動作が異なる. LR のみ,あるいは RL のみ指定時は,それをそのまま読み込む. * 双方とも指定されている場合は,RLをまず主モデルとして読み込んだ後, * LR の 2-gram だけを第1パス用に主モデルに追加読み込みする. * * また,読み込み終了後,辞書上のN-gramエントリとのマッチングを取る. * * * * @brief Read in word N-gram from file and setup for recognition. * * When N-gram is specified in ARPA format, the behavior relies on whether * N-grams are specified in "-nlr" and "-nrl". When either of them was * solely specified, this function simply read it. If both are specified, * it will read the RL model fully as a primary model, and additionally * read only the 2-gram part or the LR model as the first pass LM. * * Also, this function create mapping from dictionary words to LM entry. * * * * @param lmconf [in] LM configuration variables * @param winfo [i/o] word dictionary that will be used with this N-gram. * each word in the dictionary will be assigned to an N-gram entry here. * * @return the newly created N-gram information data, or NULL on failure. * */ static NGRAM_INFO * initialize_ngram(JCONF_LM *lmconf, WORD_INFO *winfo) { NGRAM_INFO *ngram; boolean ret; /* allocate new */ ngram = ngram_info_new(); /* load LM */ if (lmconf->ngram_filename != NULL) { /* binary format */ ret = init_ngram_bin(ngram, lmconf->ngram_filename); } else { /* ARPA format */ /* if either forward or backward N-gram is specified, read it */ /* if both specified, use backward N-gram as main and use forward 2-gram only for 1st pass (this is an old behavior) */ if (lmconf->ngram_filename_rl_arpa) { ret = init_ngram_arpa(ngram, lmconf->ngram_filename_rl_arpa, DIR_RL); if (ret == FALSE) { ngram_info_free(ngram); return NULL; } if (lmconf->ngram_filename_lr_arpa) { ret = init_ngram_arpa_additional(ngram, lmconf->ngram_filename_lr_arpa); if (ret == FALSE) { ngram_info_free(ngram); return NULL; } } } else if (lmconf->ngram_filename_lr_arpa) { ret = init_ngram_arpa(ngram, lmconf->ngram_filename_lr_arpa, DIR_LR); } } if (ret == FALSE) { ngram_info_free(ngram); return NULL; } /* set unknown (=OOV) word id */ if (strcmp(lmconf->unknown_name, UNK_WORD_DEFAULT)) { set_unknown_id(ngram, lmconf->unknown_name); } /* map dict item to N-gram entry */ if (make_voca_ref(ngram, winfo) == FALSE) { ngram_info_free(ngram); return NULL; } /* post-fix EOS / BOS uni prob for SRILM */ fix_uniprob_srilm(ngram, winfo); return(ngram); } /** * * @brief Load an acoustic model. * * This function will create an AM process instance using the given AM * configuration, and load models specified in the configuration into * the instance. Then the created instance will be installed to the * engine instance. The amconf should be registered to the global * jconf before calling this function. * * * * * @brief 音響モデルを読み込む. * * この関数は,与えられた AM 設定に従って AM 処理インスタンスを生成し, * その中に音響モデルをロードします.その後,そのAM処理インスタンスは * 新たにエンジンインスタンスに登録されます.AM設定はこの関数を * 呼ぶ前にあらかじめ全体設定recog->jconfに登録されている必要があります. * * * * @param recog [i/o] engine instance * @param amconf [in] AM configuration to load * * @return TRUE on success, or FALSE on error. * * @callgraph * @callergraph * @ingroup instance * */ boolean j_load_am(Recog *recog, JCONF_AM *amconf) { PROCESS_AM *am; jlog("STAT: *** loading AM%02d %s\n", amconf->id, amconf->name); /* create AM process instance */ am = j_process_am_new(recog, amconf); /* HMM */ if ((am->hmminfo = initialize_HMM(amconf, recog->jconf)) == NULL) { jlog("ERROR: m_fusion: failed to initialize AM\n"); return FALSE; } if (amconf->hmm_gs_filename != NULL) { if ((am->hmm_gs = initialize_GSHMM(amconf)) == NULL) { jlog("ERROR: m_fusion: failed to initialize GS HMM\n"); return FALSE; } } /* fixate model-specific params */ /* set params whose default will change by models and not specified in arg */ /* select Gaussian pruning function */ if (am->config->gprune_method == GPRUNE_SEL_UNDEF) {/* set default if not specified */ if (am->hmminfo->is_tied_mixture) { /* enabled by default for tied-mixture models */ #if defined(GPRUNE_DEFAULT_SAFE) am->config->gprune_method = GPRUNE_SEL_SAFE; #elif defined(GPRUNE_DEFAULT_HEURISTIC) am->config->gprune_method = GPRUNE_SEL_HEURISTIC; #elif defined(GPRUNE_DEFAULT_BEAM) am->config->gprune_method = GPRUNE_SEL_BEAM; #endif } else { /* disabled by default for non tied-mixture model */ am->config->gprune_method = GPRUNE_SEL_NONE; } } /* fixated analysis.para not uses loaded flag any more, so reset it for binary matching */ amconf->analysis.para.loaded = 0; jlog("STAT: *** AM%02d %s loaded\n", amconf->id, amconf->name); return TRUE; } /** * * @brief Load a language model. * * This function will create an LM process instance using the given LM * configuration, and load models specified in the configuration into * the instance. Then the created instance will be installed to the * engine instance. The lmconf should be registered to the * recog->jconf before calling this function. * * To convert phoneme sequence to triphone at loading, you should * specify which AM to use with this LM by the argument am. * * * * * @brief 言語モデルを読み込む. * * この関数は,与えられた LM 設定に従って LM 処理インスタンスを生成し, * その中に言語モデルをロードします.その後,そのLM処理インスタンスは * 新たにエンジンインスタンスに登録されます.LM設定はこの関数を * 呼ぶ前にあらかじめ全体設定recog->jconfに登録されている必要があります. * * 辞書の読み込み時にトライフォンへの変換および音響モデルとのリンクが * 同時に行われます.このため,この言語モデルが使用する音響モデルの * インスタンスを引数 am として指定する必要があります. * * * * @param recog [i/o] engine instance * @param lmconf [in] LM configuration to load * * @return TRUE on success, or FALSE on error. * * @callgraph * @callergraph * @ingroup instance * */ boolean j_load_lm(Recog *recog, JCONF_LM *lmconf) { JCONF_SEARCH *sh; PROCESS_LM *lm; PROCESS_AM *am, *atmp; jlog("STAT: *** loading LM%02d %s\n", lmconf->id, lmconf->name); /* find which am process instance to assign to each LM */ am = NULL; for(sh=recog->jconf->search_root;sh;sh=sh->next) { if (sh->lmconf == lmconf) { for(atmp=recog->amlist;atmp;atmp=atmp->next) { if (sh->amconf == atmp->config) { am = atmp; } } } } if (am == NULL) { jlog("ERROR: cannot find corresponding AM for LM%02d %s\n", lmconf->id, lmconf->name); jlog("ERROR: you should write all AM/LM combinations to be used for recognition with \"-SR\"\n"); return FALSE; } /* create LM process instance */ lm = j_process_lm_new(recog, lmconf); /* assign AM process instance to the LM instance */ lm->am = am; /* load language model */ if (lm->lmtype == LM_PROB) { /* LM (N-gram) */ if ((lm->winfo = initialize_dict(lm->config, lm->am->hmminfo)) == NULL) { jlog("ERROR: m_fusion: failed to initialize dictionary\n"); return FALSE; } if (lm->config->ngram_filename_lr_arpa || lm->config->ngram_filename_rl_arpa || lm->config->ngram_filename) { if ((lm->ngram = initialize_ngram(lm->config, lm->winfo)) == NULL) { jlog("ERROR: m_fusion: failed to initialize N-gram\n"); return FALSE; } } } if (lm->lmtype == LM_DFA) { /* DFA */ if (lm->config->dfa_filename != NULL && lm->config->dictfilename != NULL) { /* here add grammar specified by "-dfa" and "-v" to grammar list */ multigram_add_gramlist(lm->config->dfa_filename, lm->config->dictfilename, lm->config, LM_DFA_GRAMMAR); } /* load all the specified grammars */ if (multigram_load_all_gramlist(lm) == FALSE) { jlog("ERROR: m_fusion: some error occured in reading grammars\n"); return FALSE; } /* setup for later wchmm building */ multigram_update(lm); /* the whole lexicon will be forced to built in the boot sequence, so reset the global modification flag here */ lm->global_modified = FALSE; } jlog("STAT: *** LM%02d %s loaded\n", lmconf->id, lmconf->name); return TRUE; } /**********************************************************************/ /** * * @brief 全てのモデルを読み込み,認識の準備を行なう. * * この関数では,jconf 内にある(複数の) AM 設定パラメータ構造体やLM * 設定パラメータ構造体のそれぞれに対して,AM/LM処理インスタンスを生成 * する. そしてそれぞれのインスタンスについてその中にモデルを読み込み, * 認識用にセットアップする. GMMもここで読み込まれる. * * * * @brief Read in all models for recognition. * * This function create AM/LM processing instance for each AM/LM * configurations in jconf. Then the model for each instance will be loaded * into memory and set up for recognition. GMM will also be read here. * * * * @param recog [i/o] engine instance * @param jconf [in] global configuration variables * * @return TRUE on success, FALSE on failure. * * @callgraph * @callergraph * @ingroup instance */ boolean j_load_all(Recog *recog, Jconf *jconf) { JCONF_AM *amconf; JCONF_LM *lmconf; /* set global jconf */ recog->jconf = jconf; /* load acoustic models */ for(amconf=jconf->am_root;amconf;amconf=amconf->next) { if (j_load_am(recog, amconf) == FALSE) return FALSE; } /* load language models */ for(lmconf=jconf->lm_root;lmconf;lmconf=lmconf->next) { if (j_load_lm(recog, lmconf) == FALSE) return FALSE; } /* GMM */ if (jconf->reject.gmm_filename != NULL) { jlog("STAT: loading GMM\n"); if ((recog->gmm = initialize_GMM(jconf)) == NULL) { jlog("ERROR: m_fusion: failed to initialize GMM\n"); return FALSE; } } /* check sampling rate requirement on AMs and set it to global jconf */ { boolean ok_p; /* set input sampling rate from an AM */ jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq; jconf->input.period = jconf->am_root->analysis.para.smp_period; jconf->input.frameshift = jconf->am_root->analysis.para.frameshift; jconf->input.framesize = jconf->am_root->analysis.para.framesize; /* check if the value is equal at all AMs */ ok_p = TRUE; for(amconf = jconf->am_root; amconf; amconf = amconf->next) { if (jconf->input.sfreq != amconf->analysis.para.smp_freq) ok_p = FALSE; } if (!ok_p) { jlog("ERROR: required sampling rate differs in AMs!\n"); for(amconf = jconf->am_root; amconf; amconf = amconf->next) { jlog("ERROR: AM%02d %s: %dHz\n", amconf->analysis.para.smp_freq); } return FALSE; } /* also check equality for GMM */ if (recog->gmm) { if (jconf->input.sfreq != jconf->gmm->analysis.para.smp_freq) { jlog("ERROR: required sampling rate differs between AM and GMM!\n"); jlog("ERROR: AM : %dHz\n", jconf->input.sfreq); jlog("ERROR: GMM: %dHz\n", jconf->gmm->analysis.para.smp_freq); return FALSE; } } for(amconf = jconf->am_root; amconf; amconf = amconf->next) { if (jconf->input.frameshift != amconf->analysis.para.frameshift) ok_p = FALSE; } if (!ok_p) { jlog("ERROR: requested frame shift differs in AMs!\n"); for(amconf = jconf->am_root; amconf; amconf = amconf->next) { jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.frameshift); } return FALSE; } /* also check equality for GMM */ if (recog->gmm) { if (jconf->input.frameshift != jconf->gmm->analysis.para.frameshift) { jlog("ERROR: required frameshift differs between AM and GMM!\n"); jlog("ERROR: AM : %d samples\n", jconf->input.frameshift); jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.frameshift); return FALSE; } } for(amconf = jconf->am_root; amconf; amconf = amconf->next) { if (jconf->input.framesize != amconf->analysis.para.framesize) ok_p = FALSE; } if (!ok_p) { jlog("ERROR: requested frame size (window length) differs in AMs!\n"); for(amconf = jconf->am_root; amconf; amconf = amconf->next) { jlog("ERROR: AM%02d %s: %d samples\n", amconf->analysis.para.framesize); } return FALSE; } /* also check equality for GMM */ if (recog->gmm) { if (jconf->input.framesize != jconf->gmm->analysis.para.framesize) { jlog("ERROR: requested frame size differs between AM and GMM!\n"); jlog("ERROR: AM : %d samples\n", jconf->input.framesize); jlog("ERROR: GMM: %d samples\n", jconf->gmm->analysis.para.framesize); return FALSE; } } } return TRUE; } /** * * Check if parameter extraction configuration is the same between an AM * configuration and a MFCC instance. * * * AM設定パラメータと既に作られたMFCC計算インスタンス間で,パラメータ抽出の * 設定が同一であるかどうかをチェックする. * * * * @param amconf [in] AM configuration parameters * @param mfcc [in] MFCC calculation instance. * * @return TRUE if exactly the same, or FALSE if not. * */ static boolean mfcc_config_is_same(JCONF_AM *amconf, MFCCCalc *mfcc) { char *s1, *s2; /* parameter extraction conditions are the same */ /* check exact match in amconf->analysis.* */ if (&(amconf->analysis.para) == mfcc->para || memcmp(&(amconf->analysis.para), mfcc->para, sizeof(Value)) == 0) { s1 = amconf->analysis.cmnload_filename; s2 = mfcc->cmn.load_filename; if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { s1 = amconf->analysis.cmnsave_filename; s2 = mfcc->cmn.save_filename; if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { if (amconf->analysis.cmn_update == mfcc->cmn.update && amconf->analysis.cmn_map_weight == mfcc->cmn.map_weight) { if (amconf->frontend.ss_alpha == mfcc->frontend.ss_alpha && amconf->frontend.ss_floor == mfcc->frontend.ss_floor && amconf->frontend.sscalc == mfcc->frontend.sscalc && amconf->frontend.sscalc_len == mfcc->frontend.sscalc_len) { s1 = amconf->frontend.ssload_filename; s2 = mfcc->frontend.ssload_filename; if (s1 == s2 || (s1 && s2 && strmatch(s1, s2))) { return TRUE; } } } } } } return FALSE; } /***************************************************/ /* create MFCC calculation instance from AM config */ /* according to the fixated parameter information */ /***************************************************/ /** * * * @brief Create MFCC calculation instance for AM processing instances and GMM * * If more than one AM processing instance (or GMM) has the same configuration, * the same MFCC calculation instance will be shared among them. * * * * * @brief 全てのAM処理インスタンスおよびGMM用に,MFCC計算インスタンスを生成する. * * 2つ以上のAM処理インスタンス(およびGMM)が同一の特徴量計算条件を持 * つ場合,それらのインスタンスはひとつの MFCC 計算インスタンスを共有する. * * * * @param recog [i/o] engine instance * * @callgraph * @callergraph * */ void create_mfcc_calc_instances(Recog *recog) { PROCESS_AM *am; MFCCCalc *mfcc; int count; jlog("STAT: *** create MFCC calculation modules from AM\n"); count = 0; for(am=recog->amlist;am;am=am->next) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc_config_is_same(am->config, mfcc)) { /* the same */ jlog("STAT: AM%02d %s: share MFCC%02d\n", am->config->id, am->config->name, mfcc->id); am->mfcc = mfcc; break; } } if (!mfcc) { /* the same not found */ /* initialize MFCC calculation work area */ count++; /* create new mfcc instance */ mfcc = j_mfcccalc_new(am->config); mfcc->id = count; /* assign to the am */ am->mfcc = mfcc; /* add to the list of all MFCCCalc */ mfcc->next = recog->mfcclist; recog->mfcclist = mfcc; jlog("STAT: AM%2d %s: create a new module MFCC%02d\n", am->config->id, am->config->name, mfcc->id); } } /* for GMM */ if (recog->gmm) { /* if GMM calculation config found, make MFCC instance for that. */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc_config_is_same(recog->jconf->gmm, mfcc)) { /* the same */ jlog("STAT: GMM: share MFCC%02d\n", mfcc->id); recog->gmmmfcc = mfcc; break; } } if (!mfcc) { /* the same not found */ /* initialize MFCC calculation work area */ count++; /* create new mfcc instance */ mfcc = j_mfcccalc_new(recog->jconf->gmm); mfcc->id = count; /* assign to gmm */ recog->gmmmfcc = mfcc; /* add to the list of all MFCCCalc */ mfcc->next = recog->mfcclist; recog->mfcclist = mfcc; jlog("STAT: GMM: create a new module MFCC%02d\n", mfcc->id); } } jlog("STAT: %d MFCC modules created\n", count); } /** * * @brief Launch a recognition process instance. * * This function will create an recognition process instance * using the given SEARCH configuration, and launch recognizer for * the search. Then the created instance will be installed to the * engine instance. The sconf should be registered to the global * jconf before calling this function. * * * * * @brief 認識処理インスタンスを立ち上げる. * * この関数は,与えられた SEARCH 設定に従って 認識処理インスタンスを生成し, * 対応する音声認識器を構築します.その後,その生成された認識処理インスタンスは * 新たにエンジンインスタンスに登録されます.SEARCH設定はこの関数を * 呼ぶ前にあらかじめ全体設定jconfに登録されている必要があります. * * * * @param recog [i/o] engine instance * @param sconf [in] SEARCH configuration to launch * * @return TRUE on success, or FALSE on error. * * @callgraph * @callergraph * @ingroup instance * */ boolean j_launch_recognition_instance(Recog *recog, JCONF_SEARCH *sconf) { RecogProcess *p; PROCESS_AM *am; PROCESS_LM *lm; jlog("STAT: composing recognizer instance SR%02d %s (AM%02d %s, LM%02d %s)\n", sconf->id, sconf->name, sconf->amconf->id, sconf->amconf->name, sconf->lmconf->id, sconf->lmconf->name); /* allocate recognition instance */ p = j_recogprocess_new(recog, sconf); /* assign corresponding AM instance and LM instance to use */ for(lm=recog->lmlist;lm;lm=lm->next) { if (sconf->lmconf == lm->config) { for(am=recog->amlist;am;am=am->next) { if (sconf->amconf == am->config) { p->am = am; p->lm = lm; } } } } if (p->config->sw.triphone_check_flag && p->am->hmminfo->is_triphone) { /* go into interactive triphone HMM check mode */ hmm_check(p); } /******************************************/ /******** set work area and flags *********/ /******************************************/ /* copy values of sub instances for handly access during recognition */ /* set lm type */ p->lmtype = p->lm->lmtype; p->lmvar = p->lm->lmvar; p->graphout = p->config->graph.enabled; /* set flag for context dependent handling */ if (p->config->force_ccd_handling) { p->ccd_flag = p->config->ccd_handling; } else { if (p->am->hmminfo->is_triphone) { p->ccd_flag = TRUE; } else { p->ccd_flag = FALSE; } } /* iwsp prepare */ if (p->lm->config->enable_iwsp) { if (p->am->hmminfo->multipath) { /* find short-pause model */ if (p->am->hmminfo->sp == NULL) { jlog("ERROR: iwsp enabled but no short pause model \"%s\" in hmmdefs\n", p->am->config->spmodel_name); return FALSE; } p->am->hmminfo->iwsp_penalty = p->am->config->iwsp_penalty; } else { jlog("ERROR: \"-iwsp\" needs multi-path mode\n"); jlog("ERROR: you should use multi-path AM, or specify \"-multipath\" with \"-iwsp\"\n"); return FALSE; } } /* for short-pause segmentation */ if (p->config->successive.enabled) { if (p->config->successive.pausemodelname) { /* pause model name string specified, divide it and store to p */ char *s; int n; p->pass1.pausemodelnames = (char*)mymalloc(strlen(p->config->successive.pausemodelname)+1); strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname); n = 0; for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) { n++; } p->pass1.pausemodelnum = n; p->pass1.pausemodel = (char **)mymalloc(sizeof(char *) * n); strcpy(p->pass1.pausemodelnames, p->config->successive.pausemodelname); n = 0; for (s = strtok(p->pass1.pausemodelnames, " ,"); s; s = strtok(NULL, " ,")) { p->pass1.pausemodel[n++] = s; } } else { p->pass1.pausemodel = NULL; } /* check if pause word exists on dictionary */ { WORD_ID w; boolean ok_p; ok_p = FALSE; for(w=0;wlm->winfo->num;w++) { if (is_sil(w, p)) { ok_p = TRUE; break; } } if (!ok_p) { #ifdef SPSEGMENT_NAIST jlog("Error: no pause word in dictionary needed for decoder-based VAD\n"); #else jlog("Error: no pause word in dictionary needed for short-pause segmentation\n"); #endif jlog("Error: you should have at least one pause word in dictionary\n"); jlog("Error: you can specify pause model names by \"-pausemodels\"\n"); return FALSE; } } } /**********************************************/ /******** set model-specific defaults *********/ /**********************************************/ if (p->lmtype == LM_PROB) { /* set default lm parameter if not specified */ if (!p->config->lmp.lmp_specified) { if (p->am->hmminfo->is_triphone) { p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_TRI_PASS1; p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_TRI_PASS1; } else { p->config->lmp.lm_weight = DEFAULT_LM_WEIGHT_MONO_PASS1; p->config->lmp.lm_penalty = DEFAULT_LM_PENALTY_MONO_PASS1; } } if (!p->config->lmp.lmp2_specified) { if (p->am->hmminfo->is_triphone) { p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_TRI_PASS2; p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_TRI_PASS2; } else { p->config->lmp.lm_weight2 = DEFAULT_LM_WEIGHT_MONO_PASS2; p->config->lmp.lm_penalty2 = DEFAULT_LM_PENALTY_MONO_PASS2; } } if (p->config->lmp.lmp_specified != p->config->lmp.lmp2_specified) { jlog("WARNING: m_fusion: only -lmp or -lmp2 specified, LM weights may be unbalanced\n"); } } /****************************/ /******* build wchmm ********/ /****************************/ if (p->lmtype == LM_DFA) { /* execute generation of global grammar and build of wchmm */ multigram_build(p); /* some modification occured if return TRUE */ } if (p->lmtype == LM_PROB) { /* build wchmm with N-gram */ p->wchmm = wchmm_new(); p->wchmm->lmtype = p->lmtype; p->wchmm->lmvar = p->lmvar; p->wchmm->ccd_flag = p->ccd_flag; p->wchmm->category_tree = FALSE; p->wchmm->hmmwrk = &(p->am->hmmwrk); /* assign models */ p->wchmm->ngram = p->lm->ngram; if (p->lmvar == LM_NGRAM_USER) { /* register LM functions for 1st pass here */ p->wchmm->uni_prob_user = p->lm->lmfunc.uniprob; p->wchmm->bi_prob_user = p->lm->lmfunc.biprob; } p->wchmm->winfo = p->lm->winfo; p->wchmm->hmminfo = p->am->hmminfo; if (p->wchmm->category_tree) { if (p->config->pass1.old_tree_function_flag) { if (build_wchmm(p->wchmm, p->lm->config) == FALSE) { jlog("ERROR: m_fusion: error in bulding wchmm\n"); return FALSE; } } else { if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) { jlog("ERROR: m_fusion: error in bulding wchmm\n"); return FALSE; } } } else { if (build_wchmm2(p->wchmm, p->lm->config) == FALSE) { jlog("ERROR: m_fusion: error in bulding wchmm\n"); return FALSE; } } /* 起動時 -check でチェックモードへ */ if (p->config->sw.wchmm_check_flag) { wchmm_check_interactive(p->wchmm); } /* set beam width */ /* guess beam width from models, when not specified */ p->trellis_beam_width = set_beam_width(p->wchmm, p->config->pass1.specified_trellis_beam_width); /* initialize cache for factoring */ max_successor_cache_init(p->wchmm); } /* backtrellis initialization */ p->backtrellis = (BACKTRELLIS *)mymalloc(sizeof(BACKTRELLIS)); bt_init(p->backtrellis); /* prepare work area for 2nd pass */ wchmm_fbs_prepare(p); jlog("STAT: SR%02d %s composed\n", sconf->id, sconf->name); if (sconf->sw.start_inactive) { /* start inactive */ p->active = -1; } else { /* book activation for the recognition */ p->active = 1; } if (p->lmtype == LM_DFA) { if (p->lm->winfo == NULL || (p->lmvar == LM_DFA_GRAMMAR && p->lm->dfa == NULL)) { /* make this instance inactive */ p->active = -1; } } return TRUE; } /** * * @brief Combine all loaded models and settings into one engine instance. * * This function will finalize preparation of recognition: * * - create required MFCC calculation instances, * - create recognition process instance for specified LM/AM combination, * - set model-specific recognition parameters, * - build tree lexicon for each process instance for the 1st pass, * - prepare work area and cache area for recognition, * - initialize some values / work area for frontend processing. * * After this function, all recognition setup was done and we are ready for * start recognition. * * This should be called after j_jconf_finalize() and j_load_all() has been * completed. You should put the jconf at recog->jconf before calling this * function. * * * @brief 全てのロードされたモデルと設定からエンジンインスタンスを * 最終構成する. * * この関数は,認識準備のための最終処理を行う. 内部では, * * - 必要な MFCC 計算インスタンスの生成 * - 指定された LM/AM の組からの認識処理インスタンス生成 * - モデルに依存する認識用パラメータの設定 * - 第1パス用の木構造化辞書を認識処理インスタンスごとに構築 * - 認識処理用ワークエリアとキャッシュエリアを確保 * - フロントエンド処理のためのいくつかの値とワークエリアの確保 * * を行う. この関数が終了後,エンジンインスタンス内の全てのセットアップ * は終了し,認識が開始できる状態となる. * * この関数は,j_jconf_finalize() と j_load_all() が終わった状態で * 呼び出す必要がある. 呼出し前には,recog->jconf に (j_load_all でともに * 使用した) jconf を格納しておくこと. * * * * @param recog [in] engine instance * * @return TRUE when all initialization successfully done, or FALSE if any * error has been occured. * * @callgraph * @callergraph * @ingroup instance * */ boolean j_final_fusion(Recog *recog) { MFCCCalc *mfcc; JCONF_SEARCH *sconf; PROCESS_AM *am; jlog("STAT: ------\n"); jlog("STAT: All models are ready, go for final fusion\n"); jlog("STAT: [1] create MFCC extraction instance(s)\n"); if (recog->jconf->input.type == INPUT_WAVEFORM) { /***************************************************/ /* create MFCC calculation instance from AM config */ /* according to the fixated parameter information */ /***************************************************/ create_mfcc_calc_instances(recog); } /****************************************/ /* create recognition process instances */ /****************************************/ jlog("STAT: [2] create recognition processing instance(s) with AM and LM\n"); for(sconf=recog->jconf->search_root;sconf;sconf=sconf->next) { if (j_launch_recognition_instance(recog, sconf) == FALSE) return FALSE; } /****************************/ /****** initialize GMM ******/ /****************************/ if (recog->gmm != NULL) { jlog("STAT: [2.5] create GMM instance\n"); if (gmm_init(recog) == FALSE) { jlog("ERROR: m_fusion: error in initializing GMM\n"); return FALSE; } } /* stage 4: setup output probability function for each AM */ jlog("STAT: [3] initialize for acoustic HMM calculation\n"); for(am=recog->amlist;am;am=am->next) { #ifdef ENABLE_PLUGIN /* set plugin function if specified */ if (am->config->gprune_method == GPRUNE_SEL_USER) { am->hmmwrk.compute_gaussset = (void (*)(HMMWork *, HTK_HMM_Dens **, int, int *, int)) plugin_get_func(am->config->gprune_plugin_source, "calcmix"); if (am->hmmwrk.compute_gaussset == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix\"\n"); return FALSE; } am->hmmwrk.compute_gaussset_init = (boolean (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_init"); if (am->hmmwrk.compute_gaussset_init == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix_init\"\n"); return FALSE; } am->hmmwrk.compute_gaussset_free = (void (*)(HMMWork *)) plugin_get_func(am->config->gprune_plugin_source, "calcmix_free"); if (am->hmmwrk.compute_gaussset_free == NULL) { jlog("ERROR: calcmix plugin has no function \"calcmix_free\"\n"); return FALSE; } } #endif if (am->config->hmm_gs_filename != NULL) {/* with GMS */ if (outprob_init(&(am->hmmwrk), am->hmminfo, am->hmm_gs, am->config->gs_statenum, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { return FALSE; } } else { if (outprob_init(&(am->hmmwrk), am->hmminfo, NULL, 0, am->config->gprune_method, am->config->mixnum_thres) == FALSE) { return FALSE; } } } /* stage 5: initialize work area for input and realtime decoding */ jlog("STAT: [4] prepare MFCC storage(s)\n"); if (recog->jconf->input.type == INPUT_VECTOR) { /* create an MFCC instance for MFCC input */ /* create new mfcc instance */ recog->mfcclist = j_mfcccalc_new(NULL); recog->mfcclist->id = 1; /* assign to the am */ for(am=recog->amlist;am;am=am->next) { am->mfcc = recog->mfcclist; } if (recog->gmm) recog->gmmmfcc = recog->mfcclist; } /* allocate parameter holders */ for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { mfcc->param = new_param(); } /* initialize SS calculation work area */ if (recog->jconf->input.type == INPUT_WAVEFORM) { for(mfcc=recog->mfcclist;mfcc;mfcc=mfcc->next) { if (mfcc->frontend.sscalc) { mfcc->frontend.mfccwrk_ss = WMP_work_new(mfcc->para); if (mfcc->frontend.mfccwrk_ss == NULL) { jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n"); return FALSE; } if (mfcc->frontend.sscalc_len * recog->jconf->input.sfreq / 1000 < mfcc->para->framesize) { jlog("ERROR: m_fusion: head sil length for SS (%d msec) is shorter than a frame (%d msec)\n", mfcc->frontend.sscalc_len, mfcc->para->framesize * 1000 / recog->jconf->input.sfreq); return FALSE; } } } } if (recog->jconf->decodeopt.realtime_flag) { jlog("STAT: [5] prepare for real-time decoding\n"); /* prepare for 1st pass pipeline processing */ if (recog->jconf->input.type == INPUT_WAVEFORM) { if (RealTimeInit(recog) == FALSE) { jlog("ERROR: m_fusion: failed to initialize recognition process\n"); return FALSE; } } } /* finished! */ jlog("STAT: All init successfully done\n\n"); /* set-up callback plugin if any */ #ifdef ENABLE_PLUGIN if (plugin_exec_engine_startup(recog) == FALSE) { jlog("ERROR: m_fusion: failed to execute callback setup in plugin\n"); return FALSE; } #endif return TRUE; } /* end of file */ julius-4.2.2/libjulius/src/callback.c0000644001051700105040000002510510767521476016043 0ustar ritrlab/** * @file callback.c * * * @brief Regist and execute callback functions. * * This file contains functions for handling callback functions. * User should use callback_add() (and callback_add_adin() for A/D-in * related callbacks) to regist user function to the callback repository. * Then, Julius will call the registered functions at apropriate timimg * while search. * * More than one function can be assigned to a callback, * in which case all functions will be called in turn. * * * * @brief コールバック関数の登録と実行 * * このファイルにはコールバックを扱う関数が含まれています. * ユーザは callback_add() (A/D-in 関連のコールバックでは allback_add_adin()) * を使って,ユーザが作成した関数を,指定のコールバックレポジトリに登録します. * 認識時はJulius は登録された関数を認識処理の各場面で呼び出します. * * あるコールバックについて複数の関数を登録することができます. この場 * 合,コールバック呼出しは,同じコールバックに登録された複数の関数が * 全て呼ばれます. * * * @author Akinobu Lee * @date Fri Oct 26 00:03:18 2007 * * $Revision: 1.3 $ * */ #include /** * * Initialize callback management area. * * * コールバック管理エリアの初期化 * * * @param recog [i/o] engine instance * * @callergraph * @callgraph * */ void callback_init(Recog *recog) { int i; for(i=0;icallback_function_num[i] = 0; recog->callback_num = 0; } /** * * Core function to register a function to a callback registory. * * * 関数をコールバックレジストリに登録するコア関数 * * * @param recog [i/o] engine instance * @param code [in] code in which the function will be registered * @param func [in] function * @param data [in] user-specified argument to be passed when the function is called inside Julius * * @return global callback ID unique for the whole process, or -1 on error. * */ static int callback_add_core(Recog *recog, int code, void (*func)(), void *data) { int i; int num; int newid; num = recog->callback_function_num[code]; if (num >= MAX_CALLBACK_HOOK) { jlog("ERROR: callback_add: failed to add callback for slot %d\n", code); jlog("ERROR: callback_add: maximum number of callback for a slot is limited to %d\n", MAX_CALLBACK_HOOK); jlog("ERROR: callback_add: please increase the value of MAX_CALLBACK_HOOK\n"); return -1; } for(i=0;icallback_function[code][i] == func && recog->callback_user_data[code][i] == data) { jlog("WARNING: callback_add: the same callback already registered at slot %d\n", code); return -1; } } recog->callback_function[code][num] = func; recog->callback_user_data[code][num] = data; recog->callback_function_num[code]++; newid = recog->callback_num; if (newid >= MAX_CALLBACK_HOOK * SIZEOF_CALLBACK_ID) { jlog("ERROR: callback_add: callback registration count reached maximum limit (%d)!\n", MAX_CALLBACK_HOOK * SIZEOF_CALLBACK_ID); return -1; } recog->callback_list_code[newid] = code; recog->callback_list_loc[newid] = num; recog->callback_num++; return newid; } /** * * Register a function to a callback registory. * * * 関数をコールバックレジストリに登録する. * * * @param recog [i/o] engine instance * @param code [in] code in which the function will be registered * @param func [in] function * @param data [in] user-specified argument to be passed when the function is called inside Julius * * @return global callback ID unique for the whole process, or -1 on error. * * @ingroup callback * @callergraph * @callgraph * */ int callback_add(Recog *recog, int code, void (*func)(Recog *recog, void *data), void *data) { return(callback_add_core(recog, code, func, data)); } /** * * Register a function to the A/D-in type callback registory. * * * 関数をA/D-inタイプのコールバックレジストリに登録する. * * * @param recog [i/o] engine instance * @param code [in] code in which the function will be registered * @param func [in] function * @param data [in] user-specified argument to be passed when the function is called inside Julius * * @return global callback ID unique for the whole process, or -1 on error. * * @ingroup callback * @callergraph * @callgraph * */ int callback_add_adin(Recog *recog, int code, void (*func)(Recog *recog, SP16 *buf, int len, void *data), void *data) { return(callback_add_core(recog, code, func, data)); } static void c_out(const char *s, int flag) { if (flag == 0) { jlog("DEBUG: (%s)\n", s); } else { jlog("DEBUG: %s\n", s); } } static void callback_debug_stdout(int code, Recog *recog) { int f = recog->callback_function_num[code]; switch(code) { //case CALLBACK_POLL: c_out("CALLBACK_POLL", f); break; case CALLBACK_EVENT_PROCESS_ONLINE: c_out("CALLBACK_EVENT_PROCESS_ONLINE", f); break; case CALLBACK_EVENT_PROCESS_OFFLINE: c_out("CALLBACK_EVENT_PROCESS_OFFLINE", f); break; case CALLBACK_EVENT_STREAM_BEGIN: c_out("CALLBACK_EVENT_STREAM_BEGIN", f); break; case CALLBACK_EVENT_STREAM_END: c_out("CALLBACK_EVENT_STREAM_END", f); break; case CALLBACK_EVENT_SPEECH_READY: c_out("CALLBACK_EVENT_SPEECH_READY", f); break; case CALLBACK_EVENT_SPEECH_START: c_out("CALLBACK_EVENT_SPEECH_START", f); break; case CALLBACK_EVENT_SPEECH_STOP: c_out("CALLBACK_EVENT_SPEECH_STOP", f); break; case CALLBACK_EVENT_RECOGNITION_BEGIN: c_out("CALLBACK_EVENT_RECOGNITION_BEGIN", f); break; case CALLBACK_EVENT_RECOGNITION_END: c_out("CALLBACK_EVENT_RECOGNITION_END", f); break; case CALLBACK_EVENT_SEGMENT_BEGIN: c_out("CALLBACK_EVENT_SEGMENT_BEGIN", f); break; case CALLBACK_EVENT_SEGMENT_END: c_out("CALLBACK_EVENT_SEGMENT_END", f); break; case CALLBACK_EVENT_PASS1_BEGIN: c_out("CALLBACK_EVENT_PASS1_BEGIN", f); break; //case CALLBACK_EVENT_PASS1_FRAME: c_out("CALLBACK_EVENT_PASS1_FRAME", f); break; case CALLBACK_EVENT_PASS1_END: c_out("CALLBACK_EVENT_PASS1_END", f); break; //case CALLBACK_RESULT_PASS1_INTERIM: c_out("CALLBACK_RESULT_PASS1_INTERIM", f); break; case CALLBACK_RESULT_PASS1: c_out("CALLBACK_RESULT_PASS1", f); break; case CALLBACK_RESULT_PASS1_GRAPH: c_out("CALLBACK_RESULT_PASS1_GRAPH", f); break; case CALLBACK_STATUS_PARAM: c_out("CALLBACK_STATUS_PARAM", f); break; case CALLBACK_EVENT_PASS2_BEGIN: c_out("CALLBACK_EVENT_PASS2_BEGIN", f); break; case CALLBACK_EVENT_PASS2_END: c_out("CALLBACK_EVENT_PASS2_END", f); break; case CALLBACK_RESULT: c_out("CALLBACK_RESULT", f); break; case CALLBACK_RESULT_GMM: c_out("CALLBACK_RESULT_GMM", f); break; case CALLBACK_RESULT_GRAPH: c_out("CALLBACK_RESULT_GRAPH", f); break; case CALLBACK_RESULT_CONFNET: c_out("CALLBACK_RESULT_CONFNET", f); break; //case CALLBACK_ADIN_CAPTURED: c_out("CALLBACK_ADIN_CAPTURED", f); break; //case CALLBACK_ADIN_TRIGGERED: c_out("CALLBACK_ADIN_TRIGGERED", f); break; case CALLBACK_EVENT_PAUSE: c_out("CALLBACK_EVENT_PAUSE", f); break; case CALLBACK_EVENT_RESUME: c_out("CALLBACK_EVENT_RESUME", f); break; case CALLBACK_PAUSE_FUNCTION: c_out("CALLBACK_PAUSE_FUNCTION", f); break; case CALLBACK_DEBUG_PASS2_POP: c_out("CALLBACK_DEBUG_PASS2_POP", f); break; case CALLBACK_DEBUG_PASS2_PUSH: c_out("CALLBACK_DEBUG_PASS2_PUSH", f); break; //case CALLBACK_RESULT_PASS1_DETERMINED: c_out("CALLBACK_RESULT_PASS1_DETERMINED", f); break; } } /** * * Execute all functions assigned to a callback registory. * * * コールバックレジストリに登録されている関数を全て実行する. * * * @param code [in] callback code * @param recog [in] engine instance. * * @callergraph * @callgraph * */ void callback_exec(int code, Recog *recog) { int i; if (code < 0 || code >= SIZEOF_CALLBACK_ID) { jlog("ERROR: callback_exec: failed to exec callback: invalid code number: %d\n", code); return; } if (callback_debug_flag) callback_debug_stdout(code, recog); for(i=0;icallback_function_num[code];i++) { (*(recog->callback_function[code][i]))(recog, recog->callback_user_data[code][i]); } } /** * * Execute all functions assigned to a A/D-in callback. * * * A/D-in タイプのコールバックに登録された関数を全て実行する. * * * @param code [in] callbcak code * @param recog [in] engine instance * @param buf [in] buffer that holds the current input speech which will be passed to the functions * @param len [in] length of @a buf * * @callergraph * @callgraph * */ void callback_exec_adin(int code, Recog *recog, SP16 *buf, int len) { int i; if (code < 0 || code >= SIZEOF_CALLBACK_ID) { jlog("ERROR: callback_exec_adin: failed to exec callback: invalid code number: %d\n", code); return; } if (callback_debug_flag) callback_debug_stdout(code, recog); for(i=0;icallback_function_num[code];i++) { (*(recog->callback_function[code][i]))(recog, buf, len, recog->callback_user_data[code][i]); } } /** * * Check if at least one function has been registered to a callback repository. * * * コールバックレジストリに1つでも関数が登録されたかどうかを返す. * * * @param recog [in] engine instance * @param code [in] callback code * * @return TRUE when at least one is registered, or FALSE if none. * * @ingroup callback * @callergraph * @callgraph * */ boolean callback_exist(Recog *recog, int code) { if (recog->callback_function_num[code] == 0) return FALSE; return TRUE; } /** * * Delete an already registered function from callback. * * * コールバックから関数を削除する. * * * @param recog [i/o] engine instance * @param id [in] global callback ID to delete * * @return TRUE on success, or FALSE on failure. * * @ingroup callback * @callergraph * @callgraph * */ boolean callback_delete(Recog *recog, int id) { int code; int loc; int i; if (id >= recog->callback_num || id < 0) { jlog("ERROR: callback_delete: callback id #%d not exist!\n", id); return FALSE; } code = recog->callback_list_code[id]; loc = recog->callback_list_loc[id]; if (code == -1) { jlog("WARNING: callback_delete: callback #%d already deleted\n", id); return TRUE; } for(i=loc;icallback_function_num[code]-1;i++) { recog->callback_function[code][i] = recog->callback_function[code][i+1]; recog->callback_function[code][i] = recog->callback_function[code][i+1]; recog->callback_user_data[code][i] = recog->callback_user_data[code][i+1]; } recog->callback_function_num[code]--; recog->callback_list_code[id] = -1; recog->callback_list_loc[id] = -1; jlog("STAT: callback #%d deleted\n", id); return TRUE; } /* end of file */ julius-4.2.2/libjulius/src/m_adin.c0000644001051700105040000002255712004452401015517 0ustar ritrlab/** * @file m_adin.c * * * @brief 音声入力デバイスの初期化 * * * * @brief Initialize audio input device * * * @author Akinobu LEE * @date Fri Mar 18 16:17:23 2005 * * $Revision: 1.14 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * Set up device-specific parameters and functions to AD-in work area. * * @param a [i/o] AD-in work area * @param source [in] input source ID @sa adin.h * * @return TRUE on success, FALSE if @a source is not available. */ static boolean adin_select(ADIn *a, int source, int dev) { switch(source) { case SP_RAWFILE: #ifdef HAVE_LIBSNDFILE /* libsndfile interface */ a->ad_standby = adin_sndfile_standby; a->ad_begin = adin_sndfile_begin; a->ad_end = adin_sndfile_end; a->ad_resume = NULL; a->ad_pause = NULL; a->ad_terminate = NULL; a->ad_read = adin_sndfile_read; a->ad_input_name = adin_sndfile_get_current_filename; a->silence_cut_default = FALSE; a->enable_thread = FALSE; #else /* ~HAVE_LIBSNDFILE */ /* built-in RAW/WAV reader */ a->ad_standby = adin_file_standby; a->ad_begin = adin_file_begin; a->ad_end = adin_file_end; a->ad_resume = NULL; a->ad_pause = NULL; a->ad_terminate = NULL; a->ad_read = adin_file_read; a->ad_input_name = adin_file_get_current_filename; a->silence_cut_default = FALSE; a->enable_thread = FALSE; #endif break; #ifdef USE_MIC case SP_MIC: /* microphone input */ a->ad_resume = NULL; a->ad_pause = NULL; a->ad_terminate = NULL; a->silence_cut_default = TRUE; a->enable_thread = TRUE; switch(dev) { case SP_INPUT_DEFAULT: a->ad_standby = adin_mic_standby; a->ad_begin = adin_mic_begin; a->ad_end = adin_mic_end; a->ad_read = adin_mic_read; a->ad_input_name = adin_mic_input_name; break; #ifdef HAS_ALSA case SP_INPUT_ALSA: a->ad_standby = adin_alsa_standby; a->ad_begin = adin_alsa_begin; a->ad_end = adin_alsa_end; a->ad_read = adin_alsa_read; a->ad_input_name = adin_alsa_input_name; break; #endif #ifdef HAS_OSS case SP_INPUT_OSS: a->ad_standby = adin_oss_standby; a->ad_begin = adin_oss_begin; a->ad_end = adin_oss_end; a->ad_read = adin_oss_read; a->ad_input_name = adin_oss_input_name; break; #endif #ifdef HAS_ESD case SP_INPUT_ESD: a->ad_standby = adin_esd_standby; a->ad_begin = adin_esd_begin; a->ad_end = adin_esd_end; a->ad_read = adin_esd_read; a->ad_input_name = adin_esd_input_name; break; #endif #ifdef HAS_PULSEAUDIO case SP_INPUT_PULSEAUDIO: a->ad_standby = adin_pulseaudio_standby; a->ad_begin = adin_pulseaudio_begin; a->ad_end = adin_pulseaudio_end; a->ad_read = adin_pulseaudio_read; a->ad_input_name = adin_pulseaudio_input_name; break; #endif default: jlog("ERROR: m_adin: invalid input device specified\n"); } break; #endif #ifdef USE_NETAUDIO case SP_NETAUDIO: /* DatLink/NetAudio input */ a->ad_standby = adin_netaudio_standby; a->ad_begin = adin_netaudio_begin; a->ad_end = adin_netaudio_end; a->ad_resume = NULL; a->ad_pause = NULL; a->ad_terminate = NULL; a->ad_read = adin_netaudio_read; a->ad_input_name = adin_netaudio_input_name; a->silence_cut_default = TRUE; a->enable_thread = TRUE; break; #endif case SP_ADINNET: /* adinnet network input */ a->ad_standby = adin_tcpip_standby; a->ad_begin = adin_tcpip_begin; a->ad_end = adin_tcpip_end; a->ad_resume = adin_tcpip_send_resume; a->ad_pause = adin_tcpip_send_pause; a->ad_terminate = adin_tcpip_send_terminate; a->ad_read = adin_tcpip_read; a->ad_input_name = adin_tcpip_input_name; a->silence_cut_default = FALSE; a->enable_thread = FALSE; break; case SP_STDIN: /* standard input */ a->ad_standby = adin_stdin_standby; a->ad_begin = adin_stdin_begin; a->ad_end = NULL; a->ad_resume = NULL; a->ad_pause = NULL; a->ad_terminate = NULL; a->ad_read = adin_stdin_read; a->ad_input_name = adin_stdin_input_name; a->silence_cut_default = FALSE; a->enable_thread = FALSE; break; case SP_MFCFILE: /* MFC_FILE is not waveform, so special handling on main routine should be done */ break; default: jlog("Error: m_adin: unknown input ID\n"); return FALSE; } return TRUE; } /** * * 音声入力デバイスを初期化し,音入力切出用パラメータをセットアップする. * * @param adin [in] AD-in ワークエリア * @param jconf [in] 全体設定パラメータ * @param arg [in] デバイス依存引数 * * * Initialize audio device and set up parameters for sound detection. * * @param adin [in] AD-in work area * @param jconf [in] global configuration parameters * @param arg [in] device-specific argument * */ static boolean adin_setup_all(ADIn *adin, Jconf *jconf, void *arg) { if (jconf->input.use_ds48to16) { if (jconf->input.use_ds48to16 && jconf->input.sfreq != 16000) { jlog("ERROR: m_adin: in 48kHz input mode, target sampling rate should be 16k!\n"); return FALSE; } /* setup for 1/3 down sampling */ adin->ds = ds48to16_new(); adin->down_sample = TRUE; /* set device sampling rate to 48kHz */ if (adin_standby(adin, 48000, arg) == FALSE) { /* fail */ jlog("ERROR: m_adin: failed to ready input device\n"); return FALSE; } } else { adin->ds = NULL; adin->down_sample = FALSE; if (adin_standby(adin, jconf->input.sfreq, arg) == FALSE) { /* fail */ jlog("ERROR: m_adin: failed to ready input device\n"); return FALSE; } } /* set parameter for recording/silence detection */ if (adin_setup_param(adin, jconf) == FALSE) { jlog("ERROR: m_adin: failed to set parameter for input device\n"); return FALSE; } adin->input_side_segment = FALSE; return TRUE; } /** * * 設定パラメータに従い音声入力デバイスをセットアップする. * * @param recog [i/o] エンジンインスタンス * * * * Set up audio input device according to the jconf configurations. * * @param recog [i/o] engine instance * * * @callgraph * @callergraph */ boolean adin_initialize(Recog *recog) { char *arg = NULL; ADIn *adin; Jconf *jconf; #ifdef ENABLE_PLUGIN FUNC_INT func; int sid; #endif adin = recog->adin; jconf = recog->jconf; jlog("STAT: ###### initialize input device\n"); /* select input device: file, mic, netaudio, etc... */ #ifdef ENABLE_PLUGIN sid = jconf->input.plugin_source; if (sid >= 0) { /* set plugin properties and functions to adin */ func = (FUNC_INT) plugin_get_func(sid, "adin_get_configuration"); if (func == NULL) { jlog("ERROR: invalid plugin: adin_get_configuration() not exist\n"); return FALSE; } adin->silence_cut_default = (*func)(1); adin->enable_thread = (*func)(2); adin->ad_standby = (boolean (*)(int, void *)) plugin_get_func(sid, "adin_standby"); adin->ad_begin = (boolean (*)(char *)) plugin_get_func(sid, "adin_open"); adin->ad_end = (boolean (*)()) plugin_get_func(sid, "adin_close"); adin->ad_resume = (boolean (*)()) plugin_get_func(sid, "adin_resume"); adin->ad_pause = (boolean (*)()) plugin_get_func(sid, "adin_pause"); adin->ad_terminate = (boolean (*)()) plugin_get_func(sid, "adin_terminate"); adin->ad_read = (int (*)(SP16 *, int)) plugin_get_func(sid, "adin_read"); adin->ad_input_name = (char * (*)()) plugin_get_func(sid, "adin_input_name"); if (adin->ad_read == NULL) { jlog("ERROR: m_adin: selected plugin has no function adin_read()\n"); return FALSE; } } else { #endif /* built-in */ if (adin_select(adin, jconf->input.speech_input, jconf->input.device) == FALSE) { jlog("ERROR: m_adin: failed to select input device\n"); return FALSE; } /* set sampling frequency and device-dependent configuration (argument is device-dependent) */ switch(jconf->input.speech_input) { case SP_ADINNET: /* arg: port number */ arg = mymalloc(100); sprintf(arg, "%d", jconf->input.adinnet_port); break; case SP_RAWFILE: /* arg: filename of file list (if any) */ if (jconf->input.inputlist_filename != NULL) { arg = mymalloc(strlen(jconf->input.inputlist_filename)+1); strcpy(arg, jconf->input.inputlist_filename); } else { arg = NULL; } break; case SP_STDIN: arg = NULL; break; #ifdef USE_NETAUDIO case SP_NETAUDIO: /* netaudio server/port name */ arg = mymalloc(strlen(jconf->input.netaudio_devname)+1); strcpy(arg, jconf->input.netaudio_devname); break; #endif } #ifdef ENABLE_PLUGIN } #endif if (adin_setup_all(adin, jconf, arg) == FALSE) { return FALSE; } if (arg != NULL) free(arg); return TRUE; } /* end of file */ julius-4.2.2/libjulius/src/adin-cut.c0000644001051700105040000012776012004452401015776 0ustar ritrlab/** * @file adin-cut.c * * * @brief 音声キャプチャおよび有音区間検出 * * 音声入力デバイスからの音声データの取り込み,および * 音の存在する区間の検出を行ないます. * * 有音区間の検出は,振幅レベルと零交差数を用いて行ないます. * 入力断片ごとに,レベルしきい値を越える振幅について零交差数をカウントし, * それが指定した数以上になれば,音の区間開始検出として * 取り込みを開始します. 取り込み中に零交差数が指定数以下になれば, * 取り込みを停止します. 実際には頑健に切り出しを行なうため,開始部と * 停止部の前後にマージンを持たせて切り出します. * * また,オプション指定 (-zmean)により DC offset の除去をここで行ないます. * offset は最初の @a ZMEANSAMPLES 個のサンプルの平均から計算されます. * * 音声データの取り込みと並行して入力音声の処理を行ないます. このため, * 取り込んだ音声データはその取り込み単位(live入力では一定時間,音声ファイル * ではバッファサイズ)ごとに,それらを引数としてコールバック関数が呼ばれます. * このコールバック関数としてデータの保存や特徴量抽出, * (フレーム同期の)認識処理を進める関数を指定します. * * マイク入力や NetAudio 入力などの Live 入力では, * コールバック内の処理が重く処理が入力の速度に追い付かないと, * デバイスのバッファが溢れ,入力断片がロストする場合があります. * このエラーを防ぐため,実行環境で pthread が使用可能である場合, * 音声取り込み・区間検出部は本体と独立したスレッドで動作します. * この場合,このスレッドは本スレッドとバッファ @a speech を介して * 以下のように協調動作します. * * - Thread 1: 音声取り込み・音区間検出スレッド * - デバイスから音声データを読み込みながら音区間検出を行なう. * 検出した音区間のサンプルはバッファ @a speech の末尾に逐次 * 追加される. * - このスレッドは起動時から本スレッドから独立して動作し, * 上記の動作を行ない続ける. * - Thread 2: 音声処理・認識処理を行なう本スレッド * - バッファ @a speech を一定時間ごとに監視し,新たなサンプルが * Thread 1 によって追加されたらそれらを処理し,処理が終了した * 分バッファを詰める. * * * * @brief Capture audio and detect sound trigger * * This file contains functions to get waveform from an audio device * and detect speech/sound input segment * * Sound detection at this stage is based on level threshold and zero * cross count. The number of zero cross are counted for each * incoming sound fragment. If the number becomes larger than * specified threshold, the fragment is treated as a beginning of * sound/speech input (trigger on). If the number goes below the threshold, * the fragment will be treated as an end of input (trigger * off). In actual detection, margins are considered on the beginning * and ending point, which will be treated as head and tail silence * part. DC offset normalization will be also performed if configured * so (-zmean). * * The triggered input data should be processed concurrently with the * detection for real-time recognition. For this purpose, after the * beginning of input has been detected, the following triggered input * fragments (samples of a certain period in live input, or buffer size in * file input) are passed sequencially in turn to a callback function. * The callback function should be specified by the caller, typicaly to * store the recoded data, or to process them into a frame-synchronous * recognition process. * * When source is a live input such as microphone, the device buffer will * overflow if the processing callback is slow. In that case, some input * fragments may be lost. To prevent this, the A/D-in part together with * sound detection will become an independent thread if @em pthread functions * are supported. The A/D-in and detection thread will cooperate with * the original main thread through @a speech buffer, like the followings: * * - Thread 1: A/D-in and speech detection thread * - reads audio input from source device and perform sound detection. * The detected fragments are immediately appended * to the @a speech buffer. * - will be detached after created, and run forever till the main * thread dies. * - Thread 2: Main thread * - performs input processing and recognition. * - watches @a speech buffer, and if detect appendings of new samples * by the Thread 1, proceed the processing for the appended samples * and purge the finished samples from @a speech buffer. * * * * @sa adin.c * * @author Akinobu LEE * @date Sat Feb 12 13:20:53 2005 * * $Revision: 1.18 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #ifdef HAVE_PTHREAD #include #endif /// Define this if you want to output a debug message for threading #undef THREAD_DEBUG /// Enable some fixes relating adinnet+module #define TMP_FIX_200602 /** * * @brief Set up parameters for A/D-in and input detection. * * Set variables in work area according to the configuration values. * * * * @brief 音声切り出し用各種パラメータをセット * * 設定を元に切り出し用のパラメータを計算し,ワークエリアにセットします. * * * @param adin [in] AD-in work area * @param jconf [in] configuration data * * @callergraph * @callgraph */ boolean adin_setup_param(ADIn *adin, Jconf *jconf) { float samples_in_msec; int freq; if (jconf->input.sfreq <= 0) { jlog("ERROR: adin_setup_param: going to set smpfreq to %d\n", jconf->input.sfreq); return FALSE; } if (jconf->detect.silence_cut < 2) { adin->adin_cut_on = (jconf->detect.silence_cut == 1) ? TRUE : FALSE; } else { adin->adin_cut_on = adin->silence_cut_default; } adin->strip_flag = jconf->preprocess.strip_zero_sample; adin->thres = jconf->detect.level_thres; #ifdef HAVE_PTHREAD if (adin->enable_thread && jconf->decodeopt.segment) { adin->ignore_speech_while_recog = FALSE; } else { adin->ignore_speech_while_recog = TRUE; } #endif adin->need_zmean = jconf->preprocess.use_zmean; /* calc & set internal parameter from configuration */ freq = jconf->input.sfreq; samples_in_msec = (float) freq / (float)1000.0; adin->chunk_size = jconf->detect.chunk_size; /* cycle buffer length = head margin length */ adin->c_length = (int)((float)jconf->detect.head_margin_msec * samples_in_msec); /* in msec. */ if (adin->chunk_size > adin->c_length) { jlog("ERROR: adin_setup_param: chunk size (%d) > header margin (%d)\n", adin->chunk_size, adin->c_length); return FALSE; } /* compute zerocross trigger count threshold in the cycle buffer */ adin->noise_zerocross = jconf->detect.zero_cross_num * adin->c_length / freq; /* variables that comes from the tail margin length (in wstep) */ adin->nc_max = (int)((float)(jconf->detect.tail_margin_msec * samples_in_msec / (float)adin->chunk_size)) + 2; adin->sbsize = jconf->detect.tail_margin_msec * samples_in_msec + (adin->c_length * jconf->detect.zero_cross_num / 200); adin->c_offset = 0; #ifdef HAVE_PTHREAD adin->transfer_online = FALSE; adin->speech = NULL; #endif /**********************/ /* initialize buffers */ /**********************/ adin->buffer = (SP16 *)mymalloc(sizeof(SP16) * MAXSPEECHLEN); adin->cbuf = (SP16 *)mymalloc(sizeof(SP16) * adin->c_length); adin->swapbuf = (SP16 *)mymalloc(sizeof(SP16) * adin->sbsize); if (adin->down_sample) { adin->io_rate = 3; /* 48 / 16 (fixed) */ adin->buffer48 = (SP16 *)mymalloc(sizeof(SP16) * MAXSPEECHLEN * adin->io_rate); } if (adin->adin_cut_on) { init_count_zc_e(&(adin->zc), adin->c_length); } adin->need_init = TRUE; adin->rehash = FALSE; return TRUE; } /** * * Purge samples already processed in the temporary buffer. * * * テンポラリバッファにある処理されたサンプルをパージする. * * * @param a [in] AD-in work area * @param from [in] Purge samples in range [0..from-1]. * */ static void adin_purge(ADIn *a, int from) { if (from > 0 && a->current_len - from > 0) { memmove(a->buffer, &(a->buffer[from]), (a->current_len - from) * sizeof(SP16)); } a->bp = a->current_len - from; } /** * * @brief Main A/D-in and sound detection function * * This function read inputs from device and do sound detection * (both up trigger and down trigger) until end of device. * * In threaded mode, this function will detach and loop forever as * ad-in thread, (adin_thread_create()) storing triggered samples in * speech[], and telling the status to another process thread via @a * transfer_online in work area. The process thread, called from * adin_go(), polls the length of speech[] and transfer_online in work area * and process them if new samples has been stored. * * In non-threaded mode, this function will be called directly from * adin_go(), and triggered samples are immediately processed within here. * * Threaded mode should be used for "live" input such as microphone input * where input is infinite and capture delay is severe. For file input, * adinnet input and other "buffered" input, non-threaded mode will be used. * * Argument "ad_process()" should be a function to process the triggered * input samples. On real-time recognition, a frame-synchronous search * function for the first pass will be specified by the caller. The current * input will be segmented if it returns 1, and will be terminated as error * if it returns -1. * * When the argument "ad_check()" specified, it will be called periodically. * When it returns less than 0, this function will be terminated. * * * * @brief 音声入力と音検出を行うメイン関数 * * ここでは音声入力の取り込み,音区間の開始・終了の検出を行います. * * スレッドモード時,この関数は独立したAD-inスレッドとしてデタッチされます. * (adin_thread_create()), 音入力を検知するとこの関数はワークエリア内の * speech[] にトリガしたサンプルを記録し,かつ transfer_online を TRUE に * セットします. Julius のメイン処理スレッド (adin_go()) は * adin_thread_process() に移行し,そこで transfer_online 時に speech[] を * 参照しながら認識処理を行います. * * 非スレッドモード時は,メイン処理関数 adin_go() は直接この関数を呼び, * 認識処理はこの内部で直接行われます. * * スレッドモードはマイク入力など,入力が無限で処理の遅延がデータの * 取りこぼしを招くような live input で用いられます. 一方,ファイル入力 * やadinnet 入力のような buffered input では非スレッドモードが用いられます. * * 引数の ad_process は,取り込んだサンプルに対して処理を行う関数を * 指定します. リアルタイム認識を行う場合は,ここに第1パスの認識処理を * 行う関数が指定されます. 返り値が 1 であれば,入力をここで区切ります. * -1 であればエラー終了します. * * 引数の ad_check は一定処理ごとに繰り返し呼ばれる関数を指定します. この * 関数の返り値が 0 以下だった場合,入力を即時中断して関数を終了します. * * * @param ad_process [in] function to process triggerted input. * @param ad_check [in] function to be called periodically. * @param recog [in] engine instance * * @return 2 when input termination requested by ad_process(), 1 when * if detect end of an input segment (down trigger detected after up * trigger), 0 when reached end of input device, -1 on error, -2 when * input termination requested by ad_check(). * * @callergraph * @callgraph * */ static int adin_cut(int (*ad_process)(SP16 *, int, Recog *), int (*ad_check)(Recog *), Recog *recog) { ADIn *a; int i; int ad_process_ret; int imax, len, cnt; int wstep; int end_status = 0; /* return value */ boolean transfer_online_local; /* local repository of transfer_online */ int zc; /* count of zero cross */ a = recog->adin; /* * there are 3 buffers: * temporary storage queue: buffer[] * cycle buffer for zero-cross counting: (in zc_e) * swap buffer for re-starting after short tail silence * * Each samples are first read to buffer[], then passed to count_zc_e() * to find trigger. Samples between trigger and end of speech are * passed to (*ad_process) with pointer to the first sample and its length. * */ if (a->need_init) { a->bpmax = MAXSPEECHLEN; a->bp = 0; a->is_valid_data = FALSE; /* reset zero-cross status */ if (a->adin_cut_on) { reset_count_zc_e(&(a->zc), a->thres, a->c_length, a->c_offset); } a->end_of_stream = FALSE; a->nc = 0; a->sblen = 0; a->need_init = FALSE; /* for next call */ } /****************/ /* resume input */ /****************/ // if (!a->adin_cut_on && a->is_valid_data == TRUE) { // callback_exec(CALLBACK_EVENT_SPEECH_START, recog); // } /*************/ /* main loop */ /*************/ for (;;) { /****************************/ /* read in new speech input */ /****************************/ if (a->end_of_stream) { /* already reaches end of stream, just process the rest */ a->current_len = a->bp; } else { /*****************************************************/ /* get samples from input device to temporary buffer */ /*****************************************************/ /* buffer[0..bp] is the current remaining samples */ /* mic input - samples exist in a device buffer tcpip input - samples exist in a socket file input - samples in a file Return value is the number of read samples. If no data exists in the device (in case of mic input), ad_read() will return 0. If reached end of stream (in case end of file or receive end ack from tcpip client), it will return -1. If error, returns -2. If the device requests segmentation, returns -3. */ if (a->down_sample) { /* get 48kHz samples to temporal buffer */ cnt = (*(a->ad_read))(a->buffer48, (a->bpmax - a->bp) * a->io_rate); } else { cnt = (*(a->ad_read))(&(a->buffer[a->bp]), a->bpmax - a->bp); } if (cnt < 0) { /* end of stream / segment or error */ /* set the end status */ switch(cnt) { case -1: /* end of stream */ a->input_side_segment = FALSE; end_status = 0; break; case -2: a->input_side_segment = FALSE; end_status = -1; break; case -3: a->input_side_segment = TRUE; end_status = 0; } /* now the input has been ended, we should not get further speech input in the next loop, instead just process the samples in the temporary buffer until the entire data is processed. */ a->end_of_stream = TRUE; cnt = 0; /* no new input */ /* in case the first trial of ad_read() fails, exit this loop */ if (a->bp == 0) break; } if (a->down_sample && cnt != 0) { /* convert to 16kHz */ cnt = ds48to16(&(a->buffer[a->bp]), a->buffer48, cnt, a->bpmax - a->bp, a->ds); if (cnt < 0) { /* conversion error */ jlog("ERROR: adin_cut: error in down sampling\n"); end_status = -1; a->end_of_stream = TRUE; cnt = 0; if (a->bp == 0) break; } } /*************************************************/ /* execute callback here for incoming raw data stream.*/ /* the content of buffer[bp...bp+cnt-1] or the */ /* length can be modified in the functions. */ /*************************************************/ if (cnt > 0) { #ifdef ENABLE_PLUGIN plugin_exec_adin_captured(&(a->buffer[a->bp]), cnt); #endif callback_exec_adin(CALLBACK_ADIN_CAPTURED, recog, &(a->buffer[a->bp]), cnt); /* record total number of captured samples */ a->total_captured_len += cnt; } /*************************************************/ /* some speech processing for the incoming input */ /*************************************************/ if (cnt > 0) { if (a->strip_flag) { /* strip off successive zero samples */ len = strip_zero(&(a->buffer[a->bp]), cnt); if (len != cnt) cnt = len; } if (a->need_zmean) { /* remove DC offset */ sub_zmean(&(a->buffer[a->bp]), cnt); } } /* current len = current samples in buffer */ a->current_len = a->bp + cnt; } #ifdef THREAD_DEBUG if (a->end_of_stream) { jlog("DEBUG: adin_cut: stream already ended\n"); } if (cnt > 0) { jlog("DEBUG: adin_cut: get %d samples [%d-%d]\n", a->current_len - a->bp, a->bp, a->current_len); } #endif /**************************************************/ /* call the periodic callback (non threaded mode) */ /*************************************************/ /* this function is mainly for periodic checking of incoming command in module mode */ /* in threaded mode, this will be done in process thread, not here in adin thread */ if (ad_check != NULL #ifdef HAVE_PTHREAD && !a->enable_thread #endif ) { /* if ad_check() returns value < 0, termination of speech input is required */ if ((i = (*ad_check)(recog)) < 0) { /* -1: soft termination -2: hard termination */ // if ((i == -1 && current_len == 0) || i == -2) { if (i == -2 || (i == -1 && a->is_valid_data == FALSE)) { end_status = -2; /* recognition terminated by outer function */ /* execute callback */ if (a->current_len > 0) { callback_exec(CALLBACK_EVENT_SPEECH_STOP, recog); } a->need_init = TRUE; /* bufer status shoule be reset at next call */ goto break_input; } } } /***********************************************************************/ /* if no data has got but not end of stream, repeat next input samples */ /***********************************************************************/ if (a->current_len == 0) continue; /* When not adin_cut mode, all incoming data is valid. So is_valid_data should be set to TRUE when some input first comes till this input ends. So, if some data comes, set is_valid_data to TRUE here. */ if (!a->adin_cut_on && a->is_valid_data == FALSE && a->current_len > 0) { a->is_valid_data = TRUE; callback_exec(CALLBACK_EVENT_SPEECH_START, recog); } /******************************************************/ /* prepare for processing samples in temporary buffer */ /******************************************************/ wstep = a->chunk_size; /* process unit (should be smaller than cycle buffer) */ /* imax: total length that should be processed at one ad_read() call */ /* if in real-time mode and not threaded, recognition process will be called and executed as the ad_process() callback within this function. If the recognition speed is over the real time, processing all the input samples at the loop below may result in the significant delay of getting next input, that may result in the buffer overflow of the device (namely a microphone device will suffer from this). So, in non-threaded mode, in order to avoid buffer overflow and input frame dropping, we will leave here by processing only one segment [0..wstep], and leave the rest in the temporary buffer. */ #ifdef HAVE_PTHREAD if (a->enable_thread) imax = a->current_len; /* process whole */ else imax = (a->current_len < wstep) ? a->current_len : wstep; /* one step */ #else imax = (a->current_len < wstep) ? a->current_len : wstep; /* one step */ #endif /* wstep: unit length for the loop below */ if (wstep > a->current_len) wstep = a->current_len; #ifdef THREAD_DEBUG jlog("DEBUG: process %d samples by %d step\n", imax, wstep); #endif #ifdef HAVE_PTHREAD if (a->enable_thread) { /* get transfer status to local */ pthread_mutex_lock(&(a->mutex)); transfer_online_local = a->transfer_online; pthread_mutex_unlock(&(a->mutex)); } #endif /*********************************************************/ /* start processing buffer[0..current_len] by wstep step */ /*********************************************************/ i = 0; while (i + wstep <= imax) { if (a->adin_cut_on) { /********************/ /* check triggering */ /********************/ /* the cycle buffer in count_zc_e() holds the last samples of (head_margin) miliseconds, and the zerocross over the threshold level are counted within the cycle buffer */ /* store the new data to cycle buffer and update the count */ /* return zero-cross num in the cycle buffer */ zc = count_zc_e(&(a->zc), &(a->buffer[i]), wstep); if (zc > a->noise_zerocross) { /* now triggering */ if (a->is_valid_data == FALSE) { /*****************************************************/ /* process off, trigger on: detect speech triggering */ /*****************************************************/ a->is_valid_data = TRUE; /* start processing */ a->nc = 0; #ifdef THREAD_DEBUG jlog("DEBUG: detect on\n"); #endif /* record time */ a->last_trigger_sample = a->total_captured_len - a->current_len + i + wstep - a->zc.valid_len; callback_exec(CALLBACK_EVENT_SPEECH_START, recog); a->last_trigger_len = 0; if (a->zc.valid_len > wstep) { a->last_trigger_len += a->zc.valid_len - wstep; } /****************************************/ /* flush samples stored in cycle buffer */ /****************************************/ /* (last (head_margin) msec samples */ /* if threaded mode, processing means storing them to speech[]. if ignore_speech_while_recog is on (default), ignore the data if transfer is offline (=while processing second pass). Else, datas are stored even if transfer is offline */ if ( ad_process != NULL #ifdef HAVE_PTHREAD && (!a->enable_thread || !a->ignore_speech_while_recog || transfer_online_local) #endif ) { /* copy content of cycle buffer to cbuf */ zc_copy_buffer(&(a->zc), a->cbuf, &len); /* Note that the last 'wstep' samples are the same as the current samples 'buffer[i..i+wstep]', and they will be processed later. So, here only the samples cbuf[0...len-wstep] will be processed */ if (len - wstep > 0) { #ifdef THREAD_DEBUG jlog("DEBUG: callback for buffered samples (%d bytes)\n", len - wstep); #endif #ifdef ENABLE_PLUGIN plugin_exec_adin_triggered(a->cbuf, len - wstep); #endif callback_exec_adin(CALLBACK_ADIN_TRIGGERED, recog, a->cbuf, len - wstep); ad_process_ret = (*ad_process)(a->cbuf, len - wstep, recog); switch(ad_process_ret) { case 1: /* segmentation notification from process callback */ #ifdef HAVE_PTHREAD if (a->enable_thread) { /* in threaded mode, just stop transfer */ pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); } else { /* in non-threaded mode, set end status and exit loop */ end_status = 2; adin_purge(a, i); goto break_input; } break; #else /* in non-threaded mode, set end status and exit loop */ end_status = 2; adin_purge(a, i); goto break_input; #endif case -1: /* error occured in callback */ /* set end status and exit loop */ end_status = -1; goto break_input; } } } } else { /* is_valid_data == TRUE */ /******************************************************/ /* process on, trigger on: we are in a speech segment */ /******************************************************/ if (a->nc > 0) { /*************************************/ /* re-triggering in trailing silence */ /*************************************/ #ifdef THREAD_DEBUG jlog("DEBUG: re-triggered\n"); #endif /* reset noise counter */ a->nc = 0; if (a->sblen > 0) { a->last_trigger_len += a->sblen; } #ifdef TMP_FIX_200602 if (ad_process != NULL #ifdef HAVE_PTHREAD && (!a->enable_thread || !a->ignore_speech_while_recog || transfer_online_local) #endif ) { #endif /*************************************************/ /* process swap buffer stored while tail silence */ /*************************************************/ /* In trailing silence, the samples within the tail margin length will be processed immediately, but samples after the tail margin will not be processed, instead stored in swapbuf[]. If re-triggering occurs while in the trailing silence, the swapped samples should be processed now to catch up with current input */ if (a->sblen > 0) { #ifdef THREAD_DEBUG jlog("DEBUG: callback for swapped %d samples\n", a->sblen); #endif #ifdef ENABLE_PLUGIN plugin_exec_adin_triggered(a->swapbuf, a->sblen); #endif callback_exec_adin(CALLBACK_ADIN_TRIGGERED, recog, a->swapbuf, a->sblen); ad_process_ret = (*ad_process)(a->swapbuf, a->sblen, recog); a->sblen = 0; switch(ad_process_ret) { case 1: /* segmentation notification from process callback */ #ifdef HAVE_PTHREAD if (a->enable_thread) { /* in threaded mode, just stop transfer */ pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); } else { /* in non-threaded mode, set end status and exit loop */ end_status = 2; adin_purge(a, i); goto break_input; } break; #else /* in non-threaded mode, set end status and exit loop */ end_status = 2; adin_purge(a, i); goto break_input; #endif case -1: /* error occured in callback */ /* set end status and exit loop */ end_status = -1; goto break_input; } } #ifdef TMP_FIX_200602 } #endif } } } else if (a->is_valid_data == TRUE) { /*******************************************************/ /* process on, trigger off: processing tailing silence */ /*******************************************************/ #ifdef THREAD_DEBUG jlog("DEBUG: TRAILING SILENCE\n"); #endif if (a->nc == 0) { /* start of tail silence: prepare valiables for start swapbuf[] */ a->rest_tail = a->sbsize - a->c_length; a->sblen = 0; #ifdef THREAD_DEBUG jlog("DEBUG: start tail silence, rest_tail = %d\n", a->rest_tail); #endif } /* increment noise counter */ a->nc++; } } /* end of triggering handlers */ /********************************************************************/ /* process the current segment buffer[i...i+wstep] if process == on */ /********************************************************************/ if (a->adin_cut_on && a->is_valid_data && a->nc > 0 && a->rest_tail == 0) { /* The current trailing silence is now longer than the user- specified tail margin length, so the current samples should not be processed now. But if 're-triggering' occurs in the trailing silence later, they should be processed then. So we just store the overed samples in swapbuf[] and not process them now */ #ifdef THREAD_DEBUG jlog("DEBUG: tail silence over, store to swap buffer (nc=%d, rest_tail=%d, sblen=%d-%d)\n", a->nc, a->rest_tail, a->sblen, a->sblen+wstep); #endif if (a->sblen + wstep > a->sbsize) { jlog("ERROR: adin_cut: swap buffer for re-triggering overflow\n"); } memcpy(&(a->swapbuf[a->sblen]), &(a->buffer[i]), wstep * sizeof(SP16)); a->sblen += wstep; } else { /* we are in a normal speech segment (nc == 0), or trailing silence (shorter than tail margin length) (nc>0,rest_tail>0) The current trailing silence is shorter than the user- specified tail margin length, so the current samples should be processed now as same as the normal speech segment */ #ifdef TMP_FIX_200602 if (!a->adin_cut_on || a->is_valid_data == TRUE) { #else if( (!a->adin_cut_on || a->is_valid_data == TRUE) #ifdef HAVE_PTHREAD && (!a->enable_thread || !a->ignore_speech_while_recog || transfer_online_local) #endif ) { #endif if (a->nc > 0) { /* if we are in a trailing silence, decrease the counter to detect start of swapbuf[] above */ if (a->rest_tail < wstep) a->rest_tail = 0; else a->rest_tail -= wstep; #ifdef THREAD_DEBUG jlog("DEBUG: %d processed, rest_tail=%d\n", wstep, a->rest_tail); #endif } a->last_trigger_len += wstep; #ifdef TMP_FIX_200602 if (ad_process != NULL #ifdef HAVE_PTHREAD && (!a->enable_thread || !a->ignore_speech_while_recog || transfer_online_local) #endif ) { #else if ( ad_process != NULL ) { #endif #ifdef THREAD_DEBUG jlog("DEBUG: callback for input sample [%d-%d]\n", i, i+wstep); #endif /* call external function */ #ifdef ENABLE_PLUGIN plugin_exec_adin_triggered(&(a->buffer[i]), wstep); #endif callback_exec_adin(CALLBACK_ADIN_TRIGGERED, recog, &(a->buffer[i]), wstep); ad_process_ret = (*ad_process)(&(a->buffer[i]), wstep, recog); switch(ad_process_ret) { case 1: /* segmentation notification from process callback */ #ifdef HAVE_PTHREAD if (a->enable_thread) { /* in threaded mode, just stop transfer */ pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); } else { /* in non-threaded mode, set end status and exit loop */ adin_purge(a, i+wstep); end_status = 2; goto break_input; } break; #else /* in non-threaded mode, set end status and exit loop */ adin_purge(a, i+wstep); end_status = 2; goto break_input; #endif case -1: /* error occured in callback */ /* set end status and exit loop */ end_status = -1; goto break_input; } } } } /* end of current segment processing */ if (a->adin_cut_on && a->is_valid_data && a->nc >= a->nc_max) { /*************************************/ /* process on, trailing silence over */ /* = end of input segment */ /*************************************/ #ifdef THREAD_DEBUG jlog("DEBUG: detect off\n"); #endif /* end input by silence */ a->is_valid_data = FALSE; /* turn off processing */ a->sblen = 0; callback_exec(CALLBACK_EVENT_SPEECH_STOP, recog); #ifdef HAVE_PTHREAD if (a->enable_thread) { /* just stop transfer */ pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); } else { adin_purge(a, i+wstep); end_status = 1; goto break_input; } #else adin_purge(a, i+wstep); end_status = 1; goto break_input; #endif } /*********************************************************/ /* end of processing buffer[0..current_len] by wstep step */ /*********************************************************/ i += wstep; /* increment to next wstep samples */ } /* purge processed samples and update queue */ adin_purge(a, i); /* end of input by end of stream */ if (a->end_of_stream && a->bp == 0) break; } break_input: /****************/ /* pause input */ /****************/ if (a->end_of_stream) { /* input already ends */ /* execute callback */ callback_exec(CALLBACK_EVENT_SPEECH_STOP, recog); if (a->bp == 0) { /* rest buffer successfully flushed */ /* reset status */ a->need_init = TRUE; /* bufer status shoule be reset at next call */ } if (end_status >= 0) { end_status = (a->bp) ? 1 : 0; } } return(end_status); } #ifdef HAVE_PTHREAD /***********************/ /* threading functions */ /***********************/ /*************************/ /* adin thread functions */ /*************************/ /** * * Callback to store triggered samples within A/D-in thread. * * * A/D-in スレッドにてトリガした入力サンプルを保存するコールバック. * * * @param now [in] triggered fragment * @param len [in] length of above * @param recog [in] engine instance * * @return always 0, to tell caller to just continue the input */ static int adin_store_buffer(SP16 *now, int len, Recog *recog) { ADIn *a; a = recog->adin; if (a->speechlen + len > MAXSPEECHLEN) { /* just mark as overflowed, and continue this thread */ pthread_mutex_lock(&(a->mutex)); a->adinthread_buffer_overflowed = TRUE; pthread_mutex_unlock(&(a->mutex)); return(0); } pthread_mutex_lock(&(a->mutex)); memcpy(&(a->speech[a->speechlen]), now, len * sizeof(SP16)); a->speechlen += len; pthread_mutex_unlock(&(a->mutex)); #ifdef THREAD_DEBUG jlog("DEBUG: input: stored %d samples, total=%d\n", len, a->speechlen); #endif return(0); /* continue */ } /** * * A/D-in thread main function. * * * A/D-in スレッドのメイン関数. * * * @param dummy [in] a dummy data, not used. */ static void adin_thread_input_main(void *dummy) { Recog *recog; int ret; recog = dummy; ret = adin_cut(adin_store_buffer, NULL, recog); if (ret == -2) { /* termination request by ad_check? */ jlog("Error: adin thread exit with termination request by checker\n"); } else if (ret == -1) { /* error */ jlog("Error: adin thread exit with error\n"); } else if (ret == 0) { /* EOF */ jlog("Stat: adin thread end with EOF\n"); } recog->adin->adinthread_ended = TRUE; /* return to end this thread */ } /** * * Start new A/D-in thread, and initialize buffer. * * * バッファを初期化して A/D-in スレッドを開始する. * * @param recog [in] engine instance * * @callergraph * @callgraph */ boolean adin_thread_create(Recog *recog) { ADIn *a; a = recog->adin; /* init storing buffer */ a->speech = (SP16 *)mymalloc(sizeof(SP16) * MAXSPEECHLEN); a->speechlen = 0; a->transfer_online = FALSE; /* tell adin-mic thread to wait at initial */ a->adinthread_buffer_overflowed = FALSE; a->adinthread_ended = FALSE; if (pthread_mutex_init(&(a->mutex), NULL) != 0) { /* error */ jlog("ERROR: adin_thread_create: failed to initialize mutex\n"); return FALSE; } if (pthread_create(&(recog->adin->adin_thread), NULL, (void *)adin_thread_input_main, recog) != 0) { jlog("ERROR: adin_thread_create: failed to create AD-in thread\n"); return FALSE; } if (pthread_detach(recog->adin->adin_thread) != 0) { /* not join, run forever */ jlog("ERROR: adin_thread_create: failed to detach AD-in thread\n"); return FALSE; } jlog("STAT: AD-in thread created\n"); return TRUE; } /** * * Delete A/D-in thread * * * A/D-in スレッドを終了する * * @param recog [in] engine instance * * @callergraph * @callgraph */ boolean adin_thread_cancel(Recog *recog) { ADIn *a; int ret; if (recog->adin->adinthread_ended) return TRUE; /* send a cencellation request to the A/D-in thread */ ret = pthread_cancel(recog->adin->adin_thread); if (ret != 0) { if (ret == ESRCH) { jlog("STAT: adin_thread_cancel: no A/D-in thread\n"); recog->adin->adinthread_ended = TRUE; return TRUE; } else { jlog("Error: adin_thread_cancel: failed to cancel A/D-in thread\n"); return FALSE; } } /* wait for the thread to terminate */ ret = pthread_join(recog->adin->adin_thread, NULL); if (ret != 0) { if (ret == EINVAL) { jlog("InternalError: adin_thread_cancel: AD-in thread is invalid\n"); recog->adin->adinthread_ended = TRUE; return FALSE; } else if (ret == ESRCH) { jlog("STAT: adin_thread_cancel: no A/D-in thread\n"); recog->adin->adinthread_ended = TRUE; return TRUE; } else if (ret == EDEADLK) { jlog("InternalError: adin_thread_cancel: dead lock or self thread?\n"); recog->adin->adinthread_ended = TRUE; return FALSE; } else { jlog("Error: adin_thread_cancel: failed to wait end of A/D-in thread\n"); return FALSE; } } jlog("STAT: AD-in thread deleted\n"); recog->adin->adinthread_ended = TRUE; return TRUE; } /****************************/ /* process thread functions */ /****************************/ /** * * @brief Main processing function for thread mode. * * It waits for the new samples to be stored in @a speech by A/D-in thread, * and if found, process them. The interface are the same as adin_cut(). * * * @brief スレッドモード用メイン関数 * * この関数は A/D-in スレッドによってサンプルが保存されるのを待ち, * 保存されたサンプルを順次処理していきます. 引数や返り値は adin_cut() と * 同一です. * * * @param ad_process [in] function to process triggerted input. * @param ad_check [in] function to be called periodically. * @param recog [in] engine instance * * @return 2 when input termination requested by ad_process(), 1 when * if detect end of an input segment (down trigger detected after up * trigger), 0 when reached end of input device, -1 on error, -2 when * input termination requested by ad_check(). */ static int adin_thread_process(int (*ad_process)(SP16 *, int, Recog *), int (*ad_check)(Recog *), Recog *recog) { int prev_len, nowlen; int ad_process_ret; int i; boolean overflowed_p; boolean transfer_online_local; boolean ended_p; ADIn *a; a = recog->adin; /* reset storing buffer --- input while recognition will be ignored */ pthread_mutex_lock(&(a->mutex)); /*if (speechlen == 0) transfer_online = TRUE;*/ /* tell adin-mic thread to start recording */ a->transfer_online = TRUE; #ifdef THREAD_DEBUG jlog("DEBUG: process: reset, speechlen = %d, online=%d\n", a->speechlen, a->transfer_online); #endif a->adinthread_buffer_overflowed = FALSE; pthread_mutex_unlock(&(a->mutex)); /* main processing loop */ prev_len = 0; for(;;) { /* get current length (locking) */ pthread_mutex_lock(&(a->mutex)); nowlen = a->speechlen; overflowed_p = a->adinthread_buffer_overflowed; transfer_online_local = a->transfer_online; ended_p = a->adinthread_ended; pthread_mutex_unlock(&(a->mutex)); /* check if thread is alive */ if (ended_p) { /* adin thread has already exited, so return EOF to stop this input */ return(0); } /* check if other input thread has overflowed */ if (overflowed_p) { jlog("WARNING: adin_thread_process: too long input (> %d samples), segmented now\n", MAXSPEECHLEN); /* segment input here */ pthread_mutex_lock(&(a->mutex)); a->speechlen = 0; a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); return(1); /* return with segmented status */ } /* callback poll */ if (ad_check != NULL) { if ((i = (*(ad_check))(recog)) < 0) { if ((i == -1 && nowlen == 0) || i == -2) { pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; a->speechlen = 0; pthread_mutex_unlock(&(a->mutex)); return(-2); } } } if (prev_len < nowlen) { #ifdef THREAD_DEBUG jlog("DEBUG: process: proceed [%d-%d]\n",prev_len, nowlen); #endif /* got new sample, process */ /* As the speech[] buffer is monotonously increase, content of speech buffer [prev_len..nowlen] would not alter in both threads So locking is not needed while processing. */ /*jlog("DEBUG: main: read %d-%d\n", prev_len, nowlen);*/ if (ad_process != NULL) { ad_process_ret = (*ad_process)(&(a->speech[prev_len]), nowlen - prev_len, recog); #ifdef THREAD_DEBUG jlog("DEBUG: ad_process_ret=%d\n", ad_process_ret); #endif switch(ad_process_ret) { case 1: /* segmented */ /* segmented by callback function */ /* purge processed samples and keep transfering */ pthread_mutex_lock(&(a->mutex)); if(a->speechlen > nowlen) { memmove(a->speech, &(a->speech[nowlen]), (a->speechlen - nowlen) * sizeof(SP16)); a->speechlen -= nowlen; } else { a->speechlen = 0; } a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); /* keep transfering */ return(2); /* return with segmented status */ case -1: /* error */ pthread_mutex_lock(&(a->mutex)); a->transfer_online = transfer_online_local = FALSE; pthread_mutex_unlock(&(a->mutex)); return(-1); /* return with error */ } } if (a->rehash) { /* rehash */ pthread_mutex_lock(&(a->mutex)); if (debug2_flag) jlog("STAT: adin_cut: rehash from %d to %d\n", a->speechlen, a->speechlen - prev_len); a->speechlen -= prev_len; nowlen -= prev_len; memmove(a->speech, &(a->speech[prev_len]), a->speechlen * sizeof(SP16)); pthread_mutex_unlock(&(a->mutex)); a->rehash = FALSE; } prev_len = nowlen; } else { if (transfer_online_local == FALSE) { /* segmented by zero-cross */ /* reset storing buffer for next input */ pthread_mutex_lock(&(a->mutex)); a->speechlen = 0; pthread_mutex_unlock(&(a->mutex)); break; } usleep(50000); /* wait = 0.05sec*/ } } /* as threading assumes infinite input */ /* return value should be 1 (segmented) */ return(1); } #endif /* HAVE_PTHREAD */ /** * * @brief Top function to start input processing * * If threading mode is enabled, this function simply enters to * adin_thread_process() to process triggered samples detected by * another running A/D-in thread. * * If threading mode is not available or disabled by either device requirement * or OS capability, this function simply calls adin_cut() to detect speech * segment from input device and process them concurrently by one process. * * * @brief 入力処理を行うトップ関数 * * スレッドモードでは,この関数は adin_thead_process() を呼び出し, * 非スレッドモードでは adin_cut() を直接呼び出す. 引数や返り値は * adin_cut() と同一である. * * * @param ad_process [in] function to process triggerted input. * @param ad_check [in] function to be called periodically. * @param recog [in] engine instance * * @return 2 when input termination requested by ad_process(), 1 when * if detect end of an input segment (down trigger detected after up * trigger), 0 when reached end of input device, -1 on error, -2 when * input termination requested by ad_check(). * * @callergraph * @callgraph * */ int adin_go(int (*ad_process)(SP16 *, int, Recog *), int (*ad_check)(Recog *), Recog *recog) { /* output listening start message */ callback_exec(CALLBACK_EVENT_SPEECH_READY, recog); #ifdef HAVE_PTHREAD if (recog->adin->enable_thread) { return(adin_thread_process(ad_process, ad_check, recog)); } #endif return(adin_cut(ad_process, ad_check, recog)); } /** * * Call device-specific initialization. * * * デバイス依存の初期化関数を呼び出す. * * * @param a [in] A/D-in work area * @param freq [in] sampling frequency * @param arg [in] device-dependent argument * * @return TRUE if succeeded, FALSE if failed. * * @callergraph * @callgraph * */ boolean adin_standby(ADIn *a, int freq, void *arg) { if (a->need_zmean) zmean_reset(); if (a->ad_standby != NULL) return(a->ad_standby(freq, arg)); return TRUE; } /** * * Call device-specific function to begin capturing of the audio stream. * * * 音の取り込みを開始するデバイス依存の関数を呼び出す. * * * @param a [in] A/D-in work area * @param file_or_dev_name [in] device / file path to open or NULL for default * * @return TRUE on success, FALSE on failure. * * @callergraph * @callgraph * */ boolean adin_begin(ADIn *a, char *file_or_dev_name) { if (debug2_flag && a->input_side_segment) jlog("Stat: adin_begin: skip\n"); if (a->input_side_segment == FALSE) { a->total_captured_len = 0; a->last_trigger_len = 0; if (a->need_zmean) zmean_reset(); if (a->ad_begin != NULL) return(a->ad_begin(file_or_dev_name)); } return TRUE; } /** * * Call device-specific function to end capturing of the audio stream. * * * 音の取り込みを終了するデバイス依存の関数を呼び出す. * * * @param a [in] A/D-in work area * * @return TRUE on success, FALSE on failure. * * @callergraph * @callgraph */ boolean adin_end(ADIn *a) { if (debug2_flag && a->input_side_segment) jlog("Stat: adin_end: skip\n"); if (a->input_side_segment == FALSE) { if (a->ad_end != NULL) return(a->ad_end()); } return TRUE; } /** * * Free memories of A/D-in work area. * * * 音取り込み用ワークエリアのメモリを開放する. * * * @param recog [in] engine instance * * @callergraph * @callgraph * */ void adin_free_param(Recog *recog) { ADIn *a; a = recog->adin; if (a->ds) { ds48to16_free(a->ds); a->ds = NULL; } if (a->adin_cut_on) { free_count_zc_e(&(a->zc)); } if (a->down_sample) { free(a->buffer48); } free(a->swapbuf); free(a->cbuf); free(a->buffer); #ifdef HAVE_PTHREAD if (a->speech) free(a->speech); #endif } /* end of file */ julius-4.2.2/libjulius/src/plugin.c0000644001051700105040000003456012004452401015563 0ustar ritrlab/** * @file plugin.c * * * @brief Load plugin * * * * @brief プラグイン読み込み * * * @author Akinobu Lee * @date Sat Aug 2 09:46:09 2008 * * $Revision: 1.7 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #ifdef ENABLE_PLUGIN #if defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__MINGW32__) #include #else #include #endif #include /** * Plugin file path suffix * */ static char *plugin_suffix = PLUGIN_SUFFIX; /** * Function names to be loaded * */ static char *plugin_function_namelist[] = PLUGIN_FUNCTION_NAMELIST; /**************************************************************/ #if defined(_WIN32) && !defined(__CYGWIN32__) /** * Return error string. * * @return the error string. */ static const char* dlerror() { static char szMsgBuf[256]; FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), szMsgBuf, sizeof szMsgBuf, NULL); return szMsgBuf; } #endif /**************************************************************/ static int plugin_namelist_num() { return(sizeof(plugin_function_namelist) / sizeof(char *)); } static void plugin_free_all() { PLUGIN_ENTRY *p, *ptmp; int i, num; num = plugin_namelist_num(); for(i=0;inext; free(p); p = ptmp; } } free(global_plugin_list); } int plugin_get_id(char *name) { int i, num; num = plugin_namelist_num(); for(i=0;i= plugin_suffix && p >= filename && *x == *p) { x--; p--; } if (x < plugin_suffix) { return TRUE; } return FALSE; } /** * Load a plugin file. * * @param file [in] plugin file path * * @return TRUE on success, FALSE on failure. */ boolean plugin_load_file(char *file) { PLUGIN_MODULE handle; FUNC_INT func; FUNC_VOID entfunc; int ret, number, num; char buf[256]; int buflen = 256; PLUGIN_ENTRY *p; int i; if (global_plugin_list == NULL) plugin_init(); /* open file */ handle = dlopen(file, RTLD_LAZY); if (!handle) { jlog("ERROR: plugin_load: failed to open: %s\n", dlerror()); return(FALSE); } /* call initialization function */ func = dlsym(handle, "initialize"); if (func) { ret = (*func)(); if (ret == -1) { jlog("WARNING: plugin_load: %s: initialize() returns no, skip this file\n", file); dlclose(handle); return(FALSE); } } /* call information function */ func = dlsym(handle, "get_plugin_info"); if (func == NULL) { jlog("ERROR: plugin_load: %s: get_plugin_info(): %s\n", file, dlerror()); dlclose(handle); return(FALSE); } number = 0; ret = (*func)(number, buf, buflen); if (ret == -1) { jlog("ERROR: plugin_load: %s: get_plugin_info(0) returns error\n", file); dlclose(handle); return(FALSE); } buf[buflen-1] = '\0'; jlog("#%d [%s]\n", global_plugin_loaded_file_num, buf); /* register plugin functions */ num = plugin_namelist_num(); for(i=0;iid = i; p->source_id = global_plugin_loaded_file_num; p->func = entfunc; p->next = global_plugin_list[i]; global_plugin_list[i] = p; } } /* increment file counter */ global_plugin_loaded_file_num++; return(TRUE); } /** * Search for plugin file in a directory and load them. * * @param dir [in] directory * * @return TRUE on success, FALSE on failure */ boolean plugin_load_dir(char *dir) { #if defined(_WIN32) && !defined(__CYGWIN32__) && !defined(__MINGW32__) WIN32_FIND_DATA FindFileData; HANDLE hFind; static char buf[512]; int cnt; strncpy(buf, dir, 505); strcat(buf, "\\*.dll"); if ((hFind = FindFirstFile(buf, &FindFileData)) == INVALID_HANDLE_VALUE) { jlog("ERROR: plugin_load: cannot open plugins dir \"%s\"\n", dir); return FALSE; } cnt = 0; do { jlog("STAT: file: %-23s ", FindFileData.cFileName); sprintf_s(buf, 512, "%s\\%s", dir, FindFileData.cFileName); if (plugin_load_file(buf)) cnt++; } while (FindNextFile(hFind, &FindFileData)); FindClose(hFind); jlog("STAT: %d files loaded\n", cnt); return TRUE; #else DIR *d; struct dirent *f; static char buf[512]; int cnt; if ((d = opendir(dir)) == NULL) { jlog("ERROR: plugin_load: cannot open plugins dir \"%s\"\n", dir); return FALSE; } cnt = 0; while((f = readdir(d)) != NULL) { if (is_plugin_obj(f->d_name)) { snprintf(buf, 512, "%s/%s", dir, f->d_name); jlog("STAT: file: %-23s ", f->d_name); if (plugin_load_file(buf)) cnt++; } } closedir(d); jlog("STAT: %d files loaded\n", cnt); return TRUE; #endif } /** * read in plugins in multiple directories * * @param dirent [i/o] directory entry in form of * "dir1:dir2:dir3:...". * */ void plugin_load_dirs(char *dirent) { char *p, *s; char c; if (dirent == NULL) return; if (debug2_flag) { jlog("DEBUG: loading dirs: %s\n", dirent); } p = dirent; do { s = p; while(*p != '\0' && *p != ':') p++; c = *p; *p = '\0'; jlog("STAT: loading plugins at \"%s\":\n", dirent); plugin_load_dir(s); if (c != '\0') { *p = c; p++; } } while (*p != '\0'); } /************************************************************************/ int plugin_find_optname(char *optfuncname, char *str) { char buf[64]; int id; PLUGIN_ENTRY *p; FUNC_VOID func; if ((id = plugin_get_id(optfuncname)) < 0) return -1; for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_VOID) p->func; (*func)(buf, (int)64); if (strmatch(buf, str)) { return p->source_id; } } return -1; } FUNC_VOID plugin_get_func(int sid, char *name) { int id; PLUGIN_ENTRY *p; FUNC_VOID func; if ((id = plugin_get_id(name)) < 0) return NULL; for(p=global_plugin_list[id];p;p=p->next) { if (p->source_id == sid) return p->func; } return NULL; } /************************************************************************/ boolean plugin_exec_engine_startup(Recog *recog) { int id; PLUGIN_ENTRY *p; FUNC_INT func; boolean ok_p; if (global_plugin_list == NULL) return TRUE; if ((id = plugin_get_id("startup")) < 0) return FALSE; ok_p = TRUE; for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_INT) p->func; if ((*func)(recog) != 0) { jlog("WARNING: plugin #%d: failed in startup()\n", p->source_id); ok_p = FALSE; } } return ok_p; } /************************************************************************/ void plugin_exec_adin_captured(short *buf, int len) { int id; PLUGIN_ENTRY *p; FUNC_VOID adfunc; if (global_plugin_list == NULL) return; if ((id = plugin_get_id("adin_postprocess")) < 0) return; for(p=global_plugin_list[id];p;p=p->next) { adfunc = (FUNC_VOID) p->func; (*adfunc)(buf, len); } } void plugin_exec_adin_triggered(short *buf, int len) { int id; PLUGIN_ENTRY *p; FUNC_VOID adfunc; if (global_plugin_list == NULL) return; if ((id = plugin_get_id("adin_postprocess_triggered")) < 0) return; for(p=global_plugin_list[id];p;p=p->next) { adfunc = (FUNC_VOID) p->func; (*adfunc)(buf, len); } } void plugin_exec_vector_postprocess(VECT *vecbuf, int veclen, int nframe) { int id; PLUGIN_ENTRY *p; FUNC_INT func; if (global_plugin_list == NULL) return; if ((id = plugin_get_id("fvin_postprocess")) < 0) return; for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_INT) p->func; (*func)(vecbuf, veclen, nframe); } } void plugin_exec_vector_postprocess_all(HTK_Param *param) { int id; PLUGIN_ENTRY *p; FUNC_INT func; int t; if (global_plugin_list == NULL) return; if ((id = plugin_get_id("fvin_postprocess")) < 0) return; for(t=0;tsamplenum;t++) { for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_INT) p->func; (*func)(param->parvec[t], param->veclen, t); } } } void plugin_exec_process_result(Recog *recog) { int id; PLUGIN_ENTRY *p; FUNC_VOID func; RecogProcess *rtmp, *r; Sentence *s; int i; int len; char *str; if (global_plugin_list == NULL) return; /* for result_str(), return the best sentence string among processes */ s = NULL; for(rtmp=recog->process_list;rtmp;rtmp=rtmp->next) { if (! rtmp->live) continue; if (rtmp->result.status >= 0 && rtmp->result.sentnum > 0) { /* recognition succeeded */ if (s == NULL || rtmp->result.sent[0].score > s->score) { r = rtmp; s = &(r->result.sent[0]); } } } if (s == NULL) { str = NULL; } else { len = 0; for(i=0;iword_num;i++) len += strlen(r->lm->winfo->woutput[s->word[i]]) + 1; str = (char *)mymalloc(len); str[0]='\0'; for(i=0;iword_num;i++) { if (strlen(r->lm->winfo->woutput[s->word[i]]) == 0) continue; if (strlen(str) > 0) strcat(str, " "); strcat(str, r->lm->winfo->woutput[s->word[i]]); } } if ((id = plugin_get_id("result_best_str")) < 0) return; for(p=global_plugin_list[id];p;p=p->next) { func = (FUNC_VOID) p->func; (*func)(str); } if (str != NULL) free(str); } #endif /* ENABLE_PLUGIN */ /************************************************************************/ /* assume only one MFCC module! */ /************************************************************************/ boolean mfc_module_init(MFCCCalc *mfcc, Recog *recog) { #ifdef ENABLE_PLUGIN mfcc->plugin_source = recog->jconf->input.plugin_source; if (mfcc->plugin_source < 0) { jlog("ERROR: SP_MDCMODULE selected but plugin is missing?\n"); return FALSE; } mfcc->func.fv_standby = (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_standby"); mfcc->func.fv_begin = (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_open"); mfcc->func.fv_read = (int (*)(VECT *, int)) plugin_get_func(mfcc->plugin_source, "fvin_read"); mfcc->func.fv_end = (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_close"); mfcc->func.fv_resume = (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_resume"); mfcc->func.fv_pause = (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_pause"); mfcc->func.fv_terminate= (boolean (*)()) plugin_get_func(mfcc->plugin_source, "fvin_terminate"); mfcc->func.fv_input_name= (char * (*)()) plugin_get_func(mfcc->plugin_source, "fvin_input_name"); if (mfcc->func.fv_read == NULL) { jlog("ERROR: FEATURE_INPUT plugin: fvin_read() not found!\n"); return FALSE; } #endif return TRUE; } boolean mfc_module_set_header(MFCCCalc *mfcc, Recog *recog) { FUNC_INT func; unsigned int ret; #ifdef ENABLE_PLUGIN func = (FUNC_INT) plugin_get_func(mfcc->plugin_source, "fvin_get_configuration"); if (func == NULL) { jlog("ERROR: feature vector input plugin: fvin_get_configuration() not found\n"); return FALSE; } /* vector length in unit */ mfcc->param->veclen = (*func)(0); mfcc->param->header.sampsize = mfcc->param->veclen * sizeof(VECT); /* frame shift in msec */ mfcc->param->header.wshift = (*func)(1) * 10000.0; /* parameter type for checking (return 0xffff to disable the check) */ ret = (*func)(2); if (ret == 0xffff) { /* disable type checking */ recog->jconf->input.paramtype_check_flag = FALSE; } else { mfcc->param->header.samptype = ret; } #endif return TRUE; } boolean mfc_module_standby(MFCCCalc *mfcc) { #ifdef ENABLE_PLUGIN FUNC_INT func; int ret; if (mfcc->func.fv_standby) ret = mfcc->func.fv_standby(); else ret = TRUE; mfcc->segmented_by_input = FALSE; return ret; #else return TRUE; #endif } boolean mfc_module_begin(MFCCCalc *mfcc) { #ifdef ENABLE_PLUGIN FUNC_INT func; int ret; if (mfcc->segmented_by_input) return TRUE; /* do nothing if last was segmented */ if (mfcc->func.fv_begin) ret = mfcc->func.fv_begin(); else ret = TRUE; return ret; #else return TRUE; #endif } boolean mfc_module_end(MFCCCalc *mfcc) { #ifdef ENABLE_PLUGIN FUNC_INT func; int ret; if (mfcc->segmented_by_input) return TRUE; /* do nothing if last was segmented */ if (mfcc->func.fv_end) ret = mfcc->func.fv_end(); else ret = TRUE; return ret; #else return TRUE; #endif } int mfc_module_read(MFCCCalc *mfcc, int *new_t) { #ifdef ENABLE_PLUGIN FUNC_INT func; int ret; /* expand area if needed */ if (param_alloc(mfcc->param, mfcc->f + 1, mfcc->param->veclen) == FALSE) { jlog("ERROR: FEATURE_INPUT plugin: failed to allocate memory\n"); return -2; } /* get data */ ret = mfcc->func.fv_read(mfcc->param->parvec[mfcc->f], mfcc->param->veclen); if (ret == -3) { /* function requests segmentation of the current recognition */ mfcc->segmented_by_input = TRUE; *new_t = mfcc->f; return -3; } else if (ret == -1) { /* end of input */ mfcc->segmented_by_input = FALSE; *new_t = mfcc->f; return -1; } else if (ret == -2) { /* error */ jlog("ERROR: FEATURE_INPUT plugin: fvin_read() returns error (-2)\n"); return -2; } *new_t = mfcc->f + 1; #endif return 0; } char * mfc_module_input_name(MFCCCalc *mfcc) { #ifdef ENABLE_PLUGIN int ret; if (mfcc->func.fv_input_name) return(mfcc->func.fv_input_name()); #endif return NULL; } /* end of file */ julius-4.2.2/libjulius/src/beam.c0000644001051700105040000030255612004452401015174 0ustar ritrlab/** * @file beam.c * * * @brief フレーム同期ビーム探索の実行(第1パス) * * 第1パスのフレーム同期ビーム探索を実際に実行する関数群です. * 認識処理インスタンスごとに実行されます. * 初期化,1フレームの認識処理,終了処理,第1パスの結果決定,セグメント * 終了の検知などの処理が含まれています. * * アルゴリズムについては,単語履歴近似は 1-best 近似がデフォルトです * が,単語対近似も使用可能です. 単語N-gram では単語間の接続制約は 1-gram * factoring (2-gram factoring も選択可)を用いて計算されます. 文法の * 場合,木構造化辞書は文法のカテゴリ単位で作成され,単語間の接続(単語 * 対制約)は単語間遷移で適用されます. 単語認識モードでは単語間接続は * 考慮されません. * * * * @brief Frame-synchronous beam search for the 1st pass * * These are core functions of frame-synchronous beam search using a * static lexicon tree, as the first pass of Julius. These functions * will be called from pass1.c, to execute for each recognition * process instance in turn. Functions for initialization, frame-wise * recognition processing, end procedure, finding best path, detecting * end of segment on short-pause segmentation mode, are defined here. * * About algorithm: 1-best approximation will be performed for word * context approximation, but normal word-pair approximation is also * supported. With word/class N-gram, Julius computes the language * score using 1-gram factoring (can be changed to 2-gram factoring if * you want). With DFA grammar, Julius can compute the connection * constraint of words using the category-pair constraint on the * beginning of the words, since Julian makes a per-category tree * lexicon. On isolated word recognition mode, the cross-word transitions * are ignored. * * * * @author Akinobu LEE * @date Tue Feb 22 17:00:45 2005 * * $Revision: 1.21 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #undef DEBUG /* ---------------------------------------------------------- */ /* 第1パスの結果処理 */ /* end procedure to get result of 1st pass */ /* ---------------------------------------------------------- */ #ifdef WORD_GRAPH /** * * @brief 認識結果の単語トレリスから単語グラフを抽出する * * (WORD_GRAPH 指定時) * この関数は第1パスの結果の単語トレリスを終端からバックトレースし, * パス上にあるトレリス単語を単語グラフとして抽出する. 実際には, * 単語トレリス上でグラフ上に残るもののみにマークを付け, * 第2パスでは,マークのついた単語のみを展開する. * * グラフは r->result.wg1 に格納される. * * @param frame [in] 単語トレリス上で単語末端を検索するフレーム * @param r [i/o] 認識処理インスタンス * * * @brief Extract word graph from the resulting word trellis * * If WORD_GRAPH is defined, this function trace back through the * word trellis from the end point, to extract the trellis words on * the path as a word graph. Actually, this function only marks * which trellis words are included in the word graph. On the 2nd pass, * only the words in the word graph will be expanded. * * The generated word graph will be stored to r->result.wg1. * * @param frame [in] frame to lookup for word ends in the word trellis * @param r [i/o] recognition process instance * */ static void generate_lattice(int frame, RecogProcess *r) { BACKTRELLIS *bt; WORD_INFO *winfo; TRELLIS_ATOM *ta; int i, j; LOGPROB l; WordGraph *new; bt = r->backtrellis; winfo = r->lm->winfo; if (frame >= 0) { for (i=0;inum[frame];i++) { ta = bt->rw[frame][i]; /* words will be saved as a part of graph only if any of its following word has been survived in a beam */ if (! ta->within_context) continue; /* not a candidate */ if (ta->within_wordgraph) continue; /* already marked */ /* mark */ ta->within_wordgraph = TRUE; new = (WordGraph *)mymalloc(sizeof(WordGraph)); new->wid = ta->wid; new->lefttime = ta->begintime; new->righttime = ta->endtime; new->fscore_head = ta->backscore; new->fscore_tail = 0.0; new->gscore_head = 0.0; new->gscore_tail = 0.0; new->lscore_tmp = ta->lscore; #ifdef CM_SEARCH new->cmscore = 0.0; #endif new->forward_score = new->backward_score = 0.0; new->headphone = winfo->wseq[ta->wid][0]; new->tailphone = winfo->wseq[ta->wid][winfo->wlen[ta->wid]-1]; new->leftwordmaxnum = FANOUTSTEP; new->leftword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->leftwordmaxnum); new->left_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->leftwordmaxnum); new->leftwordnum = 0; new->rightwordmaxnum = FANOUTSTEP; new->rightword = (WordGraph **)mymalloc(sizeof(WordGraph *) * new->rightwordmaxnum); new->right_lscore = (LOGPROB *)mymalloc(sizeof(LOGPROB) * new->rightwordmaxnum); new->rightwordnum = 0; l = ta->backscore; if (ta->last_tre->wid != WORD_INVALID) { l -= ta->last_tre->backscore; } l -= ta->lscore; new->amavg = l / (float)(ta->endtime - ta->begintime + 1); #ifdef GRAPHOUT_DYNAMIC new->purged = FALSE; #endif new->saved = FALSE; new->graph_cm = 0.0; new->mark = FALSE; new->next = r->result.wg1; r->result.wg1 = new; /* recursive call */ generate_lattice(ta->last_tre->endtime, r); } } } /** * * Link all words in 1st pass word graph extracted by * generate_lattice() by their boundary frame. All words with the * same boundary frame will be connected. * * * * generate_lattice() で生成した第1パスグラフ中の単語どうしを境界時間 * に従って連結する. 同じ境界時間を持つすべての単語が接続される. * * * * @param root [in] pointer to the root of lattice words. * */ static void link_lattice_by_time(WordGraph *root) { WordGraph *wg; WordGraph *wtmp; int lefttime, righttime; for(wg=root;wg;wg=wg->next) { for(wtmp=root;wtmp;wtmp=wtmp->next) { if (wg->righttime + 1 == wtmp->lefttime) { wordgraph_check_and_add_leftword(wtmp, wg, wtmp->lscore_tmp); wordgraph_check_and_add_rightword(wg, wtmp, wtmp->lscore_tmp); } if (wtmp->righttime + 1 == wg->lefttime) { wordgraph_check_and_add_leftword(wg, wtmp, wg->lscore_tmp); wordgraph_check_and_add_rightword(wtmp, wg, wg->lscore_tmp); } } } } /** * * re-compute 2-gram prob for all link in 1st pass word graph mode. * * * 第1パスで単語グラフを生成するモードにおいて,生成後に単語グラフ上の * 正確な2-gram言語確立を再計算する. * * * @param root [in] pointer to root node of word graph * @param wchmm [in] tree lexicon used for the word graph generation * */ static void re_compute_lattice_lm(WordGraph *root, WCHMM_INFO *wchmm) { WordGraph *wg; int i; for(wg=root;wg;wg=wg->next) { for(i=0;ileftwordnum;i++) { wg->left_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->leftword[i]->wid], wchmm->winfo->wton[wg->wid]); } for(i=0;irightwordnum;i++) { wg->right_lscore[i] = (*(wchmm->ngram->bigram_prob))(wchmm->ngram, wchmm->winfo->wton[wg->wid], wchmm->winfo->wton[wg->rightword[i]->wid]); } } } #endif /** * * あるトレリス単語の情報をテキストで出力 (デバッグ用) * * @param atom [in] 出力するトレリス単語 * @param winfo [in] 単語辞書 * * * Output a trellis word information in text (for debug) * * @param atom [in] trellis word to output * @param winfo [in] word dictionary * */ static void put_atom(TRELLIS_ATOM *atom, WORD_INFO *winfo) { int i; jlog("DEBUG: %3d,%3d %f %16s (id=%5d)", atom->begintime, atom->endtime, atom->backscore, winfo->wname[atom->wid], atom->wid); for (i=0;iwlen[atom->wid]; i++) { jlog(" %s",winfo->wseq[atom->wid][i]->name); } jlog("\n"); } /** * * @brief 認識結果の単語トレリス上の最尤単語系列を求める * * 与えられたトレリス単語から入力始端に向かって単語トレリス上を * トレースバックし, その最尤単語系列候補およびその言語スコアを返す. * 起点となる最初のトレリス単語が与えられる必要がある. * * @param wordseq_rt [out] 結果の最尤単語系列が格納されるバッファ * @param rt_wordlen [out] @a wordseq_rt の長さ * @param atom [in] バックトレースの起点となるトレリス単語 * @param winfo [in] 単語辞書 * * @return 得られた最尤単語系列の言語スコア. * * * @brief Find the best word sequence in the word trellis * * This function trace back through the word trellis to the beginning * of input, to find the best word sequence. The traceback starting point * should be specified as a trellis word. * * @param wordseq_rt [out] buffer to store the best word sequence as result * @param rt_wordlen [out] length of @a wordseq_rt * @param atom [in] a trellis word as the starting point of the traceback * @param winfo [in] word dictionary * * @return the total N-gram language score of the word sequence. * */ static LOGPROB trace_backptr(WORD_ID wordseq_rt[MAXSEQNUM], int *rt_wordlen, TRELLIS_ATOM *atom, WORD_INFO *winfo) { int wordlen = 0; /* word length of best sentence hypothesis */ TRELLIS_ATOM *tretmp; LOGPROB langscore = 0.0; WORD_ID wordseq[MAXSEQNUM]; /* temporal: in reverse order */ int i; /* initialize */ wordseq[0] = atom->wid; /* start from specified atom */ wordlen = 1; tretmp = atom; langscore += tretmp->lscore; if (debug2_flag) { put_atom(tretmp, winfo); } /* trace the backtrellis */ while (tretmp->begintime > 0) {/* until beginning of input */ tretmp = tretmp->last_tre; /* t = tretmp->boundtime - 1; tretmp = bt_binsearch_atom(backtrellis, tretmp->boundtime-1, tretmp->last_wid);*/ if (tretmp == NULL) { /* should not happen */ j_internal_error("trace_backptr: last trellis missing while backtracking"); } langscore += tretmp->lscore; wordseq[wordlen] = tretmp->wid; wordlen++; if (debug2_flag) { put_atom(tretmp, winfo); } if (wordlen >= MAXSEQNUM) { j_internal_error("trace_backptr: sentence length exceeded ( > %d)\n",MAXSEQNUM); } } *rt_wordlen = wordlen; /* reverse order -> normal order */ for(i=0;i * @brief 第1パスの認識処理結果から認識結果を判定し,最尤単語系列を見つける. * * 第1パスの計算結果である単語トレリスから,第1パスでの最尤候補を求 * め,インスタンス内の result.pass1 に保存する. 候補が得られない場合 * はエラー(探索誤り:コード -1)となる. * * ショートポーズセグメンテーション時は,認識結果が無音単語のみからなる場合, * エラー(デコーダによる棄却:コード -4)となる. * * また,WORD_GRAPH 定義時は,この関数内でさらに generate_lattice() を * 呼び出し,単語グラフの抽出を行う. * * @param framelen [in] 第1パスで処理が到達したフレーム数 * @param r [in] 認識処理インスタンス * * * * @brief Find best path from the first pass result and set result status. * * This function find the best word sequence from the resulting word * trellis of the 1st pass, and store them to result.pass1 in the * recognition process instance. If no candidate was found, it sets * error code -1 (recognition failure) and exit. * * On short-pause segmentation, it sets error code -4 (decoder rejection) * if the found best path consists of only silence words. * * Also, if WORD_GRAPH is defined, this function also calls * generate_lattice() to extract word graph from the word trellis. * * @param framelen [in] frame length that has been processed * @param r [in] recognition process instance * */ static void find_1pass_result(int framelen, RecogProcess *r) { BACKTRELLIS *backtrellis; WORD_INFO *winfo; WORD_ID wordseq[MAXSEQNUM]; int wordlen; int i; TRELLIS_ATOM *best; int last_time; LOGPROB total_lscore; LOGPROB maxscore; TRELLIS_ATOM *tmp; #ifdef SPSEGMENT_NAIST boolean ok_p; #endif backtrellis = r->backtrellis; winfo = r->lm->winfo; /* look for the last trellis word */ if (r->lmtype == LM_PROB) { for (last_time = framelen - 1; last_time >= 0; last_time--) { maxscore = LOG_ZERO; for (i=0;inum[last_time];i++) { tmp = backtrellis->rw[last_time][i]; #ifdef WORD_GRAPH /* treat only words on a graph path */ if (!tmp->within_context) continue; #endif if (r->config->successive.enabled) { /* short-pause segmentation mode */ /* 最終フレームに残った最大スコアの単語 */ /* it should be the best trellis word on the last frame */ if (maxscore < tmp->backscore) { maxscore = tmp->backscore; best = tmp; } } else { /* not segmentation mode */ /* 最終単語は winfo->tail_silwid に固定 */ /* it is fixed to the tail silence model (winfo->tail_silwid) */ if (tmp->wid == winfo->tail_silwid && maxscore < tmp->backscore) { maxscore = tmp->backscore; best = tmp; break; } } } if (maxscore != LOG_ZERO) break; } if (last_time < 0) { /* not found */ jlog("WARNING: %02d %s: no tail silence word survived on the last frame, search failed\n", r->config->id, r->config->name); r->result.status = J_RESULT_STATUS_FAIL; //callback_exec(CALLBACK_RESULT, r); return; } } if (r->lmtype == LM_DFA) { for (last_time = framelen - 1; last_time >= 0; last_time--) { /* 末尾に残った単語の中で最大スコアの単語(cp_endは使用しない) */ /* the best trellis word on the last frame (not use cp_end[]) */ maxscore = LOG_ZERO; for (i=0;inum[last_time];i++) { tmp = backtrellis->rw[last_time][i]; #ifdef WORD_GRAPH /* treat only words on a graph path */ if (!tmp->within_context) continue; #endif /* if (dfa->cp_end[winfo->wton[tmp->wid]] == TRUE) {*/ if (maxscore < tmp->backscore) { maxscore = tmp->backscore; best = tmp; } /* }*/ } if (maxscore != LOG_ZERO) break; } if (last_time < 0) { /* not found */ jlog("WARNING: %02d %s: no sentence-end word survived on last beam\n", r->config->id, r->config->name); r->result.status = J_RESULT_STATUS_FAIL; //callback_exec(CALLBACK_RESULT, r); return; } } /* traceback word trellis from the best word */ total_lscore = trace_backptr(wordseq, &wordlen, best, r->lm->winfo); #ifdef SPSEGMENT_NAIST if (r->config->successive.enabled) { /* on segmentation mode, recognition result that only consists of short-pause words will be treated as recognition rejection */ ok_p = FALSE; for(i=0;iresult.status = J_RESULT_STATUS_ONLY_SILENCE; return; } } #endif /* just flush last progress output */ /* if (recog->jconf->output.progout_flag) { recog->result.status = 1; recog->result.num_frame = last_time; recog->result.pass1.word = wordseq; recog->result.pass1.word_num = wordlen; recog->result.pass1.score = best->backscore; recog->result.pass1.score_lm = total_lscore; recog->result.pass1.score_am = best->backscore - total_lscore; //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, recog); }*/ /* output 1st pass result */ if (verbose_flag || ! r->config->output.progout_flag) { r->result.status = J_RESULT_STATUS_SUCCESS; r->result.num_frame = framelen; for(i=0;iresult.pass1.word[i] = wordseq[i]; r->result.pass1.word_num = wordlen; r->result.pass1.score = best->backscore; r->result.pass1.score_lm = total_lscore; r->result.pass1.score_am = best->backscore - total_lscore; //callback_exec(CALLBACK_RESULT_PASS1, r); } /* store the result to global val (notice: in reverse order) */ for(i=0;ipass1_wseq[i] = wordseq[i]; r->pass1_wnum = wordlen; r->pass1_score = best->backscore; #ifdef WORD_GRAPH /* 単語トレリスから,ラティスを生成する */ /* generate word graph from the word trellis */ r->peseqlen = backtrellis->framelen; r->result.wg1 = NULL; generate_lattice(last_time, r); link_lattice_by_time(r->result.wg1); if (r->lmtype == LM_PROB) re_compute_lattice_lm(r->result.wg1, r->wchmm); r->result.wg1_num = wordgraph_sort_and_annotate_id(&(r->result.wg1), r); /* compute graph CM by forward-backward processing */ graph_forward_backward(r->result.wg1, r); //callback_exec(CALLBACK_RESULT_PASS1_GRAPH, r); //wordgraph_clean(&(r->result.wg1)); #endif } /** * * トレリス単語をスコアでソートするqsort関数. * * @param x1 [in] 要素1へのポインタ * @param x2 [in] 要素2へのポインタ * * @return qsort の値 * * * qsort function to sort trellis words by their score. * * @param x1 [in] pointer to element #1 * @param x2 [in] pointer to element #2 * * @return value required for qsort. * */ static int compare_backscore(TRELLIS_ATOM **x1, TRELLIS_ATOM **x2) { return((*x2)->backscore - (*x1)->backscore); } /** * * find_1pass_result() の単語認識モード版. 単語認識モードでは第1パスで * 認識を終了するので,得られた候補は通常の第2パスと同じ場所に格納する. * * @param framelen [in] 第1パスで処理が到達したフレーム数 * @param r [i/o] 認識処理インスタンス * * * * Isolated word recognition version of find_1pass_result(). * Since Julius executes only the 1st pass on Isolated word recognition * mode, the result candidate will be stored as the final result. * * @param framelen [in] frame length that has been processed * @param r [i/o] recognition process instance * * */ static void find_1pass_result_word(int framelen, RecogProcess *r) { BACKTRELLIS *bt; TRELLIS_ATOM *best, *tmp; int last_time; Sentence *s; #ifdef CONFIDENCE_MEASURE LOGPROB sum; #endif LOGPROB maxscore; int i; TRELLIS_ATOM **idx; int num; if (r->lmvar != LM_DFA_WORD) return; bt = r->backtrellis; for (last_time = framelen - 1; last_time >= 0; last_time--) { maxscore = LOG_ZERO; for (i=0;inum[last_time];i++) { tmp = bt->rw[last_time][i]; #ifdef WORD_GRAPH /* treat only words on a graph path */ if (!tmp->within_context) continue; #endif if (maxscore < tmp->backscore) { maxscore = tmp->backscore; best = tmp; } } if (maxscore != LOG_ZERO) break; } if (last_time < 0) { /* not found */ jlog("WARNING: %02d %s: no word survived on the last frame, search failed\n", r->config->id, r->config->name); r->result.status = J_RESULT_STATUS_FAIL; //callback_exec(CALLBACK_RESULT, r); return; } #ifdef CONFIDENCE_MEASURE sum = 0.0; for (i=0;inum[last_time];i++) { tmp = bt->rw[last_time][i]; #ifdef WORD_GRAPH /* treat only words on a graph path */ if (!tmp->within_context) continue; #endif sum += pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)); } #endif /* set recognition result status to normal */ r->result.status = J_RESULT_STATUS_SUCCESS; if (r->config->output.output_hypo_maxnum > 1) { /* more than one candidate is requested */ /* get actual number of candidates to output */ num = r->config->output.output_hypo_maxnum; if (num > bt->num[last_time]) { num = bt->num[last_time]; } /* prepare result storage */ result_sentence_malloc(r, num); r->result.sentnum = num; /* sort by score */ idx = (TRELLIS_ATOM **)mymalloc(sizeof(TRELLIS_ATOM *)*bt->num[last_time]); for (i=0;inum[last_time];i++) { idx[i] = bt->rw[last_time][i]; } qsort(idx, bt->num[last_time], sizeof(TRELLIS_ATOM *), (int (*)(const void *,const void *))compare_backscore); /* store to result storage */ for(i=0;iresult.sentnum;i++) { s = &(r->result.sent[i]); tmp = idx[i]; s->word_num = 1; s->word[0] = tmp->wid; #ifdef CONFIDENCE_MEASURE s->confidence[0] = pow(10, r->config->annotate.cm_alpha * (tmp->backscore - maxscore)) / sum; #endif s->score = tmp->backscore; s->score_lm = 0.0; s->score_am = tmp->backscore; if (multigram_get_all_num(r->lm) > 0) { s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); } else { s->gram_id = 0; } } /* free work area for sort */ free(idx); } else { /* only max is needed */ /* prepare result storage */ result_sentence_malloc(r, 1); r->result.sentnum = 1; s = &(r->result.sent[0]); s->word_num = 1; s->word[0] = best->wid; #ifdef CONFIDENCE_MEASURE s->confidence[0] = 1.0 / sum; #endif s->score = best->backscore; s->score_lm = 0.0; s->score_am = best->backscore; if (multigram_get_all_num(r->lm) > 0) { s->gram_id = multigram_get_gram_from_wid(s->word[0], r->lm); } else { s->gram_id = 0; } } /* copy as 1st pass result */ memcpy(&(r->result.pass1), &(r->result.sent[0]), sizeof(Sentence)); r->result.pass1.align = NULL; //callback_exec(CALLBACK_RESULT, r); //free(r->result.sent); } #ifdef DETERMINE /** * * 第1パスの途中データから早期確定可能かどうか判定する(実験). tremax が * NULL のときは初期化する. 確定時は r->have_determine を TRUE にする. * * @param r [i/o] 音声認識処理インスタンス * @param t [in] フレーム * @param tremax [in] 現在のフレーム上で最尤のトレリス単語 * @param thres [in] 確定用のスコア閾値 * @param countthres [in] 確定用の持続フレーム数の閾値 * * @return 確定時は tremax を返す. 未確定時は NULL を返す. * * * Try to Determine a word hypothesis before end of input on isolated * word recognition mode (EXPERIMENT). Initialize if tremax is NULL. * Set r->have_determine to TRUE if determined. * * @param r [i/o] recognition process instance * @param t [in] current frame * @param tremax [in] maximum scored trellis word on the current frame * @param thres [in] score threshold for determinization * @param countthres [in] frame duration threshold for determinization * * @return the tremax if determined, or NULL if not determined yet., * */ static TRELLIS_ATOM * determine_word(RecogProcess *r, int t, TRELLIS_ATOM *tremax, LOGPROB thres, int countthres) { TRELLIS_ATOM *ret; WORD_ID w; //LOGPROB sum; //LOGPROB cm; int j; FSBeam *d; TOKEN2 *tk; if (tremax == NULL) { /* initialize */ r->determine_count = 0; r->determine_maxnodescore = LOG_ZERO; r->determined = FALSE; r->determine_last_wid = WORD_INVALID; r->have_determine = FALSE; return NULL; } ret = NULL; /* get confidence score of the maximum word hypothesis */ /* * sum = 0.0; * tre = recog->backtrellis->list; * while (tre != NULL && tre->endtime == t) { * sum += pow(10, recog->jconf->annotate.cm_alpha * (tre->backscore - tremax->backscore)); * tre = tre->next; * } * cm = 1.0 / sum; */ /* determinization decision */ w = tremax->wid; r->have_determine = FALSE; /* determine by score threshold from maximum node score to maximum word end node score */ if (r->determine_last_wid == w && r->determine_maxnodescore - tremax->backscore <= thres) { r->determine_count++; if (r->determine_count > countthres) { if (r->determined == FALSE) { ret = tremax; r->determined = TRUE; r->have_determine = TRUE; } } } else { r->determine_count = 0; } //printf("determine: %d: %s: cm=%f, relscore=%f, count=%d, phase=%d\n", t, recog->model->winfo->woutput[w], cm, determine_maxnodescore - tremax->backscore, count, phase); r->determine_last_wid = w; /* update maximum node score here for next call, since the word path determination is always one frame later */ d = &(r->pass1); r->determine_maxnodescore = LOG_ZERO; for (j = d->n_start; j <= d->n_end; j++) { tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); if (r->determine_maxnodescore < tk->score) r->determine_maxnodescore = tk->score; } return(ret); } /** * * 単語認識時に,第1パスの処理中に早期確定する(実験). 確定できた場合, * 第1パスの結果格納用エリア (r->result.pass1) に確定結果を格納する. * また確定時は r->have_determine に TRUE が入る. * * @param r [in] 認識処理インスタンス * @param t [in] 現在の入力フレーム * * * Determine word hypothesis before end of input (EXPERIMENT). When * determined, the determined word will be stored to the result area * for the 1st pass (r->result.pass1). r->have_determine will be * set to TRUE when determinized. * * @param r [in] recognition process instance * @param t [in] current input frame * */ static void check_determine_word(RecogProcess *r, int t) { TRELLIS_ATOM *tre; TRELLIS_ATOM *tremax; LOGPROB maxscore; /* bt->list is ordered by time frame */ maxscore = LOG_ZERO; tremax = NULL; tre = r->backtrellis->list; while (tre != NULL && tre->endtime == t) { if (maxscore < tre->backscore) { maxscore = tre->backscore; tremax = tre; } tre = tre->next; } r->result.status = J_RESULT_STATUS_SUCCESS; r->result.num_frame = t; if (maxscore != LOG_ZERO) { // if ((tre = determine_word(recog, t, tremax, 0.9, 17)) != NULL) { if ((tre = determine_word(r, t, tremax, r->config->pass1.determine_score_thres, r->config->pass1.determine_duration_thres)) != NULL) { r->result.pass1.word[0] = tremax->wid; r->result.pass1.word_num = 1; r->result.pass1.score = tremax->backscore; r->result.pass1.score_lm = 0.0; r->result.pass1.score_am = tremax->backscore; r->result.num_frame = t; //callback_exec(CALLBACK_RESULT_PASS1_DETERMINED, r); } } } #endif /* DETERMINE */ /** * * 第1パスの処理中に,あるフレームまでのベストパスを表示する. * * @param r [i/o] 認識処理インスタンス * @param t [in] 現在の入力フレーム * * * Output the current best word sequence ending * at a specified time frame in the course of the 1st pass. * * @param r [i/o] recognition process instance * @param t [in] current input frame * */ static void bt_current_max(RecogProcess *r, int t) { int wordlen; TRELLIS_ATOM *tre; TRELLIS_ATOM *tremax; LOGPROB maxscore; LOGPROB lscore; /* bt->list is ordered by time frame */ maxscore = LOG_ZERO; tremax = NULL; tre = r->backtrellis->list; while (tre != NULL && tre->endtime == t) { if (maxscore < tre->backscore) { maxscore = tre->backscore; tremax = tre; } tre = tre->next; } r->result.status = J_RESULT_STATUS_SUCCESS; r->result.num_frame = t; if (maxscore == LOG_ZERO) { r->result.pass1.word_num = 0; } else { if (r->lmvar == LM_DFA_WORD) { r->result.pass1.word[0] = tremax->wid; r->result.pass1.word_num = 1; r->result.pass1.score = tremax->backscore; r->result.pass1.score_lm = 0.0; r->result.pass1.score_am = tremax->backscore; } else { lscore = trace_backptr(r->result.pass1.word, &wordlen, tremax, r->lm->winfo); r->result.pass1.word_num = wordlen; r->result.pass1.score = tremax->backscore; r->result.pass1.score_lm = lscore; r->result.pass1.score_am = tremax->backscore; } } //callback_exec(CALLBACK_RESULT_PASS1_INTERIM, r); } /** * * 第1パスの処理中に,あるフレーム上の最尤単語を表示する(デバッグ用) * * @param r [i/o] 認識処理インスタンス * @param t [in] 現在の入力フレーム * * * Output the current best word on a specified time frame in the course * of the 1st pass. * * @param r [i/o] recognition process instance * @param t [in] current input frame * */ static void bt_current_max_word(RecogProcess *r, int t) { TRELLIS_ATOM *tre; TRELLIS_ATOM *tremax; LOGPROB maxscore; WORD_ID w; /* bt->list は時間順に格納されている */ /* bt->list is order by time */ maxscore = LOG_ZERO; tremax = NULL; tre = r->backtrellis->list; while (tre != NULL && tre->endtime == t) { if (maxscore < tre->backscore) { maxscore = tre->backscore; tremax = tre; } tre = tre->next; } if (maxscore != LOG_ZERO) { jlog("DEBUG: %3d: ",t); w = tremax->wid; jlog("\"%s [%s]\"(id=%d)", r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); jlog(" [%d-%d] %f", tremax->begintime, t, tremax->backscore); w = tremax->last_tre->wid; if (w != WORD_INVALID) { jlog(" <- \"%s [%s]\"(id=%d)\n", r->lm->winfo->wname[w], r->lm->winfo->woutput[w], w); } else { jlog(" <- bgn\n"); } } } /* -------------------------------------------------------------------- */ /* ビーム探索中のトークンを扱うサブ関数 */ /* functions to handle hypothesis tokens */ /* -------------------------------------------------------------------- */ /** * * 第1パスのビーム探索用の初期ワークエリアを確保する. * 足りない場合は探索中に動的に伸長される. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param n [in] 木構造化辞書のノード数 * @param ntoken_init [in] 最初に確保するトークンの数 * * * Allocate initial work area for beam search on the 1st pass. * If filled while search, they will be expanded on demand. * * @param d [i/o] work area for 1st pass recognition processing * @param n [in] number of nodes in lexicon tree * @param ntoken_init [in] number of token space to be allocated at first * */ static void malloc_nodes(FSBeam *d, int n, int ntoken_init) { d->totalnodenum = n; d->token = (TOKENID *)mymalloc(sizeof(TOKENID) * d->totalnodenum); //d->maxtnum = ntoken_init; if (d->maxtnum < ntoken_init) d->maxtnum = ntoken_init; d->tlist[0] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); d->tlist[1] = (TOKEN2 *)mymalloc(sizeof(TOKEN2) * d->maxtnum); d->tindex[0] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); d->tindex[1] = (TOKENID *)mymalloc(sizeof(TOKENID) * d->maxtnum); //d->expand_step = ntoken_step; d->nodes_malloced = TRUE; d->expanded = FALSE; } /** * * 第1パスのビーム探索用のワークエリアを伸ばして再確保する. * * @param d [i/o] 第1パス探索処理用ワークエリア * * * Re-allocate work area for beam search on the 1st pass. * * @param d [i/o] work area for 1st pass recognition processing * */ static void expand_tlist(FSBeam *d) { d->maxtnum += d->expand_step; d->tlist[0] = (TOKEN2 *)myrealloc(d->tlist[0],sizeof(TOKEN2) * d->maxtnum); d->tlist[1] = (TOKEN2 *)myrealloc(d->tlist[1],sizeof(TOKEN2) * d->maxtnum); d->tindex[0] = (TOKENID *)myrealloc(d->tindex[0],sizeof(TOKENID) * d->maxtnum); d->tindex[1] = (TOKENID *)myrealloc(d->tindex[1],sizeof(TOKENID) * d->maxtnum); if (debug2_flag) jlog("STAT: token space expanded to %d\n", d->maxtnum); d->expanded = TRUE; } /** * * Clear nodes for the next input. Julius will call this function for * each input to re-set the work area for the beam search. If the size * of tree lexicon has been changed since the last input, Julius will * free and re-allocate the work area. * * * ノード情報を初期化する. Julius は,木構造化辞書のサイズが直前の入力 * 時と変化がないときは,この関数によってノード情報を初期化するだけで * よい. サイズが変更されているときはノードを開放・再確保する. * * * @param d [i/o] work area for 1st pass recognition processing * @param ntoken_step [in] required token step * */ static void prepare_nodes(FSBeam *d, int ntoken_step) { d->tnum[0] = d->tnum[1] = 0; if (d->expand_step < ntoken_step) d->expand_step = ntoken_step; } /** * * 第1パスのビーム探索用のワークエリアを全て解放する. * * @param d [i/o] 第1パス探索処理用ワークエリア * * * * Free all the work area for beam search on the 1st pass. * * @param d [i/o] work area for 1st pass recognition processing * * */ static void free_nodes(FSBeam *d) { if (d->nodes_malloced) { free(d->token); free(d->tlist[0]); free(d->tlist[1]); free(d->tindex[0]); free(d->tindex[1]); d->nodes_malloced = FALSE; } } /** * * トークンスペースをリセットする. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param tt [in] ワークエリアID (0 または 1) * * * Reset the token space. * * @param d [i/o] work area for 1st pass recognition processing * @param tt [in] work area id (0 or 1) * */ static void clear_tlist(FSBeam *d, int tt) { d->tnum[tt] = 0; } /** * * アクティブトークンリストをクリアする. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param tt [in] 直前のワークエリアID (0 または 1) * * * Clear the active token list. * * @param d [i/o] work area for 1st pass recognition processing * @param tt [in] work area id of previous frame (0 or 1) * */ static void clear_tokens(FSBeam *d, int tt) { int j; /* initialize active token list: only clear ones used in the last call */ for (j=0; jtnum[tt]; j++) { d->token[d->tlist[tt][j].node] = TOKENID_UNDEFINED; } } /** * * トークンスペースから新たなトークンを取りだす. * * @param d [i/o] 第1パス探索処理用ワークエリア * * @return 新たに取り出されたトークンのID * * * Assign a new token from token space. * * @param d [i/o] work area for 1st pass recognition processing * @return the id of the newly assigned token. * */ static TOKENID create_token(FSBeam *d) { TOKENID newid; int tn; tn = d->tn; newid = d->tnum[tn]; d->tnum[tn]++; while (d->tnum[tn]>=d->maxtnum) expand_tlist(d); d->tindex[tn][newid] = newid; #ifdef WPAIR /* initialize link */ d->tlist[tn][newid].next = TOKENID_UNDEFINED; #endif return(newid); } /** * * @brief 木構造化辞書のノードにトークンを割り付ける. * * 木構造化辞書のノードのアクティブトークンリストにトークンを保存する. * またトークンスペースにおいてトークンからノード番号へのリンクを保存する. * * 既にトークンがある場合は,新たなトークンによって上書きされる. なお * WPAIR 指定時はそのリストに新たなトークンを追加する. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param node [in] 木構造化辞書のノード番号 * @param tkid [in] トークン番号 * * * @brief Assign token to a node on tree lexicon * * Save the token id to the specified node in the active token list. * Also saves the link to the node from the token in token space. * * If a token already exist on the node, it will be overridden by the new one. * If WPAIR is defined, the new token will be simply added to the list of * active tokens on the node. * * @param d [i/o] work area for 1st pass recognition processing * @param node [in] node id on the tree lexicon * @param tkid [in] token id to be assigned * */ static void node_assign_token(FSBeam *d, int node, TOKENID tkid) { #ifdef WPAIR /* add to link list */ d->tlist[d->tn][tkid].next = d->token[node]; #endif d->token[node] = tkid; d->tlist[d->tn][tkid].node = node; } /** * * @brief 木構造化辞書上のあるノードが,現在なんらかのトークンを * 保持しているかをチェックする. * * WPAIR が定義されている場合,ノードは直前単語ごとに異なるトークンを複数 * 保持する. この場合, 指定された単語IDを直前単語とするトークンが * そのノードに保持されているかどうかがチェックされる. すなわち,既にトークン * が存在しても,そのトークンの表すパスの直前単語が指定した単語と異なって * いれば未保持 (TOKENID_UNDEFINED) を返す. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param tt [in] 直前のワークエリアID (0 または 1) * @param node [in] ノード番号 * @param wid [in] 直前単語のID (WPAIR定義時のみ有効, 他では無視される) * * @return そのノードが既に保持するトークン番号,無ければ TOKENID_UNDEFINED. * * * @brief Check if a node holds any token * * This function checks if a node on the tree lexicon already holds a token. * * If WPAIR is defined, a node has multiple tokens according to the previous * word context. In this case, only token with the same previous word will be * checked. * * @param d [i/o] work area for 1st pass recognition processing * @param tt [in] work area id (0 or 1) * @param node [in] node id of lexicon tree * @param wid [in] word id of previous word (ignored if WPAIR is not defined) * * @return the token id on the node, or TOKENID_UNDEFINED if no token has * been assigned in this frame. * */ static TOKENID node_exist_token(FSBeam *d, int tt, int node, WORD_ID wid) { #ifdef WPAIR /* In word-pair mode, multiple tokens are assigned to a node as a list. so we have to search for tokens with same last word ID */ #ifdef WPAIR_KEEP_NLIMIT /* 1ノードごとに保持するtoken数の上限を設定 */ /* tokenが無いが上限に達しているときは一番スコアの低いtokenを上書きする */ /* N-best: limit number of assigned tokens to a node */ int i = 0; TOKENID lowest_token = TOKENID_UNDEFINED; #endif TOKENID tmp; for(tmp=d->token[node]; tmp != TOKENID_UNDEFINED; tmp=d->tlist[tt][tmp].next) { if (d->tlist[tt][tmp].last_tre->wid == wid) { return(tmp); } #ifdef WPAIR_KEEP_NLIMIT if (lowest_token == TOKENID_UNDEFINED || d->tlist[tt][lowest_token].score > d->tlist[tt][tmp].score) lowest_token = tmp; if (++i >= d->wpair_keep_nlimit) break; #endif } #ifdef WPAIR_KEEP_NLIMIT if (i >= d->wpair_keep_nlimit) { /* overflow, overwrite lowest score */ return(lowest_token); } else { return(TOKENID_UNDEFINED); } #else return(TOKENID_UNDEFINED); #endif #else /* not WPAIR */ /* 1つだけ保持,これを常に上書き */ /* Only one token is kept in 1-best mode (default), so simply return the ID */ return(d->token[node]); #endif } #ifdef DEBUG /* tlist と token の対応をチェックする(debug) */ /* for debug: check tlist <-> token correspondence where tlist[tt][tokenID].node = nodeID and token[nodeID] = tokenID */ static void node_check_token(FSBeam *d, int tt) { int i; for(i=0;itnum[tt];i++) { if (node_exist_token(d, tt, d->tlist[tt][i].node, d->tlist[tt][i].last_tre->wid) != i) { jlog("ERROR: token %d not found on node %d\n", i, d->tlist[tt][i].node); } } } #endif /* -------------------------------------------------------------------- */ /* トークンをソートし 上位 N トークンを判別する (heap sort) */ /* Sort generated tokens and get N-best (use heap sort) */ /* -------------------------------------------------------------------- */ /* ビームの閾値として上位 N 番目のスコアが欲しいだけであり,実際にソート される必要はない */ /* we only want to know the N-th score for determining beam threshold, so order is not considered here */ #define SD(A) tindex_local[A-1] ///< Index locater for sort_token_*() #define SCOPY(D,S) D = S ///< Content copier for sort_token_*() #define SVAL(A) (tlist_local[tindex_local[A-1]].score) ///< Score locater for sort_token_*() #define STVAL (tlist_local[s].score) ///< Indexed score locater for sort_token_*() /** * * @brief トークンスペースをスコアの大きい順にソートする. * * heap sort を用いて現在のトークン集合をスコアの大きい順にソートする. * 上位 @a neednum 個のトークンがソートされればそこで処理を終了する. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param neednum [in] 上位 @a neednum 個が得られるまでソートする * @param totalnum [in] トークンスペース内の有効なトークン数 * * * @brief Sort the token space upward by score. * * This function sort the whole token space in upward direction, according * to their accumulated score. * This function terminates sort as soon as the top * @a neednum tokens has been found. * * @param d [i/o] work area for 1st pass recognition processing * @param neednum [in] sort until top @a neednum tokens has been found * @param totalnum [in] total number of assigned tokens in the token space * */ static void sort_token_upward(FSBeam *d, int neednum, int totalnum) { int n,root,child,parent; TOKENID s; TOKEN2 *tlist_local; TOKENID *tindex_local; tlist_local = d->tlist[d->tn]; tindex_local = d->tindex[d->tn]; for (root = totalnum/2; root >= 1; root--) { SCOPY(s, SD(root)); parent = root; while ((child = parent * 2) <= totalnum) { if (child < totalnum && SVAL(child) < SVAL(child+1)) { child++; } if (STVAL >= SVAL(child)) { break; } SCOPY(SD(parent), SD(child)); parent = child; } SCOPY(SD(parent), s); } n = totalnum; while ( n > totalnum - neednum) { SCOPY(s, SD(n)); SCOPY(SD(n), SD(1)); n--; parent = 1; while ((child = parent * 2) <= n) { if (child < n && SVAL(child) < SVAL(child+1)) { child++; } if (STVAL >= SVAL(child)) { break; } SCOPY(SD(parent), SD(child)); parent = child; } SCOPY(SD(parent), s); } } /** * * @brief トークンスペースをスコアの小さい順にソートする. * * ビームのしきい値決定のために,heap sort を用いて * 現在のトークン集合をスコアの小さい順にソートする. * 下位 @a neednum 個のトークンがソートされればそこで処理を終了する. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param neednum [in] 下位 @a neednum 個が得られるまでソートする * @param totalnum [in] トークンスペース内の有効なトークン数 * * * @brief Sort the token space downward by score. * * This function sort the whole token space in downward direction, according * to their accumulated score for hypothesis pruning. * * This function terminates sort as soon as the bottom * @a neednum tokens has been found. * * @param d [i/o] work area for 1st pass recognition processing * @param neednum [in] sort until bottom @a neednum tokens has been found * @param totalnum [in] total number of assigned tokens in the token space * */ static void sort_token_downward(FSBeam *d, int neednum, int totalnum) { int n,root,child,parent; TOKENID s; TOKEN2 *tlist_local; TOKENID *tindex_local; tlist_local = d->tlist[d->tn]; tindex_local = d->tindex[d->tn]; for (root = totalnum/2; root >= 1; root--) { SCOPY(s, SD(root)); parent = root; while ((child = parent * 2) <= totalnum) { if (child < totalnum && SVAL(child) > SVAL(child+1)) { child++; } if (STVAL <= SVAL(child)) { break; } SCOPY(SD(parent), SD(child)); parent = child; } SCOPY(SD(parent), s); } n = totalnum; while ( n > totalnum - neednum) { SCOPY(s, SD(n)); SCOPY(SD(n), SD(1)); n--; parent = 1; while ((child = parent * 2) <= n) { if (child < n && SVAL(child) > SVAL(child+1)) { child++; } if (STVAL <= SVAL(child)) { break; } SCOPY(SD(parent), SD(child)); parent = child; } SCOPY(SD(parent), s); } } /** * * @brief トークンスペースをソートしてビーム内に残るトークンを決定する * * heap sort を用いて現在のトークン集合をソートし,上位スコアのトークン * 集合を求める. 上位 @a neednum 個のトークン集合が得られれば良いので, * 全体が完全にソートされている必要はない. よって * 上位 @a neednum 個のトークンのみをソートする. 実際には,全体のトークン * 数と必要なトークン数から sort_token_upward() * と sort_token_downward() の早い方が用いられる. * * @param d [i/o] 第1パス探索処理用ワークエリア * @param neednum [in] 求める上位トークンの数 * @param start [out] 上位 @a neednum のトークンが存在するトークンスペースの最初のインデックス番号 * @param end [out] 上位 @a neednum のトークンが存在するトークンスペースの最後のインデックス番号 * * * @brief Sort the token space to find which tokens to be survived in the beam * * This function sorts the currrent tokens in the token space to find * the tokens to be survived in the current frame. Only getting the top * @a neednum tokens is required, so the sort will be terminate just after * the top @a neednum tokens are determined. Actually, either * sort_token_upward() or sort_token_downward() will be used depending of * the number of needed tokens and total tokens. * * @param d [i/o] work area for 1st pass recognition processing * @param neednum [in] number of top tokens to be found * @param start [out] start index of the top @a neednum nodes * @param end [out] end index of the top @a neednum nodes * */ static void sort_token_no_order(FSBeam *d, int neednum, int *start, int *end) { int totalnum; int restnum; totalnum = d->tnum[d->tn]; restnum = totalnum - neednum; if (neednum >= totalnum) { /* no need to sort */ *start = 0; *end = totalnum - 1; } else if (neednum < restnum) { /* needed num is smaller than rest, so sort for the needed tokens */ sort_token_upward(d, neednum,totalnum); *start = totalnum - neednum; *end = totalnum - 1; } else { /* needed num is bigger than rest, so sort for the rest token */ sort_token_downward(d, restnum,totalnum); *start = 0; *end = neednum - 1; } } /* -------------------------------------------------------------------- */ /* 第1パス(フレーム同期ビームサーチ) メイン */ /* main routines of 1st pass (frame-synchronous beam search) */ /* -------------------------------------------------------------------- */ /** * * @brief 初期仮説の生成 * * 初期仮説は,N-gramでは winfo->head_silwid に固定されている. DFA では * 文法上文頭にきうる単語すべてが初期仮説となる. 単語認識モードでは * 全ての単語が初期仮説となる. * * 音響モデルが非multipathの場合,ここで最初のフレームの出力確率 * 計算まで行われる. * * @param param [in] 入力ベクトル列情報(最初のフレームのみ必要) * @param r [in] 音声認識処理インスタンス * * * @brief Generate initial hypotheses * * The initial hypothesis is: 1) winfo->head_silwid for N-gram, 2) all words * that can be at beginning of sentence for DFA, or 3) all words in dictionary * for isolated word recognition mode. * * If acoustic model is NOT a multi-path model, the output probabilities for * the first frame (t=0) will also be computed in this function. * * @param param [in] input vectors (only the first frame will be used) * @param r [in] recognition process instance * */ static boolean init_nodescore(HTK_Param *param, RecogProcess *r) { WCHMM_INFO *wchmm; FSBeam *d; TOKENID newid; TOKEN2 *new; WORD_ID beginword; int node; int i; wchmm = r->wchmm; d = &(r->pass1); /* 初期仮説用単語履歴 */ /* setup initial word context */ if (r->config->successive.enabled) { /* sp segment mode */ /* initial word context = last non-sp word of previous 2nd pass at last segment*/ if (r->lmtype == LM_PROB) { if (r->sp_break_last_nword == wchmm->winfo->tail_silwid) { /* if end with silE, initialize as normal start of sentence */ d->bos.wid = WORD_INVALID; } else { d->bos.wid = r->sp_break_last_nword; } } else { d->bos.wid = WORD_INVALID; } } else { /* not sp segment mode */ d->bos.wid = WORD_INVALID; /* no context */ } d->bos.begintime = d->bos.endtime = -1; /* ノード・トークンを初期化 */ /* clear tree lexicon nodes and tokens */ for(node = 0; node < d->totalnodenum; node++) { d->token[node] = TOKENID_UNDEFINED; } d->tnum[0] = d->tnum[1] = 0; #ifdef PASS1_IWCD /* 出力確率計算キャッシュを初期化 */ /* initialize outprob cache */ outprob_style_cache_init(wchmm); #endif /* 初期仮説の作成: 初期単語の決定と初期トークンの生成 */ /* initial word hypothesis */ if (r->lmtype == LM_PROB) { if (r->config->successive.enabled) { /* sp segment mode */ if (r->sp_break_last_word != WORD_INVALID) { /* last segment exist */ /* 開始単語=前のセグメント計算時の最後の最尤単語 */ /* 文終了単語(silE,句点(IPAモデル))なら,silB で開始 */ /* initial word = best last word hypothesis on the last segment */ /* if silE or sp, begin with silB */ /*if (is_sil(recog.sp_break_last_word, wchmm->winfo, wchmm->hmminfo)) {*/ if (r->sp_break_last_word == wchmm->winfo->tail_silwid) { beginword = wchmm->winfo->head_silwid; d->bos.wid = WORD_INVALID; /* reset initial word context */ } else { beginword = r->sp_break_last_word; } } else { /* initial segment: initial word set to silB */ beginword = wchmm->winfo->head_silwid; } } else { /* not sp segment mode */ /* initial word fixed to silB */ beginword = wchmm->winfo->head_silwid; } #ifdef SP_BREAK_DEBUG jlog("DEBUG: startword=[%s], last_nword=[%s]\n", (beginword == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[beginword], (d->bos.wid == WORD_INVALID) ? "WORD_INVALID" : wchmm->winfo->wname[d->bos.wid]); #endif /* create the first token at the first node of initial word */ newid = create_token(d); new = &(d->tlist[d->tn][newid]); /* initial node = head node of the beginword */ if (wchmm->hmminfo->multipath) { node = wchmm->wordbegin[beginword]; } else { node = wchmm->offset[beginword][0]; } /* set initial LM score */ if (wchmm->state[node].scid != 0) { /* if initial node is on a factoring branch, use the factored score */ new->last_lscore = max_successor_prob(wchmm, d->bos.wid, node); } else { /* else, set to 0.0 */ new->last_lscore = 0.0; } #ifdef FIX_PENALTY new->last_lscore = new->last_lscore * d->lm_weight; #else new->last_lscore = new->last_lscore * d->lm_weight + d->lm_penalty; #endif /* set initial word history */ new->last_tre = &(d->bos); new->last_cword = d->bos.wid; if (wchmm->hmminfo->multipath) { /* set initial score using the initial LM score */ new->score = new->last_lscore; } else { /* set initial score using the initial LM score and AM score of the first state */ new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; } /* assign the initial node to token list */ node_assign_token(d, node, newid); } if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_GRAMMAR) { /* 初期仮説: 文法上文頭に接続しうる単語集合 */ /* initial words: all words that can be begin of sentence grammatically */ /* アクティブな文法に属する単語のみ許す */ /* only words in active grammars are allowed to be an initial words */ MULTIGRAM *m; int t,tb,te; WORD_ID iw; boolean flag; DFA_INFO *gdfa; gdfa = r->lm->dfa; flag = FALSE; /* for all active grammar */ for(m = r->lm->grammars; m; m = m->next) { if (m->active) { tb = m->cate_begin; te = tb + m->dfa->term_num; for(t=tb;tterm.wnum[t];iw++) { /* create the initial token at the first node of all words that belongs to the category */ i = gdfa->term.tw[t][iw]; if (wchmm->hmminfo->multipath) { node = wchmm->wordbegin[i]; } else { node = wchmm->offset[i][0]; } /* in tree lexicon, words in the same category may share the same root node, so skip it if the node has already existed */ if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; newid = create_token(d); new = &(d->tlist[d->tn][newid]); new->last_tre = &(d->bos); #ifdef FIX_PENALTY new->last_lscore = 0.0; #else new->last_lscore = d->penalty1; #ifdef CLASS_NGRAM /* add per-word penalty */ new->last_lscore += wchmm->winfo->cprob[i]; #endif #endif if (wchmm->hmminfo->multipath) { new->score = new->last_lscore; } else { new->score = outprob_style(wchmm, node, d->bos.wid, 0, param) + new->last_lscore; } node_assign_token(d, node, newid); } } } } } if (!flag) { jlog("ERROR: init_nodescore: no initial state found in active DFA grammar\n"); return FALSE; } } if (r->lmtype == LM_DFA && r->lmvar == LM_DFA_WORD) { /* アクティブな文法に属する単語のみ許す */ /* only words in active grammars are allowed to be an initial words */ MULTIGRAM *m; for(m = r->lm->grammars; m; m = m->next) { if (m->active) { for(i = m->word_begin; i < m->word_begin + m->winfo->num; i++) { if (wchmm->hmminfo->multipath) { node = wchmm->wordbegin[i]; } else { node = wchmm->offset[i][0]; } if (node_exist_token(d, d->tn, node, d->bos.wid) != TOKENID_UNDEFINED) continue; newid = create_token(d); new = &(d->tlist[d->tn][newid]); new->last_tre = &(d->bos); new->last_lscore = 0.0; if (wchmm->hmminfo->multipath) { new->score = 0.0; } else { new->score = outprob_style(wchmm, node, d->bos.wid, 0, param); } node_assign_token(d, node, newid); } } } } return TRUE; } /******************************************************/ /* フレーム同期ビーム探索の実行 --- 最初のフレーム用 */ /* frame synchronous beam search --- first frame only */ /******************************************************/ /** * * @brief フレーム同期ビーム探索の初期化 * * ここではビームサーチに用いるワークエリアの確保と初期化を行う. * 初期化説の生成は init_nodescore() で行われる. * * @param param [in] 入力ベクトル列情報 (最初の1フレーム目のみ用いられる) * @param r [i/o] 音声認識処理インスタンス * * * @brief Initialization of the frame synchronous beam search * * This function will initialize work area for the 1st pass. * Generation of initial hypotheses will be performed in init_nodescore(). * * @param param [in] input vectors (only the first frame will be used) * @param r [i/o] recognition process instance * * * @callergraph * @callgraph * */ boolean get_back_trellis_init(HTK_Param *param, RecogProcess *r) { WCHMM_INFO *wchmm; BACKTRELLIS *backtrellis; FSBeam *d; wchmm = r->wchmm; backtrellis = r->backtrellis; d = &(r->pass1); /* Viterbi演算用ワークエリアのスイッチャー tl,tn の初期値設定 */ /* tn: このフレーム用ID tl: 1フレーム前のID */ /* initialize switch tl, tn for Viterbi computation */ /* tn: this frame tl: last frame */ d->tn = 0; d->tl = 1; /* 結果の単語トレリスを格納するバックトレリス構造体を初期化 */ /* initialize backtrellis structure to store resulting word trellis */ bt_prepare(backtrellis); /* 計算用ワークエリアを初期化 */ /* initialize some data on work area */ if (r->lmtype == LM_PROB) { d->lm_weight = r->config->lmp.lm_weight; d->lm_penalty = r->config->lmp.lm_penalty; } if (r->lmtype == LM_DFA) { d->penalty1 = r->config->lmp.penalty1; } #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) d->wpair_keep_nlimit = r->config->pass1.wpair_keep_nlimit; #endif /* ワークエリアを確保 */ /* malloc work area */ /* 使用するトークン量 = viterbi時に遷移先となる状態候補の数 * 予測: ビーム数 x 2 (自己遷移+次状態) + 木構造化辞書のルートノード数 */ /* assumed initial number of needed tokens: beam width x 2 (self + next trans.) * + root node on the tree lexicon (for inter-word trans.) */ if (d->totalnodenum != wchmm->n) { free_nodes(d); } if (d->nodes_malloced == FALSE) { malloc_nodes(d, wchmm->n, r->trellis_beam_width * 2 + wchmm->startnum); } prepare_nodes(d, r->trellis_beam_width); /* 初期スコアを nodescore[tn] にセット */ /* set initial score to nodescore[tn] */ if (init_nodescore(param, r) == FALSE) { jlog("ERROR: get_back_trellis_init: failed to set initial node scores\n"); return FALSE; } sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); /* 漸次出力を行なう場合のインターバルを計算 */ /* set interval frame for progout */ r->config->output.progout_interval_frame = (int)((float)r->config->output.progout_interval / ((float)param->header.wshift / 10000.0)); if (r->config->successive.enabled) { /* ショートポーズセグメンテーション用パラメータの初期化 */ /* initialize parameter for short pause segmentation */ d->in_sparea = TRUE; /* assume beginning is silence */ r->am->mfcc->sparea_start = d->tmp_sparea_start = 0; /* set start frame to 0 */ #ifdef SP_BREAK_RESUME_WORD_BEGIN d->tmp_sp_break_last_word = WORD_INVALID; #endif r->sp_break_last_word = WORD_INVALID; /* 最初のセグメント: 次の非ポーズフレームで第2パスへ移行しない */ /* the first end of pause segment should be always silB, so skip the first segment */ d->first_sparea = TRUE; r->sp_break_2_begin_word = WORD_INVALID; } #ifdef DETERMINE if (r->lmvar == LM_DFA_WORD) { /* initialize */ determine_word(r, 0, NULL, 0, 0); } #endif #ifdef SCORE_PRUNING d->score_pruning_threshold = LOG_ZERO; d->score_pruning_count = 0; #endif return TRUE; } /*****************************************************/ /* frame synchronous beam search --- proceed 1 frame */ /* フレーム同期ビーム探索の実行 --- 1フレーム進める */ /*****************************************************/ /** * * Propagate a token to next node. * * * * トークンを次ノードに伝搬する. * * * * @param d [i/o] work area for 1st pass recognition processing * @param next_node [in] next node id * @param next_score [in] score when transmitted to the next node * @param last_tre [in] previous word context for the next node * @param last_cword [in] previous context-valid word for the next node * @param last_lscore [in] LM score to be propagated * */ static void propagate_token(FSBeam *d, int next_node, LOGPROB next_score, TRELLIS_ATOM *last_tre, WORD_ID last_cword, LOGPROB last_lscore) { TOKEN2 *tknext; TOKENID tknextid; /* does not propagate invalid token */ if (next_score <= LOG_ZERO) return; if ((tknextid = node_exist_token(d, d->tn, next_node, last_tre->wid)) != TOKENID_UNDEFINED) { /* 遷移先ノードには既に他ノードから伝搬済み: スコアが高いほうを残す */ /* the destination node already has a token: compare score */ tknext = &(d->tlist[d->tn][tknextid]); if (tknext->score < next_score) { /* その遷移先ノードが持つトークンの内容を上書きする(新規トークンは作らない) */ /* overwrite the content of existing destination token: not create a new token */ tknext->last_tre = last_tre; /* propagate last word info */ tknext->last_cword = last_cword; /* propagate last context word info */ tknext->last_lscore = last_lscore; /* set new LM score */ tknext->score = next_score; /* set new score */ } } else { /* 遷移先ノードは未伝搬: 新規トークンを作って割り付ける */ /* token unassigned: create new token and assign */ tknextid = create_token(d); /* get new token */ tknext = &(d->tlist[d->tn][tknextid]); tknext->last_tre = last_tre; /* propagate last word info */ tknext->last_cword = last_cword; /* propagate last context word info */ tknext->last_lscore = last_lscore; tknext->score = next_score; /* set new score */ node_assign_token(d, next_node, tknextid); /* assign this new token to the next node */ } } /** * * 単語内のあるノード間の遷移を行う. * * @param wchmm [in] 木構造化辞書 * @param d [i/o] 第1パスワークエリア * @param tk_ret [i/o] 伝搬元のトークン(内部でポインタ更新時は上書き) * @param j [in] @a tk_ret の元のトークンリストのID * @param next_node [in] 遷移先のノード番号 * @param next_a [in] 遷移確率 * * * Word-internal transition for a set of nodes. * * @param wchmm [in] tree lexicon * @param d [i/o] work area for the 1st pass * @param tk_ret [in] source token (if pointer updated, overwrite this) * @param j [in] the token ID of @a tk_ret * @param next_node [in] id of next node * @param next_a [in] transition probability * * */ static void beam_intra_word_core(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j, int next_node, LOGPROB next_a) { int node; ///< Temporal work to hold the current node number on the lexicon tree LOGPROB tmpsum; LOGPROB ngram_score_cache; TOKEN2 *tk; tk = *tk_ret; node = tk->node; /* now, 'node' is the source node, 'next_node' is the destication node, and ac-> holds transition probability */ /* tk->score is the accumulated score at the 'node' on previous frame */ /******************************************************************/ /* 2.1.1 遷移先へのスコア計算(遷移確率+言語スコア) */ /* compute score of destination node (transition prob + LM) */ /******************************************************************/ tmpsum = tk->score + next_a; ngram_score_cache = LOG_ZERO; /* the next score at 'next_node' will be computed on 'tmpsum', and the new LM probability (if updated) will be stored on 'ngram_score_cache' at below */ if (!wchmm->category_tree) { /* 言語スコア factoring: arcが自己遷移でない単語内の遷移で,かつ遷移先にsuccessorリスト があれば,lexicon tree の分岐部分の遷移である */ /* LM factoring: If this is not a self transition and destination node has successor list, this is branching transition */ if (next_node != node) { if (wchmm->state[next_node].scid != 0 #ifdef UNIGRAM_FACTORING /* 1-gram factoring 使用時は, 複数で共有される枝では wchmm->state[node].scid は負の値となり,その絶対値を 添字として wchmm->fscore[] に単語集合の1-gramの最大値が格納 されている. 末端の枝(複数単語で共有されない)では, wchmm->state[node].scid は正の値となり, 1単語を sc として持つのでそこで正確な2-gramを計算する */ /* When uni-gram factoring, wchmm->state[node].scid is below 0 for shared branches. In this case the maximum uni-gram probability for sub-tree is stored in wchmm->fscore[- wchmm->state[node].scid]. Leaf branches (with only one successor word): the scid is larger than 0, and has the word ID in wchmm->sclist[wchmm->state[node].scid]. So precise 2-gram is computed in this point */ #endif ){ if (wchmm->lmtype == LM_PROB) { /* ここで言語モデル確率を更新する */ /* LM value should be update at this transition */ /* N-gram確率からfactoring 値を計算 */ /* compute new factoring value from N-gram probabilities */ #ifdef FIX_PENALTY /* if at the beginning of sentence, not add lm_penalty */ if (tk->last_cword == WORD_INVALID) { ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight; } else { ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; } #else ngram_score_cache = max_successor_prob(wchmm, tk->last_cword, next_node) * d->lm_weight + d->lm_penalty; #endif /* スコアの更新: tk->last_lscore に単語内での最後のfactoring値が 入っているので, それをスコアから引いてリセットし, 新たなスコアを セットする */ /* update score: since 'tk->last_lscore' holds the last LM factoring value in this word, we first remove the score from the current score, and then add the new LM value computed above. */ tmpsum -= tk->last_lscore; tmpsum += ngram_score_cache; } if (wchmm->lmtype == LM_DFA && wchmm->lmvar == LM_DFA_GRAMMAR) { /* 文法を用いる場合, カテゴリ単位の木構造化がなされていれば, 接続制約は単語間遷移のみで扱われるので,factoring は必要ない. カテゴリ単位木構造化が行われない場合, 文法間の接続制約はここ で factoring で行われることになる. */ /* With DFA, we use category-pair constraint extracted from the DFA at this 1st pass. So if we compose a tree lexicon per word's category, the each category tree has the same connection constraint and thus we can apply the constraint on the cross-word transition. This per-category tree lexicon is enabled by default, and in the case the constraint will be applied at the word end. If you disable per-category tree lexicon by undefining 'CATEGORY_TREE', the word-pair contrained will be applied in a factoring style at here. */ /* 決定的factoring: 直前単語に対して,sub-tree内にカテゴリ対制約上 接続しうる単語が1つもなければ, この遷移は不可 */ /* deterministic factoring in grammar mode: transition disabled if there are totally no sub-tree word that can grammatically (in category-pair constraint) connect to the previous word. */ if (!can_succeed(wchmm, tk->last_tre->wid, next_node)) { tmpsum = LOG_ZERO; } } } } } /* factoring not needed when DFA mode and uses category-tree */ /****************************************/ /* 2.1.2 遷移先ノードへトークン伝搬 */ /* pass token to destination node */ /****************************************/ if (ngram_score_cache == LOG_ZERO) ngram_score_cache = tk->last_lscore; propagate_token(d, next_node, tmpsum, tk->last_tre, tk->last_cword, ngram_score_cache); if (d->expanded) { /* if work area has been expanded at 'create_token()' above, the inside 'realloc()' will destroy the pointers. so, reset local pointers from token index */ tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); d->expanded = FALSE; } *tk_ret = tk; } /** * * 単語内遷移を行う. * * @param wchmm [in] 木構造化辞書 * @param d [i/o] 第1パスワークエリア * @param tk_ret [i/o] 伝搬元のトークン(内部でポインタ更新時は上書き) * @param j [in] @a tk_ret の元のトークンリストのID * * * Word-internal transition. * * @param wchmm [in] tree lexicon * @param d [i/o] work area for the 1st pass * @param tk_ret [in] source token (if pointer updated, overwrite this) * @param j [in] the token ID of @a tk_ret * * */ static void beam_intra_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, int j) { A_CELL2 *ac; ///< Temporal work to hold the next states of a node TOKEN2 *tk; int node; int k; tk = *tk_ret; node = tk->node; if (wchmm->self_a[node] != LOG_ZERO) { beam_intra_word_core(wchmm, d, tk_ret, j, node, wchmm->self_a[node]); } if (wchmm->next_a[node] != LOG_ZERO) { beam_intra_word_core(wchmm, d, tk_ret, j, node+1, wchmm->next_a[node]); } for(ac=wchmm->ac[node];ac;ac=ac->next) { for(k=0;kn;k++) { beam_intra_word_core(wchmm, d, tk_ret, j, ac->arc[k], ac->a[k]); } } } /**************************/ /* 2.2. トレリス単語保存 */ /* save trellis word */ /**************************/ /** * * トークンからトレリス単語を保存する. * * @param bt [i/o] バックトレリス構造体 * @param wchmm [in] 木構造化辞書 * @param tk [in] 単語末端に到達しているトークン * @param t [in] 現在の時間フレーム * @param final_for_multipath [in] 入力最後の1回処理時 TRUE * * @return 新たに格納されたトレリス単語へのポインタ * * * Store a new trellis word on the token. * * @param bt [i/o] backtrellis data to save it * @param wchmm [in] tree lexicon * @param tk [in] source token at word edge * @param t [in] current time frame * @param final_for_multipath [in] TRUE if this is final frame * * @return pointer to the newly stored trellis word. * */ static TRELLIS_ATOM * save_trellis(BACKTRELLIS *bt, WCHMM_INFO *wchmm, TOKEN2 *tk, int t, boolean final_for_multipath) { TRELLIS_ATOM *tre; int sword; sword = wchmm->stend[tk->node]; /* この遷移元の単語終端ノードは「直前フレームで」生き残ったノード. (「このフレーム」でないことに注意!!) よってここで, 時間(t-1) を単語終端とするトレリス上の単語仮説 (TRELLIS_ATOM)として,単語トレリス構造体に保存する. */ /* This source node (= word end node) has been survived in the "last" frame (notice: not "this" frame!!). So this word end is saved here to the word trellis structure (BACKTRELLIS) as a trellis word (TRELLIS_ATOM) with end frame (t-1). */ tre = bt_new(bt); tre->wid = sword; /* word ID */ tre->backscore = tk->score; /* log score (AM + LM) */ tre->begintime = tk->last_tre->endtime + 1; /* word beginning frame */ tre->endtime = t-1; /* word end frame */ tre->last_tre = tk->last_tre; /* link to previous trellis word */ tre->lscore = tk->last_lscore; /* log LM score */ bt_store(bt, tre); /* save to backtrellis */ #ifdef WORD_GRAPH if (tre->last_tre != NULL) { /* mark to indicate that the following words was survived in beam */ tre->last_tre->within_context = TRUE; } if (final_for_multipath) { /* last node */ if (tre->wid == wchmm->winfo->tail_silwid) { tre->within_context = TRUE; } } #endif /* WORD_GRAPH */ return tre; } /** * * 単語末トークンからの単語間遷移. * * @param wchmm [in] 木構造化辞書 * @param d [i/o] 第1パスワークエリア * @param tk_ret [in] 伝搬元の単語末トークン * @param tre [in] @a tk_ret から生成されたトレリス単語 * @param j [in] @a tk_ret の元のトークンリストのID * * * Cross-word transition processing from word-end token. * * @param wchmm [in] tree lexicon * @param d [i/o] work area for the 1st pass * @param tk_ret [in] source token where the propagation is from * @param tre [in] the trellis word generated from @a tk_ret * @param j [in] the token ID of @a tk_ret * */ static void beam_inter_word(WCHMM_INFO *wchmm, FSBeam *d, TOKEN2 **tk_ret, TRELLIS_ATOM *tre, int j) { A_CELL2 *ac; TOKEN2 *tk; int sword; int node, next_node; LOGPROB *iwparray; ///< Temporal pointer to hold inter-word cache array int stid; #ifdef UNIGRAM_FACTORING int isoid; ///< Temporal work to hold isolated node #endif LOGPROB tmpprob, tmpsum, ngram_score_cache; int k; WORD_ID last_word; tk = *tk_ret; node = tk->node; sword = wchmm->stend[node]; last_word = wchmm->winfo->is_transparent[sword] ? tk->last_cword : sword; if (wchmm->lmtype == LM_PROB) { /* 遷移元単語が末尾単語の終端なら,どこへも遷移させない */ /* do not allow transition if the source word is end-of-sentence word */ if (sword == wchmm->winfo->tail_silwid) return; #ifdef UNIGRAM_FACTORING #ifndef WPAIR /* あとで共有単語先頭ノードに対して単語間遷移をまとめて計算するため,*/ /* このループ内では最大尤度を持つ単語終端ノードを記録しておく */ /* here we will record the best wordend node of maximum likelihood at this frame, to compute later the cross-word transitions toward shared factoring word-head node */ tmpprob = tk->score; if (!wchmm->hmminfo->multipath) tmpprob += wchmm->wordend_a[sword]; if (d->wordend_best_score < tmpprob) { d->wordend_best_score = tmpprob; d->wordend_best_node = node; d->wordend_best_tre = tre; d->wordend_best_last_cword = tk->last_cword; } #endif #endif /* N-gramにおいては常に全単語の接続を考慮する必要があるため, ここで単語間の言語確率値をすべて計算しておく. キャッシュは max_successor_prob_iw() 内で考慮. */ /* As all words are possible to connect in N-gram, we first compute all the inter-word LM probability here. Cache is onsidered in max_successor_prob_iw(). */ if (wchmm->winfo->is_transparent[sword]) { iwparray = max_successor_prob_iw(wchmm, tk->last_cword); } else { iwparray = max_successor_prob_iw(wchmm, sword); } } /* すべての単語始端ノードに対して以下を実行 */ /* for all beginning-of-word nodes, */ /* wchmm->startnode[0..stid-1] ... 単語始端ノードリスト */ /* wchmm->startnode[0..stid-1] ... list of word start node (shared) */ for (stid = wchmm->startnum - 1; stid >= 0; stid--) { next_node = wchmm->startnode[stid]; if (wchmm->hmminfo->multipath) { if (wchmm->lmtype == LM_PROB) { /* connection to the head silence word is not allowed */ if (wchmm->wordbegin[wchmm->winfo->head_silwid] == next_node) continue; } } /*****************************************/ /* 2.3.1. 単語間言語制約を適用 */ /* apply cross-word LM constraint */ /*****************************************/ if (wchmm->lmtype == LM_PROB) { /* N-gram確率を計算 */ /* compute N-gram probability */ #ifdef UNIGRAM_FACTORING /* wchmm,start2isolate[0..stid-1] ... ノードを共有しない単語は その通しID, 共有する(キャッシュの必要のない)単語は -1 */ /* wchmm->start2isolate[0..stid-1] ... isolate ID for beginning-of-word state. value: -1 for states that has 1-gram factoring value (share nodes with some other words), and ID for unshared words */ isoid = wchmm->start2isolate[stid]; #ifdef WPAIR /* Efficient cross-word LM handling should be disabled for word-pair approximation */ if (isoid == -1) { tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; } else { tmpprob = iwparray[isoid]; } #else /* ~WPAIR */ /* 1-gram factoring における単語間言語確率キャッシュの効率化: 1-gram factoring は単語履歴に依存しないので, ここで参照する factoring 値の多くは wchmm->fscore[] に既に格納され, 探索中も不変である. よって計算が必要な単語(どの単語ともノードを共有しない単語) についてのみ iwparray[] で計算・キャッシュする. */ /* Efficient cross-word LM cache: As 1-gram factoring values are independent of word context, they remain unchanged while search. So, in cross-word LM computation, beginning-of-word states which share nodes with others and has factoring value in wchmm does not need cache. So only the unshared beginning-of-word states are computed and cached here in iwparray[]. */ /* 計算が必要でない単語先頭ノードはパスをまとめて後に計算するので ここではスキップ */ /* the shared nodes will be computed afterward, so just skip them here */ if (isoid == -1) continue; tmpprob = iwparray[isoid]; #endif /* ~WPAIR */ #else /* ~UNIGRAM_FACTORING */ tmpprob = iwparray[stid]; #endif } /* 遷移先の単語が先頭単語なら遷移させない. これは wchmm.c で該当単語に stid を割り振らないことで対応 しているので,ここでは何もしなくてよい */ /* do not allow transition if the destination word is beginning-of-sentence word. This limitation is realized by not assigning 'stid' for the word in wchmm.c, so we have nothing to do here. */ if (wchmm->category_tree) { /* 文法の場合, 制約は決定的: カテゴリ対制約上許されない場合は遷移させない */ /* With DFA and per-category tree lexicon, LM constraint is deterministic: do not allow transition if the category connection is not allowed (with category tree, constraint can be determined on top node) */ if (dfa_cp(wchmm->dfa, wchmm->winfo->wton[sword], wchmm->winfo->wton[wchmm->start2wid[stid]]) == FALSE) continue; } /*******************************************************************/ /* 2.3.2. 遷移先の単語先頭へのスコア計算(遷移確率+言語スコア) */ /* compute score of destination node (transition prob + LM) */ /*******************************************************************/ tmpsum = tk->score; if (!wchmm->hmminfo->multipath) tmpsum += wchmm->wordend_a[sword]; /* 'tmpsum' now holds outgoing score from the wordend node */ if (wchmm->lmtype == LM_PROB) { /* 言語スコアを追加 */ /* add LM score */ ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; tmpsum += ngram_score_cache; if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[tk->last_cword]) { tmpsum += d->lm_penalty_trans; } } if (wchmm->lmtype == LM_DFA) { /* grammar: 単語挿入ペナルティを追加 */ /* grammar: add insertion penalty */ ngram_score_cache = d->penalty1; #ifdef CLASS_NGRAM /* add per-word penalty of last word as delayed penalty */ ngram_score_cache += wchmm->winfo->cprob[last_word]; #endif tmpsum += ngram_score_cache; /* grammar: deterministic factoring (in case category-tree not enabled) */ if (!wchmm->category_tree) { if (!can_succeed(wchmm, sword, next_node)) { tmpsum = LOG_ZERO; } } } /*********************************************************************/ /* 2.3.3. 遷移先ノードへトークン伝搬(単語履歴情報は更新) */ /* pass token to destination node (updating word-context info */ /*********************************************************************/ if (wchmm->hmminfo->multipath) { /* since top node has no ouput, we should go one more step further */ if (wchmm->self_a[next_node] != LOG_ZERO) { propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], tre, last_word, ngram_score_cache); if (d->expanded) { /* if work area has been expanded at 'create_token()' above, the inside 'realloc()' will destroy the pointers. so, reset local pointers from token index */ tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); d->expanded = FALSE; } } if (wchmm->next_a[next_node] != LOG_ZERO) { propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], tre, last_word, ngram_score_cache); if (d->expanded) { /* if work area has been expanded at 'create_token()' above, the inside 'realloc()' will destroy the pointers. so, reset local pointers from token index */ tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); d->expanded = FALSE; } } for(ac=wchmm->ac[next_node];ac;ac=ac->next) { for(k=0;kn;k++) { propagate_token(d, ac->arc[k], tmpsum + ac->a[k], tre, last_word, ngram_score_cache); if (d->expanded) { /* if work area has been expanded at 'create_token()' above, the inside 'realloc()' will destroy the pointers. so, reset local pointers from token index */ tk = &(d->tlist[d->tn][d->tindex[d->tn][j]]); d->expanded = FALSE; } } } } else { propagate_token(d, next_node, tmpsum, tre, last_word, ngram_score_cache); if (d->expanded) { /* if work area has been expanded at 'create_token()' above, the inside 'realloc()' will destroy the pointers. so, reset local pointers from token index */ tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); d->expanded = FALSE; } } } /* end of next word heads */ *tk_ret = tk; } /* end of cross-word processing */ #ifdef UNIGRAM_FACTORING /** * * @brief 1-gram factoring 用単語間遷移の追加処理 * * 1-gram factoring 使用時は、複数の単語間で共有されている * 単語先頭のノード (= factoring されている単語先頭ノード) については、 * すべて、最も尤度の高い単語終端からの遷移が選択される。この性質を * 用いて、この関数ではあらかじめ求められた最も尤度の高い単語終端 * から、ファクタリングされた単語先頭ノードへの遷移計算を一度に行う。 * * @param wchmm [in] 木構造化辞書 * @param d [in] 第1パス用ワークエリア * * * @brief Additional cross-word transition processing for 1-gram factoring. * * When using 1-gram factoring, The word end of maximum likelihood will be * chosen at cross-word viterbi for factored word-head node, since the * LM factoring value is independent of the context. This function performs * viterbi processing to the factored word-head nodes from the maximum * likelihood word end previously stored. * * @param wchmm [in] tree lexicon * @param d [in] work area for the 1st pass * */ static void beam_inter_word_factoring(WCHMM_INFO *wchmm, FSBeam *d) { int sword; int node, next_node; int stid; LOGPROB tmpprob, tmpsum, ngram_score_cache; A_CELL2 *ac; int j; WORD_ID last_word; node = d->wordend_best_node; sword = wchmm->stend[node]; last_word = wchmm->winfo->is_transparent[sword] ? d->wordend_best_last_cword : sword; for (stid = wchmm->startnum - 1; stid >= 0; stid--) { next_node = wchmm->startnode[stid]; /* compute transition from 'node' at end of word 'sword' to 'next_node' */ /* skip isolated words already calculated in the above main loop */ if (wchmm->start2isolate[stid] != -1) continue; /* rest should have 1-gram factoring score at wchmm->fscore */ if (wchmm->state[next_node].scid >= 0) { j_internal_error("get_back_trellis_proceed: scid mismatch at 1-gram factoring of shared states\n"); } tmpprob = wchmm->fscore[- wchmm->state[next_node].scid]; ngram_score_cache = tmpprob * d->lm_weight + d->lm_penalty; tmpsum = d->wordend_best_score; tmpsum += ngram_score_cache; if (wchmm->winfo->is_transparent[sword] && wchmm->winfo->is_transparent[d->wordend_best_last_cword]) { tmpsum += d->lm_penalty_trans; } #ifdef SCORE_PRUNING if (tmpsum < d->score_pruning_threshold) { d->score_pruning_count++; continue; } #endif if (wchmm->hmminfo->multipath) { /* since top node has no ouput, we should go one more step further */ if (wchmm->self_a[next_node] != LOG_ZERO) { propagate_token(d, next_node, tmpsum + wchmm->self_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); if (d->expanded) { d->expanded = FALSE; } } if (wchmm->next_a[next_node] != LOG_ZERO) { propagate_token(d, next_node+1, tmpsum + wchmm->next_a[next_node], d->wordend_best_tre, last_word, ngram_score_cache); if (d->expanded) { d->expanded = FALSE; } } for(ac=wchmm->ac[next_node];ac;ac=ac->next) { for(j=0;jn;j++) { propagate_token(d, ac->arc[j], tmpsum + ac->a[j], d->wordend_best_tre, last_word, ngram_score_cache); if (d->expanded) { d->expanded = FALSE; } } } } else { propagate_token(d, next_node, tmpsum, d->wordend_best_tre, last_word, ngram_score_cache); if (d->expanded) { d->expanded = FALSE; } } } } #endif /* UNIGRAM_FACTORING */ /** * * @brief フレーム同期ビーム探索を進行する. * * 与えられた1フレーム分,探索処理を進める. また,フレーム内に残った * 単語を単語トレリス構造体に保存する. ショートポーズセグメンテーション時 * はセグメント終了の判断もこの中から呼び出される. * * @param t [in] 現在のフレーム (このフレームについて計算が進められる) * @param param [in] 入力ベクトル列構造体 (@a t 番目のフレームのみ用いられる) * @param r [in] 認識処理インスタンス * @param final_for_multipath [i/o] 入力最後のフレームを処理するときに TRUE * * @return TRUE (通常どおり終了) あるいは FALSE (ここで探索を中断する * 場合: 逐次デコーディング時にショートポーズ区間を検出したか,ビーム内の * アクティブノード数が0になったとき) * * * @brief Frame synchronous beam search: proceed for 2nd frame and later. * * This is the main function of beam search on the 1st pass. Given a * input vector of a frame, it proceeds the computation for the one frame, * and store the words survived in the beam width to the word trellis * structure. get_back_trellis_init() should be used for the first frame. * For detailed procedure, please see the comments in this * function. * * @param t [in] current frame to be computed in @a param * @param param [in] input vector structure (only the vector at @a t will be used) * @param r [in] recognition process instance * @param final_for_multipath [i/o] TRUE if this is last frame of an input * * @return TRUE if processing ended normally, or FALSE if the search was * terminated (in case of short pause segmentation in successive decoding * mode, or active nodes becomes zero). * * * @callergraph * @callgraph * */ boolean get_back_trellis_proceed(int t, HTK_Param *param, RecogProcess *r, boolean final_for_multipath) { /* local static work area for get_back_trellis_proceed() */ /* these are local work area and need not to be kept for another call */ TRELLIS_ATOM *tre; ///< Local workarea to hold the generated trellis word int node; ///< Temporal work to hold the current node number on the lexicon tree int lmtype, lmvar; WCHMM_INFO *wchmm; FSBeam *d; int j; TOKEN2 *tk; LOGPROB minscore; /* local copied variables */ int tn, tl; /* store pointer to local for rapid access */ wchmm = r->wchmm; d = &(r->pass1); lmtype = r->lmtype; lmvar = r->lmvar; /*********************/ /* 1. 初期化 */ /* initialization */ /*********************/ /* tl と tn を入れ替えて作業領域を切り替え */ /* tl (= 直前の tn) は直前フレームの結果を持つ */ /* swap tl and tn to switch work buffer */ /* tl (= last tn) holds result of the previous frame */ d->tl = d->tn; if (d->tn == 0) d->tn = 1; else d->tn = 0; /* store locally for rapid access */ tl = d->tl; tn = d->tn; #ifdef UNIGRAM_FACTORING #ifndef WPAIR /* 1-gram factoring では単語先頭での言語確率が一定で直前単語に依存しない ため,単語間 Viterbi において選ばれる直前単語は,次単語によらず共通である. よって単語終端からfactoring値のある単語先頭への遷移は1つにまとめられる. ただし,木から独立した単語については, 単語先頭で履歴に依存した2-gramが 与えられるため, 最尤の単語間 Viterbi パスは次単語ごとに異なる. よってそれらについてはまとめずに別に計算する */ /* In 1-gram factoring, the language score on the word-head node is constant and independent of the previous word. So, the same word hypothesis will be selected as the best previous word at the inter-word Viterbi processing. So, in inter-word processing, we can (1) select only the best word-end hypothesis, and then (2) process viterbi from the node to each word-head node. On the other hand, the isolated words, i.e. words not sharing any node with other word, has unique word-head node and the true 2-gram language score is determined at the top node. In such case the best word hypothesis prior to each node will differ according to the language scores. So we have to deal such words separately. */ /* initialize max value to delect best word-end hypothesis */ if (lmtype == LM_PROB) { d->wordend_best_score = LOG_ZERO; } #endif #endif #ifdef DEBUG /* debug */ /* node_check_token(d, tl); */ #endif /* トークンバッファを初期化: 直前フレームで使われた部分だけクリアすればよい */ /* initialize token buffer: for speedup, only ones used in the last call will be cleared */ clear_tokens(d, tl); /**************************/ /* 2. Viterbi計算 */ /* Viterbi computation */ /**************************/ /* 直前フレームからこのフレームへの Viterbi 計算を行なう */ /* tindex[tl][n_start..n_end] に直前フレーム上位ノードのIDが格納されている */ /* do one viterbi computation from last frame to this frame */ /* tindex[tl][n_start..n_end] holds IDs of survived nodes in last frame */ if (wchmm->hmminfo->multipath) { /*********************************/ /* MULTIPATH MODE */ /*********************************/ for (j = d->n_start; j <= d->n_end; j++) { /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ /* tk: token data node: lexicon tree node ID that holds the 'tk' */ tk = &(d->tlist[tl][d->tindex[tl][j]]); if (tk->score <= LOG_ZERO) continue; /* invalid node */ #ifdef SCORE_PRUNING if (tk->score < d->score_pruning_threshold) { d->score_pruning_count++; continue; } #endif node = tk->node; /*********************************/ /* 2.1. 単語内遷移 */ /* word-internal transition */ /*********************************/ beam_intra_word(wchmm, d, &tk, j); } /*******************************************************/ /* 2.2. スコアでトークンをソートしビーム幅分の上位を決定 */ /* sort tokens by score up to beam width */ /*******************************************************/ sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); /*************************/ /* 2.3. 単語間Viterbi計算 */ /* cross-word viterbi */ /*************************/ for(j = d->n_start; j <= d->n_end; j++) { tk = &(d->tlist[tn][d->tindex[tn][j]]); node = tk->node; #ifdef SCORE_PRUNING if (tk->score < d->score_pruning_threshold) { d->score_pruning_count++; continue; } #endif /* 遷移元ノードが単語終端ならば */ /* if source node is end state of a word, */ if (wchmm->stend[node] != WORD_INVALID) { /**************************/ /* 2.4. トレリス単語保存 */ /* save trellis word */ /**************************/ #ifdef SPSEGMENT_NAIST if (r->config->successive.enabled && !d->after_trigger) { tre = tk->last_tre; /* dummy */ } else { tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); } #else tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); #endif /* 最終フレームであればここまで:遷移はさせない */ /* If this is a final frame, does not do cross-word transition */ if (final_for_multipath) continue; /* 単語認識モードでは単語間遷移は必要ない */ if (lmvar == LM_DFA_WORD) continue; /******************************/ /* 2.5. 単語間遷移 */ /* cross-word transition */ /******************************/ #ifdef UNIGRAM_FACTORING /* ここで処理されるのは isolated words のみ, shared nodes はまとめてこのループの外で計算する */ /* Only the isolated words will be processed here. The shared nodes with constant factoring values will be computed after this loop */ #endif beam_inter_word(wchmm, d, &tk, tre, j); } /* end of cross-word processing */ } /* end of main viterbi loop */ } else { /*********************************/ /* NORMAL MODE */ /*********************************/ for (j = d->n_start; j <= d->n_end; j++) { /* tk: 対象トークン node: そのトークンを持つ木構造化辞書ノードID */ /* tk: token data node: lexicon tree node ID that holds the 'tk' */ tk = &(d->tlist[tl][d->tindex[tl][j]]); if (tk->score <= LOG_ZERO) continue; /* invalid node */ #ifdef SCORE_PRUNING if (tk->score < d->score_pruning_threshold) { d->score_pruning_count++; continue; } #endif node = tk->node; /*********************************/ /* 2.1. 単語内遷移 */ /* word-internal transition */ /*********************************/ beam_intra_word(wchmm, d, &tk, j); /* 遷移元ノードが単語終端ならば */ /* if source node is end state of a word, */ if (wchmm->stend[node] != WORD_INVALID) { /**************************/ /* 2.2. トレリス単語保存 */ /* save trellis word */ /**************************/ #ifdef SPSEGMENT_NAIST if (r->config->successive.enabled && !d->after_trigger) { tre = tk->last_tre; /* dummy */ } else { tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); } #else tre = save_trellis(r->backtrellis, wchmm, tk, t, final_for_multipath); #endif /* 単語認識モードでは単語間遷移は必要ない */ if (lmvar == LM_DFA_WORD) continue; /******************************/ /* 2.3. 単語間遷移 */ /* cross-word transition */ /******************************/ #ifdef UNIGRAM_FACTORING /* ここで処理されるのは isolated words のみ, shared nodes はまとめてこのループの外で計算する */ /* Only the isolated words will be processed here. The shared nodes with constant factoring values will be computed after this loop */ #endif beam_inter_word(wchmm, d, &tk, tre, j); } /* end of cross-word processing */ } /* end of main viterbi loop */ } #ifdef UNIGRAM_FACTORING #ifndef WPAIR if (lmtype == LM_PROB) { /***********************************************************/ /* 2.x 単語終端からfactoring付き単語先頭への遷移 ***********/ /* transition from wordend to shared (factorized) nodes */ /***********************************************************/ /* d->wordend_best_* holds the best word ends at this frame. */ if (d->wordend_best_score > LOG_ZERO) { beam_inter_word_factoring(wchmm, d); } } #endif #endif /* UNIGRAM_FACTORING */ /***************************************/ /* 3. 状態の出力確率計算 */ /* compute state output probability */ /***************************************/ /* 次段の有効ノードについて出力確率を計算してスコアに加える */ /* compute outprob for new valid (token assigned) nodes and add to score */ /* 今扱っているのが入力の最終フレームの場合出力確率は計算しない */ /* don't calculate the last frame (transition only) */ #ifdef SCORE_PRUNING d->score_pruning_max = LOG_ZERO; minscore = 0.0; #endif if (wchmm->hmminfo->multipath) { if (! final_for_multipath) { for (j = 0; j < d->tnum[tn]; j++) { tk = &(d->tlist[tn][d->tindex[tn][j]]); /* skip non-output state */ if (wchmm->state[tk->node].out.state == NULL) continue; tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); #ifdef SCORE_PRUNING if (d->score_pruning_max < tk->score) d->score_pruning_max = tk->score; if (minscore > tk->score) minscore = tk->score; #endif } } } else { for (j = 0; j < d->tnum[tn]; j++) { tk = &(d->tlist[tn][d->tindex[tn][j]]); tk->score += outprob_style(wchmm, tk->node, tk->last_tre->wid, t, param); #ifdef SCORE_PRUNING if (d->score_pruning_max < tk->score) d->score_pruning_max = tk->score; if (minscore > tk->score) minscore = tk->score; #endif } } #ifdef SCORE_PRUNING if (r->config->pass1.score_pruning_width >= 0.0) { d->score_pruning_threshold = d->score_pruning_max - r->config->pass1.score_pruning_width; //printf("width=%f, tnum=%d\n", d->score_pruning_max - minscore, d->tnum[tn]); } else { // disable score pruning d->score_pruning_threshold = LOG_ZERO; } #endif /*******************************************************/ /* 4. スコアでトークンをソートしビーム幅分の上位を決定 */ /* sort tokens by score up to beam width */ /*******************************************************/ /* tlist[tl]を次段のためにリセット */ clear_tlist(d, tl); /* ヒープソートを用いてこの段のノード集合から上位(bwidth)個を得ておく */ /* (上位内の順列は必要ない) */ sort_token_no_order(d, r->trellis_beam_width, &(d->n_start), &(d->n_end)); /***************/ /* 5. 終了処理 */ /* finalize */ /***************/ #ifdef SPSEGMENT_NAIST if (!r->config->successive.enabled || d->after_trigger) { #endif /* call frame-wise callback */ r->have_interim = FALSE; if (t > 0) { if (r->config->output.progout_flag) { /* 漸次出力: 現フレームのベストパスを一定時間おきに上書き出力 */ /* progressive result output: output current best path in certain time interval */ if (((t-1) % r->config->output.progout_interval_frame) == 0) { r->have_interim = TRUE; bt_current_max(r, t-1); } } } /* jlog("DEBUG: %d: %d\n",t,tnum[tn]); */ /* for debug: output current max word */ if (debug2_flag) { bt_current_max_word(r, t-1); } #ifdef DETERMINE if (lmvar == LM_DFA_WORD) { check_determine_word(r, t-1); } #endif #ifdef SPSEGMENT_NAIST } #endif /* ビーム内ノード数が 0 になってしまったら,強制終了 */ if (d->tnum[tn] == 0) { jlog("ERROR: get_back_trellis_proceed: %02d %s: frame %d: no nodes left in beam, now terminates search\n", r->config->id, r->config->name, t); return(FALSE); } return(TRUE); } /*************************************************/ /* frame synchronous beam search --- last frame */ /* フレーム同期ビーム探索の実行 --- 最終フレーム */ /*************************************************/ /** * * @brief フレーム同期ビーム探索:最終フレーム * * 第1パスのフレーム同期ビーム探索を終了するために, * (param->samplenum -1) の最終フレームに対する終了処理を行う. * * * @param param [in] 入力ベクトル列 (param->samplenum の値のみ用いられる) * @param r [in] 音声認識処理インスタンス * * * @brief Frame synchronous beam search: last frame * * This function should be called at the end of the 1st pass. * The last procedure will be done for the (param->samplenum - 1) frame. * * @param param [in] input vectors (only param->samplenum is referred) * @param r [in] recognition process instance * * * @callergraph * @callgraph * */ void get_back_trellis_end(HTK_Param *param, RecogProcess *r) { WCHMM_INFO *wchmm; FSBeam *d; int j; TOKEN2 *tk; wchmm = r->wchmm; d = &(r->pass1); /* 最後にビーム内に残った単語終端トークンを処理する */ /* process the last wordend tokens */ if (r->am->hmminfo->multipath) { /* MULTI-PATH VERSION */ /* 単語末ノードへの遷移のみ計算 */ /* only arcs to word-end node is calculated */ get_back_trellis_proceed(param->samplenum, param, r, TRUE); } else { /* NORMAL VERSION */ /* 最後の遷移のあとの単語終端処理を行う */ /* process the word-ends at the last frame */ d->tl = d->tn; if (d->tn == 0) d->tn = 1; else d->tn = 0; for (j = d->n_start; j <= d->n_end; j++) { tk = &(d->tlist[d->tl][d->tindex[d->tl][j]]); if (wchmm->stend[tk->node] != WORD_INVALID) { save_trellis(r->backtrellis, wchmm, tk, param->samplenum, TRUE); } } } #ifdef SCORE_PRUNING if (debug2_flag) jlog("STAT: %d tokens pruned by score beam\n", d->score_pruning_count); #endif } /*************************/ /* 探索終了 --- 終了処理 */ /* end of search */ /*************************/ /** * * @brief 第1パスの終了処理を行う. * * この関数は get_back_trellis_end() の直後に呼ばれ,第1パスの終了処理を * 行う. 生成した単語トレリス構造体の最終的な後処理を行い第2パスで * アクセス可能な形に内部を変換する. また, * 仮説のバックトレースを行い第1パスのベスト仮説を出力する. * * @param r [in] 認識処理インスタンス * @param len [in] 第1パスで処理された最終的なフレーム長 * * @return 第1パスの最尤仮説の累積尤度,あるいは仮説が見つからない場合 * は LOG_ZERO. * * * @brief Finalize the 1st pass. * * This function will be called just after get_back_trellis_end() to * finalize the 1st pass. It processes the resulting word trellis structure * to be accessible from the 2nd pass, and output the best sentence hypothesis * by backtracing the word trellis. * * @param r [in] recoginirion process instance * @param len [in] total number of processed frames * * @return the maximum score of the best hypothesis, or LOG_ZERO if search * failed. * * * @callergraph * @callgraph * */ void finalize_1st_pass(RecogProcess *r, int len) { BACKTRELLIS *backtrellis; backtrellis = r->backtrellis; backtrellis->framelen = len; /* 単語トレリス(backtrellis) を整理: トレリス単語の再配置とソート */ /* re-arrange backtrellis: index them by frame, and sort by word ID */ bt_relocate_rw(backtrellis); bt_sort_rw(backtrellis); if (backtrellis->num == NULL) { if (backtrellis->framelen > 0) { jlog("WARNING: %02d %s: input processed, but no survived word found\n", r->config->id, r->config->name); } /* reognition failed */ r->result.status = J_RESULT_STATUS_FAIL; return; } /* 第1パスのベストパスを結果に格納する */ /* store 1st pass result (best hypothesis) to result */ if (r->lmvar == LM_DFA_WORD) { find_1pass_result_word(len, r); } else { find_1pass_result(len, r); } } /** * * Free work area for the first pass * * * 第1パスのためのワークエリア領域を開放する * * * @param d [in] work are for 1st pass input handling * * @callergraph * @callgraph * */ void fsbeam_free(FSBeam *d) { free_nodes(d); if (d->pausemodelnames != NULL) { free(d->pausemodelnames); free(d->pausemodel); } } /* end of file */ julius-4.2.2/libjulius/src/hmm_check.c0000644001051700105040000002662412004452401016205 0ustar ritrlab/** * @file hmm_check.c * * * @brief トライフォンの辞書上での整合性チェック * * * * @brief Triphone checker on word dictionary * * * @author Akinobu LEE * @date Thu Mar 17 20:50:07 2005 * * $Revision: 1.4 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #define PHSTEP 10 ///< Malloc step for phoneme conversion /** * * @brief 音素列からHMM列への変換を行ない,結果を表示する. * * このルーチンは,Julius/Julian に与えられた音響モデルと * HMMList ファイルにおいて,音素列からHMM列への変換をテストする * ための関数である. * * 空白で区切られた音素列の文字列に対して,トライフォンモデル使用時には * コンテキストが考慮され,最終的に対応する HMM 列へ変換される. * その後,変換した結果を, * - 音素列から導かれる本来の適用すべきモデル名 * - 上記を HMMList にしたがって変換した論理 HMM 名 * - 実際に計算で用いられる物理HMM名または pseudo HMM 名 * の順に出力する. * * なお,文字列中に "|" を含めることで,そこを単語区切りとして扱い, * トライフォンにおいて単語間の展開を考慮することができる. * * @param str [i/o] 空白で区切られた音素列の文字列 * @param hmminfo [in] HMM定義構造体 * @param len_ret [out] 返り値の論理 HMM の要素数 * * @return 新たにメモリ割り付けられた変換後の論理HMMのポインタ列 * * * @brief Convert phoneme sequences to logical HMM sequences, and output the * result. * * This function is for testing procedure to convert words in dictionary * to corresponding HMM sequences in Julius/Julian, given an HMMList and * HTK HMM definition. * * Given a space-separated list of phoneme names in a string, each phonemes * will be converted to context-dependent style (if using triphone model), * and then converted to HMM sequence that will finally be used for * recognition. Then, the following data will be output for all HMM: * - Original phone HMM name, * - Logical HMM name that is converted from above, * - Physical or pseudo HMM name that will actually be used. * * Additionally, specifying '|' in the string gives a word boundary between * phonemes, and triphone conversion will consider the cross-word expansion. * * @param str [i/o] string that contains space-saparated phoneme sequence. * @param hmminfo [in] HMM definition structure * @param len_ret [out] num of elements in the return value * * @return the newly allocated pointer array to the converted logical HMMs. * */ static HMM_Logical ** new_str2phseq(char *str, HTK_HMM_INFO *hmminfo, int *len_ret) { char **tokens; boolean *word_end; int phnum; boolean word_mode = FALSE; HMM_Logical **new; static char buf[MAX_HMMNAME_LEN]; /* read in string and divide into token unit */ { char *p; int tokenmax; tokenmax = PHSTEP; tokens = (char **)mymalloc(sizeof(char *) * tokenmax); word_end = (boolean *)mymalloc(sizeof(boolean) * tokenmax); phnum = 0; for(p = strtok(str, DELM); p; p = strtok(NULL, DELM)) { if (strmatch(p, "|")) { word_mode = TRUE; if (phnum > 0) word_end[phnum-1] = TRUE; continue; } if (phnum >= tokenmax) { tokenmax += PHSTEP; tokens = (char **)myrealloc(tokens, sizeof(char *) * tokenmax); word_end = (boolean *)myrealloc(word_end, sizeof(boolean) * tokenmax); } tokens[phnum] = strcpy((char *)mymalloc(strlen(p)+1), p); word_end[phnum] = FALSE; phnum++; } if (phnum == 0) { jlog("ERROR: hmm_check: no phone specified\n"); printf("ERROR: hmm_check: no phone specified\n"); new = NULL; goto spend; } word_end[phnum-1] = TRUE; } /* check if the phonemes exist in basephone list */ { BASEPHONE *ph; int i; boolean ok_flag = TRUE; for (i=0;ibasephone.root); if (ph == NULL || ! strmatch(ph->name, tokens[i])) { jlog("ERROR: hmm_check: %2d - unknown phone \"%s\"\n", i+1, tokens[i]); printf("ERROR: hmm_check: %2d - unknown phone \"%s\"\n", i+1, tokens[i]); ok_flag = FALSE; continue; } } if (! ok_flag) { jlog("ERROR: hmm_check: unknown phone(s)\n"); printf("ERROR: hmm_check: unknown phone(s)\n"); new = NULL; goto spend; } } /* token -> original logical name -> logical HMM -> physical/pseudo phone */ /* cross-word conversion and fallback to bi/mono-phone is also considered */ { int i; char *hmmstr; HMM_Logical *lg; boolean ok_flag = TRUE; new = (HMM_Logical **)mymalloc(sizeof(HMM_Logical *) * phnum); /* original logical name, applied logical HMM name (defined by HMMList), and the actual physical/pseudo HMM name (defined in hmmdefs) */ printf("\n id original logical physical/pseudo\n"); printf(" -------------------------------------------------\n"); if (hmminfo->is_triphone) { cycle_triphone(NULL); cycle_triphone(tokens[0]); for (i = 0; i < phnum; i++) { if (i < phnum - 1) { hmmstr = cycle_triphone(tokens[i+1]); } else { hmmstr = cycle_triphone_flush(); } lg = htk_hmmdata_lookup_logical(hmminfo, hmmstr); if (lg == NULL) { if (word_mode) { if (i > 0 && word_end[i-1]) { if (word_end[i]) { center_name(hmmstr, buf); } else { rightcenter_name(hmmstr, buf); } } else if (word_end[i]) { leftcenter_name(hmmstr, buf); } lg = htk_hmmdata_lookup_logical(hmminfo, buf); if (lg == NULL) { jlog("ERROR: hmm_check: no defined/pseudo HMM for \"%s\"??\n", buf); printf("ERROR: hmm_check: no defined/pseudo HMM for \"%s\"??\n", buf); ok_flag = FALSE; continue; } if (lg->is_pseudo) { printf(" %2d: %11s -> (pseudo) -> {%s}\n", i+1, hmmstr, lg->body.pseudo->name); } else { printf(" %2d: %11s -> %8s -> [%s]\n", i+1, hmmstr, lg->name, lg->body.defined->name); } } else { jlog("ERROR: hmm_check: UNKNOWN %2d: (%s)\n", i+1, hmmstr); printf("ERROR: hmm_check: UNKNOWN %2d: (%s)\n", i+1, hmmstr); ok_flag = FALSE; continue; } } else { if (lg->is_pseudo) { printf(" %2d: %11s -> (pseudo) -> {%s}\n", i+1, hmmstr, lg->body.pseudo->name); } else { printf(" %2d: %11s -> %8s -> [%s]\n", i+1, hmmstr, " ", lg->body.defined->name); } } new[i] = lg; } } else { for (i = 0; i < phnum; i++) { lg = htk_hmmdata_lookup_logical(hmminfo, tokens[i]); if (lg == NULL) { jlog("ERROR: hmm_check: %2d - unknown logical HMM \"%s\"\n", i+1, tokens[i]); printf("ERROR: hmm_check: %2d - unknown logical HMM \"%s\"\n", i+1, tokens[i]); ok_flag = FALSE; continue; } new[i] = lg; } } if (ok_flag) { printf("succeeded\n"); } else { jlog("ERROR: hmm_check: failed\n"); printf("failed\n"); free(new); new = NULL; goto spend; } } spend: { int i; for(i=0;i * 標準入力から1行を音素列表記として読み込み,トライフォンへの変換チェックを * 行なう. * * @param hmminfo [in] HMM定義構造体 * * * Read in line from stdin as phoneme sequence and try convertion to * triphone for checking. * * @param hmminfo [in] HMM definition structure * */ static boolean test_expand_triphone(HTK_HMM_INFO *hmminfo) { char *buf; int newline; HMM_Logical **phseq; int phlen; boolean flag = FALSE; buf = (char *)mymalloc(4096); for(;;) { /* read in phoneme sequence from stdin */ printf(">>> input phone sequence (word delimiter is `|', blank to return)\n"); if (fgets(buf, 4096, stdin) == NULL) { flag = TRUE; break; } newline = strlen(buf)-1; /* chop newline */ if (buf[newline] == '\n') buf[newline] = '\0'; if (buf[0] == '\0') break; /* convert string to phseq and output */ phseq = new_str2phseq(buf, hmminfo, &phlen); free(phseq); } free(buf); return flag; } /** * * コマンドライン上でトライフォンのチェックを行なうモード ("-check triphone"). * * @param r [in] 認識処理インスタンス * * * Mode to do interactive triphone conversion check ("-check triphone"). * * @param r [in] recognition process instance * * * @callgraph * @callergraph */ void hmm_check(RecogProcess *r) { boolean endflag; static char cmd[MAX_HMMNAME_LEN]; int newline; printf("*************************************************\n"); printf("******** TRIPHONE COHERENCE CHECK MODE ********\n"); printf("*************************************************\n"); printf("hmmdefs=%s\n", r->am->config->hmmfilename); if (r->am->config->mapfilename != NULL) { printf("hmmlist=%s\n", r->am->config->mapfilename); } printf("dict=%s\n", r->lm->config->dictfilename); printf("headsil = "); put_voca(stdout, r->lm->winfo, r->lm->winfo->head_silwid); printf("tailsil = "); put_voca(stdout, r->lm->winfo, r->lm->winfo->tail_silwid); if (make_base_phone(r->am->hmminfo, r->lm->winfo) == FALSE) { jlog("ERROR: hmm_check: error in making base phone list\n"); printf("ERROR: hmm_check: error in making base phone list\n"); return; } print_phone_info(stdout, r->am->hmminfo); for(endflag = FALSE; endflag == FALSE;) { printf("===== command (\"H\" for help) > "); if (fgets(cmd, MAX_HMMNAME_LEN, stdin) == NULL) break; newline = strlen(cmd)-1; /* chop newline */ if (cmd[newline] == '\n') cmd[newline] = '\0'; if (cmd[0] == '\0') continue; /* if blank line, read next */ switch(cmd[0]) { case 'a': /* all */ /* check if logical HMMs cover all possible variants */ test_interword_triphone(r->am->hmminfo, r->lm->winfo); break; case 'c': /* conv */ /* try to expand triphone for given phoneme sequence */ endflag = test_expand_triphone(r->am->hmminfo); break; case 'i': /* info */ /* output data source */ printf("hmmdefs=%s\n", r->am->config->hmmfilename); if (r->am->config->mapfilename != NULL) { printf("hmmlist=%s\n", r->am->config->mapfilename); } printf("dict=%s\n", r->lm->config->dictfilename); printf("headsil = "); put_voca(stdout, r->lm->winfo, r->lm->winfo->head_silwid); printf("tailsil = "); put_voca(stdout, r->lm->winfo, r->lm->winfo->tail_silwid); print_phone_info(stdout, r->am->hmminfo); break; case 'p': /* phonelist */ /* output basephone */ print_all_basephone_name(&(r->am->hmminfo->basephone)); break; case 'd': /* phonelist in detail */ /* output basephone */ print_all_basephone_detail(&(r->am->hmminfo->basephone)); break; case 'q': /* quit */ /* quit this check mode */ endflag = TRUE; break; default: printf("COMMANDS:\n"); printf(" info --- output HMM information\n"); printf(" conv --- try HMM conversion for given phone sequence\n"); printf(" phonelist --- print base phone list\n"); printf(" all --- check if all possible IW-triphone is covered\n"); printf(" quit --- quit\n"); break; } } printf("*************************************************\n"); printf("***** END OF TRIPHONE COHERENCE CHECK MODE ****\n"); printf("*************************************************\n"); } /* end of file */ julius-4.2.2/libjulius/src/dfa_decode.c0000644001051700105040000002264112004452401016317 0ustar ritrlab/** * @file dfa_decode.c * * * @brief 記述文法に基づく次単語予測(第2パス) * * 与えられた仮説に対して,DFA 文法上接続可能な次単語の集合を決定する. * ただし実際には, 展開元仮説の予測される始端フレーム周辺の単語トレリス * 上に残っている単語のみが展開される. * * 文法制約ではショートポーズは単語として記述し,そのショートポーズ単語の * 出現位置を文法で指定する. ただし,実際の入力ではその想定した位置に * 必ずしもポーズが入らないため,単語展開においては, * 次単語集合にショートポーズがある場合は,さらにその次の単語集合まで見て * 次単語集合に含める. 実際にそこにショートポーズが挿入されるかどうかは, * search_bestfirst_main.c で両者のスコアを比較して判断する. * * 文法を用いる認識処理インスタンスでは,dfa_firstwords(), dfa_nextwords(), * dfa_acceptable(), dfa_eosscore() が第2パスのメイン関数 wchmm_fbs() から * 使用される. なお N-gram を用いる認識処理インスタンスでは, * 代わりに ngram_decode.c 内の関数が使われる. * * * * @brief Grammar-based word prediction (2nd pass) * * Given a part-of-sentence hypothesis, these function determine a set of next * words allowed to be connected by the grammar. Actually, only words in the * word trellis, which exist around the estimated word-end frame will be * expanded. * * When using DFA grammar, the possible (short) pause insertion point * should be explicitly specified in grammar, by defining "short-pause * word" in vocabulary and write its appearance in grammar. Since a * short pause will not always appear on the specified point, Julius * will consider the skipping of such short pause word for the next * word prediction in these functions. Whether short pause was * actually inserted or not in the user input will be determined by * score in search_bestfirst_main.c. * * In recognition process instance with DFA grammar, dfa_firstwords(), * dfa_nextwords(), dfa_acceptable() and dfa_eosscore() will be called * from main search function wchmm_fbs(). When using N-gram, on the * other hand, the corresponding functions in ngram_decode.c will be * used instead. * * @author Akinobu LEE * @date Mon Mar 7 15:31:00 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 文法にしたがって,文頭に接続しうる単語を最初の予測単語群として返す. * * @param nw [out] 次単語集合の格納先へのポインタ * @param peseqlen [in] 入力フレーム長 * @param maxnw [in] @a nw の許される最大長 * @param r [in] 認識プロセスインスタンス * * @return 予測された単語数 (ただしエラー時は -1 を返す) * * * Return initial word set from grammar. * * @param nw [out] pointer to hold the resulting next word set * @param peseqlen [in] input frame length * @param maxnw [in] maximum number of words that can be set in @a nw * @param r [in] recognition process instance * * @return the number of predicted words, or -1 on error. * * * @callgraph * @callergraph * */ int dfa_firstwords(NEXTWORD **nw, int peseqlen, int maxnw, RecogProcess *r) { DFA_INFO *dfa; DFA_ARC *arc; MULTIGRAM *m; int s, sb, se; int cate, iw, ns; int num = 0; dfa = r->lm->dfa; for (m = r->lm->grammars; m; m = m->next) { if (m->active) { sb = m->state_begin; se = sb + m->dfa->state_num; for(s=sb;sst[s].status & INITIAL_S) != 0) { /* from initial state */ for (arc = dfa->st[s].arc; arc; arc = arc->next) { /* for all arc */ cate = arc->label; /* category ID */ ns = arc->to_state; /* next DFA state ID */ /* all words within the category is expanded */ for (iw=0;iwterm.wnum[cate];iw++) { nw[num]->id = dfa->term.tw[cate][iw]; /* word ID */ nw[num]->next_state = ns; /* next state */ nw[num]->can_insert_sp = FALSE; /* short pause should not inserted before this word */ #ifdef FIX_PENALTY nw[num]->lscore = 0.0; #else nw[num]->lscore = r->config->lmp.penalty2; #ifdef CLASS_NGRAM /* add per-word penalty */ nw[num]->lscore += r->wchmm->winfo->cprob[nw[num]->id]; #endif #endif num++; if (num >= maxnw) return -1; /* buffer overflow */ } } } } } } return num; } /** * * 部分文仮説に対して,文法に従って次に接続しうる単語群を返す. * * @param hypo [in] 展開元の部分文仮説 * @param nw [out] 次単語集合の格納先へのポインタ * @param maxnw [in] @a nw の許される最大長 * @param r [in] 認識プロセスインスタンス * * @return 予測された単語数 (ただしエラー時は -1 を返す) * * * Given a part-of-sentence hypothesis, returns the next word set defined * by DFA grammar. * * @param hypo [in] the source part-of-sentene hypothesis * @param nw [out] pointer to hold the resulting next word set * @param maxnw [in] maximum number of words that can be set in @a nw * @param r [in] recognition process instance * * @return the number of predicted words, or -1 on error. * * * @callgraph * @callergraph * */ int dfa_nextwords(NODE *hypo, NEXTWORD **nw, int maxnw, RecogProcess *r) { DFA_INFO *dfa; DFA_ARC *arc, *arc2; int iw,cate,ns,cate2,ns2; int num = 0; dfa = r->lm->dfa; /* hypo->state: current DFA state ID */ for (arc = dfa->st[hypo->state].arc; arc; arc = arc->next) {/* for all arc */ cate = arc->label; ns = arc->to_state; if (dfa->is_sp[cate]) { /* short pause */ /* expand one more next (not expand the short pause word itself) */ for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) { cate2 = arc2->label; ns2 = arc2->to_state; for (iw=0;iwterm.wnum[cate2];iw++) { nw[num]->id = dfa->term.tw[cate2][iw]; nw[num]->next_state = ns2; nw[num]->can_insert_sp = TRUE; nw[num]->lscore = r->config->lmp.penalty2; #ifdef CLASS_NGRAM /* add per-word penalty */ nw[num]->lscore += r->wchmm->winfo->cprob[nw[num]->id]; #endif num++; if (num >= maxnw) return -1; /* buffer overflow */ } } } else { /* not short pause */ /* all words within the category is expanded */ for (iw=0;iwterm.wnum[cate];iw++) { nw[num]->id = dfa->term.tw[cate][iw]; nw[num]->next_state = ns; nw[num]->can_insert_sp = FALSE; nw[num]->lscore = r->config->lmp.penalty2; #ifdef CLASS_NGRAM /* add per-word penalty */ nw[num]->lscore += r->wchmm->winfo->cprob[nw[num]->id]; #endif num++; if (num >= maxnw) return -1; /* buffer overflow */ } } } return num; } /** * * 部分文仮説が文法上文として最終(受理可能)状態にあるかどうかを返す. * * @param hypo [in] 部分文仮説 * @param r [in] 認識プロセスインスタンス * * @return 受理可能状態にあるとき TRUE 受理不可能なとき FALSE * * * Return whether the hypothesis is currently on final state * * @param hypo [in] sentence hypothesis * @param r [in] recognition process instance * * @return TRUE when on final state, or FALSE if not acceptable. * * * @callgraph * @callergraph * */ boolean dfa_acceptable(NODE *hypo, RecogProcess *r) { if (r->lm->dfa->st[hypo->state].status & ACCEPT_S) { return TRUE; } else { return FALSE; } } /* patch by kashima */ /** * * 次単語候補がその推定された接続予測点の前後の単語トレリス上に * あるかどうかをチェックし,もしあればそのトレリス単語へのポインタをセット * する. なお最尤の接続点はあとで決まるので,ここでは最適なトレリス単語 * でなくてよい. * * @param nword [i/o] 次単語候補 (対応するトレリス単語へのポインタが * セットされる) * @param hypo [in] 展開元仮説 * @param r [in] 認識プロセスインスタンス * * @return 単語トレリス上の予測位置付近に次単語が存在すれば TRUE,存在 * しなければ FALSE を返す. * * * Check if the given nextword exists in the word trellis around the * estimated connection time. If exist, set the pointer to the corresponding * trellis word to the nextword. Since the best connection time will be * re-computed later, it need not to be an optimal one. * * @param nword [i/o] next word candidate (pointer to the found trellis word * will be set) * @param hypo [in] source part-of-sentence hypothesis * @param r [in] recognition process instance * * @return TRUE if the nextword exists on the word trellis around the estimated * connection point, or FALSE if not exist. * * * @callgraph * @callergraph * */ boolean dfa_look_around(NEXTWORD *nword, NODE *hypo, RecogProcess *r) { int t,tm; int i; WORD_ID w; BACKTRELLIS *bt; int lookup_range; bt = r->backtrellis; lookup_range = r->config->pass2.lookup_range; tm = hypo->estimated_next_t; /* estimated connection time */ /* look aound [tm-lookup_range..tm+lookup_range] frame */ /* near the center is better: 1. the first half (backward) 2. the second half (forward) */ /* 1. backward */ for(t = tm; t >= tm - lookup_range; t--) { if (t < 0) break; for (i=0;inum[t];i++) { w = (bt->rw[t][i])->wid; if(w == nword->id){ /* found */ nword->tre = bt->rw[t][i]; return TRUE; } } } /* 2. forward */ for(t = tm + 1; t < tm + lookup_range; t++) { if (t > bt->framelen - 1) break; if (t >= hypo->bestt) break; for (i=0;inum[t];i++) { w = (bt->rw[t][i])->wid; if(w == nword->id){ /* found */ nword->tre = bt->rw[t][i]; return TRUE; } } } return FALSE; /* not found */ } /* end of file */ julius-4.2.2/libjulius/src/multi-gram.c0000644001051700105040000010072212004452401016335 0ustar ritrlab/** * @file multi-gram.c * * * @brief 認識用文法の管理 * * このファイルには,認識用文法の読み込みと管理を行う関数が含まれています. * これらの関数は,文法ファイルの読み込み,および各種データの * セットアップを行います. * * 複数文法の同時認識に対応しています. 複数の文法を一度に読み込んで, * 並列に認識を行えます. また,モジュールモードでは,クライアントから * 認識実行中に文法を動的に追加・削除したり,一部分の文法を無効化・ * 有効化したりできます. また与えられた個々の文法ごとに認識結果を * 出すことができます. * * 与えられた(複数の)文法は一つのグローバル文法として結合され, * 文法の読み込みや削除などの状態変更を行ったとき,更新されます. * 結合された構文規則 (DFA) が global_dfa に,語彙辞書が global_winfo に * それぞれローカルに格納されます. これらは適切なタイミングで * multigram_build() が呼び出されたときに,global.h 内の大域変数 dfa * および winfo にコピーされ,認識処理において使用されるようになります. * * * * @brief Management of Recognition grammars * * This file contains functions to read and manage recognition grammar. * These function read in grammar and dictionary, and setup data for * recognition. * * Recognition with multiple grammars are supported. Julian can read * several grammars specified at startup time, and perform recognition * with those grammars simultaneously. In module mode, you can add / * delete / activate / deactivate each grammar while performing recognition, * and also can output optimum results for each grammar. * * Internally, the given grammars are composed to a single Global Grammar. * The global grammar will be updated whenever a new grammar has been read * or deleted. The syntax rule (DFA) of the global grammar will be stored * at global_dfa, and the corresponding dictionary will be at global_winfo * locally, independent of the decoding timing. After that, multigram_build() * will be called to make the prepared global grammar to be used in the * actual recognition process, by copying the grammar and the dictionary * to the global variable dfa and winfo. * * @author Akinobu Lee * @date Sat Jun 18 23:45:18 2005 * * $Revision: 1.11 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /// For debug: define to enable grammar update messages to stdout #define MDEBUG /** * * @brief グローバル文法から木構造化辞書を構築する. * * 与えられた文法で認識を行うために,認識処理インスタンスが現在持つ * グローバル文法から木構造化辞書を(再)構築します. また, * 起動時にビーム幅が明示的に指示されていない場合やフルサーチの場合, * ビーム幅の再設定も行います. * * @param r [i/o] 認識処理インスタンス * * * @brief Build tree lexicon from global grammar. * * This function will re-construct the tree lexicon using the global grammar * in the recognition process instance. If the beam width was not explicitly * specified on startup, the the beam width will be guessed * according to the size of the new lexicon. * * @param r [i/o] recognition process instance * */ static boolean multigram_rebuild_wchmm(RecogProcess *r) { boolean ret; /* re-build wchmm */ if (r->wchmm != NULL) { wchmm_free(r->wchmm); } r->wchmm = wchmm_new(); r->wchmm->lmtype = r->lmtype; r->wchmm->lmvar = r->lmvar; r->wchmm->ccd_flag = r->ccd_flag; r->wchmm->category_tree = TRUE; r->wchmm->hmmwrk = &(r->am->hmmwrk); /* assign models */ r->wchmm->dfa = r->lm->dfa; r->wchmm->winfo = r->lm->winfo; r->wchmm->hmminfo = r->am->hmminfo; if (r->wchmm->category_tree) { if (r->config->pass1.old_tree_function_flag) { ret = build_wchmm(r->wchmm, r->lm->config); } else { ret = build_wchmm2(r->wchmm, r->lm->config); } } else { ret = build_wchmm2(r->wchmm, r->lm->config); } /* 起動時 -check でチェックモードへ */ if (r->config->sw.wchmm_check_flag) { wchmm_check_interactive(r->wchmm); } if (ret == FALSE) { jlog("ERROR: multi-gram: failed to build (global) lexicon tree for recognition\n"); return FALSE; } /* guess beam width from models, when not specified */ r->trellis_beam_width = set_beam_width(r->wchmm, r->config->pass1.specified_trellis_beam_width); switch(r->config->pass1.specified_trellis_beam_width) { case 0: jlog("STAT: multi-gram: beam width set to %d (full) by lexicon change\n", r->trellis_beam_width); break; case -1: jlog("STAT: multi-gram: beam width set to %d (guess) by lexicon change\n", r->trellis_beam_width); } /* re-allocate factoring cache for the tree lexicon*/ /* for n-gram only?? */ //max_successor_cache_free(recog->wchmm); //max_successor_cache_init(recog->wchmm); /* finished! */ return TRUE; } /** * * @brief Check for global grammar and (re-)build tree lexicon if needed. * * If any modification of the global grammar has been occured, * the tree lexicons and some other data for recognition will be re-constructed * from the updated global grammar. * * * @brief グローバル文法を調べ,必要があれば木構造化辞書を(再)構築する. * * グローバル辞書に変更があれば,その更新されたグローバル * 辞書から木構造化辞書などの音声認識用データ構造を再構築する. * * * * @param r [in] recognition process instance * * @return TRUE on success, FALSE on error. * * @callgraph * @callergraph * @ingroup grammar * */ boolean multigram_build(RecogProcess *r) { if (r->lm->winfo != NULL) { /* re-build tree lexicon for recognition process */ if (multigram_rebuild_wchmm(r) == FALSE) { jlog("ERROR: multi-gram: failed to re-build tree lexicon\n"); return FALSE; } #ifdef MDEBUG jlog("STAT: wchmm (re)build completed\n"); #endif } return(TRUE); } /** * * @brief グローバル文法の末尾に文法を追加する. * * もとの文法構造体には,グローバル文法のどの位置にその文法が追加 * されたか,そのカテゴリ番号と辞書番号の範囲が記録される. * * @param gdfa [i/o] 結合先の文法のDFA情報 * @param gwinfo [i/o] 結合先の文法の辞書情報 * @param m [i/o] 結合する文法情報. * * * @brief Append a grammar to the tail of global grammar. * * The location of the grammar in the global grammar (categories and words) * will be stored to the grammar structure for later access. * * @param gdfa [i/o] DFA information of the global grammar * @param gwinfo [i/o] Dictionary information of the global grammar * @param m [i/o] New grammar information to be installed. * */ static boolean multigram_append_to_global(DFA_INFO *gdfa, WORD_INFO *gwinfo, MULTIGRAM *m) { /* the new grammar 'm' will be appended to the last of gdfa and gwinfo */ m->state_begin = gdfa->state_num; /* initial state ID */ m->cate_begin = gdfa->term_num; /* initial terminal ID */ m->word_begin = gwinfo->num; /* initial word ID */ /* append category ID and node number of src DFA */ /* Julius allow multiple initial states: connect each initial node is not necesarry. */ dfa_append(gdfa, m->dfa, m->state_begin, m->cate_begin); /* append words of src vocabulary to global winfo */ if (voca_append(gwinfo, m->winfo, m->cate_begin, m->word_begin) == FALSE) { return FALSE; } /* append category->word mapping table */ terminfo_append(&(gdfa->term), &(m->dfa->term), m->cate_begin, m->word_begin); /* append catergory-pair information */ /* pause has already been considered on m->dfa, so just append here */ if (cpair_append(gdfa, m->dfa, m->cate_begin) == FALSE) { return FALSE; } /* re-set noise entry by merging */ if (dfa_pause_word_append(gdfa, m->dfa, m->cate_begin) == FALSE) { return FALSE; } jlog("STAT: Gram #%d %s: installed\n", m->id, m->name); return TRUE; } /** * * 新たな文法を,文法リストに追加する. * 現在インスタンスが保持している文法のリストは lm->grammars に保存される. * 追加した文法には,newbie と inactive のフラグがセットされ,次回の * 文法更新チェック時に更新対象となる. * * @param dfa [in] 追加登録する文法のDFA情報 * @param winfo [in] 追加登録する文法の辞書情報 * @param name [in] 追加登録する文法の名称 * @param lm [i/o] 言語処理インスタンス * * @return 文法IDを返す. * * * Add a new grammar to the current list of grammars. * The list of grammars which the LM instance keeps currently is * at lm->grammars. * The new grammar is flagged at "newbie" and "inactive", to be treated * properly at the next grammar update check. * * @param dfa [in] DFA information of the new grammar. * @param winfo [in] dictionary information of the new grammar. * @param name [in] name string of the new grammar. * @param lm [i/o] LM processing instance * * @return the new grammar ID for the given grammar. * * * @callgraph * @callergraph * @ingroup grammar */ int multigram_add(DFA_INFO *dfa, WORD_INFO *winfo, char *name, PROCESS_LM *lm) { MULTIGRAM *new; /* allocate new gram */ new = (MULTIGRAM *)mymalloc(sizeof(MULTIGRAM)); if (name != NULL) { strncpy(new->name, name, MAXGRAMNAMELEN); } else { strncpy(new->name, "(no name)", MAXGRAMNAMELEN); } new->id = lm->gram_maxid; new->dfa = dfa; new->winfo = winfo; /* will be setup and activated after multigram_update() is called once */ new->hook = MULTIGRAM_DEFAULT | MULTIGRAM_ACTIVATE; new->newbie = TRUE; /* need to setup */ new->active = FALSE; /* default: inactive */ /* the new grammar is now added to gramlist */ new->next = lm->grammars; lm->grammars = new; jlog("STAT: Gram #%d %s registered\n", new->id, new->name); lm->gram_maxid++; return new->id; } /** * * 文法を削除する. * * 文法リスト中のある文法について,削除マークを付ける. * 実際の削除は multigram_exec_delete() で行われる. * * @param delid [in] 削除する文法の文法ID * @param lm [i/o] 言語処理インスタンス * * @return 通常時 TRUE を返す. 指定されたIDの文法が無い場合は FALSE を返す. * * * Mark a grammar in the grammar list to be deleted at the next grammar update. * * @param delid [in] grammar id to be deleted * @param lm [i/o] LM processing instance * * @return TRUE on normal exit, or FALSE if the specified grammar is not found * in the grammar list. * * @callgraph * @callergraph * @ingroup grammar */ boolean multigram_delete(int delid, PROCESS_LM *lm) { MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (m->id == delid) { m->hook |= MULTIGRAM_DELETE; jlog("STAT: Gram #%d %s: marked delete\n", m->id, m->name); break; } } if (! m) { jlog("STAT: Gram #%d: not found\n", delid); return FALSE; } return TRUE; } /** * * すべての文法を次回更新時に削除するようマークする. * * @param lm [i/o] 言語処理インスタンス * * * Mark all grammars to be deleted at next grammar update. * * @param lm [i/o] LM processing instance * * @callgraph * @callergraph * @ingroup grammar */ void multigram_delete_all(PROCESS_LM *lm) { MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { m->hook |= MULTIGRAM_DELETE; } } /** * * 削除マークのついた文法をリストから削除する. * * @param lm [i/o] 言語処理インスタンス * * @return グローバル文法の再構築が必要なときは TRUE を,不必要なときは FALSE を返す. * * * Purge grammars marked as delete. * * @param lm [i/o] LM processing instance * * @return TRUE if the global grammar must be re-constructed, or FALSE if not needed. * */ static boolean multigram_exec_delete(PROCESS_LM *lm) { MULTIGRAM *m, *mtmp, *mprev; boolean ret_flag = FALSE; /* exec delete */ mprev = NULL; m = lm->grammars; while(m) { mtmp = m->next; if (m->hook & MULTIGRAM_DELETE) { /* if any grammar is deleted, we need to rebuild lexicons etc. */ /* so tell it to the caller */ if (! m->newbie) ret_flag = TRUE; if (m->dfa) dfa_info_free(m->dfa); word_info_free(m->winfo); jlog("STAT: Gram #%d %s: purged\n", m->id, m->name); free(m); if (mprev != NULL) { mprev->next = mtmp; } else { lm->grammars = mtmp; } } else { mprev = m; } m = mtmp; } return(ret_flag); } /** * * 文法を有効化する. ここでは次回更新時に * 反映されるようにマークをつけるのみである. * * @param gid [in] 有効化したい文法の ID * @param lm [i/o] 言語処理インスタンス * * * Activate a grammar in the grammar list. The specified grammar * will only be marked as to be activated in the next grammar update timing. * * @param gid [in] grammar ID to be activated * @param lm [i/o] LM processing instance * * @return 0 on success, -1 on error (when specified grammar not found), * of 1 if already active * * @callgraph * @callergraph * @ingroup grammar */ int multigram_activate(int gid, PROCESS_LM *lm) /* only mark */ { MULTIGRAM *m; int ret; for(m=lm->grammars;m;m=m->next) { if (m->id == gid) { if (m->hook & MULTIGRAM_DEACTIVATE) { ret = 0; m->hook &= ~(MULTIGRAM_DEACTIVATE); m->hook |= MULTIGRAM_ACTIVATE; jlog("STAT: Gram #%d %s: marked active, superceding deactivate\n", m->id, m->name); } else { if (m->hook & MULTIGRAM_ACTIVATE) { jlog("STAT: Gram #%d %s: already marked active\n", m->id, m->name); ret = 1; } else { ret = 0; m->hook |= MULTIGRAM_ACTIVATE; jlog("STAT: Gram #%d %s: marked activate\n", m->id, m->name); } } break; } } if (! m) { jlog("WARNING: Gram #%d: not found, activation ignored\n", gid); ret = -1; } return(ret); } /** * * 文法を無効化する. 無効化された文法は * 認識において仮説展開されない. これによって,グローバル辞書を * 再構築することなく,一時的に個々の文法をON/OFFできる. 無効化した * 文法は multigram_activate() で再び有効化できる. なおここでは * 次回の文法更新タイミングで反映されるようにマークをつけるのみである. * * @param gid [in] 無効化したい文法のID * @param lm [i/o] 言語処理インスタンス * * * Deactivate a grammar in the grammar list. The words of the de-activated * grammar will not be expanded in the recognition process. This feature * enables rapid switching of grammars without re-building tree lexicon. * The de-activated grammar will again be activated by calling * multigram_activate(). * * @param gid [in] grammar ID to be de-activated * @param lm [i/o] LM processing instance * * @return 0 on success, -1 on error (when specified grammar not found), * of 1 if already inactive * * @callgraph * @callergraph * @ingroup grammar */ int multigram_deactivate(int gid, PROCESS_LM *lm) /* only mark */ { MULTIGRAM *m; int ret; for(m=lm->grammars;m;m=m->next) { if (m->id == gid) { if (m->hook & MULTIGRAM_ACTIVATE) { ret = 0; m->hook &= ~(MULTIGRAM_ACTIVATE); m->hook |= MULTIGRAM_DEACTIVATE; jlog("STAT: Gram #%d %s: marked deactivate, superceding activate\n", m->id, m->name); } else { if (m->hook & MULTIGRAM_DEACTIVATE) { jlog("STAT: Gram #%d %s: already marked deactivate\n", m->id, m->name); ret = 1; } else { ret = 0; m->hook |= MULTIGRAM_DEACTIVATE; jlog("STAT: Gram #%d %s: marked deactivate\n", m->id, m->name); } } break; } } if (! m) { jlog("WARNING: - Gram #%d: not found, deactivation ignored\n", gid); ret = -1; } return(ret); } /** * * 文法の有効化・無効化を実行する. * * @param lm [i/o] 言語処理インスタンス * * @return 無効から有効へ,あるいは有効から無効へ状態が変化した文法が一つでも * あればTRUE, 状態が全く変化しなかった場合は FALSE を返す. * * * Execute (de)activation of grammars. * * @param lm [i/o] LM processing instance * * @return TRUE if at least one grammar has been changed, or FALSE if no * grammar has changed its status. * */ static boolean multigram_exec_activate(PROCESS_LM *lm) { MULTIGRAM *m; boolean modified; modified = FALSE; for(m=lm->grammars;m;m=m->next) { if (m->hook & MULTIGRAM_ACTIVATE) { m->hook &= ~(MULTIGRAM_ACTIVATE); if (!m->active) { jlog("STAT: Gram #%d %s: turn on active\n", m->id, m->name); } m->active = TRUE; modified = TRUE; } else if (m->hook & MULTIGRAM_DEACTIVATE) { m->hook &= ~(MULTIGRAM_DEACTIVATE); if (m->active) { jlog("STAT: Gram #%d %s: turn off inactive\n", m->id, m->name); } m->active = FALSE; modified = TRUE; } } return(modified); } /** * * @brief グローバル文法の更新 * * 前回呼出しからの文法リストの変更をチェックする. * リスト中に削除マークがつけられた文法がある場合は,その文法を削除し, * グローバル辞書を再構築する. 新たに追加された文法がある場合は, * その文法を現在のグローバル辞書の末尾に追加する. * * @param lm [i/o] 言語処理インスタンス * * @return 常に TRUE を返す. * * * @brief Update global grammar if needed. * * This function checks for any modification in the grammar list from * previous call, and update the global grammar if needed. * * If there are grammars marked to be deleted in the grammar list, * they will be actually deleted from memory. Then the global grammar is * built from scratch using the rest grammars. * If there are new grammars, they are appended to the current global grammar. * * @param lm [i/o] LM processing instance * * @return TRUE when any of add/delete/active/inactive occurs, or FALSE if * nothing modified. * * @callgraph * @callergraph * @ingroup grammar */ boolean /* return FALSE if no gram */ multigram_update(PROCESS_LM *lm) { MULTIGRAM *m; boolean active_changed = FALSE; boolean rebuild_flag; if (lm->lmvar == LM_DFA_GRAMMAR) { /* setup additional grammar info of new ones */ for(m=lm->grammars;m;m=m->next) { if (m->newbie) { jlog("STAT: Gram #%d %s: new grammar loaded, now mash it up for recognition\n", m->id, m->name); /* map dict item to dfa terminal symbols */ if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) { jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n"); /* mark as to be deleted */ m->hook |= MULTIGRAM_DELETE; continue; } /* set dfa->sp_id and dfa->is_sp */ dfa_find_pause_word(m->dfa, m->winfo, lm->am->hmminfo); /* build catergory-pair information */ jlog("STAT: Gram #%d %s: extracting category-pair constraint for the 1st pass\n", m->id, m->name); if (extract_cpair(m->dfa) == FALSE) { jlog("ERROR: failed to extract category pair. This grammar will be deleted\n"); /* mark as to be deleted */ m->hook |= MULTIGRAM_DELETE; } } } } rebuild_flag = FALSE; /* delete grammars marked as "delete" */ if (multigram_exec_delete(lm)) { /* some built grammars deleted */ rebuild_flag = TRUE; /* needs rebuilding global grammar */ } /* find modified grammar */ for(m=lm->grammars;m;m=m->next) { if (m->hook & MULTIGRAM_MODIFIED) { rebuild_flag = TRUE; /* needs rebuilding global grammar */ m->hook &= ~(MULTIGRAM_MODIFIED); } } if (rebuild_flag) { /* rebuild global grammar from scratch (including new) */ /* active status not changed here (inactive grammar will also included) */ /* activate/deactivate hook will be handled later, so just keep it here */ #ifdef MDEBUG jlog("STAT: re-build whole global grammar...\n"); #endif /* free old if not yet */ if (lm->dfa != NULL) { dfa_info_free(lm->dfa); lm->dfa = NULL; } if (lm->winfo != NULL) { word_info_free(lm->winfo); lm->winfo = NULL; } /* concatinate all existing grammars to global */ for(m=lm->grammars;m;m=m->next) { if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) { lm->dfa = dfa_info_new(); dfa_state_init(lm->dfa); } if (lm->winfo == NULL) { lm->winfo = word_info_new(); winfo_init(lm->winfo); } if (m->newbie) m->newbie = FALSE; if (lm->lmvar == LM_DFA_WORD) { /* just append dictionaty */ m->word_begin = lm->winfo->num; if (voca_append(lm->winfo, m->winfo, m->id, m->word_begin) == FALSE) { jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id); /* mark as delete */ m->hook |= MULTIGRAM_DELETE; } } else { if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) { jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id); /* mark as delete */ m->hook |= MULTIGRAM_DELETE; } } } /* delete the error grammars if exist */ if (multigram_exec_delete(lm)) { jlog("ERROR: errorous grammar deleted\n"); } lm->global_modified = TRUE; } else { /* global not need changed by the deletion */ /* append only new grammars */ for(m=lm->grammars;m;m=m->next) { if (m->newbie) { if (lm->lmvar == LM_DFA_GRAMMAR && lm->dfa == NULL) { lm->dfa = dfa_info_new(); dfa_state_init(lm->dfa); } if (lm->winfo == NULL) { lm->winfo = word_info_new(); winfo_init(lm->winfo); } if (m->newbie) m->newbie = FALSE; if (lm->lmvar == LM_DFA_WORD) { /* just append dictionaty */ m->word_begin = lm->winfo->num; if (voca_append(lm->winfo, m->winfo, m->id, m->word_begin) == FALSE) { jlog("ERROR: multi-gram: failed to add dictionary #%d to recognition network\n", m->id); /* mark as delete */ m->hook |= MULTIGRAM_DELETE; } } else { if (multigram_append_to_global(lm->dfa, lm->winfo, m) == FALSE) { jlog("ERROR: multi-gram: failed to add grammar #%d to recognition network\n", m->id); /* mark as delete */ m->hook |= MULTIGRAM_DELETE; } } lm->global_modified = TRUE; } } } /* process activate/deactivate hook */ active_changed = multigram_exec_activate(lm); if (lm->global_modified) { /* if global lexicon has changed */ /* now global grammar info has been updated */ /* check if no grammar */ if (lm->lmvar == LM_DFA_GRAMMAR) { if (lm->dfa == NULL || lm->winfo == NULL) { if (lm->dfa != NULL) { dfa_info_free(lm->dfa); lm->dfa = NULL; } if (lm->winfo != NULL) { word_info_free(lm->winfo); lm->winfo = NULL; } } } #ifdef MDEBUG jlog("STAT: grammar update completed\n"); #endif } if (lm->global_modified || active_changed) { return (TRUE); } return FALSE; } /** * * dfaファイルとdictファイルを読み込んで文法リストに追加する. * * @param dfa_file [in] dfa ファイル名 * @param dict_file [in] dict ファイル名 * @param lm [i/o] 言語処理インスタンス * * * Add grammar to the grammar list specified by dfa file and dict file. * * @param dfa_file [in] dfa file name * @param dict_file [in] dict file name * @param lm [i/o] LM processing instance * */ static boolean multigram_read_file_and_add(char *dfa_file, char *dict_file, PROCESS_LM *lm) { WORD_INFO *new_winfo; DFA_INFO *new_dfa; char buf[MAXGRAMNAMELEN], *p, *q; boolean ret; if (dfa_file != NULL) { jlog("STAT: reading [%s] and [%s]...\n", dfa_file, dict_file); } else { jlog("STAT: reading [%s]...\n", dict_file); } /* read dict*/ new_winfo = word_info_new(); if (lm->lmvar == LM_DFA_GRAMMAR) { ret = init_voca(new_winfo, dict_file, lm->am->hmminfo, #ifdef MONOTREE TRUE, #else FALSE, #endif lm->config->forcedict_flag); if ( ! ret ) { jlog("ERROR: failed to read dictionary \"%s\"\n", dict_file); word_info_free(new_winfo); return FALSE; } } else if (lm->lmvar == LM_DFA_WORD) { ret = init_wordlist(new_winfo, dict_file, lm->am->hmminfo, lm->config->wordrecog_head_silence_model_name, lm->config->wordrecog_tail_silence_model_name, (lm->config->wordrecog_silence_context_name[0] == '\0') ? NULL : lm->config->wordrecog_silence_context_name, lm->config->forcedict_flag); if ( ! ret ) { jlog("ERROR: failed to read word list \"%s\"\n", dict_file); word_info_free(new_winfo); return FALSE; } } new_dfa = NULL; if (lm->lmvar == LM_DFA_GRAMMAR) { /* read dfa */ new_dfa = dfa_info_new(); if (init_dfa(new_dfa, dfa_file) == FALSE) { jlog("ERROR: multi-gram: error in reading DFA\n"); word_info_free(new_winfo); dfa_info_free(new_dfa); return FALSE; } } jlog("STAT: done\n"); /* extract name */ p = &(dict_file[0]); q = p; while(*p != '\0') { if (*p == '/') q = p + 1; p++; } p = q; while(*p != '\0' && *p != '.') { buf[p-q] = *p; p++; } buf[p-q] = '\0'; /* register the new grammar to multi-gram tree */ multigram_add(new_dfa, new_winfo, buf, lm); return TRUE; } /** * * 起動時に指定されたすべての文法をロードする. * * @param lm [i/o] 言語処理インスタンス * * * * Load all the grammars specified at startup. * * @param lm [i/o] LM processing instance * * * @callgraph * @callergraph */ boolean multigram_load_all_gramlist(PROCESS_LM *lm) { GRAMLIST *g; GRAMLIST *groot; boolean ok_p; switch(lm->config->lmvar) { case LM_DFA_GRAMMAR: groot = lm->config->gramlist_root; break; case LM_DFA_WORD: groot = lm->config->wordlist_root; break; } ok_p = TRUE; for(g = groot; g; g = g->next) { if (multigram_read_file_and_add(g->dfafile, g->dictfile, lm) == FALSE) { ok_p = FALSE; } } return(ok_p); } /** * * 現在ある文法の数を得る(active/inactiveとも). * * @param lm [i/o] 言語処理インスタンス * * @return 文法の数を返す. * * * Get the number of current grammars (both active and inactive). * * @param lm [i/o] LM processing instance * * @return the number of grammars. * * @callgraph * @callergraph * @ingroup grammar */ int multigram_get_all_num(PROCESS_LM *lm) { MULTIGRAM *m; int cnt; cnt = 0; for(m=lm->grammars;m;m=m->next) cnt++; return(cnt); } /** * * 単語カテゴリの属する文法を得る. * * @param category 単語カテゴリID * @param lm [i/o] 言語処理インスタンス * * @return 単語カテゴリの属する文法のIDを返す. * * * Get which grammar the given category belongs to. * * @param category word category ID * @param lm [i/o] LM processing instance * * @return the id of the belonging grammar. * * @callgraph * @callergraph * @ingroup grammar */ int multigram_get_gram_from_category(int category, PROCESS_LM *lm) { MULTIGRAM *m; int tb, te; for(m = lm->grammars; m; m = m->next) { if (m->newbie) continue; tb = m->cate_begin; te = tb + m->dfa->term_num; if (tb <= category && category < te) { /* found */ return(m->id); } } return(-1); } /** * * 単語IDから属する文法を得る. * * @param wid 単語ID * @param lm [i/o] 言語処理インスタンス * * @return 単語の属する文法のIDを返す. * * * Get which grammar the given word belongs to. * * @param wid word ID * @param lm [i/o] LM processing instance * * @return the id of the belonging grammar. * * @callgraph * @callergraph * @ingroup grammar */ int multigram_get_gram_from_wid(WORD_ID wid, PROCESS_LM *lm) { MULTIGRAM *m; int wb, we; for(m = lm->grammars; m; m = m->next) { if (m->newbie) continue; wb = m->word_begin; we = wb + m->winfo->num; if (wb <= wid && wid < we) { /* found */ return(m->id); } } return(-1); } /** * * 保持している文法をすべて解放する。 * * @param root [in] root pointer of grammar list * * * Free all grammars. * * @param root [in] root pointer of grammar list * * @callgraph * @callergraph */ void multigram_free_all(MULTIGRAM *root) { MULTIGRAM *m, *mtmp; m = root; while(m) { mtmp = m->next; if (m->dfa) dfa_info_free(m->dfa); word_info_free(m->winfo); free(m); m = mtmp; } } /** * * Return a grammar ID of the given grammar name. * * * LM中の文法を名前で検索し,その文法IDを返す. * * * @param lm [in] LM process instance * @param gramname [in] grammar name * * @return grammar ID, or -1 if not found. * * @callgraph * @callergraph * @ingroup grammar * */ int multigram_get_id_by_name(PROCESS_LM *lm, char *gramname) { MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (strmatch(m->name, gramname)) break; } if (!m) { jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname); return -1; } return m->id; } /** * * Find a grammar in LM by its name. * * * LM中の文法を名前で検索する. * * * @param lm [in] LM process instance * @param gramname [in] grammar name * * @return poitner to the grammar, or NULL if not found. * * @callgraph * @callergraph * @ingroup grammar * */ MULTIGRAM * multigram_get_grammar_by_name(PROCESS_LM *lm, char *gramname) { MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (strmatch(m->name, gramname)) break; } if (!m) { jlog("ERROR: multigram: cannot find grammar \"%s\"\n", gramname); return NULL; } return m; } /** * * Find a grammar in LM by its ID number. * * * LM中の文法を ID 番号で検索する. * * * @param lm [in] LM process instance * @param id [in] ID number * * @return poitner to the grammar, or NULL if not found. * * @callgraph * @callergraph * @ingroup grammar * */ MULTIGRAM * multigram_get_grammar_by_id(PROCESS_LM *lm, unsigned short id) { MULTIGRAM *m; for(m=lm->grammars;m;m=m->next) { if (m->id == id) break; } if (!m) { jlog("ERROR: multi-gram: cannot find grammar id \"%d\"\n", id); return NULL; } return m; } /** * * @brief Append words to a grammar. * * Category IDs of grammar in the adding words will be copied as is to * the target grammar, so they should be set beforehand correctly. * The whole tree lexicon will be rebuilt later. * * Currently adding words to N-gram LM is not supported yet. * * * * @brief 単語集合を文法に追加する. * * 追加する単語の文法カテゴリIDについては,すでにアサインされているものが * そのままコピーされる.よって,それらはこの関数を呼び出す前に, * 追加対象の文法で整合性が取れるよう正しく設定されている必要がある. * 木構造化辞書全体が,後に再構築される. * * 単語N-gram言語モデルへの辞書追加は現在サポートされていない. * * * * @param lm [i/o] LM process instance * @param m [i/o] grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */ boolean multigram_add_words_to_grammar(PROCESS_LM *lm, MULTIGRAM *m, WORD_INFO *winfo) { int offset; if (lm == NULL || m == NULL || winfo == NULL) return FALSE; offset = m->winfo->num; printf("adding %d words to grammar #%d (%d words)\n", winfo->num, m->id, m->winfo->num); /* append to the grammar */ if (voca_append(m->winfo, winfo, m->id, offset) == FALSE) { jlog("ERROR: multi-gram: failed to add words to dict in grammar #%d \"%s\"\n", m->id, m->name); return FALSE; } /* update dictianary info */ if (lm->lmvar == LM_DFA_GRAMMAR) { if (m->dfa->term_num != 0) free_terminfo(&(m->dfa->term)); if (make_dfa_voca_ref(m->dfa, m->winfo) == FALSE) { jlog("ERROR: failed to map dict <-> DFA. This grammar will be deleted\n"); return FALSE; } } /* prepare for update */ m->hook |= MULTIGRAM_MODIFIED; return TRUE; } /** * * @brief Append words to a grammar, given by its name. * * Call multigram_add_words_to_grammar() with target grammar * specified by its name. * * * @brief 名前で指定された文法に単語集合を追加する. * * multigram_add_words_to_grammar() を文法名で指定して実行する. * * * * @param lm [i/o] LM process instance * @param gramname [in] name of the grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */ boolean multigram_add_words_to_grammar_by_name(PROCESS_LM *lm, char *gramname, WORD_INFO *winfo) { return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_name(lm, gramname), winfo)); } /** * * @brief Append words to a grammar, given by its ID number. * * Call multigram_add_words_to_grammar() with target grammar * specified by its number. * * * @brief 番号で指定された文法に単語集合を追加する. * * multigram_add_words_to_grammar() を番号で指定して実行する. * * * * @param lm [i/o] LM process instance * @param id [in] ID number of the grammar to which the winfo will be appended * @param winfo [in] words to be added to the grammar * * @return TRUE on success, or FALSE on failure. * * @callgraph * @callergraph * @ingroup grammar * */ boolean multigram_add_words_to_grammar_by_id(PROCESS_LM *lm, unsigned short id, WORD_INFO *winfo) { return(multigram_add_words_to_grammar(lm, multigram_get_grammar_by_id(lm, id), winfo)); } /* end of file */ julius-4.2.2/libjulius/src/wchmm_check.c0000644001051700105040000003252312004452401016532 0ustar ritrlab/** * @file wchmm_check.c * * * @brief 木構造化辞書のマニュアルチェック * * ここでは,与えられた単語辞書と言語モデルから生成された木構造化辞書の構造を * 対話的にチェックするための関数が定義されています. 起動時に "-check wchmm" * とすることで,木構造化辞書の構築後にプロンプトが表示され,ある単語が * 木構造化辞書のどこに位置するか,あるいはあるノードにどのような情報が * 付与されているかなどを調べることができます. * * * * @brief Manual inspection of tree lexicon * * This file defines some functions to browse and check the structure * of the tree lexicon at startup time. When invoking with "-check wchmm", * it will enter to a prompt mode after tree lexicon is generated, and * you can check its structure, e.g. how the specified word is located in the * tree lexicon, or what kind of information a node has in it. * * * @author Akinobu Lee * @date Sat Sep 24 15:45:06 2005 * * $Revision: 1.6 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * 単語の辞書情報を出力する * * @param winfo [in] 単語辞書 * @param word [in] 出力する単語のID * @param ngram_exist [in] 同時に使用する言語制約が存在する場合TRUE * * * Display informations of a word in the dictionary. * * @param winfo [in] word dictionary * @param word [in] ID of a word to be displayed * @param ngram_exist [in] TRUE when an N-gram was tied with this winfo * */ static void print_winfo_w(WORD_INFO *winfo, WORD_ID word, boolean ngram_exist) { int i; if (word >= winfo->num) return; printf("--winfo\n"); printf("wname = %s\n",winfo->wname[word]); printf("woutput = %s\n",winfo->woutput[word]); printf("\ntransp = %s\n", (winfo->is_transparent[word]) ? "yes" : "no"); printf("wlen = %d\n",winfo->wlen[word]); printf("wseq ="); for (i=0;iwlen[word];i++) { printf(" %s",winfo->wseq[word][i]->name); } printf("\nwseq_def="); for (i=0;iwlen[word];i++) { if (winfo->wseq[word][i]->is_pseudo) { printf(" (%s)", winfo->wseq[word][i]->body.pseudo->name); } else { printf(" %s",winfo->wseq[word][i]->body.defined->name); } } if (ngram_exist) { printf("\nwton = %d\n",winfo->wton[word]); #ifdef CLASS_NGRAM printf("cprob = %f(%f)\n", winfo->cprob[word], pow(10.0, winfo->cprob[word])); #endif } } /** * * 木構造化辞書上の単語の位置情報を出力する. * * @param wchmm [in] 木構造化辞書 * @param word [in] 単語ID * * * Display the location of a word in the tree lexicon. * * @param wchmm [in] tree lexicon * @param word [in] word ID * */ static void print_wchmm_w(WCHMM_INFO *wchmm, WORD_ID word) { int i; if (word >= wchmm->winfo->num) return; printf("--wchmm (word)\n"); printf("offset ="); for (i=0;iwinfo->wlen[word];i++) { printf(" %d",wchmm->offset[word][i]); } printf("\n"); if (wchmm->hmminfo->multipath) { printf("wordbegin = %d\n",wchmm->wordbegin[word]); } printf("wordend = %d\n",wchmm->wordend[word]); } /** * * 木構造化辞書上のあるノードの情報を出力する. * * @param wchmm [in] 木構造化辞書 * @param node [in] ノード番号 * * * Display informations assigned to a node in the tree lexicon. * * @param wchmm [in] tree lexicon * @param node [in] node id * */ static void print_wchmm_s(WCHMM_INFO *wchmm, int node) { printf("--wchmm (node)\n"); printf("stend = %d\n",wchmm->stend[node]); if (wchmm->hmminfo->multipath) { if (wchmm->state[node].out.state == NULL) { printf("NO OUTPUT\n"); return; } } #ifdef PASS1_IWCD printf("outstyle= "); switch(wchmm->outstyle[node]) { case AS_STATE: printf("AS_STATE (id=%d)\n", (wchmm->state[node].out.state)->id); break; case AS_LSET: printf("AS_LSET (%d variants)\n", (wchmm->state[node].out.lset)->num); break; case AS_RSET: if ((wchmm->state[node].out.rset)->hmm->is_pseudo) { printf("AS_RSET (name=\"%s\", pseudo=\"%s\", loc=%d)\n", (wchmm->state[node].out.rset)->hmm->name, (wchmm->state[node].out.rset)->hmm->body.pseudo->name, (wchmm->state[node].out.rset)->state_loc); } else { printf("AS_RSET (name=\"%s\", defined=\"%s\", loc=%d)\n", (wchmm->state[node].out.rset)->hmm->name, (wchmm->state[node].out.rset)->hmm->body.defined->name, (wchmm->state[node].out.rset)->state_loc); } break; case AS_LRSET: if ((wchmm->state[node].out.rset)->hmm->is_pseudo) { printf("AS_LRSET (name=\"%s\", pseudo=\"%s\", loc=%d)\n", (wchmm->state[node].out.lrset)->hmm->name, (wchmm->state[node].out.lrset)->hmm->body.pseudo->name, (wchmm->state[node].out.lrset)->state_loc); } else { printf("AS_LRSET (name=\"%s\", defined=\"%s\", loc=%d)\n", (wchmm->state[node].out.lrset)->hmm->name, (wchmm->state[node].out.lrset)->hmm->body.defined->name, (wchmm->state[node].out.lrset)->state_loc); } break; default: printf("UNKNOWN???\n"); } #endif /* PASS1_IWCD */ } /** * * 木構造化辞書上のあるノードについて,遷移先のリストを出力する. * * @param wchmm [in] 木構造化辞書 * @param node [in] ノード番号 * * * Display list of transition arcs from a node in the tree lexicon. * * @param wchmm [in] tree lexicon * @param node [in] node ID * */ static void print_wchmm_s_arc(WCHMM_INFO *wchmm, int node) { A_CELL2 *ac; int i = 0; int j; printf("arcs:\n"); if (wchmm->self_a[node] != LOG_ZERO) { printf(" %d %f(%f)\n", node, wchmm->self_a[node], pow(10.0, wchmm->self_a[node])); i++; } if (wchmm->next_a[node] != LOG_ZERO) { printf(" %d %f(%f)\n", node + 1, wchmm->next_a[node], pow(10.0, wchmm->next_a[node])); i++; } for(ac = wchmm->ac[node]; ac; ac = ac->next) { for (j=0;jn;j++) { printf(" %d %f(%f)\n",ac->arc[j],ac->a[j],pow(10.0, ac->a[j])); i++; } } printf(" total %d arcs\n",i); } /** * * 木構造化辞書上のあるノードの持つ factoring 情報を出力する. * * @param wchmm [in] 木構造化辞書 * @param node [in] ノード番号 * * * Display factoring values on a node in the tree lexicon. * * @param wchmm [in] tree lexicon * @param node [in] node ID * */ static void print_wchmm_s_successor(WCHMM_INFO *wchmm, int node) { int i = 0, j; int scid; scid = wchmm->state[node].scid; if (scid == 0) { printf("no successors\n"); } else if (scid < 0) { printf("successor id: %d\n", scid); #ifdef UNIGRAM_FACTORING if (wchmm->lmtype == LM_PROB) { printf("1-gram factoring node: score=%f\n",wchmm->fscore[-scid]); } #endif } else { #ifdef UNIGRAM_FACTORING printf("successor id: %d\n", scid); printf(" %d\n", wchmm->scword[scid]); #else printf("successor id: %d\n", scid); for (j = 0; j < wchmm->sclen[scid]; j++) { printf(" %d\n", wchmm->sclist[scid][j]); i++; } printf(" total %d successors\n",i); #endif } } /** * * 指定された論理名のHMMを検索し,その情報を出力する. * * @param name [in] 論理HMMの名前 * @param hmminfo [in] HMM定義 * * * Lookup an HMM of given name, and display specs of it. * * @param name [in] HMM logical name * @param hmminfo [in] HMM definition * */ static void print_hmminfo(char *name, HTK_HMM_INFO *hmminfo) { HMM_Logical *l; l = htk_hmmdata_lookup_logical(hmminfo, name); if (l == NULL) { printf("no HMM named \"%s\"\n", name); } else { put_logical_hmm(stdout, l); } } /** * * 単語N-gramのある単語の情報を出力する. * * @param ngram [in] 単語N-gram * @param nid [in] N-gram単語のID * * * Display specs of a word in the word N-gram * * @param ngram [in] word N-gram * @param nid [in] N-gram word ID * */ static void print_ngraminfo(NGRAM_INFO *ngram, int nid) { printf("-- N-gram entry --\n"); printf("nid = %d\n", nid); printf("name = %s\n", ngram->wname[nid]); } /** * * 木構造化辞書の構造を起動時に対話的にチェックする際のコマンドループ * * @param wchmm [in] 木構造化辞書 * * * Command loop to browse and check the structure of the constructed tree * lexicon on startup. * * @param wchmm [in] tree lexicon * * @callgraph * @callergraph */ void wchmm_check_interactive(WCHMM_INFO *wchmm) /* interactive check */ { #define MAXNAMELEN 24 char buf[MAXNAMELEN], *name; int arg, newline; WORD_ID argw; boolean endflag; printf("\n\n"); printf("********************************************\n"); printf("******** LM & LEXICON CHECK MODE *********\n"); printf("********************************************\n"); printf("\n"); for (endflag = FALSE; endflag == FALSE;) { printf("===== syntax: command arg (\"H\" for help) > "); if (fgets(buf, MAXNAMELEN, stdin) == NULL) break; name = ""; arg = 0; if (isalpha(buf[0]) != 0 && buf[1] == ' ') { newline = strlen(buf)-1; if (buf[newline] == '\n') { buf[newline] = '\0'; } if (buf[2] != '\0') { name = buf + 2; arg = atoi(name); } } switch(buf[0]) { case 'w': /* word info */ argw = arg; print_winfo_w(wchmm->winfo, argw, (wchmm->ngram) ? TRUE : FALSE); print_wchmm_w(wchmm, argw); break; case 'n': /* node info */ print_wchmm_s(wchmm, arg); break; case 'a': /* arc list */ print_wchmm_s_arc(wchmm, arg); break; #if 0 case 'r': /* reverse arc list */ print_wchmm_r_arc(arg); break; #endif case 's': /* successor word list */ if (wchmm->category_tree) { printf("Error: this is category tree (no successor list)\n"); } else { print_wchmm_s_successor(wchmm, arg); } break; case 't': /* node total info of above */ print_wchmm_s(wchmm, arg); print_wchmm_s_arc(wchmm, arg); #if 0 print_wchmm_r_arc(arg); #endif if (!wchmm->category_tree) { print_wchmm_s_successor(wchmm, arg); } break; case 'h': /* hmm state info */ print_hmminfo(name, wchmm->hmminfo); break; case 'l': /* N-gram language model info */ if (wchmm->lmtype == LM_PROB) { print_ngraminfo(wchmm->ngram, arg); } else { printf("Error: this is not an N-gram model\n"); } break; case 'q': /* quit */ endflag = TRUE; break; default: /* help */ printf("syntax: [command_character] [number(#)]\n"); printf(" w [word_id] ... show word info\n"); printf(" n [state] ... show wchmm state info\n"); printf(" a [state] ... show arcs from the state\n"); #if 0 printf(" r [state] ... show arcs to the state\n"); #endif printf(" s [state] ... show successor list of the state\n"); printf(" h [hmmname] ... show HMM info of the name\n"); printf(" l [nwid] ... N-gram entry info\n"); printf(" H ... print this help\n"); printf(" q ... quit\n"); break; } } printf("\n"); printf("********************************************\n"); printf("***** END OF LM & LEXICON CHECK MODE *****\n"); printf("********************************************\n"); printf("\n"); } /** * * 木構造化辞書内のリンク情報の一貫性をチェックする(内部デバッグ用) * * @param wchmm [in] 木構造化辞書 * * * Check coherence of tree lexicon (for internal debug only!) * * @param wchmm [in] tree lexicon * * @callgraph * @callergraph */ void check_wchmm(WCHMM_INFO *wchmm) { int i; boolean ok_flag; int node; WORD_ID w; ok_flag = TRUE; if (wchmm->hmminfo->multipath) { /* check word-beginning nodes */ for(i=0;istartnum;i++) { node = wchmm->startnode[i]; if (wchmm->state[node].out.state != NULL) { printf("Error: word-beginning node %d has output function!\n", node); ok_flag = FALSE; } } /* examine if word->state and state->word mapping is correct */ for(w=0;wwinfo->num;w++) { if (wchmm->stend[wchmm->wordend[w]] != w) { printf("Error: no match of word end for word %d!!\n", w); ok_flag = FALSE; } } } else { /* examine if word->state and state->word mapping is correct */ for (i=0;iwinfo->num;i++) { if (wchmm->stend[wchmm->wordend[i]]!=i) { printf("end ga awanai!!!: word=%d, node=%d, value=%d\n", i, wchmm->wordend[i], wchmm->stend[wchmm->wordend[i]]); ok_flag = FALSE; } } } #if 0 /* check if the last state is unique and has only one output arc */ { int n; A_CELL *ac; i = 0; for (n=0;nn;n++) { if (wchmm->stend[n] != WORD_INVALID) { i++; for (ac=wchmm->state[n].ac; ac; ac=ac->next) { if (ac->arc == n) continue; if (!wchmm->hmminfo->multipath && wchmm->ststart[ac->arc] != WORD_INVALID) continue; break; } if (ac != NULL) { printf("node %d is shared?\n",n); ok_flag = FALSE; } } } if (i != wchmm->winfo->num ) { printf("num of heads of words in wchmm not match word num!!\n"); printf("from wchmm->stend:%d != from winfo:%d ?\n",i,wchmm->winfo->num); ok_flag = FALSE; } } #endif /* if check failed, go into interactive mode */ if (!ok_flag) { wchmm_check_interactive(wchmm); } jlog("STAT: coordination check passed\n"); } /* end of file */ julius-4.2.2/libjulius/src/default.c0000644001051700105040000002033012004452401015677 0ustar ritrlab/** * @file default.c * * * @brief 設定のデフォルト値のセット * * 設定可能なパラメータの初期値をセットします. * * * * @brief Set system default values for configuration parameters * * This file contains a function to set system default values for all the * configuration parameters. This will be called at initialization phase. * * * @author Akinobu Lee * @date Fri Feb 16 15:05:43 2007 * * $Revision: 1.15 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /** * * @brief パラメータ構造体 Jconf に初期値を代入する. * * ここで値が初期化されるのは,Jconf 自身に格納される値のみである. * 下位の構造(AM, LM, SEARCH)のパラメータはセットしないので, * それぞれ別の関数で初期化する必要が有る. * * @param j [in] パラメータ構造体 * * * @brief Fill in the system default values to a parameter structure Jconf. * * Only values of the Jconf will be set. The parameters in sub-structures * (AM, LM, SEARCH) will not be set in this function: they should be * initialized separatedly at each corresponding functions. * * @param j [in] parameter structure * * * @callgraph * @callergraph * */ void jconf_set_default_values(Jconf *j) { j->input.type = INPUT_VECTOR; j->input.speech_input = SP_MFCFILE; j->input.device = SP_INPUT_DEFAULT; j->input.plugin_source = -1; j->input.sfreq = 16000; j->input.period = 625; j->input.framesize = DEF_FRAMESIZE; j->input.frameshift = DEF_FRAMESHIFT; j->input.use_ds48to16 = FALSE; j->input.inputlist_filename = NULL; j->input.adinnet_port = ADINNET_PORT; #ifdef USE_NETAUDIO j->input.netaudio_devname = NULL; #endif j->input.paramtype_check_flag = TRUE; j->detect.level_thres = 2000; j->detect.head_margin_msec = 300; j->detect.tail_margin_msec = 400; j->detect.zero_cross_num = 60; j->detect.silence_cut = 2; /* accept device default */ j->detect.chunk_size = 1000; #ifdef GMM_VAD j->detect.gmm_margin = DEFAULT_GMM_MARGIN; j->detect.gmm_uptrigger_thres = 0.7; j->detect.gmm_downtrigger_thres = -0.2; #endif j->preprocess.strip_zero_sample = TRUE; j->preprocess.use_zmean = FALSE; j->reject.gmm_filename = NULL; j->reject.gmm_gprune_num = 10; j->reject.gmm_reject_cmn_string = NULL; j->reject.rejectshortlen = 0; #ifdef POWER_REJECT j->reject.powerthres = POWER_REJECT_DEFAULT_THRES; #endif j->decodeopt.forced_realtime = FALSE; j->decodeopt.force_realtime_flag = FALSE; j->decodeopt.segment = FALSE; j->optsection = JCONF_OPT_DEFAULT; j->optsectioning = TRUE; } /** * * Fill in system default values to an AM parameter structure. * @param j [in] AM configuration parameter structure * * * AMパラメータ構造体に初期値を代入する. * * @param j [in] AMパラメータ構造体 * * * * @callgraph * @callergraph * */ void jconf_set_default_values_am(JCONF_AM *j) { j->name[0] = '\0'; j->hmmfilename = NULL; j->mapfilename = NULL; j->gprune_method = GPRUNE_SEL_UNDEF; j->mixnum_thres = 2; j->spmodel_name = NULL; j->hmm_gs_filename = NULL; j->gs_statenum = 24; j->iwcdmethod = IWCD_UNDEF; j->iwcdmaxn = 3; j->iwsp_penalty = -1.0; j->force_multipath = FALSE; undef_para(&(j->analysis.para)); undef_para(&(j->analysis.para_hmm)); undef_para(&(j->analysis.para_default)); undef_para(&(j->analysis.para_htk)); make_default_para(&(j->analysis.para_default)); make_default_para_htk(&(j->analysis.para_htk)); j->analysis.cmnload_filename = NULL; j->analysis.cmn_update = TRUE; j->analysis.cmnsave_filename = NULL; j->analysis.cmn_map_weight = 100.0; j->frontend.ss_alpha = DEF_SSALPHA; j->frontend.ss_floor = DEF_SSFLOOR; j->frontend.sscalc = FALSE; j->frontend.sscalc_len = 300; j->frontend.ssload_filename = NULL; } /** * * Fill in system default values to an LM parameter structure. * * @param j [in] LM configuration parameter structure * * * LMパラメータ構造体に初期値を代入する. * * @param j [in] LMパラメータ構造体 * * * @callgraph * @callergraph * */ void jconf_set_default_values_lm(JCONF_LM *j) { j->name[0] = '\0'; j->lmtype = LM_UNDEF; j->lmvar = LM_UNDEF; j->dictfilename = NULL; j->head_silname = NULL; j->tail_silname = NULL; j->forcedict_flag = FALSE; j->ngram_filename = NULL; j->ngram_filename_lr_arpa = NULL; j->ngram_filename_rl_arpa = NULL; j->dfa_filename = NULL; j->gramlist_root = NULL; j->wordlist_root = NULL; j->enable_iwsp = FALSE; j->enable_iwspword = FALSE; j->iwspentry = NULL; #ifdef SEPARATE_BY_UNIGRAM j->separate_wnum = 150; #endif strcpy(j->wordrecog_head_silence_model_name, "silB"); strcpy(j->wordrecog_tail_silence_model_name, "silE"); j->wordrecog_silence_context_name[0] = '\0'; strcpy(j->unknown_name, UNK_WORD_DEFAULT); // or UNK_WORD_DEFAULT2 j->additional_dict_files = NULL; j->additional_dict_entries = NULL; } /** * * Fill in system default values to a search parameter structure. * * @param j [in] search configuration parameter structure * * * 探索(SEARCH)パラメータ構造体に初期値を代入する. * * @param j [in] 探索パラメータ構造体 * * * @callgraph * @callergraph * */ void jconf_set_default_values_search(JCONF_SEARCH *j) { j->name[0] = '\0'; j->amconf = NULL; j->lmconf = NULL; j->compute_only_1pass = FALSE; j->force_ccd_handling = FALSE; j->ccd_handling = FALSE; /* default values below are assigned later using HMM information: j->lmp.* */ j->lmp.lm_penalty_trans = 0.0; j->lmp.penalty1 = 0.0; j->lmp.penalty2 = 0.0; j->lmp.lmp2_specified = FALSE; j->lmp.lmp_specified = FALSE; j->pass1.specified_trellis_beam_width = -1; #ifdef SCORE_PRUNING j->pass1.score_pruning_width = -1.0; #endif #if defined(WPAIR) && defined(WPAIR_KEEP_NLIMIT) j->pass1.wpair_keep_nlimit = 3; #endif #ifdef HASH_CACHE_IW j->pass1.iw_cache_rate = 10; #endif j->pass1.old_tree_function_flag = FALSE; #ifdef DETERMINE j->pass1.determine_score_thres = 10.0; j->pass1.determine_duration_thres = 6; #endif if (strmatch(JULIUS_SETUP, "fast")) { j->pass2.nbest = 1; j->pass2.enveloped_bestfirst_width = 30; } else { j->pass2.nbest = 10; j->pass2.enveloped_bestfirst_width = 100; } #ifdef SCAN_BEAM j->pass2.scan_beam_thres = 80.0; #endif j->pass2.hypo_overflow = 2000; j->pass2.stack_size = 500; j->pass2.lookup_range = 5; j->pass2.looktrellis_flag = FALSE; /* dfa */ j->graph.enabled = FALSE; j->graph.lattice = FALSE; j->graph.confnet = FALSE; j->graph.graph_merge_neighbor_range = 0; #ifdef GRAPHOUT_DEPTHCUT j->graph.graphout_cut_depth = 80; #endif #ifdef GRAPHOUT_LIMIT_BOUNDARY_LOOP j->graph.graphout_limit_boundary_loop_num = 20; #endif #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION j->graph.graphout_search_delay = FALSE; #endif j->successive.enabled = FALSE; j->successive.sp_frame_duration = 10; j->successive.pausemodelname = NULL; #ifdef SPSEGMENT_NAIST j->successive.sp_margin = DEFAULT_SP_MARGIN; j->successive.sp_delay = DEFAULT_SP_DELAY; #endif #ifdef CONFIDENCE_MEASURE j->annotate.cm_alpha = 0.05; #ifdef CM_MULTIPLE_ALPHA j->annotate.cm_alpha_bgn = 0.03; j->annotate.cm_alpha_end = 0.15; j->annotate.cm_alpha_num = 5; j->annotate.cm_alpha_step = 0.03; #endif #ifdef CM_SEARCH_LIMIT j->annotate.cm_cut_thres = 0.03; #endif #ifdef CM_SEARCH_LIMIT_POPO j->annotate.cm_cut_thres_pop = 0.1; #endif #endif /* CONFIDENCE_MEASURE */ j->annotate.align_result_word_flag = FALSE; j->annotate.align_result_phoneme_flag = FALSE; j->annotate.align_result_state_flag = FALSE; j->output.output_hypo_maxnum = 1; j->output.progout_flag = FALSE; j->output.progout_interval = 300; j->output.multigramout_flag = FALSE; /* dfa */ j->sw.trellis_check_flag = FALSE; j->sw.triphone_check_flag = FALSE; j->sw.wchmm_check_flag = FALSE; j->sw.start_inactive = FALSE; j->sw.fallback_pass1_flag = FALSE; } /* end of file */ julius-4.2.2/libjulius/src/search_bestfirst_main.c0000644001051700105040000020652612004452401020626 0ustar ritrlab/** * @file search_bestfirst_main.c * * * @brief 第2パス:スタックデコーディング * * Julius の第2パスであるスタックデコーディングアルゴリズムが記述され * ています. 第1パスの結果の単語トレリス情報を元に,第1パスとは逆向き * の right-to-left に探索を行います. 仮説のスコアは、第1パスのトレリ * スとそのスコアを未探索部のヒューリスティックとして接続することで, * 文全体の仮説スコアを考慮しながら探索を行います. * * 次単語集合の取得のために,単語N-gramでは ngram_decode.c 内の関数が, * 文法では dfa_decode.c の関数が用いられます. * * * * * @brief The second pass: stack decoding * * This file implements search algorithm based on best-first stack * decoding on the 2nd pass. The search will be performed on backward * (i.e. right-to-left) direction, using the result of 1st pass (word * trellis) as heuristics of unreached area. Hypothesis are stored * in a global stack, and the best one will be expanded according to * the survived words in the word trellis and language constraint. * * The expanding words will be given by ngram_decode.c for N-gram * based recognition, with their langugage probabilities, or by * dfa_decode.c for grammar-based recognition, with their emitting * DFA state information. * * * * @author Akinobu Lee * @date Thu Sep 08 11:51:12 2005 * * $Revision: 1.12 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include /* declaration of local functions */ static NODE *get_best_from_stack(NODE **start, int *stacknum); static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize); static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo); static void free_all_nodes(NODE *node); static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo); static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo); /**********************************************************************/ /********** 次単語格納領域の割り当て *************************/ /********** allocate memory for nextword data *************************/ /**********************************************************************/ /** * * 次単語の格納領域の割り当て. * 次単語候補を格納するための NEXTWORD 配列にメモリを割り付ける. * * @param maxlen [out] 格納可能な単語数 * @param root [out] 割り付け領域の先頭へのポインタ * @param max [in] 割り付ける領域のサイズ * * @return 割り付けられた次単語配列へのポインタを返す. * * * Allocate memory for next word candidates. * Allocate NEXTWORD array for storing list of candidate next words. * * @param maxlen [out] maximum number of words that can be stored * @param root [out] pointer to the top address of allocated data * @param max [in] number of elementes to be allocated * * @return the newly allocated pointer of NEXTWORD array. * */ static NEXTWORD ** nw_malloc(int *maxlen, NEXTWORD **root, int max) { NEXTWORD *nwtmp; NEXTWORD **nw; int i; nw = (NEXTWORD **)mymalloc(max * sizeof(NEXTWORD *)); nwtmp = (NEXTWORD *)mymalloc(max * sizeof(NEXTWORD)); for (i=0;i * 次単語の格納領域の解放. * * @param nw [in] NEXTWORD配列 * @param root [in] nw_malloc() で与えられた領域先頭へのポインタ * * * Free next word candidate area. * * @param nw [in] pointer to NEXTWORD structure to be free. * @param root [in] pointer to the top address of allocated data previously * returned by nw_malloc() * */ static void nw_free(NEXTWORD **nw, NEXTWORD *root) { free(root); free(nw); } /** * * @brief 次単語候補格納用の NEXTWORD 配列のメモリ領域を伸張する. * * この関数は探索中に次単語候補集合が溢れた際に呼ばれ,配列により多くの * 次単語候補を格納できるよう NEXTWORD の中身を realloc() する. * 実際には最初に nw_malloc() で辞書の単語数分だけ領域を確保しており, * 単語N-gram使用時は呼ばれることはない. 文法認識では,ショートポーズの * スキップ処理により状態の異なる候補を同時に展開するので, * 次単語数が語彙数よりも大きいことが起こりうる. * * @param nwold [i/o] NEXTWORD配列 * @param maxlen [i/o] 最大格納数を格納するポインタ. 現在の最大格納数を * 入れて呼び,関数内で新たに確保された数に変更される. * @param root [i/o] 領域先頭へのポインタを格納するアドレス. 関数内で * 書き換えられる. * @param num [in] 伸長する長さ * * @return 伸張された新たな次単語配列へのポインタを返す. * * * @brief expand data area of NEXTWORD. * * In DFA mode, the number of nextwords can exceed the vocabulary size when * more than one DFA states are expanded by short-pause skipping. * In such case, the nextword data area should expanded here. * * @param nwold [i/o] NEXTWORD array * @param maxlen [i/o] pointer to the maximum number of words that can be * stored. The current number should be stored before calling this function, * and the resulting new number will be stored within this function. * @param root [i/o] address to the pointer of the allocated data. The * value will be updated by reallocation in this function. * @param num [in] size to expand * * @return the newlly re-allocated pointer of NEXTWORD array. * */ static NEXTWORD ** nw_expand(NEXTWORD **nwold, int *maxlen, NEXTWORD **root, int num) { NEXTWORD *nwtmp; NEXTWORD **nw; int i; int nwmaxlen; nwmaxlen = *maxlen + num; nwtmp = (NEXTWORD *)myrealloc(*root, nwmaxlen * sizeof(NEXTWORD)); nw = (NEXTWORD **)myrealloc(nwold, nwmaxlen * sizeof(NEXTWORD *)); nw[0] = nwtmp; for (i=1;i * スタックトップの最尤仮説を取り出す. * * @param start [i/o] スタックの先頭ノードへのポインタ(書換えられる場合あり) * @param stacknum [i/o] 現在のスタックサイズへのポインタ(書き換えあり) * * @return 取り出した最尤仮説のポインタを返す. * * * Pop the best hypothesis from stack. * * @param start [i/o] pointer to stack top node (will be modified if necessary) * @param stacknum [i/o] pointer to the current stack size (will be modified * if necessary) * * @return pointer to the popped hypothesis. * */ static NODE * get_best_from_stack(NODE **start, int *stacknum) { NODE *tmp; /* return top */ tmp=(*start); if ((*start)!=NULL) { /* delete it from stack */ (*start)=(*start)->next; if ((*start)!=NULL) (*start)->prev=NULL; (*stacknum)--; return(tmp); } else { return(NULL); } } /** * * ある仮説がスタック内に格納されるかどうかチェックする. * * @param new [in] チェックする仮説 * @param bottom [in] スタックの底ノードへのポインタ * @param stacknum [in] スタックに現在格納されているノード数へのポインタ * @param stacksize [in] スタックのノード数の上限 * * @return スタックのサイズが上限に達していないか,スコアが底ノードよりも * よければ格納されるとして 0 を,それ以外であれば格納できないとして -1 を * 返す. * * * Check whether a hypothesis will be stored in the stack. * * @param new [in] hypothesis to be checked * @param bottom [in] pointer to stack bottom node * @param stacknum [in] pointer to current stack size * @param stacksize [in] pointer to maximum stack size limit * * @return 0 if it will be stored in the stack (in case @a stacknum < * @a stacksize or the score of @a new is better than bottom. Otherwise * returns -1, which means it can not be pushed to the stack. * */ static int can_put_to_stack(NODE *new, NODE **bottom, int *stacknum, int stacksize) { /* stack size check */ if ((*stacknum + 1) > stacksize && (*bottom)->score >= new->score) { /* new node is below the bottom: discard it */ return(-1); } return(0); } /** * * スタックに新たな仮説を格納する. * スタック内のスコア順を考慮した位置に挿入される. * 格納できなかった場合,与えられた仮説は free_node() される. * * @param new [in] チェックする仮説 * @param start [i/o] スタックのトップノードへのポインタ * @param bottom [i/o] スタックの底ノードへのポインタ * @param stacknum [i/o] スタックに現在格納されているノード数へのポインタ * @param stacksize [in] スタックのノード数の上限 * * @return 格納できれば 0 を,できなかった場合は -1 を返す. * * * Push a new hypothesis into the stack, keeping score order. * If not succeeded, the given new hypothesis will be freed by free_node(). * * @param new [in] hypothesis to be checked * @param start [i/o] pointer to stack top node * @param bottom [i/o] pointer to stack bottom node * @param stacknum [i/o] pointer to current stack size * @param stacksize [in] pointer to maximum stack size limit * * @return 0 if succeded, or -1 if failed to push because of number * limitation or too low score. * */ static int put_to_stack(NODE *new, NODE **start, NODE **bottom, int *stacknum, int stacksize) { NODE *tmp; /* stack size is going to increase... */ (*stacknum)++; /* stack size check */ if ((*stacknum)>stacksize) { /* stack size overflow */ (*stacknum)--; if ((*bottom)->score < new->score) { /* new node will be inserted in the stack: free the bottom */ tmp=(*bottom); (*bottom)->prev->next=NULL; (*bottom)=(*bottom)->prev; free_node(tmp); } else { /* new node is below the bottom: discard it */ free_node(new); return(-1); } } /* insert new node on edge */ if ((*start)==NULL) { /* no node in stack */ /* new node is the only node */ (*start)=new; (*bottom)=new; new->next=NULL; new->prev=NULL; return(0); } if ((*start)->score <= new->score) { /* insert on the top */ new->next = (*start); new->next->prev = new; (*start)=new; new->prev=NULL; return(0); } if ((*bottom)->score >= new->score) { /* insert on the bottom */ new->prev = (*bottom); new->prev->next = new; (*bottom)=new; new->next=NULL; return(0); } /* now the new node is between (*start) and (*bottom) */ if (((*start)->score + (*bottom)->score) / 2 > new->score) { /* search from bottom */ tmp=(*bottom); while(tmp->score < new->score) tmp=tmp->prev; new->prev=tmp; new->next=tmp->next; tmp->next->prev=new; tmp->next=new; } else { /* search from start */ tmp=(*start); while(tmp->score > new->score) tmp=tmp->next; new->next=tmp; new->prev=tmp->prev; tmp->prev->next=new; tmp->prev=new; } return(0); } /** * * スタックの中身を全て出力する. スタックの中身は失われる. (デバッグ用) * * @param start [i/o] スタックのトップノードへのポインタ * @param stacknum [i/o] スタックに現在格納されているノード数へのポインタ * @param winfo [in] 単語辞書 * * * Output all nodes in the stack. All nodes will be lost (for debug). * * @param start [i/o] pointer to stack top node * @param stacknum [i/o] pointer to current stack size * @param winfo [in] word dictionary * */ static void put_all_in_stack(NODE **start, int *stacknum, WORD_INFO *winfo) { NODE *ntmp; jlog("DEBUG: hypotheses remained in global stack\n"); while ((ntmp = get_best_from_stack(start, stacknum)) != NULL) { jlog("DEBUG: %3d: s=%f",*stacknum, ntmp->score); put_hypo_woutput(ntmp, winfo); free_node(ntmp); } } /** * * スタック内の全仮説を解放する. * * @param start [i/o] スタックのトップノード * * * Free all nodes in a stack. * * @param start [i/o] stack top node * */ static void free_all_nodes(NODE *start) { NODE *tmp; NODE *next; tmp=start; while(tmp) { next=tmp->next; free_node(tmp); tmp=next; } } #ifdef CONFIDENCE_MEASURE /**********************************************************************/ /********** 単語信頼度の計算 ******************************************/ /********** Confidence scoring ****************************************/ /**********************************************************************/ #ifdef CM_SEARCH /**************************************************************/ /**** CM computation method 1(default): ******/ /**** - Use local posterior probabilities while search ******/ /**** - Obtain CM at hypothesis expansion time ******/ /**************************************************************/ /** * * CM計算用のパラメータを初期化する. CM計算の直前に呼び出される. * * @param sd [i/o] 第2パス用ワークエリア * @param wnum [in] スタックサイズ * @param cm_alpha [in] 使用するスケーリング値 * * * * Initialize parameters for confidence scoring (will be called at * each startup of 2nd pass) * * @param sd [i/o] work area for 2nd pass * @param wnum [in] stack size * @param cm_alpha [in] scaling value to use at confidence scoring * */ static void cm_init(StackDecode *sd, int wnum, LOGPROB cm_alpha #ifdef CM_MULTIPLE_ALPHA , cm_alpha_num #endif ) { sd->l_stacksize = wnum; sd->l_start = sd->l_bottom = NULL; sd->l_stacknum = 0; sd->cm_alpha = cm_alpha; #ifdef CM_MULTIPLE_ALPHA if (sd->cmsumlist) { if (sd->cmsumlistlen < cm_alpha_num) { free(sd->cmsumlist); sd->cmsumlist = NULL; } } if (sd->cmsumlist == NULL) { sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * cm_alpha_num); sd->cmsumlistlen = cm_alpha_num; } #endif } /** * * CM計算のためにローカルスタックに展開仮説を一時的に保存する. * * @param sd [i/o] 第2パス用ワークエリア * @param new [in] 展開仮説 * * * Store an expanded hypothesis to the local stack for later CM scoring * * @param sd [i/o] work area for 2nd pass * @param new [in] expanded hypothesis * */ static void cm_store(StackDecode *sd, NODE *new) { /* store the generated hypo into local stack */ put_to_stack(new, &(sd->l_start), &(sd->l_bottom), &(sd->l_stacknum), sd->l_stacksize); } /** * * CM計算のためにローカルスタック内の仮説の出現確率の合計を求める. * * @param sd [i/o] 第2パス用ワークエリア * * * * Compute sum of probabilities for hypotheses in the local stack for * CM scoring. * * @param sd [i/o] work area for 2nd pass * * */ static void cm_sum_score(StackDecode *sd #ifdef CM_MULTIPLE_ALPHA , bgn, end, step #endif ) { NODE *node; LOGPROB sum; #ifdef CM_MULTIPLE_ALPHA LOGPROB a; int j; #endif if (sd->l_start == NULL) return; /* no hypo */ sd->cm_tmpbestscore = sd->l_start->score; /* best hypo is at the top of the stack */ #ifdef CM_MULTIPLE_ALPHA for (j = 0, a = bgn; a <= end; a += step) { sum = 0.0; for(node = sd->l_start; node; node = node->next) { sum += pow(10, a * (node->score - sd->cm_tmpbestscore)); } sd->cmsumlist[j++] = sum; /* store sums for each alpha coef. */ } #else sum = 0.0; for(node = sd->l_start; node; node = node->next) { sum += pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)); } sd->cm_tmpsum = sum; /* store sum */ #endif } /** * * 展開されたある文仮説について,その展開単語の信頼度を,事後確率に * 基づいて計算する. * * @param sd [i/o] 第2パス用ワークエリア * @param node [i/o] 展開されたある文仮説 * * * Compute confidence score of a new word at the end of the given hypothesis, * based on the local posterior probabilities. * * @param sd [i/o] work area for 2nd pass * @param node [i/o] expanded hypothesis * */ static void cm_set_score(StackDecode *sd, NODE *node #ifdef CM_MULTIPLE_ALPHA , bgn, end, step #endif ) { #ifdef CM_MULTIPLE_ALPHA int j; LOGPROB a; #endif #ifdef CM_MULTIPLE_ALPHA for (j = 0, a = bgn; a <= end; a += step) { node->cmscore[node->seqnum-1][j] = pow(10, a * (node->score - sd->cm_tmpbestscore)) / sd->cmsumlist[j]; j++; } #else node->cmscore[node->seqnum-1] = pow(10, sd->cm_alpha * (node->score - sd->cm_tmpbestscore)) / sd->cm_tmpsum; #endif } /** * * CM計算用のローカルスタックから仮説を取り出す. * * @param sd [i/o] 第2パス用ワークエリア * * @return 取り出された文仮説を返す. * * * Pop one node from local stack for confidence scoring. * * @param sd [i/o] work area for 2nd pass * * @return the popped hypothesis. * */ static NODE * cm_get_node(StackDecode *sd) { return(get_best_from_stack(&(sd->l_start), &(sd->l_stacknum))); } #endif /* CM_SEARCH */ #ifdef CM_NBEST /*****************************************************************/ /**** CM computation method 2: conventional N-best scoring *******/ /**** NOTE: enough N-best should be computed (-n 10 ~ -n 100) ****/ /*****************************************************************/ /** * * スタック内にある文候補から単語信頼度を計算する. * * @param sd [i/o] 第2パス用ワークエリア * @param start [in] スタックの先頭ノード * @param stacknum [in] スタックサイズ * @param jconf [in] SEARCH用設定パラメータ * * * Compute confidence scores from N-best sentence candidates in the * given stack. * * @param sd [i/o] work area for 2nd pass * @param start [in] stack top node * @param stacknum [in] current stack size * @param jconf [in] SEARCH configuration parameters * */ static void cm_compute_from_nbest(StackDecode *sd, NODE *start, int stacknum, JCONF_SEARCH *jconf) { NODE *node; LOGPROB bestscore, sum, s; WORD_ID w; int i; LOGPROB cm_alpha; int j; /* prepare buffer */ #ifdef CM_MULTIPLE_ALPHA if (sd->cmsumlist) { if (sd->cmsumlistlen < jconf->annotate.cm_alpha_num) { free(sd->cmsumlist); sd->cmsumlist = NULL; } } if (sd->cmsumlist == NULL) { sd->cmsumlist = (LOGPROB *)mymalloc(sizeof(LOGPROB) * jconf->annotate.cm_alpha_num); sd->cmsumlistlen = cm_alpha_num; } #endif if (sd->sentcm == NULL) { /* not allocated yet */ sd->sentcm = (LOGPROB *)mymalloc(sizeof(LOGPROB)*stacknum); sd->sentnum = stacknum; } else if (sd->sentnum < stacknum) { /* need expanded */ sd->sentcm = (LOGPROB *)myrealloc(sd->sentcm, sizeof(LOGPROB)*stacknum); sd->sentnum = stacknum; } if (sd->wordcm == NULL) { sd->wordcm = (LOGPROB *)mymalloc(sizeof(LOGPROB) * winfo->num); sd->wordnum = winfo->num; } else if (sd->wordnum < winfo->num) { sd->wordcm = (LOGPROB *)myremalloc(sd->wordcm, sizeof(LOGPROB) * winfo->num); sd->wordnum = winfo->num; } cm_alpha = jconf->annotate.cm_alpha; #ifdef CM_MULTIPLE_ALPHA for (j = 0, cm_alpha = jconf->annotate.cm_alpha_bgn; cm_alpha <= jconf->annotate.cm_alpha_end; cm_alpha += jconf->annotate.cm_alpha_step) { #endif /* clear whole word cm buffer */ for(w=0;wwordnum;w++) { sd->wordcm[w] = 0.0; } /* get best score */ bestscore = start->score; /* compute sum score of all hypothesis */ sum = 0.0; for (node = start; node != NULL; node = node->next) { sum += pow(10, cm_alpha * (node->score - bestscore)); } /* compute sentence posteriori probabilities */ i = 0; for (node = start; node != NULL; node = node->next) { sd->sentcm[i] = pow(10, cm_alpha * (node->score - bestscore)) / sum; i++; } /* compute word posteriori probabilities */ i = 0; for (node = start; node != NULL; node = node->next) { for (w=0;wseqnum;w++) { sd->wordcm[node->seq[w]] += sd->sentcm[i]; } i++; } /* store the probabilities to node */ for (node = start; node != NULL; node = node->next) { for (w=0;wseqnum;w++) { #ifdef CM_MULTIPLE_ALPHA node->cmscore[w][j] = sd->wordcm[node->seq[w]]; #else node->cmscore[w] = sd->wordcm[node->seq[w]]; #endif } } #ifdef CM_MULTIPLE_ALPHA j++; } #endif } #endif /* CM_NBEST */ #endif /* CONFIDENCE_MEASURE */ /**********************************************************************/ /********** Enveloped best-first search *******************************/ /**********************************************************************/ /* * 1. Word envelope * * 一種の仮説ビーム幅を設定: 展開元となった仮説の数をその仮説長(単語数) * ごとにカウントする. 一定数を越えたらそれより短い仮説は以後展開しない. * * Introduce a kind of beam width to search tree: count the number of * popped hypotheses per the depth of the hypotheses, and when a count * in a certain depth reaches the threshold, all hypotheses shorter than * the depth will be dropped from candidates. * */ /** * * Word envelope 用にカウンタを初期化する. * * @param s [i/o] 第2パス用ワークエリア * * * * Initialize counters fro word enveloping. * * @param s [i/o] work area for 2nd pass * */ static void wb_init(StackDecode *s) { int i; for(i=0;i<=MAXSEQNUM;i++) s->hypo_len_count[i] = 0; s->maximum_filled_length = -1; } /** * * Word envelope を参照して,与えられた仮説を展開してよいかどうかを返す. * また,Word envelope のカウンタを更新する. * * @param s [i/o] 第2パス用ワークエリア * @param now [in] 今から展開しようとしている仮説 * @param width [in] 展開カウントの上限値 * * @return 展開可能(展開カウントが上限に達していない)なら TRUE, * 展開不可能(カウントが上限に達している)なら FALSE を返す. * * * Consult the current word envelope to check if word expansion from * the hypothesis node is allowed or not. Also increment the counter * of word envelope if needed. * * @param s [i/o] work area for 2nd pass * @param now [in] popped hypothesis * @param width [in] maximum limit of expansion count * * @return TRUE if word expansion is allowed (in case word envelope count * of the corresponding hypothesis depth does not reach the limit), or * FALSE if already prohibited. * */ static boolean wb_ok(StackDecode *s, NODE *now, int width) { if (now->seqnum <= s->maximum_filled_length) { /* word expansion is not allowed because a word expansion count at deeper level has already reached the limit */ return FALSE; } else { /* word expansion is possible. Increment the word expansion count of the given depth */ s->hypo_len_count[now->seqnum]++; if (s->hypo_len_count[now->seqnum] > width) { /* the word expansion count of this level has reached the limit, so set the beam-filled depth to this level to inhibit further expansion of shorter hypotheses. */ if (s->maximum_filled_length < now->seqnum) s->maximum_filled_length = now->seqnum; } return TRUE; } } #ifdef SCAN_BEAM /* * 2. Score envelope * * Viterbi計算量の削減: 入力フレームごとの最大尤度 (score envelope) を * 全仮説にわたって記録しておく. 仮説の前向き尤度計算時に,その envelope * から一定幅以上スコアが下回るとき,Viterbi パスの演算を中断する. * * ここでは,取り出した仮説からフレームごとの score envelope を更新する * 部分が記述されている. Envelope を考慮した Viterbi 計算の実際は * scan_word() を参照のこと. * * Reduce computation cost of hypothesis Viterbi processing by setting a * "score envelope" that holds the maximum scores at every frames * throughout the expanded hypotheses. When calculating Viterbi path * on HMM trellis for updating score of popped hypothesis, Viterbi paths * that goes below a certain range from the score envelope are dropped. * * These functions are for updating the score envelope according to the * popped hypothesis. For actual Viterbi process with score envelope, * see scan_word(). * */ /** * * Score envelope を初期化する. 第2パスの開始時に呼ばれる. * * @param s [i/o] 第2パス用ワークエリア * @param framenum [in] 入力フレーム長 * * * Initialize score envelope. This will be called once at the beginning * of 2nd pass. * * @param s [i/o] work area for 2nd pass * @param framenum [in] input frame length * */ static void envl_init(StackDecode *s, int framenum) { int i; for(i=0;iframemaxscore[i] = LOG_ZERO; } /** * * 仮説の前向きスコアから score envelope を更新する. * * @param s [i/o] 第2パス用ワークエリア * @param n [in] 仮説 * @param framenum [in] 入力フレーム長 * * * Update the score envelope using forward score of the given hypothesis. * * @param s [i/o] work area for 2nd pass * @param n [in] hypothesis * @param framenum [in] input frame length * */ static void envl_update(StackDecode *s, NODE *n, int framenum) { int t; for(t=framenum-1;t>=0;t--) { if (s->framemaxscore[t] < n->g[t]) s->framemaxscore[t] = n->g[t]; } } #endif /* SCAN_BEAM */ /**********************************************************************/ /********** Short pause segmentation **********************************/ /**********************************************************************/ /** * * 認識結果から,次の入力区間の認識を開始する際の初期単語履歴をセットする. * 透過語および仮説の重複を考慮して初期単語履歴が決定される. * * @param hypo [in] 現在の入力区間の認識結果としての文候補 * @param r [in] 認識処理インスタンス * * * Set the previous word context for the recognition of the next input * segment from the current recognition result. The initial context word * will be chosen from the current recognition result skipping transparent * word and multiplied words. * * @param hypo [in] sentence candidate as a recognition result of current * input segment * @param r [in] recognition process instance * * * @callgraph * @callergraph * */ void segment_set_last_nword(NODE *hypo, RecogProcess *r) { int i; WORD_ID w; if (r->sp_break_last_nword_allow_override) { for(i=0;iseqnum;i++) { w = hypo->seq[i]; if (w != r->sp_break_last_word && !is_sil(w, r) && !r->lm->winfo->is_transparent[w] ) { r->sp_break_last_nword = w; break; } } #ifdef SP_BREAK_DEBUG printf("sp_break_last_nword=%d[%s]\n", r->sp_break_last_nword, r->lm->winfo->woutput[r->sp_break_last_nword]); #endif } else { r->sp_break_last_nword = WORD_INVALID; } } /**********************************************************************/ /********* Debug output of hypothesis while search ********************/ /**********************************************************************/ /** * * デバッグ用に仮説の単語列を表示する. * * @param hypo [in] 仮説 * @param winfo [in] 単語辞書 * * * Output word sequence of a hypothesis for debug. * * @param hypo [in] hypothesis * @param winfo [in] word dictionary * */ static void put_hypo_woutput(NODE *hypo, WORD_INFO *winfo) { int i,w; if (hypo != NULL) { for (i=hypo->seqnum-1;i>=0;i--) { w = hypo->seq[i]; jlog(" %s", winfo->woutput[w]); } } jlog("\n"); } /** * * デバッグ用に仮説の単語N-gramエントリ名(Julianではカテゴリ番号)を出力する. * * @param hypo [in] 仮説 * @param winfo [in] 単語辞書 * * * Output N-gram entries (or DFA category IDs) of a hypothesis for debug. * * @param hypo [in] hypothesis * @param winfo [in] word dictionary * */ static void put_hypo_wname(NODE *hypo, WORD_INFO *winfo) { int i,w; if (hypo != NULL) { for (i=hypo->seqnum-1;i>=0;i--) { w = hypo->seq[i]; jlog(" %s", winfo->wname[w]); } } jlog("\n"); } /** * * Save a hypothesis as a recognition result f 2nd pass. * * * 第2パスの結果として仮説を保存する. * * * @param hypo [in] hypothesis to save * @param r [in] recognition process instance * */ static void store_result_pass2(NODE *hypo, RecogProcess *r) { int i; Sentence *s; s = &(r->result.sent[r->result.sentnum]); s->word_num = hypo->seqnum; for (i = 0; i < hypo->seqnum; i++) { s->word[i] = hypo->seq[hypo->seqnum - 1 - i]; } #ifdef CONFIDENCE_MEASURE for (i = 0; i < hypo->seqnum; i++) { s->confidence[i] = hypo->cmscore[hypo->seqnum - 1 - i]; } #endif s->score = hypo->score; s->score_lm = hypo->totallscore; s->score_am = hypo->score - hypo->totallscore; if (r->lmtype == LM_DFA) { /* output which grammar the hypothesis belongs to on multiple grammar */ /* determine only by the last word */ if (multigram_get_all_num(r->lm) > 0) { s->gram_id = multigram_get_gram_from_category(r->lm->winfo->wton[hypo->seq[0]], r->lm); } else { s->gram_id = 0; } } r->result.sentnum++; /* add to tail */ } /**********************************************************************/ /******** Output top 'ncan' hypotheses in a stack and free all ********/ /**********************************************************************/ /** * * スタックから上位の仮説を取り出し,認識結果として出力する. さらに, * スタックに格納されている全ての仮説を解放する. * * 得られた文候補は,いったん結果格納用のスタックに格納される. 探索終 * 了("-n" の数だけ文候補が見つかるか,探索が中断される)の後,結果的 * に得られた文候補の中から上位N個("-output" で指定された数)の仮説を * 出力する. * * 指定があればアラインメントもここで実行する. * * @param r_start [i/o] 結果格納用スタックの先頭ノードへのポインタ * @param r_bottom [i/o] 結果格納用スタックの底ノードへのポインタ * @param r_stacknum [i/o] スタックに格納されているノード数へのポインタ * @param ncan [in] 出力する上位仮説数 * @param r [in] 認識処理インスタンス * @param param [in] 入力パラメータ * * * Output top N-best hypotheses in a stack as a recognition result, and * free all hypotheses. * * The sentence candidates found at the 2nd pass will be pushed to the * "result stack" instead of immediate output. After recognition is * over (in case the number of found sentences reaches the number * specified by "-n", or search has been terminated in other reason), * the top N sentence candidates in the stack will be output as a * final result (where N should be specified by "-output"). After * then, all the hypotheses in the stack will be freed. * * Additionally, forced alignment for the recognized sentence * will be executed here if required. * * @param r_start [i/o] pointer to the top node of the result stack * @param r_bottom [i/o] pointer to the bottom node of the result stack * @param r_stacknum [i/o] number of candidates in the current result stack * @param ncan [in] number of sentence candidates to be output * @param r [in] recognition process instance * @param param [in] input parameter * */ static void result_reorder_and_output(NODE **r_start, NODE **r_bottom, int *r_stacknum, int ncan, RecogProcess *r, HTK_Param *param) { NODE *now; int num; #ifdef CM_NBEST /* compute CM from the N-best sentence candidates */ cm_compute_from_nbest(&(r->pass2), *r_start, *r_stacknum, r->config); #endif num = 0; while ((now = get_best_from_stack(r_start,r_stacknum)) != NULL && num < ncan) { num++; /* output result */ store_result_pass2(now, r); /* if in sp segmentation mode, */ /* set the last context-aware word for next recognition */ if (r->lmtype == LM_PROB && r->config->successive.enabled && num == 1) segment_set_last_nword(now, r); free_node(now); } /* free the rest */ if (now != NULL) free_node(now); free_all_nodes(*r_start); } /** * * @brief Post-process of 2nd pass when no result is obtained. * * This is a post-process for the 2nd pass which should be called when * the 2nd pass has no result. This will occur when the 2nd pass was * executed but failed with no sentence candidate, or skipped by * an option. * * When the 2nd argument is set to TRUE, the result of the 1st pass * will be copied as final result of 2nd pass and the recognition * status flag is set to SUCCESS. If FALSE, recognition status will * be set to FAILED. On sp-segment decoding, the initial hypothesis * marker for the next input segment will be set up from the 1st pass * result also. * * @param r [in] recognition process instance * @param use_1pass_as_final [in] when TRUE the 1st pass result will be used as final recognition result of 2nd pass. * * * * @brief 第2パスの解が得られない場合の終了処理 * * 第2パスが失敗した場合や第2パスが実行されない設定の場合の * 認識終了処理を行う.use_1pass_as_final が TRUE のとき, * 第1パスの結果を第2パスの結果としてコピーして格納し,認識成功とする. * FALSE時は認識失敗とする. * また,sp-segment 時は,次の認識区間用の初期仮説設定も第1パスの * 結果から行う. * * @param r [in] 認識処理インスタンス * @param use_1pass_as_final [in] TRUE 時第1パスの結果を第2パス結果に格納する * * */ void pass2_finalize_on_no_result(RecogProcess *r, boolean use_1pass_as_final) { NODE *now; int i, j; /* 探索失敗 */ /* search failed */ /* make temporal hypothesis data from the result of previous 1st pass */ now = newnode(r); for (i=0;ipass1_wnum;i++) { now->seq[i] = r->pass1_wseq[r->pass1_wnum-1-i]; } now->seqnum = r->pass1_wnum; now->score = r->pass1_score; #ifdef CONFIDENCE_MEASURE /* fill in null values */ #ifdef CM_MULTIPLE_ALPHA for(j=0;jannotate.cm_alpha_num;j++) { for(i=0;iseqnum;i++) now->cmscore[i][j] = 0.0; } #else for(i=0;iseqnum;i++) now->cmscore[i] = 0.0; #endif #endif /* CONFIDENCE_MEASURE */ if (r->lmtype == LM_PROB && r->config->successive.enabled) { /* if in sp segment mode, */ /* find segment restart words from 1st pass result */ segment_set_last_nword(now, r); } if (use_1pass_as_final) { /* 第1パスの結果をそのまま出力する */ /* output the result of the previous 1st pass as a final result. */ store_result_pass2(now, r); r->result.status = J_RESULT_STATUS_SUCCESS; } else { /* store output as failure */ r->result.status = J_RESULT_STATUS_FAIL; //callback_exec(CALLBACK_RESULT, r); } free_node(now); } /**********************************************************************/ /********* Main stack decoding function *******************************/ /**********************************************************************/ /** * * 第2探索パスであるスタックデコーディングを行うメイン関数 * * 引数のうち cate_bgn, cate_num は単語N-gramでは無視される. * * @param param [in] 入力パラメータベクトル列 * @param r [i/o] 認識処理インスタンス * @param cate_bgn [in] 展開対象とすべきカテゴリの開始番号 * @param cate_num [in] 展開対象とすべきカテゴリの数 * * * Main function to perform stack decoding of the 2nd search pass. * * The cate_bgn and cate_num (third and fourth argument) will have * no effect when N-gram is used. * * @param param [in] input parameter vector * @param r [i/o] recognition process instance * @param cate_bgn [in] category id to allow word expansion from (ignored in Julius) * @param cate_num [in] num of category to allow word expansion from (ignored in Julius) * * * @callgraph * @callergraph * */ void wchmm_fbs(HTK_Param *param, RecogProcess *r, int cate_bgn, int cate_num) { /* 文仮説スタック */ /* hypothesis stack (double-linked list) */ int stacknum; /* current stack size */ NODE *start = NULL; /* top node */ NODE *bottom = NULL; /* bottom node */ /* 認識結果格納スタック(結果はここへいったん集められる) */ /* result sentence stack (found results will be stored here and then re-ordered) */ int r_stacksize; int r_stacknum; NODE *r_start = NULL; NODE *r_bottom = NULL; /* ワークエリア */ /* work area */ NEXTWORD fornoise; /* Dummy NEXTWORD data for short-pause insertion handling */ NEXTWORD **nextword, *nwroot; /* buffer to store predicted words */ int maxnwnum; /* current allocated number of words in nextword */ int nwnum; /* current number of words in nextword */ NODE *now, *new; /* popped current hypo., expanded new hypo. */ NODE *now_noise; /* for inserting/deleting noise word */ boolean now_noise_calced; boolean acc; int t; int w,i,j; LOGPROB last_score = LOG_ZERO; /* for graph generation */ LOGPROB prev_score; WordGraph *wordgraph_root = NULL; boolean merged_p; #ifdef GRAPHOUT_DYNAMIC int dynamic_merged_num = 0; WordGraph *wtmp; LOGPROB lscore_prev; #endif #ifdef GRAPHOUT_SEARCH int terminate_search_num = 0; #endif /* local temporal parameter */ int stacksize, ncan, maxhypo, peseqlen; JCONF_SEARCH *jconf; WORD_INFO *winfo; NGRAM_INFO *ngram; DFA_INFO *gdfa; BACKTRELLIS *backtrellis; StackDecode *dwrk; if (r->lmtype == LM_DFA) { if (debug2_flag) jlog("DEBUG: only words in these categories will be expanded: %d-%d\n", cate_bgn, cate_bgn + cate_num-1); } /* * 初期化 * Initialize */ /* just for quick access */ jconf = r->config; winfo = r->lm->winfo; if (r->lmtype == LM_PROB) { ngram = r->lm->ngram; } else if (r->lmtype == LM_DFA) { gdfa = r->lm->dfa; } backtrellis = r->backtrellis; dwrk = &(r->pass2); stacksize = jconf->pass2.stack_size; ncan = jconf->pass2.nbest; maxhypo = jconf->pass2.hypo_overflow; peseqlen = backtrellis->framelen; /* store data for sub routines */ r->peseqlen = backtrellis->framelen; //recog->ccd_flag = recog->jconf->am.ccd_flag; /* 予測単語格納領域を確保 */ /* malloc area for word prediction */ /* the initial maximum number of nextwords is the size of vocabulary */ nextword = nw_malloc(&maxnwnum, &nwroot, winfo->num); /* 前向きスコア計算用の領域を確保 */ /* malloc are for forward viterbi (scan_word()) */ malloc_wordtrellis(r); /* scan_word用領域 */ /* 仮説スタック初期化 */ /* initialize hypothesis stack */ start = bottom = NULL; stacknum = 0; /* 結果格納スタック初期化 */ /* initialize result stack */ r_stacksize = ncan; r_start = r_bottom = NULL; r_stacknum = 0; /* カウンタ初期化 */ /* initialize counter */ dwrk->popctr = 0; dwrk->genectr = 0; dwrk->pushctr = 0; dwrk->finishnum = 0; #ifdef CM_SEARCH /* initialize local stack */ cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_num #endif ); #endif #ifdef SCAN_BEAM /* prepare and initialize score envelope */ dwrk->framemaxscore = (LOGPROB *)mymalloc(sizeof(LOGPROB)*peseqlen); envl_init(dwrk, peseqlen); #endif /* SCAN_BEAM */ /* エンベロープ探索用の単語長別展開数カウンタを初期化 */ /* initialize counters for envelope search */ if (jconf->pass2.enveloped_bestfirst_width >= 0) wb_init(dwrk); if (jconf->graph.enabled) { wordgraph_init(r->wchmm); } /* * 初期仮説(1単語からなる)を得, 文仮説スタックにいれる * get a set of initial words from LM function and push them as initial * hypotheses */ /* the first words will be stored in nextword[] */ if (r->lmtype == LM_PROB) { nwnum = ngram_firstwords(nextword, peseqlen, maxnwnum, r); } else if (r->lmtype == LM_DFA) { nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); /* 溢れたら、バッファを増やして再チャレンジ */ /* If the number of nextwords can exceed the buffer size, expand the nextword data area */ while (nwnum < 0) { nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); nwnum = dfa_firstwords(nextword, peseqlen, maxnwnum, r); } } if (debug2_flag) { jlog("DEBUG: %d words in wordtrellis as first hypothesis\n", nwnum); } /* store them to stack */ for (w = 0; w < nwnum; w++) { if (r->lmtype == LM_DFA) { /* limit word hypothesis */ if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { continue; } } /* generate new hypothesis */ new = newnode(r); start_word(new, nextword[w], param, r); if (r->lmtype == LM_DFA) { if (new->score <= LOG_ZERO) { /* not on trellis */ free_node(new); continue; } } dwrk->genectr++; #ifdef CM_SEARCH /* store the local hypothesis to temporal stack */ cm_store(dwrk, new); #else /* put to stack */ if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { dwrk->current = new; //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); if (jconf->graph.enabled) { new->prevgraph = NULL; new->lastcontext = NULL; } dwrk->pushctr++; } #endif } #ifdef CM_SEARCH /* compute score sum */ cm_sum_score(dwrk #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_bgn , jconf->annotate.cm_alpha_end , jconf->annotate.cm_alpha_step #endif ); /* compute CM and put the generated hypotheses to global stack */ while ((new = cm_get_node(dwrk)) != NULL) { cm_set_score(dwrk, new #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_bgn , jconf->annotate.cm_alpha_end , jconf->annotate.cm_alpha_step #endif ); #ifdef CM_SEARCH_LIMIT if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres #ifdef CM_SEARCH_LIMIT_AFTER && dwrk->finishnum > 0 #endif ) { free_node(new); continue; } #endif /* CM_SEARCH_LIMIT */ if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { dwrk->current = new; //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); if (r->graphout) { new->prevgraph = NULL; new->lastcontext = NULL; } dwrk->pushctr++; } } #endif if (debug2_flag) { jlog("DEBUG: %d pushed\n", dwrk->pushctr); } /********************/ /* main search loop */ /********************/ for (;;) { /* if terminate signal has been received, cancel this input */ /* * if (recog->process_want_terminate) { * jlog("DEBUG: process terminated by request\n"); * break; * } */ /* * 仮説スタックから最もスコアの高い仮説を取り出す * pop the top hypothesis from stack */ #ifdef DEBUG jlog("DEBUG: get one hypothesis\n"); #endif now = get_best_from_stack(&start,&stacknum); if (now == NULL) { /* stack empty ---> 探索終了*/ jlog("WARNING: %02d %s: hypothesis stack exhausted, terminate search now\n", r->config->id, r->config->name); jlog("STAT: %02d %s: %d sentences have been found\n", r->config->id, r->config->name, dwrk->finishnum); break; } /* (bogus score check) */ if (now->score <= LOG_ZERO) { free_node(now); continue; } /* 単語グラフ用に pop 仮説の f スコアを一時保存 */ if (r->graphout) { prev_score = now->score; } /* word envelope チェック */ /* consult word envelope */ if (jconf->pass2.enveloped_bestfirst_width >= 0) { if (!wb_ok(dwrk, now, jconf->pass2.enveloped_bestfirst_width)) { /* この仮説長における展開元仮説数の累計数は既に閾値を越えている. そのため,この仮説は捨てる. */ /* the number of popped hypotheses at the length already reaches its limit, so the current popped hypothesis should be discarded here with no expansion */ if (debug2_flag) { jlog("DEBUG: popped but pruned by word envelope:"); put_hypo_woutput(now, r->lm->winfo); } free_node(now); continue; } } #ifdef CM_SEARCH_LIMIT_POP if (now->cmscore[now->seqnum-1] < jconf->annotate.cm_cut_thres_pop) { free_node(now); continue; } #endif /* CM_SEARCH_LIMIT_POP */ dwrk->popctr++; /* (for debug) 取り出した仮説とそのスコアを出力 */ /* output information of the popped hypothesis to stdout */ if (debug2_flag) { jlog("DEBUG: --- pop %d:\n", dwrk->popctr); jlog("DEBUG: "); put_hypo_woutput(now, r->lm->winfo); jlog("DEBUG: "); put_hypo_wname(now, r->lm->winfo); jlog("DEBUG: %d words, f=%f, g=%f\n", now->seqnum, now->score, now->g[now->bestt]); jlog("DEBUG: last word on trellis: [%d-%d]\n", now->estimated_next_t + 1, now->bestt); } dwrk->current = now; //callback_exec(CALLBACK_DEBUG_PASS2_POP, r); if (r->graphout) { #ifdef GRAPHOUT_DYNAMIC /* merge last word in popped hypo if possible */ wtmp = wordgraph_check_merge(now->prevgraph, &wordgraph_root, now->seq[now->seqnum-1], &merged_p, jconf); if (wtmp != NULL) { /* wtmp holds merged word */ dynamic_merged_num++; lscore_prev = (now->prevgraph) ? now->prevgraph->lscore_tmp : 0.0; if (now->prevgraph != NULL) { if (now->prevgraph->saved) { j_internal_error("wchmm_fbs: already saved??\n"); } wordgraph_free(now->prevgraph); } if (now->lastcontext != NULL && now->lastcontext != wtmp /* avoid self loop */ ) { wordgraph_check_and_add_leftword(now->lastcontext, wtmp, lscore_prev); #ifdef GRAPHOUT_SEARCH_CONSIDER_RIGHT if (merged_p) { if (wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev) == FALSE) { merged_p = TRUE; } else { merged_p = FALSE; } } else { wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); } #else wordgraph_check_and_add_rightword(wtmp, now->lastcontext, lscore_prev); #endif } now->prevgraph = wtmp; /* change word to the merged one */ /*printf("last word merged\n");*/ /* previous still remains at memory here... (will be purged later) */ } else { wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); } #ifdef GRAPHOUT_SEARCH /* if recent hypotheses are included in the existing graph, terminate */ if (merged_p && now->endflag == FALSE #ifdef GRAPHOUT_SEARCH_DELAY_TERMINATION /* Do not apply search termination by graph merging until the first sentence candidate is found. */ && (jconf->graph.graphout_search_delay == FALSE || dwrk->finishnum > 0) #endif ) { terminate_search_num++; free_node(now); continue; } #endif #else /* ~GRAPHOUT_DYNAMIC */ /* always save */ wordgraph_save(now->prevgraph, now->lastcontext, &wordgraph_root); #endif /* ~GRAPHOUT_DYNAMIC */ } /* 取り出した仮説のスコアを元に score envelope を更新 */ /* update score envelope using the popped hypothesis */ envl_update(dwrk, now, peseqlen); /* * 取り出した仮説の受理フラグが既に立っていれば, * その仮説は探索終了とみなし,結果として出力して次のループへ. * * If the popped hypothesis already reached to the end, * we can treat it as a recognition result. */ #ifdef DEBUG VERMES("endflag check\n"); #endif if (now->endflag) { if (debug2_flag) { jlog("DEBUG: This is a full sentence candidate\n"); } /* quick, dirty hack */ if (now->score == last_score) { free_node(now); continue; } else { last_score = now->score; } dwrk->finishnum++; if (debug2_flag) { jlog("DEBUG: %d-th sentence found\n", dwrk->finishnum); } /* 一定数の仮説が得られたあとスコアでソートするため, 一時的に別のスタックに格納しておく */ /* store the result to result stack after search is finished, they will be re-ordered and output */ put_to_stack(now, &r_start, &r_bottom, &r_stacknum, r_stacksize); /* 指定数の文仮説が得られたなら探索を終了する */ /* finish search if specified number of results are found */ if (dwrk->finishnum >= ncan) { break; } else { continue; } } /* end of now->endflag */ /* * 探索失敗を検出する. * 仮説数が maxhypo 以上展開されたら, もうこれ以上は探索しない * * detecting search failure: * if the number of expanded hypotheses reaches maxhypo, giveup further search */ #ifdef DEBUG jlog("DEBUG: loop end check\n"); #endif if (dwrk->popctr >= maxhypo) { jlog("WARNING: %02d %s: num of popped hypotheses reached the limit (%d)\n", r->config->id, r->config->name, maxhypo); /* (for debug) 探索失敗時に、スタックに残った情報を吐き出す */ /* (for debug) output all hypothesis remaining in the stack */ if (debug2_flag) put_all_in_stack(&start, &stacknum, r->lm->winfo); free_node(now); break; /* end of search */ } /* 仮説長が一定値を越えたとき,その仮説を破棄する */ /* check hypothesis word length overflow */ if (now->seqnum >= MAXSEQNUM) { jlog("ERROR: sentence length exceeded system limit ( > %d)\n", MAXSEQNUM); free_node(now); continue; } #ifndef GRAPHOUT_PRECISE_BOUNDARY if (r->graphout) { /* if monophone (= no backscan), the tail g score should be kept here */ /* else, updated tail g score will be computed in scan_word() */ if(!jconf->am.ccd_flag) { now->tail_g_score = now->g[now->bestt]; } } #endif /* * 前向きスコアを更新する: 最後の単語の部分の前向きスコアを計算する. * update forward score: compute forward trellis for the last word */ #ifdef DEBUG jlog("DEBUG: scan_word\n"); #endif scan_word(now, param, r); if (now->score < LOG_ZERO) { /* another end-of-search detecter */ jlog("WARNING: too low score, ignore: score=%f",now->score); put_hypo_woutput(now, r->lm->winfo); free_node(now); continue; } /* * 取り出した仮説が文として受理可能であれば, * 受理フラグを立ててをスタックにいれ直しておく. * (次に取り出されたら解となる) * * if the current popped hypothesis is acceptable, set endflag * and return it to stack: it will become the recognition result * when popped again. */ #ifdef DEBUG jlog("DEBUG: accept check\n"); #endif if (r->lmtype == LM_PROB) { acc = ngram_acceptable(now, r); } else if (r->lmtype == LM_DFA) { acc = dfa_acceptable(now, r); } if (acc && now->estimated_next_t <= 5) { new = newnode(r); /* new に now の中身をコピーして,最終的なスコアを計算 */ /* copy content of 'now' to 'new', and compute the final score */ last_next_word(now, new, param, r); if (debug2_flag) { jlog("DEBUG: This is acceptable as a sentence candidate\n"); } /* g[] が入力始端に達していなければ棄却 */ /* reject this sentence candidate if g[] does not reach the end */ if (new->score <= LOG_ZERO) { if (debug2_flag) { jlog("DEBUG: But invalid because Viterbi pass does not reach the 0th frame\n"); } free_node(new); free_node(now); continue; } /* 受理フラグを立てて入れ直す */ /* set endflag and push again */ if (debug2_flag) { jlog("DEBUG This hypo itself was pushed with final score=%f\n", new->score); } new->endflag = TRUE; if (put_to_stack(new, &start, &bottom, &stacknum, stacksize) != -1) { if (r->graphout) { if (new->score > LOG_ZERO) { new->lastcontext = now->prevgraph; new->prevgraph = wordgraph_assign(new->seq[new->seqnum-1], WORD_INVALID, (new->seqnum >= 2) ? new->seq[new->seqnum-2] : WORD_INVALID, 0, #ifdef GRAPHOUT_PRECISE_BOUNDARY /* wordend are shifted to the last */ #ifdef PASS2_STRICT_IWCD new->wordend_frame[0], #else now->wordend_frame[0], #endif #else now->bestt, #endif new->score, prev_score, now->g[0], #ifdef GRAPHOUT_PRECISE_BOUNDARY #ifdef PASS2_STRICT_IWCD new->wordend_gscore[0], #else now->wordend_gscore[0], #endif #else now->tail_g_score, #endif now->lscore, #ifdef CM_SEARCH new->cmscore[new->seqnum-1], #else LOG_ZERO, #endif r ); } else { new->lastcontext = now->lastcontext; new->prevgraph = now->prevgraph; } } /* put_to_stack() != -1 */ } /* recog->graphout */ /* この仮説はここで終わらずに, ここからさらに単語展開する */ /* continue with the 'now' hypothesis, not terminate here */ } /* * この仮説から,次単語集合を決定する. * 次単語集合は, この仮説の推定始端フレーム周辺に存在した * 第1パスのトレリス単語集合. * * N-gramの場合は各単語の n-gram 接続確率が含まれる. * DFA の場合は, その中でさらに DFA 上で接続可能なもののみが返ってくる */ /* * Determine next word set that can connect to this hypothesis. * They come from the trellis word that has been survived at near the * beginning of the last word. * * In N-gram mode, they also contain N-gram probabilities toward the * source hypothesis. In DFA mode, the word set is further reduced * by the grammatical constraint */ #ifdef DEBUG jlog("DEBUG: get next words\n"); #endif if (r->lmtype == LM_PROB) { nwnum = ngram_nextwords(now, nextword, maxnwnum, r); } else if (r->lmtype == LM_DFA) { nwnum = dfa_nextwords(now, nextword, maxnwnum, r); /* nextword が溢れたら、バッファを増やして再チャレンジ */ /* If the number of nextwords can exceed the buffer size, expand the nextword data area */ while (nwnum < 0) { nextword = nw_expand(nextword, &maxnwnum, &nwroot, winfo->num); nwnum = dfa_nextwords(now, nextword, maxnwnum, r); } } if (debug2_flag) { jlog("DEBUG: %d words extracted from wordtrellis\n", nwnum); } /* * 仮説と次単語集合から新たな文仮説を生成し,スタックにいれる. */ /* * generate new hypotheses from 'now' and 'nextword', * and push them to stack */ #ifdef DEBUG jlog("DEBUG: generate hypo\n"); #endif if (r->lmtype == LM_DFA) { now_noise_calced = FALSE; /* TRUE is noise-inserted score has been calculated */ } i = dwrk->pushctr; /* store old value */ #ifdef CM_SEARCH /* initialize local stack */ cm_init(dwrk, winfo->num, jconf->annotate.cm_alpha #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_num #endif ); #endif /* for each nextword, generate a new hypothesis */ for (w = 0; w < nwnum; w++) { if (r->lmtype == LM_DFA) { /* limit word hypothesis */ if (! (winfo->wton[nextword[w]->id] >= cate_bgn && winfo->wton[nextword[w]->id] < cate_bgn + cate_num)) { continue; } } new = newnode(r); if (r->lmtype == LM_DFA) { if (nextword[w]->can_insert_sp == TRUE) { /* ノイズを挟んだトレリススコアを計算し,挟まない場合との最大値を取る */ /* compute hypothesis score with noise inserted */ if (now_noise_calced == FALSE) { /* now に sp をつけた仮説 now_noise を作り,そのスコアを計算 */ /* generate temporal hypothesis 'now_noise' which has short-pause word after the original 'now' */ fornoise.id = gdfa->sp_id; now_noise = newnode(r); cpy_node(now_noise, now); #if 0 now_noise_tmp = newnode(r); next_word(now, now_noise_tmp, &fornoise, param, r); scan_word(now_noise_tmp, param, r); for(t=0;tg[t] = max(now_noise_tmp->g[t], now->g[t]); } free_node(now_noise_tmp); #else /* expand NOISE only if it exists in backward trellis */ /* begin patch by kashima */ if (jconf->pass2.looktrellis_flag) { if(!dfa_look_around(&fornoise, now, r)){ free_node(now_noise); free_node(new); continue; } } /* end patch by kashima */ /* now_nosie の スコア g[] を計算し,元の now の g[] と比較して 高い方を採用 */ /* compute trellis score g[], and adopt the maximum score for each frame compared with now->g[] */ next_word(now, now_noise, &fornoise, param, r); scan_word(now_noise, param, r); for(t=0;tg[t] = max(now_noise->g[t], now->g[t]); } /* ノイズを挟んだ際を考慮したスコアを計算したので, ここで最後のノイズ単語を now_noise から消す */ /* now that score has been computed considering pause insertion, we can delete the last noise word from now_noise here */ now_noise->seqnum--; #endif now_noise_calced = TRUE; } /* expand word only if it exists in backward trellis */ /* begin patch by kashima */ if (jconf->pass2.looktrellis_flag) { if(!dfa_look_around(nextword[w], now_noise, r)){ free_node(new); continue; } } /* end patch by kashima */ /* 新しい仮説' new' を 'now_noise' から生成 */ /* generate a new hypothesis 'new' from 'now_noise' */ next_word(now_noise, new, nextword[w], param, r); } else { /* expand word only if it exists in backward trellis */ /* begin patch by kashima */ if (jconf->pass2.looktrellis_flag) { if(!dfa_look_around(nextword[w], now, r)){ free_node(new); continue; } } /* end patch by kashima */ /* 新しい仮説' new' を 'now_noise' から生成 */ /* generate a new hypothesis 'new' from 'now_noise' */ next_word(now, new, nextword[w], param, r); } } if (r->lmtype == LM_PROB) { /* 新しい仮説' new' を 'now_noise' から生成 N-gram の場合はノイズを特別扱いしない */ /* generate a new hypothesis 'new' from 'now'. pause insertion is treated as same as normal words in N-gram mode. */ next_word(now, new, nextword[w], param, r); } if (new->score <= LOG_ZERO) { /* not on trellis */ free_node(new); continue; } dwrk->genectr++; #ifdef CM_SEARCH /* store the local hypothesis to temporal stack */ cm_store(dwrk, new); #else /* 生成した仮説 'new' をスタックに入れる */ /* push the generated hypothesis 'new' to stack */ /* stack overflow */ if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { free_node(new); continue; } if (r->graphout) { /* assign a word arc to the last fixed word */ new->lastcontext = now->prevgraph; new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->seq[new->seqnum-1], (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, new->bestt + 1, #ifdef GRAPHOUT_PRECISE_BOUNDARY #ifdef PASS2_STRICT_IWCD /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ new->wordend_frame[new->bestt], #else now->wordend_frame[new->bestt], #endif #else now->bestt, #endif new->score, prev_score, #ifdef PASS2_STRICT_IWCD new->g[new->bestt] - new->lscore, #else now->g[new->bestt+1], #endif #ifdef GRAPHOUT_PRECISE_BOUNDARY #ifdef PASS2_STRICT_IWCD /* most up-to-date wordend_gscore is on new, because the last phone of 'now' will be computed at next_word() */ new->wordend_gscore[new->bestt], #else now->wordend_gscore[new->bestt], #endif #else now->tail_g_score, #endif now->lscore, #ifdef CM_SEARCH new->cmscore[new->seqnum-2], #else LOG_ZERO, #endif r ); } /* recog->graphout */ put_to_stack(new, &start, &bottom, &stacknum, stacksize); if (debug2_flag) { j = new->seq[new->seqnum-1]; jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); } dwrk->current = new; //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); dwrk->pushctr++; #endif } /* end of nextword loop */ #ifdef CM_SEARCH /* compute score sum */ cm_sum_score(dwrk #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_bgn , jconf->annotate.cm_alpha_end , jconf->annotate.cm_alpha_step #endif ); /* compute CM and put the generated hypotheses to global stack */ while ((new = cm_get_node(dwrk)) != NULL) { cm_set_score(dwrk, new #ifdef CM_MULTIPLE_ALPHA , jconf->annotate.cm_alpha_bgn , jconf->annotate.cm_alpha_end , jconf->annotate.cm_alpha_step #endif ); #ifdef CM_SEARCH_LIMIT if (new->cmscore[new->seqnum-1] < jconf->annotate.cm_cut_thres #ifdef CM_SEARCH_LIMIT_AFTER && dwrk->finishnum > 0 #endif ) { free_node(new); continue; } #endif /* CM_SEARCH_LIMIT */ /* j = new->seq[new->seqnum-1]; printf(" %15s [%15s](id=%5d)(%f) [%d-%d] cm=%f\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt, new->cmscore[new->seqnum-1]);*/ /* stack overflow */ if (can_put_to_stack(new, &bottom, &stacknum, stacksize) == -1) { free_node(new); continue; } if (r->graphout) { /* assign a word arc to the last fixed word */ new->lastcontext = now->prevgraph; new->prevgraph = wordgraph_assign(new->seq[new->seqnum-2], new->seq[new->seqnum-1], (new->seqnum >= 3) ? new->seq[new->seqnum-3] : WORD_INVALID, new->bestt + 1, #ifdef GRAPHOUT_PRECISE_BOUNDARY #ifdef PASS2_STRICT_IWCD new->wordend_frame[new->bestt], #else now->wordend_frame[new->bestt], #endif #else now->bestt, #endif new->score, prev_score, #ifdef PASS2_STRICT_IWCD new->g[new->bestt] - new->lscore, #else now->g[new->bestt+1], #endif #ifdef GRAPHOUT_PRECISE_BOUNDARY #ifdef PASS2_STRICT_IWCD new->wordend_gscore[new->bestt], #else now->wordend_gscore[new->bestt], #endif #else now->tail_g_score, #endif now->lscore, #ifdef CM_SEARCH new->cmscore[new->seqnum-2], #else LOG_ZERO, #endif r ); } /* recog->graphout */ put_to_stack(new, &start, &bottom, &stacknum, stacksize); if (debug2_flag) { j = new->seq[new->seqnum-1]; jlog("DEBUG: %15s [%15s](id=%5d)(%f) [%d-%d] pushed\n",winfo->wname[j], winfo->woutput[j], j, new->score, new->estimated_next_t + 1, new->bestt); } dwrk->current = new; //callback_exec(CALLBACK_DEBUG_PASS2_PUSH, r); dwrk->pushctr++; } #endif if (debug2_flag) { jlog("DEBUG: %d pushed\n",dwrk->pushctr-i); } if (r->lmtype == LM_DFA) { if (now_noise_calced == TRUE) free_node(now_noise); } /* * 取り出した仮説を捨てる * free the source hypothesis */ free_node(now); } /***************/ /* End of Loop */ /***************/ /* output */ if (dwrk->finishnum == 0) { /* if search failed */ /* finalize result when no hypothesis was obtained */ if (verbose_flag) { if (r->config->sw.fallback_pass1_flag) { jlog("%02d %s: got no candidates, output 1st pass result as a final result\n", r->config->id, r->config->name); } else { jlog("WARNING: %02d %s: got no candidates, search failed\n", r->config->id, r->config->name); } } pass2_finalize_on_no_result(r, r->config->sw.fallback_pass1_flag); } else { /* if at least 1 candidate found */ if (debug2_flag) { jlog("STAT: %02d %s: got %d candidates\n", r->config->id, r->config->name, dwrk->finishnum); } /* 結果はまだ出力されていないので,文候補用スタック内をソートして ここで出力する */ /* As all of the found candidate are in result stack, we sort them and output them here */ if (debug2_flag) jlog("DEBUG: done\n"); result_reorder_and_output(&r_start, &r_bottom, &r_stacknum, jconf->output.output_hypo_maxnum, r, param); r->result.status = J_RESULT_STATUS_SUCCESS; //callback_exec(CALLBACK_RESULT, r); //callback_exec(CALLBACK_EVENT_PASS2_END, r); } /* 各種カウンタを出力 */ /* output counters */ if (verbose_flag) { jlog("STAT: %02d %s: %d generated, %d pushed, %d nodes popped in %d\n", r->config->id, r->config->name, dwrk->genectr, dwrk->pushctr, dwrk->popctr, backtrellis->framelen); jlog_flush(); #ifdef GRAPHOUT_DYNAMIC if (r->graphout) { jlog("STAT: %02d %s: graph: %d merged", r->config->id, r->config->name, dynamic_merged_num); #ifdef GRAPHOUT_SEARCH jlog("S, %d terminated", terminate_search_num); #endif jlog(" in %d\n", dwrk->popctr); } #endif } if (dwrk->finishnum > 0 && r->graphout) { if (verbose_flag) jlog("STAT: ------ wordgraph post-processing begin ------\n"); /* garbage collection and word merging */ /* words with no following word (except end of sentence) will be erased */ wordgraph_purge_leaf_nodes(&wordgraph_root, r); #ifdef GRAPHOUT_DEPTHCUT /* perform word graph depth cutting here */ wordgraph_depth_cut(&wordgraph_root, r); #endif /* if GRAPHOUT_PRECISE_BOUNDARY defined, propagate exact time boundary to the right context. words of different boundary will be duplicated here. */ wordgraph_adjust_boundary(&wordgraph_root, r); if (jconf->graph.confnet) { /* CONFUSION NETWORK GENERATION */ /* old merging functions should be skipped */ /* finalize: sort and annotate ID */ r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); /* check coherence */ wordgraph_check_coherence(wordgraph_root, r); /* compute graph CM by forward-backward processing */ graph_forward_backward(wordgraph_root, r); if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); r->result.wg = wordgraph_root; /* * if (jconf->graph.lattice) { * callback_exec(CALLBACK_RESULT_GRAPH, r); * } */ /* parse the graph to extract order relationship */ graph_make_order(wordgraph_root, r); /* create confusion network */ r->result.confnet = confnet_create(wordgraph_root, r); /* output confusion network */ //callback_exec(CALLBACK_RESULT_CONFNET, r); /* free area for order relationship */ graph_free_order(r); /* free confusion network clusters */ //cn_free_all(&(r->result.confnet)); } else if (jconf->graph.lattice) { /* WORD LATTICE POSTPROCESSING */ /* merge words with the same time and same score */ wordgraph_compaction_thesame(&wordgraph_root); /* merge words with the same time (skip if "-graphrange -1") */ wordgraph_compaction_exacttime(&wordgraph_root, r); /* merge words of near time (skip if "-graphrange" value <= 0 */ wordgraph_compaction_neighbor(&wordgraph_root, r); /* finalize: sort and annotate ID */ r->graph_totalwordnum = wordgraph_sort_and_annotate_id(&wordgraph_root, r); /* check coherence */ wordgraph_check_coherence(wordgraph_root, r); /* compute graph CM by forward-backward processing */ graph_forward_backward(wordgraph_root, r); if (verbose_flag) jlog("STAT: ------ wordgraph post-processing end ------\n"); /* output graph */ r->result.wg = wordgraph_root; //callback_exec(CALLBACK_RESULT_GRAPH, r); } else { j_internal_error("InternalError: graph generation specified but no output format specified?\n"); } /* clear all wordgraph */ //wordgraph_clean(&(r->result.wg)); } /* r->graphout */ /* 終了処理 */ /* finalize */ nw_free(nextword, nwroot); free_all_nodes(start); free_wordtrellis(dwrk); #ifdef SCAN_BEAM free(dwrk->framemaxscore); #endif //result_sentence_free(r); clear_stocker(dwrk); } /** * * 第2パス用のワークエリアを確保・初期化する. * * ここで確保されるのは認識・パラメータに依らない値のみ. * * @param r [i/o] 認識処理インスタンス * * * Initialize and allocate work area for 2nd pass. * * This function only contains input / parameter dependent initialization. * * @param r [in] recognition process instance * */ void wchmm_fbs_prepare(RecogProcess *r) { StackDecode *dwrk; dwrk = &(r->pass2); /* N-gram 用ワークエリアを確保 */ /* malloc work area for N-gram */ if (r->lmtype == LM_PROB && r->lm->ngram) { dwrk->cnword = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); dwrk->cnwordrev = (WORD_ID *)mymalloc(sizeof(WORD_ID) * r->lm->ngram->n); } else { dwrk->cnword = dwrk->cnwordrev = NULL; } dwrk->stocker_root = NULL; #ifdef CONFIDENVE_MEASURE #ifdef CM_MULTIPLE_ALPHA dwrk->cmsumlist = NULL; #endif #ifdef CM_NBEST; dwrk->sentcm = NULL; dwrk->wordcm = NULL; #endif #endif } /** * * 第2パス用のワークエリアを解放する. * * ここで解放されるのは認識・パラメータに依らない値のみ. * * @param r [i/o] 認識処理インスタンス * * * Free the work area for 2nd pass. * * This function only concerns input / parameter dependent work area. * * @param r [in] recognition process instance * */ void wchmm_fbs_free(RecogProcess *r) { StackDecode *dwrk; dwrk = &(r->pass2); if (r->lmtype == LM_PROB && r->lm->ngram) { free(dwrk->cnword); free(dwrk->cnwordrev); dwrk->cnword = dwrk->cnwordrev = NULL; } #ifdef CONFIDENVE_MEASURE #ifdef CM_MULTIPLE_ALPHA if (dwrk->cmsumlist) { free(dwrk->cmsumlist); dwrk->cmsumlist = NULL; } #endif #ifdef CM_NBEST; if (dwrk->sentcm) { free(dwrk->sentcm); dwrk->sentcm = NULL; } if (dwrk->wordcm) { free(dwrk->wordcm); dwrk->wordcm = NULL; } #endif #endif } /* end of file */ julius-4.2.2/libjulius/00readme-jconf.txt0000644001051700105040000012713310731752223016576 0ustar ritrlabjconf (5 ) jconf (5 ) NAME jconf - Jconf configuration file specification DESCRIPTION The variables that can be written in Jconf file are organized as fol- lows. o Global options o Instance declaration o Language model instance o Acoustic model and speech analysis instance o Recognizer and search instance The details are described in the followings. EXAMPLE These are examples of jconf file. First example is a simple one with no instance declaration. When no instance declaration is found, Julius assumes there are only one AM, LM and recognition process instance. In this case, the default instance will be named "_default", and option order does not matter. This is equivalent to older version of Julius, except for GMM handling (see below). Example of Jconf file: no instance declaration -C jconffile (Other global options...) (AM and analysis options...) (LM options...) (Search options...) This is an example using two acoustic models and three language models of different types. Three recognition process instance is defined for each combination of AM and LM. The LM type (ngram / grammar / word) is determined by the arguments. The Global options are placed at the top in the example, but actually it can be placed anywhere in the file. Example of Jconf file: multi model decoding -C jconffile (Other global options...) -AM am1 (AM and analysis options for am1...) -AM am2 (AM and analysis options for am2...) -LM lm_ngram -d ngram -v dictfile (LM options for lm1...) -LM lm_grammar -gram grammarprefix (LM options for lm2...) -LM lm_word -w dictfile (LM options for lm3...) -SR recog_ngram am1 lm_ngram (Search options for recog_ngram...) -SR recog_grammar am1 lm_grammar (Search options for recog_ngram...) -SR recog_word am2 lm_word (Search options for recog_ngram...) This is another example using GMM for frontend processing. Note that from Rev.4.0 Julius has independent MFCC calculation scheme for GMM. This means that you should explicitly specify the acoustic analysis condition for GMM, not only the AM. Option -AM_GMM switch the current AM configuration to the one prepared internally for GMM. You can place AM configuration after the option to specify MFCC computation parameter for GMM. If you define exactly the same condition as AM for recognition, the same MFCC calculation instance will be shared among AM and GMM. Else, each MFCC will be com- puted independently. Example with GMM -C jconffile (Other global options...) -gmm gmmdefs -gmmreject noise -AM_GMM (analysis options for GMM...) -AM am1 (AM and analysis options for am1...) -LM lm_ngram -d ngram -v dictfile (LM options for lm1...) -SR recog_ngram am1 lm_ngram JCONF VARIABLES The full list of options and variables that can be specified in jconf file is listed below. GLOBAL OPTIONS Misc. options -C jconffile Load a jconf file. The options written in the file are expanded at the point. This option can be used within other jconf file. -version Print version information to standard error, and exit. -setting Print engine setting information to standard error, and exit. -quiet Output less log. For result, only the best word sequence will be printed. -debug (For debug) output enoumous internal message and debug informa- tion to log. -check {wchmm|trellis|triphone} For debug, enter interactive check mode. Audio input -input {mic|rawfile|mfcfile|adinnet|stdin|netaudio} Choose speech input source. 'file' or 'rawfile' for waveform file, 'htkparam' or 'mfcfile' for HTK parameter file. Users will be prompted to enter the file name from stdin, or you can use "-filelist" option to specify list of files to process. 'mic' is to get audio input from live microphone device, and 'adinnet' means receiving waveform data via tcpip network from an adinnet client. 'netaudio' is from DatLink/NetAudio input, and 'stdin' means data input from standard input. For waveform file input, only WAV (no compression) and RAW (noheader, 16bit, big endian) are supported by default. Other format can be read when compiled with libsnd library. To see what format is actually supported, see the help message using option "-help". For stdin input, only WAV and RAW is supported. (default: mfcfile) -filelist filename (With -input rawfile|mfcfile) perform recognition on all files listed in the file. The file should contain an input file per line. Engine ends when all of the files are processed. -notypecheck By default, Julius checks the input parameter type whether it matches the AM or not. This option will disable the check and use the input vector as is. -48 Record input with 48kHz sampling, and down-sample it to 16kHz on-the-fly. This option is valid for 16kHz model only. The down-sampling routine was ported from sptk. (Rev. 4.0) -NA devicename Host name for DatLink server input (-input netaudio). -adport port_number With -input adinnet, specify adinnet port number to listen. (default: 5530) -nostrip Julius by default removes successive zero samples in input speech data. This option inhibits this removal. -zmean , -nozmean This option enables/disables DC offset removal of input wave- form. Offset will be estimated from the whole input. For micro- phone / network input, zero mean of the first 48000 samples (3 seconds in 16kHz sampling) will be used for the estimation. (default: disabled) This option uses static offset for the channel. See also -zmean- source for frame-wise offset removal. Speech segment detection by level and zero-cross -cutsilence , -nocutsilence Turn on / off the speech detection by level and zero-cross. Default is on for mic / adinnet input, off for files. -lv thres Level threshold for speech input detection. Values should be from 0 to 32767. -zc thres Zero crossing threshold per second. Only waves over the level threshold (-lv) will be counted. (default: 60) -headmargin msec Silence margin at the start of speech segment in milliseconds. (default: 300) -tailmargin msec Silence margin at the end of speech segment in milliseconds. (default: 400) -rejectshort msec Reject input shorter than specified milliseconds. Search will be terminated and no result will be output. Input rejection by average power This feature will be enabled by --enable-power-reject on compilation. Should be used with Decoder VAD or GMM VAD. Valid for real-time input only. -powerthres thres Reject the inputted segment by its average energy. If the aver- age energy of the last recognized input is below the threshold, Julius will reject the input. (Rev.4.0) This option is valid when --enable-power-reject is specified at compilation time. Gaussian mixture model GMM will be used for input rejection by accumurated score, or for GMM-based frontend VAD when --enable-gmm-vad is specified. NOTE: You should also set the proper MFCC parameters required for the GMM, specifying the acoustic parameters described in AM section -AM_GMM. -gmm hmmdefs_file GMM definition file in HTK format. If specified, GMM-based input verification will be performed concurrently with the 1st pass, and you can reject the input according to the result as speci- fied by -gmmreject. The GMM should be defined as one-state HMMs. -gmmnum number Number of Gaussian components to be computed per frame on GMM calculation. Only the N-best Gaussians will be computed for rapid calculation. The default is 10 and specifying smaller value will speed up GMM calculation, but too small value (1 or 2) may cause degradation of identification performance. -gmmreject string Comma-separated list of GMM names to be rejected as invalid input. When recognition, the log likelihoods of GMMs accumulated for the entire input will be computed concurrently with the 1st pass. If the GMM name of the maximum score is within this string, the 2nd pass will not be executed and the input will be rejected. -gmmmargin frames Head margin for GMM-based VAD in frames. (Rev.4.0) This option will be valid only if compiled with --enable-gmm-vad. Decoding option Real-time processing means concurrent processing of MFCC computation 1st pass decoding. By default, real-time processing on the pass is on for microphone / adinnet / netaudio input, and for others. -realtime , -norealtime Explicitly switch on / off real-time (pipe-line) processing on the first pass. The default is off for file input, and on for microphone, adinnet and NetAudio input. This option relates to the way CMN and energy normalization is performed: if off, they will be done using average features of whole input. If on, MAP-CMN and energy normalization to do rea-time processing. INSTANCE DECLARATION FOR MULTI DECODING The following arguments will create a new configuration set with default parameters, and switch current set to it. Jconf parameters specified after the option will be set into the current set. To do multi-model decoding, these argument should be specified at the first of each model / search instances with different names. Any options before the first instance definition will be IGNORED. When no instance definition is found (as older version of Julius), all the options are assigned to a default instance named "_default". Please note that decoding with a single LM and multiple AMs is not fully supported. For example, you may want to construct the jconf file as following. -AM am_1 -AM am_2 -LM lm (LM spec..) -SR search1 am_1 lm -SR search2 am_2 lm This type of model sharing is not supported yet, since some part of LM processing depends on the assigned AM. Instead, you can get the same result by defining the same LMs for each AM, like this: -AM am_1 -AM am_2 -LM lm_1 (LM spec..) -LM lm_2 (same LM spec..) -SR search1 am_1 lm_1 -SR search2 am_2 lm_2 -AM name Create a new AM configuration set, and switch current to the new one. You should give a unique name. (Rev.4.0) -LM name Create a new LM configuration set, and switch current to the new one. You should give a unique name. (Rev.4.0) -SR name am_name lm_name Create a new search configuration set, and switch current to the new one. The specified AM and LM will be assigned to it. The am_name and lm_name can be either name or ID number. You should give a unique name. (Rev.4.0) -AM_GMM A special command to switch AM configuration set for specifying speech analysis parameters of GMM. The current AM will be switched to the GMM specific one already reserved, so be careful not to confuse with normal AM configurations. (Rev.4.0) LANGUAGE MODEL (-LM) Only one type of LM can be specified for a LM configuration. If you want to use multi model, you should define them one by one, each as a new LM. N-gram -d bingram_file Use binary format N-gram. An ARPA N-gram file can be converted to Julius binary format by mkbingram. -nlr arpa_ngram_file A forward, left-to-right N-gram language model in standard ARPA format. When both a forward N-gram and backward N-gram are spec- ified, Julius uses this forward 2-gram for the 1st pass, and the backward N-gram for the 2nd pass. Since ARPA file often gets huge and requires a lot of time to load, it may be better to convert the ARPA file to Julius binary format by mkbingram. Note that if both forward and backward N-gram is used for recognition, they together should be con- verted to a single binary. When only a forward N-gram is specified by this option and no backward N-gram specified by -nrl, Julius performs recognition with only the forward N-gram. The 1st pass will use the 2-gram entry in the given N-gram, and The 2nd pass will use the given N-gram, with converting forward probabilities to backward proba- bilities by Bayes rule. (Rev.4.0) -nrl arpa_ngram_file A backward, right-to-left N-gram language model in standard ARPA format. When both a forward N-gram and backward N-gram are spec- ified, Julius uses the forward 2-gram for the 1st pass, and this backward N-gram for the 2nd pass. Since ARPA file often gets huge and requires a lot of time to load, it may be better to convert the ARPA file to Julius binary format by mkbingram. Note that if both forward and backward N-gram is used for recognition, they together should be con- verted to a single binary. When only a backward N-gram is specified by this option and no forward N-gram specified by -nlr, Julius performs recognition with only the backward N-gram. The 1st pass will use the forward 2-gram probability computed from the backward 2-gram using Bayes rule. The 2nd pass fully use the given backward N-gram. (Rev.4.0) -v dict_file Word dictionary file. -silhead word_string -siltail word_string Silence word defined in the dictionary, for silences at the beginning of sentence and end of sentence. (default: "", "") -iwspword Add a word entry to the dictionary that should correspond to inter-word pauses. This may improve recognition accuracy in some language model that has no explicit inter-word pause modeling. The word entry to be added can be changed by -iwspentry. -iwspentry word_entry_string Specify the word entry that will be added by -iwspword. (default: " [sp] sp sp") -sepnum number Number of high frequency words to be isolated from the lexicon tree, to ease approximation error that may be caused by the one-best approximation on 1st pass. (default: 150) Grammar Multiple grammars can be specified by using -gram and -gramlist. When you specify grammars using these options multiple times, all of them will be read at startup. Note that this is unusual behavior from other options (in normal Julius option, last one override previous ones). You can use -nogram to reset the already specified grammars at that point. -gram gramprefix1[,gramprefix2[,gramprefix3,...]] Comma-separated list of grammars to be used. the argument should be prefix of a grammar, i.e. if you have foo.dfa and foo.dict, you can specify them by single argument foo. Multiple grammars can be specified at a time as a comma-separated list. -gramlist list_file Specify a grammar list file that contains list of grammars to be used. The list file should contain the prefixes of grammars, each per line. A relative path in the list file will be treated as relative to the list file, not the current path or configura- tion file. -dfa dfa_file -v dict_file An old way of specifying grammar files separately. -nogram Remove the current list of grammars already specified by -gram, -gramlist, -dfa and -v. Isolated word Multiple dictionary can be specified by using -w and -wlist. When you specify multiple times, all of them will be read at startup. You can use -nogram to reset the already specified dictionaries at that point. -w dict_file Word dictionary for isolated word recognition. File format is the same as other LM. (Rev.4.0) -wlist list_file Specify a dictionary list file that contains list of dictionar- ies to be used. The list file should contain the file name of dictionaries, each per line. A relative path in the list file will be treated as relative to the list file, not the current path or configuration file. (Rev.4.0) -nogram Remove the current list of dictionaries already specified by -w and -wlist. -wsil head_sil_model_name tail_sil_model_name sil_context_name On isolated word recognition, silence models will be appended to the head and tail of each word at recognition. This option spec- ifies the silence models to be appended. sil_context_name is the name of the head sil model and tail sil model as a context of word head phone and tail phone. For example, if you specify -wsil silB silE sp, a word with phone sequence b eh t will be translated as silB sp-b+eh b-eh+t eh-t+sp silE. (Rev.4.0) User-defined LM -userlm Declare to use user LM defined in program. This option should be specified if you use user-defined LM function. (Rev.4.0) Misc LM options -forcedict Ignore dictionary errors and force running. Words with errors will be skipped at startup. ACOUSTIC MODEL AND SPEECH ANALYSIS (-AM) (-AM_GMM) Acoustic analysis parameters are included in this section, since the AM defines the required parameter. You can use different MFCC type for each AM. For GMM, the same parameter should be specified after -AM_GMM When using multiple AM, the values of -smpPeriod, -smpFreq, -fsize and -fshift should have the same value among all AMs. acoustic HMM and parameters -h hmmdef_file Acoustic HMM definition file. File should be in HTK ascii for- mat, or Julius binary format. You can convert HTK ascii hmmdefs to Julius binary format by mkbinhmm. -hlist hmmlist_file HMMList file for phone mapping. This options is required when using a triphone model. This file provides a mapping between logical triphone names genertated from the dictionary and defined HMM names in hmmdefs. -tmix number Specify the number of top Gaussians to be calculted in a mixture codebook. Small number will speed up the acoustic computation namely in a tied-mixture model, but AM accuracy may get worse on too small value. (default: 2) -spmodel name Specify an HMM name that corresponds to short-pause model in HMM. This option will affect various aspects in recognition: short-pause skipping process on grammar recognition, word-end short-pause model insertion with -iwsp on N-gram recognition, or short-pause segmentation (-spsegment). (default: "sp") -multipath Enable multi-path mode. Multi-path mode expand state transition availability to allow model-skipping, or multiple output/input transitions in HMMs. However, since defining additional word begin / end node and perform extra transition check on decoding, the beam width may be required to set larger and recognition becomes a bit slower. By default (without this option), Julius automatically check the transition type of specified HMMs, and enable the multi-path mode if required. You can force Julius to enable multi-path mode with this option. (rev.4.0) -gprune {safe|heuristic|beam|none|default} Set Gaussian pruning algotrihm to use. The default setting will be set according to the model type and engine setting. "default" will force accepting the default setting. Set this to "none" to disable pruning and perform full computation. "safe" gualantees the top N Gaussians to be computed. "heuristic" and "beam" do more aggressive computational cosst reduction, but may result in small loss of accuracy model (default: 'safe' (stan- dard), 'beam' (fast) for tied mixture model, 'none' for non tied-mixture model). -iwcd1 {max|avg|best number} Select method to approximate inter-word triphone on the head and tail of a word in the first pass. "max" will apply the maximum likelihood of the same context tri- phones. "avg" will apply the average likelihood of the same con- text triphones. "best number" will apply the average of top N-best likelihoods of the same context triphone. Default is "best 3" for use with N-gram, and "avg" for grammar and word. When this AM is shared by LMs of both type, latter one will be chosen. -iwsppenalty float Short pause insertion penalty for appended short pauses by -iwsp. -gshmm hmmdef_file If this option is specified, Julius performs Gaussian Mixture Selection for efficient decoding. The hmmdefs should be a mono- phone model generated from an ordinary monophone HMM model, using mkgshmm. -gsnum number On GMS, specify number of monophone state from top to compute the detailed corresponding triphones. (default: 24) Speech analysis parameters -smpPeriod period Set sampling frequency of input speech by its sampling period, in unit of 100 nanoseconds. Sampling rate can also be specified by -smpFreq. Please note that the input frequency should be the same as trained conditions of acoustic model you use. (default: 625 = 16000Hz) This option corresponds to the HTK Option "SOURCERATE". The same value can be given to this option. When using multiple AM, this value should be the same among all AMs. -smpFreq Hz Set sampling frequency of input speech in Hz. Sampling rate can also be specified using "-smpPeriod". Please note that this fre- quency should be the same as the trained conditions of acoustic model you use. (default: 16000) When using multiple AM, this value should be the same among all AMs. -fsize sample_num Window size in number of samples. (default: 400) This option corresponds to the HTK Option "WINDOWSIZE", but value should be in samples (HTK value / smpPeriod). When using multiple AM, this value should be the same among all AMs. -fshift sample_num Frame shift in number of samples. (default: 160) This option corresponds to the HTK Option "TARGETRATE", but value should be in samples (HTK value / smpPeriod). When using multiple AM, this value should be the same among all AMs. -preemph float Pre-emphasis coefficient. (default: 0.97) This option corresponds to the HTK Option "PREEMCOEF". The same value can be given to this option. -fbank num Number of filterbank channels. (default: 24) This option corresponds to the HTK Option "NUMCHANS". The same value can be given to this option. Be aware that the default value differs from HTK (22). -ceplif num Cepstral liftering coefficient. (default: 22) This option corresponds to the HTK Option "CEPLIFTER". The same value can be given to this option. -rawe , -norawe Enable/disable using raw energy before pre-emphasis (default: disabled) This option corresponds to the HTK Option "RAWENERGY". Be aware that the default value differs from HTK (enabled at HTK, dis- abled at Julius). -enormal , -noenormal Enable/disable normalizing log energy. On live input, this nor- malization will be approximated from the average of last input. (default: disabled) This option corresponds to the HTK Option "ENORMALISE". Be aware that the default value differs from HTK (enabled at HTK, disabled at Julius). -escale float_scale Scaling factor of log energy when normalizing log energy. (default: 1.0) This option corresponds to the HTK Option "ESCALE". Be aware that the default value differs from HTK (0.1). -silfloor float Energy silence floor in dB when normalizing log energy. (default: 50.0) This option corresponds to the HTK Option "SILFLOOR". -delwin frame Delta window size in number of frames. (default: 2) This option corresponds to the HTK Option "DELTAWINDOW". The same value can be given to this option. -accwin frame Acceleration window size in number of frames. (default: 2) This option corresponds to the HTK Option "ACCWINDOW". The same value can be given to this option. -hifreq Hz Enable band-limiting for MFCC filterbank computation: set upper frequency cut-off. Value of -1 will disable it. (default: -1) This option corresponds to the HTK Option "HIFREQ". The same value can be given to this option. -lofreq Hz Enable band-limiting for MFCC filterbank computation: set lower frequency cut-off. Value of -1 will disable it. (default: -1) This option corresponds to the HTK Option "LOFREQ". The same value can be given to this option. -zmeanframe , -nozmeanframe With speech input, this option enables/disables frame-wise DC offset removal. This corresponds to HTK configuration ZMEAN- SOURCE. This cannot be used with "-zmean". (default: disabled) Real-time cepstral mean normalization -cmnload file Load initial cepstral mean vector from file on startup. The file shoudld be one saved by -cmnsave. Loading an initial cepstral mean enables Julius to better recognize the first utterance on a microphone / network input. -cmnsave file Save cepstral mean vector at each input. The parameters will be saved to the file at each input end, so the output file always keeps the last cepstral mean. If output file already exist, it will be overridden. -cmnupdate -cmnnoupdate Control whether to update the cepstral mean at each input on microphone / network input. Disabling this and specifying -cmn- load will make engine to use the initial cepstral mean parma- nently. -cmnmapweight float Specify weight of initial cepstral mean for MAP-CMN. Specify larger value to retain the initial cepstral mean for a longer period, and smaller value to rely more on the current input. (default: 100.0) Spectral subtraction -sscalc Perform spectral subtraction using head part of each file. Valid only for raw speech file input. Conflict with -ssload. -sscalclen msec With -sscalc, specify the length of head part silence in mil- liseconds. (default: 300) -ssload file Perform spectral subtraction for speech input using pre-esti- mated noise spectrum from file. The noise spectrum should be computed beforehand by mkss. Valid for all speech input. Con- flict with -sscalc. -ssalpha float Alpha coefficient of spectral subtraction for -sscalc and -ssload. Noise will be subtracted stronger as this value gets larger, but distortion of the resulting signal also becomes remarkable. (default: 2.0) -ssfloor float Flooring coefficient of spectral subtraction. The spectral power that goes below zero after subtraction will be substituted by the source signal with this coefficient multiplied. (default: 0.5) Misc AM options -htkconf file Parse the given HTK Config file, and set corresponding parame- ters to Julius. When using this option, the default parameter values are switched from Julius defaults to HTK defaults. RECOGNIZER AND SEARCH (-SR) Default values for beam width and LM weights will change according to compile-time setup of JuliusLib and model specification. Please see the startup log for the actual values. General parameters -inactive Start this recognition process instance with inactive state. (Rev.4.0) -1pass Perform only the first pass. This mode is automatically set at isolated word recognition. -no_ccd , -force_ccd Normally Julius determines whether the specified acoustic model is a context-dependent model from the model names, i.e., whether the model names contain character + and -. You can explicitly specify by these options to avoid mis-detection. These option will override automatic detection. -cmalpha float Smoothing patemeter for confidence scoring. (default: 0.05) -iwsp (Multi-path mode only) Enable inter-word context-free short pause handling. This option appends a skippable short pause model for every word end. The added model will be skipped on inter-word context handling. The HMM model to be appended can be specified by -spmodel. -transp float Additional insertion penalty for transparent words. (default: 0.0) -demo Equivalent to -progout -quiet. 1st pass parameters -lmp weight penalty (N-gram) Language model weights and word insertion penalties for the first pass. -penalty1 penalty (Grammar) word insertion penalty for the first pass. (default: 0.0) -b width Beam width for rank beam in number of HMM nodes on the first pass. This value defines search width on the 1st pass, and has great effect on the total processing time. Smaller width will speed up the decoding, but too small value will result in a sub- stantial increase of recognition errors due to search failure. Larger value will make the search stable and will lead to fail- ure-free search, but processing time and memory usage will grow in proportion to the width. The default value is dependent on acoustic model type: 400 (monophone), 800 (triphone), or 1000 (triphone, setup=v2.1) -nlimit num Upper limit of token per node. This option is valid when --enable-wpair and --enable-wpair-nlimit are enabled at compila- tion time. -progout Enable progressive output of the partial results on the first pass. -proginterval msec Set the output time interval of -progout in milliseconds. 2nd pass parameters -lmp2 weight penalty (N-gram) Language model weights and word insertion penalties for the second pass. -penalty2 penalty (Grammar) word insertion penalty for the second pass. (default: 0.0) -b2 width Envelope beam width (number of hypothesis) in second pass. If the count of word expantion at a certain length of hypothesis reaches this limit while search, shorter hypotheses are not expanded further. This prevents search to fall in breadth-first-like status stacking on the same position, and improve search failure. (default: 30) -sb float Score envelope width for enveloped scoring. When calculating hypothesis score for each generated hypothesis, its trellis expansion and viterbi operation will be pruned in the middle of the speech if score on a frame goes under the width. Giving small value makes the second pass faster, but computation error may occur. (default: 80.0) -s num Stack size, i.e. the maximum number of hypothesis that can be stored on the stack during the search. A larger value may give more stable results, but increases the amount of memory required. (default: 500) -m count Number of expanded hypotheses required to discontinue the search. If the number of expanded hypotheses is greater then this threshold then, the search is discontinued at that point. The larger this value is, The longer Julius gets to give up search. (default: 2000) -n num The number of candidates Julius tries to find. The search con- tinues till this number of sentence hypotheses have been found. The obtained sentence hypotheses are sorted by score, and final result is displayed in the order (see also the -output). The possibility that the optimum hypothesis is correctly found increases as this value gets increased, but the processing time also becomes longer. The default value depends on the engine setup on compilation time: 10 (standard) or 1 (fast or v2.1) -output num The top N sentence hypothesis to be output at the end of search. Use with -n (default: 1) -lookuprange frame When performing word expansion on the second pass, this option sets the number of frames before and after to look up next word hypotheses in the word trellis. This prevents the omission of short words, but with a large value, the number of expanded hypotheses increases and system becomes slow. (default: 5) -looktrellis (Grammar) Expand only the words survived on the first pass instead of expanding all the words predicted by grammar. This option makes second pass decoding slightly faster especially for large vocabulary condition, but may increase deletion error of short words. (default: disabled) Short-pause segmentation When compiled with --enable-decoder-vad, the short-pause segmentation will be extended to support decoder-based VAD. -spsegment Enable short-pause segmentation mode. Input will be segmented when a short pause word (word with only silence model in pronun- ciation) gets the highest likelihood at certain successive frames on the first pass. When detected segment end, Julius stop the 1st pass at the point, perform 2nd pass, and continue with next segment. The word context will be considered among seg- ments. (Rev.4.0) When compiled with --enable-decoder-vad, this option enables decoder-based VAD, to skip long silence. -spdur frame Short pause duration length to detect end of input segment, in number of frames. (default: 10) -pausemodels string A comma-separated list of pause model names to be used at short-pause segmentation. The word with only the pause models will be treated as "pause word" for pause detectionin. If not specified, name of -spmodel, -silhead and -siltail will be used. (Rev.4.0) -spmargin frame Backstep margin at trigger up for decoder-based VAD. (Rev.4.0) This option will be valid only if compiled with --enable-decoder-vad. -spdelay frame Trigger decision delay frame at trigger up for decoder-based VAD. (Rev.4.0) This option will be valid only if compiled with --enable-decoder-vad. Lattice / confusion network output -lattice , -nolattice Enable / disable generation of word graph. Search algorithm also has changed to optimize for better word graph generation, so the sentence result may not be the same as normal N-best recogni- tion. (Rev.4.0) -confnet , -noconfnet Enable / disable generation of confusion network. Enabling this will also activates -lattice internally. (Rev.4.0) -graphrange frame Merge same words at neighbor position at graph generation. If the position of same words differs smaller than this value, they will be merged. The default is 0 (allow merging on exactly the same location) and specifying larger value will result in smaller graph output. Setting to -1 will disable merging, in that case same words on the same location of different scores will be left as they are. (default: 0) -graphcut depth Cut the resulting graph by its word depth at post-processing stage. The depth value is the number of words to be allowed at a frame. Setting to -1 disables this feature. (default: 80) -graphboundloop count Limit the number of boundary adjustment loop at post-processing stage. This parameter prevents Julius from blocking by infinite adjustment loop by short word oscillation. (default: 20) -graphsearchdelay , -nographsearchdelay When "-graphsearchdelay" option is set, Julius modifies its graph generation alogrithm on the 2nd pass not to terminate search by graph merging, until the first sentence candidate is found. This option may improve graph accuracy, especially when you are going to generate a huge word graph by setting broad search. Namely, it may result in better graph accuracy when you set wide beams on both 1st pass -b and 2nd pass -b2, and large number for -n. (default: disabled) Multi-gram / multi-dic output -multigramout , -nomultigramout On grammar recognition using multiple grammars, Julius will out- put only the best result among all grammars. Enabling this option will make Julius to output result for each grammar. (default: disabled) Forced alignment -walign Do viterbi alignment per word units for the recognition result. The word boundary frames and the average acoustic scores per frame will be calculated. -palign Do viterbi alignment per phone units for the recognition result. The phone boundary frames and the average acoustic scores per frame will be calculated. -salign Do viterbi alignment per state for the recognition result. The state boundary frames and the average acoustic scores per frame will be calculated. jconf (5 ) julius-4.2.2/libjulius/doxygen.conf.ver.in0000644001051700105040000000004010731704275017056 0ustar ritrlabPROJECT_NUMBER=@JULIUS_VERSION@ julius-4.2.2/Makefile.in0000644001051700105040000000320212004452377013376 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # Makefile.in --- Makefile Template for configure # # $Id: Makefile.in,v 1.5 2012/07/27 08:44:47 sumomo Exp $ # SHELL=/bin/sh SUBDIRS=libsent libjulius julius mkbingram mkbinhmm adinrec adintool mkgshmm mkss jcontrol gramtools generate-ngram jclient-perl man CONFIG_SUBDIRS=mkgshmm gramtools jcontrol julius libjulius libsent RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ all: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE)); \ done install: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) install); \ done install.bin: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) install.bin); \ done install.man: (cd man; $(MAKE) install) clean: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) clean); \ done $(RM) config.log config.cache distclean: for d in $(SUBDIRS); do \ if test -f $$d/Makefile; then \ (cd $$d; $(MAKE) distclean); \ fi; \ done $(RM) config.log config.cache $(RM) config.status $(RM) Makefile configure: for d in $(CONFIG_SUBDIRS); do \ (cd $$d; autoconf); \ done doxygen: mkdir -p doxygen/en mkdir -p doxygen/ja cat support/doxygen.conf.en libjulius/doxygen.conf.ver > doxygen/doxygen.conf.en cat support/doxygen.conf.ja libjulius/doxygen.conf.ver > doxygen/doxygen.conf.ja doxygen doxygen/doxygen.conf.en > doxygen/doxygen-en.log 2> doxygen/doxygen-en-err.log doxygen doxygen/doxygen.conf.ja > doxygen/doxygen-ja.log 2> doxygen/doxygen-ja-err.log julius-4.2.2/man/0000755001051700105040000000000012004463507012104 5ustar ritrlabjulius-4.2.2/man/dfa_minimize.10000644001051700105040000000274511071102423014617 0ustar ritrlab.\" Title: dfa_minimize .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "DFA_MINIMIZE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" dfa_minimize \- Minimize a DFA grammar network .SH "SYNOPSIS" .HP 13 \fBdfa_minimize\fR [\-o\ \fIoutfile\fR] {dfafile} .SH "DESCRIPTION" .PP \fBdfa_minimize\fR will convert an .dfa file to an equivalent minimal form. Output to standard output, or to a file specified by "\fB\-o\fR" option. .PP On version 3.5.3 and later, \fBmkdfa.pl\fR invokes this tool inside, and the output .dfa file will be always minimized, so you do not need to use this manually. .SH "OPTIONS" .PP \fB \-o \fR \fIoutfile\fR .RS 3n Output file. If not specified output to standard output. .RE .SH "EXAMPLES" .PP Minimize \fIfoo.dfa\fR to \fIbar.dfa\fR: .sp .RS 3n .nf % \fBdfa_minimize\fR \-o bar.dfa foo.dfa .fi .RE Another way: .sp .RS 3n .nf % \fBdfa_minimize\fR < foo.dfa > bar.dfa .fi .RE .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/mkbinhmm.10000644001051700105040000000526611071102423013767 0ustar ritrlab.\" Title: mkbinhmm .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKBINHMM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkbinhmm \- convert HMM definition file in HTK ascii format to Julius binary format .SH "SYNOPSIS" .HP 9 \fBmkbinhmm\fR [\-htkconf\ \fIHTKConfigFile\fR] {hmmdefs_file} {binhmm_file} .SH "DESCRIPTION" .PP \fBmkbinhmm\fR convert an HMM definition file in HTK ascii format into a binary HMM file for Julius. It will greatly speed up the launch process. .PP You can also embed acoustic analysis condition parameters needed for recognition into the output file. To embed the parameters, specify the HTK Config file you have used to extract acoustic features for training the HMM by the optione "\fB\-htkconf\fR". .PP The embedded parameters in a binary HMM format will be loaded into Julius automatically, so you do not need to specify the acoustic feature options at run time. It will be convenient when you deliver an acoustic model. .PP You can also specify binary file as the input. This can be used to update the old binary format into new one, or to embed the config parameters into the already existing binary files. If the input binhmm already has acoustic analysis parameters embedded, they will be overridden by the specified values. .PP \fBmkbinhmm\fR can read gzipped file as input. .SH "OPTIONS" .PP \fB \-htkconf \fR \fIHTKConfigFile\fR .RS 3n HTK Config file you used at training time. If specified, the values are embedded to the output file. .RE .PP \fIhmmdefs_file\fR .RS 3n The source HMm definitino file in HTK ascii format or Julius binary format. .RE .PP \fIhmmdefs_file\fR .RS 3n Output file. .RE .SH "EXAMPLES" .PP Convert HTK ascii format HMM definition file into Julius binary file: .sp .RS 3n .nf % \fBmkbinhmm\fR hmmdefs.ascii binhmm .fi .RE Furthermore, embed acoustic feature parameters as specified by Config file .sp .RS 3n .nf % \fBmkbinhmm\fR \-htkconf Config hmmdefs.ascii binhmm .fi .RE Embed the acoustic parameters into an existing binary file .sp .RS 3n .nf % \fBmkbingram\fR \-htkconf Config old_binhmm new_binhmm .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbingram \fR( 1 ) , \fB mkbinhmmlist \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/nextword.10000644001051700105040000000426311071102423014033 0ustar ritrlab.\" Title: nextword .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "NEXTWORD" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" nextword \- display next predicted words (in reverse order) .SH "SYNOPSIS" .HP 9 \fBnextword\fR [\-t] [\-r] [\-s\ \fIspname\fR] [\-v] {prefix} .SH "DESCRIPTION" .PP Given a partial (part of) sentence from the end, it outputs the next words allowed in the specified grammar. .PP \fI.dfa\fR, \fI.dict\fR and \fI.term\fR files are needed to execute. They can be generated from \fI.grammar\fR and \fI.voca\fR file by \fBmkdfa.pl\fR. .PP Please note that the latter part of sentence should be given, since the main 2nd pass does a right\-to\-left parsing. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n Input / Output in category name. (default: word) .RE .PP \fB \-r \fR .RS 3n Enter in reverse order .RE .PP \fB \-s \fR \fIspname\fR .RS 3n the name string of short\-pause word to be supressed (default: "sp") .RE .PP \fB \-v \fR .RS 3n Debug output. .RE .SH "EXAMPLES" .PP Exmple output of a sample grammar "fruit": .sp .RS 3n .nf % \fBnextword\fR fruit Stat: init_voca: read 36 words Reading in term file (optional)...done 15 categories, 36 words DFA has 26 nodes and 42 arcs \-\-\-\-\- command completion is disabled \-\-\-\-\- wseq > A BANANA [wseq: A BANANA ] [cate: (NUM_1|NUM_1|A|A) FRUIT_SINGULAR NS_E] PREDICTED CATEGORIES/WORDS: NS_B ( ) HAVE (HAVE ) WANT (WANT ) NS_B ( ) HAVE (HAVE ) WANT (WANT ) .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate \fR( 1 ) , \fB accept_check \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/Makefile.in0000644001051700105040000000156312004452411014146 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # $Id: Makefile.in,v 1.5 2012/07/27 08:44:57 sumomo Exp $ # SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ all: @echo Nothing to build install: install.man install.man.ja install.man: ${INSTALL} -d @mandir@/man1 for f in *.1; do \ @INSTALL_DATA@ $$f @mandir@/man1/$$f; \ done install.man.ja: ${INSTALL} -d @mandir@/ja/man1 cd ja; for f in *.1; do \ @INSTALL_DATA@ $$f @mandir@/ja/man1/$$f; \ done cd .. ############################################################ clean: @echo Nothing to do distclean: $(RM) Makefile julius-4.2.2/man/accept_check.10000644001051700105040000000463611071102423014561 0ustar ritrlab.\" Title: accept_check .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ACCEPT_CHECK" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" accept_check \- Check whether a grammar accept / reject given word sequences .SH "SYNOPSIS" .HP 13 \fBaccept_check\fR [\-t] [\-s\ \fIspname\fR] [\-v] {prefix} .SH "DESCRIPTION" .PP \fBaccept_check\fR is a tool to check whether a sentence can be accepted or rejected on a grammar (\fIprefix.dfa\fR and \fIprefix.dict\fR). The sentence should be given from standard input. You can do a batch check by preparing all test sentence at each line of a text file, and give it as standard input of \fBaccept_check\fR. .PP This tool needs .dfa, .dict and .term files. You should convert a written grammar file to generate them by \fBmkdfa.pl\fR. .PP A sentence should be given as space\-separated word sequence. It may be required to add head / tail silence word like sil, depending on your grammar. And should not contain a short\-pause word. .PP When a word belongs to various category in a grammar, \fBaccept_check\fR will check all the possible sentence patterns, and accept it if any of those is acceptable. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n Use category name as input instead of word. .RE .PP \fB \-s \fR \fIspname\fR .RS 3n Short\-pause word name to be skipped. (default: "sp") .RE .PP \fB \-v \fR .RS 3n Debug output. .RE .SH "EXAMPLES" .PP An output for "date" grammar: .sp .RS 3n .nf % echo ' NEXT SUNDAY ' | accept_check date Reading in dictionary... 143 words...done Reading in DFA grammar...done Mapping dict item <\-> DFA terminal (category)...done Reading in term file (optional)...done 27 categories, 143 words DFA has 35 nodes and 71 arcs \-\-\-\-\- wseq: NEXT SUNDAY cate: NS_B (NEXT|NEXT) (DAYOFWEEK|DAYOFWEEK|DAY|DAY) NS_E accepted .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate \fR( 1 ) , \fB nextword \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/mkgshmm.10000644001051700105040000000301611071102423013617 0ustar ritrlab.\" Title: mkgshmm .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKGSHMM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkgshmm \- convert monophone HMM to GS HMM for Julius .SH "SYNOPSIS" .HP 8 \fBmkgshmm\fR {monophone_hmmdefs} > {outputfile} .SH "DESCRIPTION" .PP mkgshmm converts monophone HMM definition file in HTK format into a special format for Gaussian Mixture Selection (GMS) in Julius. .PP GMS is an algorithm to reduce the amount of acoustic computation with triphone HMM, by pre\-selection of promising gaussian mixtures using likelihoods of corresponding monophone mixtures. .SH "EXAMPLES" .PP (1) Prepare a monophone model which was trained by the same corpus as target triphone model. .PP (2) Convert the monophone model using mkgshmm. .sp .RS 3n .nf % \fBmkgshmm\fR monophone > gshmmfile .fi .RE (3) Specify the output file in Julius with option "\-gshmm" .sp .RS 3n .nf % \fBjulius\fR \-C ... \-gshmm gshmmfile .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/adintool.10000644001051700105040000002155111666612225014011 0ustar ritrlab.\" Title: adintool .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ADINTOOL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" adintool \- a tool to record / split / send / receive audio streams .SH "SYNOPSIS" .HP 9 \fBadintool\fR {\-in\ \fIinputdev\fR} {\-out\ \fIoutputdev\fR} [\fIoptions\fR...] .SH "DESCRIPTION" .PP \fBadintool\fR analyzes speech input, finds speech segments skipping silence, and records the detected segments in various ways. It performs speech detection based on zerocross number and power (level), and records the detected parts to files or other output devices sucessively. .PP \fBadintool\fR is a upper version of adinrec with various functions. Supported input device are: microphone input, a speech file, standard tty input, and network socket (called adin\-net server mode). Julius plugin can be also used. Detected speech segments will be saved to output devices: speech files, standard tty output, and network socket (called adin\-net client mode). For example, you can split the incoming speech to segments and send them to Julius to be recognized. .PP Output format is WAV, 16bit (signed short), monoral. If the file already exist, it will be overridden. .SH "OPTIONS" .PP All Julius options can be set. Only audio input related options are treated and others are silently skipped. Below is a list of options. .SS "adintool specific options" .PP \fB \-freq \fR \fIHz\fR .RS 3n Set sampling rate in Hz. (default: 16,000) .RE .PP \fB \-in \fR \fIinputdev\fR .RS 3n Audio input device. "mic" to capture via microphone input, "file" for audio file input, and "stdin" to read raw data from standard\-input. For file input, file name prompt will appear after startup. Use "adinnet" to make \fBadintool\fR as "adinnet server", receiving data from client via network socket. Default port number is 5530, which can be altered by option "\fB\-inport\fR". .sp Alternatively, input device can be set by "\fB\-input\fR" option, in which case you can use plugin input. .RE .PP \fB \-out \fR \fIoutputdev\fR .RS 3n Audio output device store the data. Specify "file" to save to file, in which the output filename should be given by "\fB\-filename\fR". Use "stdout" to standard out. "adinnet" will make \fBadintool\fR to be an adinnet client, sending speech data to a server via tcp/ip socket. When using "adinnet" output, the server name to send data should be specified by "\fB\-server\fR". The default port number is 5530, which can be changed by "\fB\-port\fR" option. .RE .PP \fB \-inport \fR \fInum\fR .RS 3n When adintool becomes adinnet server to receive data (\-in adinnet), set the port number to listen. (default: 5530) .RE .PP \fB \-server \fR [host] [,host...] .RS 3n When output to adinnet server (\-out adinnet), set the hostname. You can send to multiple hosts by specifying their hostnames as comma\-delimited list like "host1,host2,host3". .RE .PP \fB \-port \fR [num] [,num...] .RS 3n When adintool send a data to adinnet server (\-out adinnet), set the port number to connect. (default: 5530) For multiple servers, specify port numbers for all servers like "5530,5530,5531". .RE .PP \fB \-filename \fR \fIfile\fR .RS 3n When output to file (\fB\-out file\fR), set the output filename. The actual file name will be as "\fIfile.0000.wav\fR" , "\fIfile.0001.wav\fR" and so on, where the four digit number increases as speech segment detected. The initial number will be set to 0 by default, which can be changed by "\fB\-startid\fR" option. When using "\fB\-oneshot\fR" option to save only the first segment, the input will be saved as "\fIfile\fR". .RE .PP \fB \-startid \fR \fInumber\fR .RS 3n At file output, set the initial file number. (default: 0) .RE .PP \fB \-oneshot \fR .RS 3n Exit after the end of first speech segment. .RE .PP \fB \-nosegment \fR .RS 3n Do not perform speech detection for input, just treat all the input as a single valid segment. .RE .PP \fB \-raw \fR .RS 3n Output as RAW file (no header). .RE .PP \fB \-autopause \fR .RS 3n When output to adinnet server, \fBadintool\fR enter pause state at every end of speech segment. It will restart when the destination adinnet server sends it a resume signal. .RE .PP \fB \-loosesync \fR .RS 3n When output to multiple adinnet server, not to do strict synchronization for restart. By default, when \fBadintool\fR has entered pause state, it will not restart until resume commands are received from all servers. This option will allow restart at least one restart command has arrived. .RE .PP \fB \-rewind \fR \fImsec\fR .RS 3n When input is a live microphone device, and there has been some continuing input at the moment \fBadintool\fR resumes, it start recording backtracking by the specified milliseconds. .RE .SS "Concerning Julius options" .PP \fB \-input \fR {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} .RS 3n Choose speech input source. Specify 'file' or 'rawfile' for waveform file. On file input, users will be prompted to enter the file name from stdin. .sp \'mic' is to get audio input from a default live microphone device, and 'adinnet' means receiving waveform data via tcpip network from an adinnet client. 'netaudio' is from DatLink/NetAudio input, and 'stdin' means data input from standard input. .sp At Linux, you can choose API at run time by specifying alsa, oss and esd. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n Audio fragment size in number of samples. (default: 1000) .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n Level threshold for speech input detection. Values should be in range from 0 to 32767. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n Zero crossing threshold per second. Only input that goes over the level threshold (\fB\-lv\fR) will be counted. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n Silence margin at the start of speech segment in milliseconds. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n Silence margin at the end of speech segment in milliseconds. (default: 400) .RE .PP \fB \-zmean \fR .RS 3n This option enables DC offset removal. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n Set sampling rate in Hz. (default: 16,000) .RE .PP \fB \-48 \fR .RS 3n Record input with 48kHz sampling, and down\-sample it to 16kHz on\-the\-fly. This option is valid for 16kHz model only. The down\-sampling routine was ported from sptk. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n Host name for DatLink server input (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n With \fB\-input adinnet\fR, specify adinnet port number to listen. (default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n Julius by default removes successive zero samples in input speech data. This option stop it. .RE .PP \fB \-C \fR \fIjconffile\fR .RS 3n Load a jconf file at here. The content of the jconffile will be expanded at this point. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n Specify which directories to load plugin. If several direcotries exist, specify them by colon\-separated list. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (using mic input with alsa device) specify a capture device name. If not specified, "default" will be used. .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (using mic input with oss device) specify a capture device path. If not specified, "\fI/dev/dsp\fR" will be used. .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Try to set input latency of microphone input in milliseconds. Smaller value will shorten latency but sometimes make process unstable. Default value will depend on the running OS. .RE .SH "EXAMPLES" .PP Record microphone input to files: "\fIdata.0000.wav\fR", "\fIdata.0001.wav\fR" and so on: .sp .RS 3n .nf % \fBadintool\fR \-in mic \-out file \-filename data .fi .RE Split a long speech file "\fIfoobar.raw\fR" into "\fIfoobar.1500.wav\fR", "\fIfoobar.1501.wav\fR" ...: .sp .RS 3n .nf % \fBadintool\fR \-in file \-out file \-filename foobar \-startid 1500 % enter filename\->foobar.raw .fi .RE Copy an entire audio file via network socket. .sp .RS 3n .nf (sender) % \fBadintool\fR \-in file \-out adinnet \-server \fIreceiver_hostname\fR \-nosegment (receiver) % \fBadintool\fR \-in adinnet \-out file \-nosegment .fi .RE Detect speech segment, send to Julius via network and recognize it: .sp .RS 3n .nf (sender) % \fBadintool\fR \-in mic \-out adinnet \-server \fIreceiver_hostname\fR (receiver) % \fBjulius\fR \-C ... \-input adinnet .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB adinrec \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/gram2sapixml.pl.10000644001051700105040000000314511556444000015207 0ustar ritrlab.\" Title: gram2sapixml.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GRAM2SAPIXML.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" gram2sapixml.pl \- convert Julius grammar to SAPI XML grammar format .SH "SYNOPSIS" .HP 16 \fBgram2sapixml.pl\fR [\fIprefix\fR...] .SH "DESCRIPTION" .PP \fBgram2sapixml.pl\fR converts a recognition grammar file of Julius (.grammar, .voca) to Microsoft SAPI XML grammar format. \fIprefix\fR should be a file name of target grammar, excluding suffixes. If multiple argument is given, each will be process sequencialy in turn. .PP The internal character set should be in UTF\-8 format. By default \fBgram2sapixml.pl\fR assume input in EUC\-JP encoding and tries to convert it to UTF\-8 using \fBiconv\fR. You may want to disable this feature within the script. .PP It will fail to convert a left recursive rule in the grammar. When fails, it will leave the source rules in the target .xml file, so you should modify the output manually to solve it. .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) .SH "DIAGNOSTICS" .PP The conversion procedure is somewhat dumb one, only converting the non\-terminal symbols and terminal symbols (=word category name) into corresponding rules one by one. This is only a help tool, and you will need a manual inspection and editing to use it on a real SAPI application. .SH "COPYRIGHT" .PP Copyright (c) 2002 Takashi Sumiyoshi .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/ja/0000755001051700105040000000000012004463507012476 5ustar ritrlabjulius-4.2.2/man/ja/dfa_minimize.10000644001051700105040000000306611071102424015207 0ustar ritrlab.\" Title: dfa_minimize .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "DFA_MINIMIZE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" dfa_minimize \- 有限オートマトン文法を最小化する .SH "概要" .HP 13 \fBdfa_minimize\fR [\-o\ \fIoutfile\fR] {dfafile} .SH "DESCRIPTION" .PP dfa_minimize は,.dfa ファイルを等価な最小化の .dfa ファイルに変換し, 標準出力に出力します.オプション \fB\-o\fR で出力先を 指定することもできます. .PP バージョン 3.5.3 以降の Julius に付属の \fBmkdfa.pl\fR は, このツールを内部で自動的に呼び出すので,出力される .dfa は常に最小化 されており,これを単体で実行する必要はありません.バージョン 3.5.2 以前の \fBmkdfa.pl\fR で出力された .dfa は最小化されていないので, このツールで最小化するとサイズを最適化することができます. .SH "OPTIONS" .PP \fB \-o \fR \fIoutfile\fR .RS 3n 出力ファイル名を指定する. .RE .SH "EXAMPLES" .PP \fIfoo.dfa\fR を最小化して \fIbar.dfa\fR に 保存する. .sp .RS 3n .nf % \fBdfa_minimize\fR \-o bar.dfa foo.dfa .fi .RE 別の方法: .sp .RS 3n .nf % \fBdfa_minimize\fR < foo.dfa > bar.dfa .fi .RE .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/mkbinhmm.10000644001051700105040000000447611071102424014364 0ustar ritrlab.\" Title: mkbinhmm .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKBINHMM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkbinhmm \- バイナリ HMM 変換 .SH "概要" .HP 9 \fBmkbinhmm\fR [\-htkconf\ \fIHTKConfigFile\fR] {hmmdefs_file} {binhmm_file} .SH "DESCRIPTION" .PP mkbinhmm は,HTKのアスキー形式のHMM定義ファイルを,Julius用のバイナ リ形式へ変換します.これを使うことで Juliusの起動を高速化することができます. .PP この音響モデルの特徴抽出条件を出力ファイルのヘッダに埋め込むことができ ます.埋め込むには,学習時に特徴量抽出に用いた HTK Config ファイルを "\fB\-htkconf\fR" で指定します.ヘッダに抽出条件を埋め込むことで, 認識時に自動的に必要な特徴抽出パラメータがセットされるので,便利です. .PP 入力として,HTKアスキー形式のほかに,既に変換済みのJulius用バイナリHMM を与えることもできます.\fB\-htkconf\fR と併用すれば, 既存のバイナリHMMに特徴量抽出条件パラメータを埋め込むことができます. .PP mkbinhmm は gzip 圧縮されたHMM定義ファイルをそのまま読み込めます. .SH "OPTIONS" .PP \fB \-htkconf \fR \fIHTKConfigFile\fR .RS 3n 学習時に特徴量抽出に使用したHTK Configファイルを指定する.指定さ れた場合,その中の設定値が出力ファイルのヘッダに埋め込まれる. 入力に既にヘッダがある場合上書きされる. .RE .PP \fIhmmdefs_file\fR .RS 3n 変換元の音響モデル定義ファイル (MMF).HTK ASCII 形式,あるいは Julius バイナリ形式. .RE .PP \fIhmmdefs_file\fR .RS 3n Julius用バイナリ形式ファイルの出力先. .RE .SH "EXAMPLES" .PP HTK ASCII 形式の HMM 定義をバイナリ形式に変換する: .sp .RS 3n .nf % \fBmkbinhmm\fR hmmdefs.ascii binhmm .fi .RE HTKの設定ファイル Config の内容をヘッダに書き込んで出力: .sp .RS 3n .nf % \fBmkbinhmm\fR \-htkconf Config hmmdefs.ascii binhmm .fi .RE 古いバイナリ形式ファイルにヘッダ情報だけ追加する: .sp .RS 3n .nf % \fBmkbingram\fR \-htkconf Config old_binhmm new_binhmm .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbingram \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/nextword.10000644001051700105040000000444611071102424014431 0ustar ritrlab.\" Title: nextword .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "NEXTWORD" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" nextword \- DFA 文法で(逆向きに)次単語を予測するツール .SH "概要" .HP 9 \fBnextword\fR [\-t] [\-r] [\-s\ \fIspname\fR] [\-v] {prefix} .SH "DESCRIPTION" .PP nextword は,\fBmkdfa.pl\fR によって変換された DFA 文法 上で,与えられた部分文に対して接続しうる次単語の集合を出力します. .PP 実行には .dfa, .dict, .term の各ファイルが必要です. あらかじめ \fBmkdfa.pl\fR で生成しておいて下さい. .PP !注意! \fBmkdfa.pl\fR で出力される文法は,元の 文法と異なり,文の後ろから前に向かう逆向きの文法となっています. これは,Julius の第2パスで後ろ向きの探索を行うためです. このため,nextword で与える部分文も逆向きとなります. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n 単語ではなくカテゴリ名で入力・出力する. .RE .PP \fB \-r \fR .RS 3n 単語を逆順に入力する. .RE .PP \fB \-s \fR \fIspname\fR .RS 3n スキップすべきショートポーズ単語の名前を指定する. (default: "sp") .RE .PP \fB \-v \fR .RS 3n デバッグ出力. .RE .SH "EXAMPLES" .PP vfr (フィッティングタスク用文法) での実行例: .sp .RS 3n .nf % nextword vfr Reading in dictionary...done Reading in DFA grammar...done Mapping dict item <\-> DFA terminal (category)...done Reading in term file (optional)...done 42 categories, 99 words DFA has 135 nodes and 198 arcs \-\-\-\-\- wseq > に して 下さい silE [wseq: に して 下さい silE] [cate: (NI|NI_AT) SURU_V KUDASAI_V NS_E] PREDICTED CATEGORIES/WORDS: KEIDOU_A (派手 地味 ) BANGOU_N (番 ) HUKU_N (服 服装 服装 ) PATTERN_N (チェック 縦縞 横縞 ...) GARA_N (柄 ) KANZI_N (感じ ) IRO_N (色 ) COLOR_N (赤 橙 黄 ...) wseq > .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate \fR( 1 ) , \fB accept_check \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/accept_check.10000644001051700105040000000462311071102424015150 0ustar ritrlab.\" Title: accept_check .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ACCEPT_CHECK" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" accept_check \- 文法における単語列の受理/非受理チェック .SH "概要" .HP 13 \fBaccept_check\fR [\-t] [\-s\ \fIspname\fR] [\-v] {prefix} .SH "DESCRIPTION" .PP accept_check は,文法で文の受理・非受理を判定するツールです.文は標準 入力から与えます.受理すべき文を一行ずつテキストファイルにまとめて書い ておき,それをaccept_check の標準入力に与えることで,その文法 (\fIprefix.dfa\fR および \fIprefix.dict\fR) において目的の文が受理されるかどうかをバッチ的にチェックできます. .PP 実行には .dfa, .dict, .term の各ファイルが必要です. あらかじめ \fBmkdfa.pl\fR で生成しておいて下さい. .PP 対象とする文は,文法の語彙単位(.vocaの第1フィールド)で空白で区切って与 えます.最初と最後には多くの場合 silB, silE が必要であることに気をつけて下さい.また, ショートポーズ単語は文に含めないでください. .PP 同一表記の単語が複数ある場合,accept_check はその可能な解釈の全ての組 み合わせについて調べ,どれか1つのパターンでも受理可能であれば受理,す べてのパターンで受理不可能であれば受理不可能とします. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n 単語ではなくカテゴリ名で入力・出力する. .RE .PP \fB \-s \fR \fIspname\fR .RS 3n スキップすべきショートポーズ単語の名前を指定する. (default: "sp") .RE .PP \fB \-v \fR .RS 3n デバッグ出力. .RE .SH "EXAMPLES" .PP vfr (フィッティングタスク用文法) での実行例: .sp .RS 3n .nf % accept_check vfr Reading in dictionary...done Reading in DFA grammar...done Mapping dict item <\-> DFA terminal (category)...done Reading in term file (optional)...done 42 categories, 99 words DFA has 135 nodes and 198 arcs \-\-\-\-\- please input word sequence>silB 白 に して 下さい silE wseq: silB 白 に して 下さい silE cate: NS_B COLOR_N (NI|NI_AT) SURU_V KUDASAI_V NS_E accepted please input word sequence> .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate \fR( 1 ) , \fB nextword \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/mkgshmm.10000644001051700105040000000310711071102424014213 0ustar ritrlab.\" Title: mkgshmm .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKGSHMM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkgshmm \- モノフォンHMMを GMS 用に変換する .SH "概要" .HP 8 \fBmkgshmm\fR {monophone_hmmdefs} > {outputfile} .SH "DESCRIPTION" .PP mkgshmm はHTK形式のmonophone HMMを Julius の Gaussian Mixture Selection (GMS) 用に変換するperlスクリプトです. .PP GMSはJulius\-3.2からサポートされている音響尤度計算の高速化手法です. フレームごとに monophone の状態尤度に基づいてtriphoneやPTMの状態を予 備選択することで,音響尤度計算が高速化されます. .SH "EXAMPLES" .PP まずターゲットとするtriphoneやPTMに対して,同じコーパスで学習した monophone モデルを用意します. .PP 次にそのmonophoneモデルを mkgshmm を用いて GMS 用に変換します. .sp .RS 3n .nf % \fBmkgshmm\fR monophone > gshmmfile .fi .RE これを Julius で "\fB\-gshmm\fR" で指定します. .sp .RS 3n .nf % \fBjulius\fR \-C ... \-gshmm gshmmfile .fi .RE GMS用モデルはtriphoneやPTMと同一のコーパスから作成する必要がある点に注 意してください.gshmm がミスマッチだと選択誤りが生じ,性能が劣化します. .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/adintool.10000644001051700105040000002141511666612225014402 0ustar ritrlab.\" Title: adintool .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ADINTOOL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" adintool \- 音声波形データの記録・分割・送信・受信ツール .SH "概要" .HP 9 \fBadintool\fR {\-in\ \fIinputdev\fR} {\-out\ \fIoutputdev\fR} [\fIoptions\fR...] .SH "DESCRIPTION" .PP adintool は,音声波形データ中の音声区間の検出および記録を連続的に行 うツールです.入力音声に対して零交差数と振幅レベルに基づく音声区間検 出を逐次行い,音声区間部分を連続出力します. .PP adintool は adinrec の高機能版です.音声データの入力元として,マイク 入力・ 音声波形ファイル・標準入力・ネットワーク入力(adinnet サーバー モード)が選択できます.Julius の \fB\-input\fR オプションも 使用可能で,プラグイン入力も選択できます. .PP 出力先として,音声波形ファイル・標準出力・ネットワーク出力(adinnet ク ライアントモード)が選択できます.特にネットワーク出力(adinnet クライ アントモード)では, julius へネットワーク経由で音声を送信して音声認識 させることができます. .PP 入力音声は音声区間ごとに自動分割され,逐次出力されます.音声区間の切 り出しには adinrec と同じ,一定時間内の零交差数とパワー(振幅レベル) のしきい値を用います.音声区間開始と同時に音声出力が開始されます.出 力としてファイル出力を選んだ場合は,連番ファイル名で検出された区間ごと に保存します. .PP サンプリング周波数は任意に設定可能です.録音形式は 16bit, 1 channel で,書き出されるファイル形式は Microsoft WAV 形式です. 既に同じ名前のファイルが存在する場合は上書きされます. .SH "OPTIONS" .PP Julius の全てのオプションが指定可能である.指定されたもののうち, 音声入力に関係するオプションのみ扱われる.以下に,adintool の オプション,および有効な Julius オプションを解説する. .SS "adintool specific options" .PP \fB \-freq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .RE .PP \fB \-in \fR \fIinputdev\fR .RS 3n 音声を読み込む入力デバイスを指定する."mic" でマイク入力, "file" でファイル入力, "stdin" で標準入力から音声を読み込む. ファイル入力の場合,ファイル名は起動後に出てくるプロンプトに対 して指定する.また,"adinnet" で adintool は adinnet サーバー となり,adinnet クライアントから音声データを tcp/ip 経由で 受け取る.ポート番号は 5530 である("\fB\-inport\fR" で変更可能). .sp 入力デバイスは,そのほか Julius の "\-input" オプションでも指定可能 である.その場合,プラグインからの入力も可能である. .RE .PP \fB \-out \fR \fIoutputdev\fR .RS 3n 音声を出力するデバイスを指定する."file" でファイル出力, stdout で標準出力へ出力する.ファイルの場合,出力ファイル名は オプション "\fB\-filename\fR" で与える.出力ファイル 形式は 16bit WAV 形式である. また,"adinnet" で adintool は adinnet クライアント となり,adinnet サーバへ取り込んだ音声データを tcp/ip 経由で 送信できる.送信先ホストは "\fB\-server\fR" で指定する. ポート番号は 5530 である("\fB\-port\fR" で変更可能). .RE .PP \fB \-inport \fR \fInum\fR .RS 3n 入力が adinnet の場合 (\-in adinnet),接続を受けるポート番号 を指定する.指定しない場合のデフォルトは 5530 である. .RE .PP \fB \-server \fR [host] [,host...] .RS 3n 出力が adinnet の場合 (\-out adinnet),送信先のサーバ名を指定する. 複数ある場合は,カンマで区切って指定する. .RE .PP \fB \-port \fR [num] [,num...] .RS 3n 出力が adinnet の場合 (\-out adinnet),送信先の各サーバのポート番号 を指定する.指定しない場合のデフォルトは 5530 である. \fB\-server\fR で複数のサーバを指定している場合, 全てについて明示的にポート番号を指定する必要がある. .RE .PP \fB \-filename \fR \fIfile\fR .RS 3n ファイル出力 (\fB\-out file\fR) 時,出力ファイル名を 与える.デフォルトでは,検出された音声区間検出ごとに, "\fIfile.0000.wav\fR" , "\fIfile.0001.wav\fR" ... のように区間ごとに連番で 記録される.番号の初期値は 0 である(\fB\-startid\fRで 変更可能).なお,オプション \fB\-oneshot\fR 指定時は 最初の区間だけが "\fIfile\fR" の名前で保存される. .RE .PP \fB \-startid \fR \fInumber\fR .RS 3n ファイル出力時,記録を開始する連番番号の初期値を指定する.( デフォルト:0) .RE .PP \fB \-oneshot \fR .RS 3n 最初の音声区間が終了したら終了する. .RE .PP \fB \-nosegment \fR .RS 3n 入力音声の音声区間検出(無音による区切りと無音区間のスキップ)を 行わない. .RE .PP \fB \-raw \fR .RS 3n RAWファイル形式で出力する. .RE .PP \fB \-autopause \fR .RS 3n 出力が adinnet の場合(\-out adinnet),音声区間が終了するたび に入力停止・動作停止状態に移行する.出力先の adinnet サーバか ら動作再開信号がくると音声入力を再開する. .RE .PP \fB \-loosesync \fR .RS 3n 出力が adinnet (\-out adinnet)で複数の出力先サーバへ出力している 場合,動作停止状態から動作再開信号によって動作を再開する際,adintool は すべてのサーバから動作再開信号を受けるまで動作を再開しない. このオプションを指定すると,少なくとも1つのサーバから再開信号 がくれば動作を再開するようになる. .RE .PP \fB \-rewind \fR \fImsec\fR .RS 3n 入力がマイクのとき,停止状態から動作を再開するとき,停止中から 持続して音声入力中だった場合,指定されたミリ秒分だけさかのぼって 録音を開始する. .RE .SS "Concerning Julius options" .PP \fB \-input \fR {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} .RS 3n 音声入力ソースを選択する."\-in" の代わりにこちらを使うことも できる(最後に指定したほうが優先される).esd やプラグイン入力が 指定可能である. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n 音声入力の処理バッファ(フラグメント)のサイズ.値はサンプ ル数.(default: 1000) .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n 振幅レベルのしきい値.値は 0 から 32767 の範囲で指定する. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n 零交差数のしきい値.値は1秒あたりの交差数で指定する. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n 音声区間開始部のマージン.単位はミリ秒. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n 音声区間終了部のマージン.単位はミリ秒. (default: 400) .RE .PP \fB \-zmean \fR .RS 3n 入力音声ストリームに対して直流成分除去を行う.全ての音声処理の の前段として処理される. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .RE .PP \fB \-48 \fR .RS 3n 48kHzで入力を行い,16kHzにダウンサンプリングする. これは 16kHz のモデルを使用しているときのみ有効である. ダウンダンプリングの内部機能は sptk から 移植された. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n DatLink サーバのデバイス名 (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n \fB\-input adinnet\fR 使用時,接続を受け付ける adinnet のボート番号を指定する.(default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n 音声取り込み時,デバイスやファイルによっては,音声波形中に振幅 が "0" となるフレームが存在することがある.Julius は通常,音声 入力に含まれるそのようなフレームを除去する.この零サンプル除去が うまく動かない場合,このオプションを指定することで自動消去を 無効化することができる. .RE .PP \fB \-C \fR \fIjconffile\fR .RS 3n jconf設定ファイルを読み込む.ファイルの内容がこの場所に展開される. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n プラグインを読み込むディレクトリを指定する.複数の場合は コロンで区切って並べて指定する. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (マイク入力で alsa デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "default". .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (マイク入力で oss デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "\fI/dev/dsp\fR". .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Linux (alsa/oss) および Windows で,マイク入力時の遅延時間をミ リ秒単位で指定する.短い値を設定することで入力遅延を小さくでき るが,CPU の負荷が大きくなり,また環境によってはプロセスやOSの 挙動が不安定になることがある.最適な値はOS やデバイスに大きく 依存する.デフォルト値は動作環境に依存する. .RE .SH "EXAMPLES" .PP マイクからの音声入力を,発話ごとに "data.0000.wav" から順に記録する: .sp .RS 3n .nf % \fBadintool\fR \-in mic \-out file \-filename data .fi .RE 巨大な収録音声ファイル "foobar.raw" を,音声区間ごとに "foobar.1500.wav" "foobar.1501.wav" ... に分割する: .sp .RS 3n .nf % \fBadintool\fR \-in file \-out file \-filename foobar \-startid 1500 % enter filename\->foobar.raw .fi .RE ネットワーク経由で音声ファイルを転送する(区間検出なし): .sp .RS 3n .nf (sender) % \fBadintool\fR \-in file \-out adinnet \-server \fIreceiver_hostname\fR \-nosegment (receiver) % \fBadintool\fR \-in adinnet \-out file \-nosegment .fi .RE マイクからの入力音声を Julius へ送信して認識: .sp .RS 3n .nf (sender) % \fBadintool\fR \-in mic \-out adinnet \-server \fIreceiver_hostname\fR (receiver) % \fBjulius\fR \-C ... \-input adinnet .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB adinrec \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/gram2sapixml.pl.10000644001051700105040000000314511556444000015601 0ustar ritrlab.\" Title: gram2sapixml.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GRAM2SAPIXML.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" gram2sapixml.pl \- 認識用文法を SAPI XML 文法に変換するスクリプト .SH "概要" .HP 16 \fBgram2sapixml.pl\fR [\fIprefix\fR...] .SH "DESCRIPTION" .PP gram2sapixml.pl は,Julius の認識用文法ファイル (.grammar, .voca) から Microsoft SAPI XML 形式へ変換するスクリプトです. \fIprefix\fR には,変換する .grammar, .voca ファ イルのファイル名から拡張子を除外したものを指定します.複数指定した場合, それらは逐次変換されます. .PP 入力文字コードは EUC\-JPを想定しています.出力ファイルは UTF\-8 エンコー ディングです.コード変換のため内部で \fBiconv\fR を使用 しています. .PP 左再帰性については手作業による修正が必要です.元ファイルの .grammar の 構造をそのまま保持するため,.grammar における正順での左再帰記述がその まま .xml に反映されます.したがって,変換後 .xml に含まれる左再帰性の 解決は手作業で行わなければいけません. .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) .SH "DIAGNOSTICS" .PP 変換は,元ファイルの文法の非終端記号と終端記号(単語カテゴリ名)をルール に変換するという単純なものです.実際にSAPIアプリケーションで使う場合に は,プロパティを指定するなど,手作業での修正が必要です. .PP 内部でコード変換に \fBiconv\fR を使用しています. 実行パス上に iconv が無い場合,エラーとなります. .SH "COPYRIGHT" .PP Copyright (c) 2002 Takashi Sumiyoshi .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/dfa_determinize.10000644001051700105040000000266411071102424015710 0ustar ritrlab.\" Title: dfa_determinize .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "DFA_DETERMINIZE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" dfa_determinize \- 有限オートマトン文法を決定化する .SH "概要" .HP 16 \fBdfa_determinize\fR [\-o\ \fIoutfile\fR] {dfafile} .SH "DESCRIPTION" .PP dfa_determinize は,.dfa ファイルを等価な決定性 .dfa ファイルに変換し, 標準出力に出力します.オプション \fB\-o\fR で出力先を 指定することもできます. .PP \fBmkdfa.pl\fR が生成するDFAは常に決定化されており, 通常,\fBmkdfa.pl\fR で作成された .dfa ファイルに対して このツールを使う必要はありません. .SH "OPTIONS" .PP \fB \-o \fR \fIoutfile\fR .RS 3n 出力ファイル名を指定する. .RE .SH "EXAMPLES" .PP \fIfoo.dfa\fR を決定化して \fIbar.dfa\fR に 保存する. .sp .RS 3n .nf % \fBdfa_determinize\fR \-o bar.dfa foo.dfa .fi .RE 別の方法: .sp .RS 3n .nf % \fBdfa_determinize\fR < foo.dfa > bar.dfa .fi .RE .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB dfa_minimize \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/generate.10000644001051700105040000000371611071102424014350 0ustar ritrlab.\" Title: generate .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GENERATE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" generate \- 文法から文をランダム生成する .SH "概要" .HP 9 \fBgenerate\fR [\-v] [\-t] [\-n\ \fInum\fR] [\-s\ \fIspname\fR] {prefix} .SH "DESCRIPTION" .PP generate は文法に従って文をランダムに生成します. .PP 実行には .dfa, .dict, .term の各ファイルが必要です. あらかじめ \fBmkdfa.pl\fR で生成しておいて下さい. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n 単語ではなくカテゴリ名で出力する. .RE .PP \fB \-n \fR \fInum\fR .RS 3n 生成する文の数を指定する (default: 10) .RE .PP \fB \-s \fR \fIspname\fR .RS 3n 生成においてスキップすべきショートポーズ単語の名前を指定する. (default: "sp") .RE .PP \fB \-v \fR .RS 3n デバッグ出力. .RE .SH "EXAMPLES" .PP vfr (フィッティングタスク用文法) での実行例: .sp .RS 3n .nf % generate vfr Reading in dictionary...done Reading in DFA grammar...done Mapping dict item <\-> DFA terminal (category)...done Reading in term file (optional)...done 42 categories, 99 words DFA has 135 nodes and 198 arcs \-\-\-\-\- silB やめます silE silB 終了します silE silB シャツ を スーツ と 統一して 下さい silE silB スーツ を カッター と 同じ 色 に 統一して 下さい silE silB 交換して 下さい silE silB これ を 覚えておいて 下さい silE silB 覚えておいて 下さい silE silB 戻って 下さい silE silB スーツ を シャツ と 統一して 下さい silE silB 上着 を 橙 に して 下さい silE .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate\-ngram \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/mkbingram.10000644001051700105040000000667611144475314014551 0ustar ritrlab.\" Title: mkbingram .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 02/11/2009 .\" Manual: .\" Source: .\" .TH "MKBINGRAM" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkbingram \- バイナリ N\-gram 変換 .SH "概要" .HP 10 \fBmkbingram\fR [\-nlr\ \fIforward_ngram.arpa\fR] [\-nrl\ \fIbackward_ngram.arpa\fR] [\-d\ \fIold_bingram_file\fR] {output_bingram_file} .SH "DESCRIPTION" .PP mkbingram は,ARPA形式の N\-gram 定義ファイルをJulius用のバイナリN\-gram ファイルに変換するツールです.あらかじめ変換しておくことで,Juliusの起 動を大幅に高速化できます. .PP Julius\-4より,N\-gram は前向き,後ろ向き,あるいは両方を指定できるよう になりました.mkbingram でも,どちらか一方だけでバイナリN\-gramを作成するこ とができます.また,両方を指定した場合は,それら2つのN\-gramは一つのバ イナリN\-gramに結合されます. .PP 前向きN\-gramのみが指定されたとき,mkbingram は 前向きN\-gramだけからバ イナリN\-gramを生成します.このバイナリN\-gramを使うとき,Julius はその 中の 2\-gram を使って第1パスを行い,第2 パ スではその前向き確率から後向 きの確率を,ベイズ則に従って算出しながら認識を行います. .PP 後向きN\-gramのみが指定されたとき,mkbingramは後ろ向きN\-gramだけからバ イナリN\-gramを生成します.このバイナリN\-gramを使うとき,Julius はその 中の後向き 2\-gram からベイズ則に従って算出しながら第1パスの認識を行い, 第2パスでは後向き N\-gramを使った認識を行います. .PP 両方が指定されたときは,前向きN\-gram中の2\-gramと後向きN\-gramが統合され たバイナリN\-gramが生成されます.Juliusではその前向き2\-gramで第1パスを 行い,後向きN\-gramで第2パスを行います.なお両 N\-gram は同一のコーパス から同 一の条件(カットオフ値,バックオフ計算方法等)で学習されてあり, 同一の語彙を持っている必要があります. .PP なお,mkbingram は gzip 圧縮された ARPA ファイルもそのまま読み込めます. .PP バージョン 3.x 以前で作成したバイナリN\-gramは,そのまま 4.0 でも読めま す.mkbingram に \fB\-d\fR で与えることで,古いバイナリ形式 を新しいバイナリ形式に変換することもできます.なお,4.0 以降の mkbingram で作成したバイナリN\-gramファイルは3.x 以前のバージョンでは 使えませんのでご注意ください. .SH "OPTIONS" .PP \fB \-nlr \fR \fIforward_ngram.arpa\fR .RS 3n 前向き(left\-to\-right)のARPA形式 N\-gram ファイルを読み込む .RE .PP \fB \-nrl \fR \fIbackward_ngram.arpa\fR .RS 3n 後ろ向き(right\-to\-left)のARPA形式 N\-gram ファイルを読み込む .RE .PP \fB \-d \fR \fIold_bingram_file\fR .RS 3n バイナリN\-gramを読み込む(古いバイナリ形式の変換用) .RE .PP \fB \-swap \fR .RS 3n 文頭記号 と文末記号 を入れ替える. .RE .PP \fIoutput_bingram_file\fR .RS 3n 出力先のバイナリN\-gramファイル名 .RE .SH "EXAMPLES" .PP ARPA形式の N\-gram をバイナリ形式に変換する(前向き+後ろ向き): .sp .RS 3n .nf % \fBmkbingram\fR \-nlr 2gram.arpa \-nrl rev\-Ngram.arpa outfile .fi .RE ARPA形式の前向き 4\-gram をバイナリ形式に変換する(前向きのみ): .sp .RS 3n .nf % \fBmkbingram\fR \-nlr 4gram.arpa outfile .fi .RE 古いバイナリN\-gramファイルを現在の形式に変換する: .sp .RS 3n .nf % \fBmkbingram\fR \-d old_bingram new_bingram .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbinhmm \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/jcontrol.10000644001051700105040000001430111071102424014400 0ustar ritrlab.\" Title: jcontrol .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "JCONTROL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" jcontrol \- Juliusモジュールモード用のサンプルクライアント .SH "概要" .HP 9 \fBjcontrol\fR {\fIhostname\fR} [\fIportnum\fR] .SH "DESCRIPTION" .PP jcontrol は,モジュールモードで動作している julius に接続し,APIを介し てコントロールする簡単なコンソールプログラムです.Julius への一時停止 や再開などのコマンドの送信,および Julius からの認識結果や音声イベントの メッセージ受信を行うことができます. .PP 起動後,jcontrol は,指定ホスト上において「モジュールモード」で動作中 のJulius に対し,接続を試みます.接続確立後,jcontrol はユーザーからの コマンド入力およびメッセージ受信待ち状態となります. .PP jcontrol は ユーザーが入力したコマンドを解釈し,対応するAPIコマンドを Julius へ送信します.また,Julius から認識結果や入力トリガ情報 など のメッセージが送信されてきたときは,その内容を標準出力へ書き出します. .PP モジュールモードの仕様については,関連文書をご覧下さい. .SH "OPTIONS" .PP \fB hostname \fR .RS 3n 接続先のホスト名 .RE .PP \fB portnum \fR .RS 3n ポート番号(デフォルト:10500) .RE .SH "COMMANDS" .PP jcontrol は標準入力から1行ずつコマンド文字列を受け取る. コマンドの一覧は以下の通り. .SS "動作制御" .PP pause .RS 3n Juliusの認識動作を中断させ,一時停止状態に移行させる.一時停止状 態にあるJuliusは,たとえ音声入力があっても認識処理を行わない. ある区間の音声認識処理の途中でこのコマンドを受け取った場合, Julius はその認識処理が終了した後,一時停止状態に移行する. .RE .PP terminate .RS 3n pauseと同じく,Juliusの認識動作を中断させ, 一時停止状態に移行させる.ある区間の音声認識処理の途中でこのコ マンドを受け取った場合,その入力を破棄して即座に一時停止状態に 移行する. .RE .PP resume .RS 3n Julius を一時停止状態から通常状態へ移行させ,認識を再開させる. .RE .PP inputparam \fIarg\fR .RS 3n 文法切り替え時に音声入力であった場合の入力中音声の扱いを指定. "TERMINATE", "PAUSE", "WAIT"のうちいずれかを指定. .RE .PP version .RS 3n Julius にバージョン文字列を返させる. .RE .PP status .RS 3n Julius からシステムの状態 (active / sleep) を報告させる. .RE .SS "文法・単語認識関連" .PP changegram \fIprefix\fR .RS 3n 認識文法を "\fIprefix.dfa\fR" と "\fIprefix.dict\fR" に切り替える.カレントプロ セス内の文法は全て消去され,指定された文法に置き換わる. .RE .PP addgram \fIprefix\fR .RS 3n 認識文法として "\fIprefix.dfa\fR" と "\fIprefix.dict\fR" をカレントプロセスに追加する. .RE .PP deletegram \fIgramlist\fR .RS 3n カレントプロセスから指定された文法を削除する.文法の指定は,文 法名(追加時の \fIprefix\fR)か,あるいは Julius から送られる GRAMINFO内にある文法 ID で指定する.複数の文法を削除したい場合は,文法名もしくはIDをカ ンマで区切って複数指定する(IDと文法名が混在してもよい). .RE .PP deactivategram \fIgramlist\fR .RS 3n カレントプロセスの指定された文法を一時的に無効にする.無効にされた 文法は,エンジン内に保持されたまま,認識処理からは一時的に除外される. 無効化された文法は activategram で再び有効化できる. .sp 文法の指定は,文法名(追加時の \fIprefix\fR)か,あるいはJulius から送ら れる GRAMINFO内にある文法 ID で指定する.複 数の文法を指定したい場合は,文法名もしくはIDをカンマで区切って 複数指定する(IDと文法名が混在してもよい). .RE .PP activategram \fIgramlist\fR .RS 3n カレントプロセスで無効化されている文法を有効化する. 文法の指定は,文法名(追加時の \fIprefix\fR)か,あるいはJulius から送ら れる GRAMINFO内にある文法 ID で指定する.複 数の文法を指定したい場合は,文法名もしくはIDをカンマで区切って 複数指定する(IDと文法名が混在してもよい). .RE .PP addword \fIgrammar_name_or_id\fR \fIdictfile\fR .RS 3n dictfile の中身を,カレントプロセスの指定された文法に追加する. .RE .PP syncgram .RS 3n addgram や deletegram などによる文法の更新を即時に行う. 同期確認用である. .RE .SS "プロセス関連のコマンド" .PP Julius\-4 では複数モデルの同時認識が行える.この場合, 認識プロセス ("\fB\-SR\fR" で指定された認識処理インスタンス) ごとにモジュールクライアントから操作を行うことができる. .PP クライアントからはどれか一つのプロセスが「カレントプロセス」として 割り当てられる.文法関連の命令はカレントプロセスに対して行われる. .PP listprocess .RS 3n Julius に現在エンジンにある認識プロセスの一覧を送信させる. .RE .PP currentprocess \fIprocname\fR .RS 3n カレントプロセスを指定された名前のプロセスに切り替える. .RE .PP shiftprocess .RS 3n カレントプロセスを循環切り替えする.呼ばれるたびにその次のプロセスに カレントプロセスが切り替わる. .RE .PP addprocess \fIjconffile\fR .RS 3n エンジンに認識プロセスを新たに追加する.与える \fIjconffile\fR は,通常のものと違い, ただ一種類の LM 設定を含むものである必要がある.また,実際に送られる のはパス名のみであり,ファイル読み込みはJulius側で行われるため, ファイルパスは Julius から見える場所を指定する必要が有る. .sp 追加された LM および認識プロセスは,jconffile の名前が プロセス名となる. .RE .PP delprocess \fIprocname\fR .RS 3n 指定された名前の認識プロセスをエンジンから削除する. .RE .PP deactivateprocess \fIprocname\fR .RS 3n 指定された名前の認識プロセスを,一時的に無効化する.無効化され たプロセスは次回以降の入力に対して認識処理からスキップされる. 無効化されたプロセスは activateprocess で 再び有効化できる. .RE .PP activateprocess \fIprocname\fR .RS 3n 指定された名前の認識プロセスを有効化する. .RE .SH "EXAMPLES" .PP Julius からのメッセージは "> " を行の先頭につけてそのまま標準出力に出力 されます.以下は実行例です. .sp .RS 3n .nf % \fBjulius\fR \-C ... \-module .fi .RE 上記のようにして Julius をモジュールモードで起動した後, jcontrol をそのホスト名を指定して起動します. .sp .RS 3n .nf % \fBjcontrol\fR hostname .fi .RE 音声入力を行えば,イベント内容や結果が jcontrol 側に送信されます. jcontrol に対してコマンドを入力する(最後に Enter を押す)と, Julius にコマンドが送信され,Julius が制御されます. .PP 詳しいプロトコルについては,関連文書を参照してください. .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/mkss.10000644001051700105040000000332211071102424013524 0ustar ritrlab.\" Title: mkss .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKSS" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkss \- スペクトルサブトラクション用のノイズスペクトル計算 .SH "概要" .HP 5 \fBmkss\fR [\fIoptions\fR...] {filename} .SH "DESCRIPTION" .PP mkss は,スペクトルサブトラクション用のノイズスペクトル計算ツールです. 指定時間分の音声のない雑音音声をマイク入力から録音し, その短時間スペクトラムの平均を ファイルに出力します.出力されたファイルは,Julius でスペクトル サブトラクションのためのノイズスペクトルファイル(オプション "\fB\-ssload\fR")として使用できます. .PP 録音は起動と同時に開始します.サンプリング条件は16bit signed short (big endian), monoral で固定です.既に同じ名前のファイルが存在する場合 は上書きします.また,ファイル名に "\-" を指定するこ とで標準出力へ出力できます. .SH "OPTIONS" .PP \fB \-freq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .RE .PP \fB \-len \fR \fImsec\fR .RS 3n 録音する時間長をミリ秒単位で指定する(default: 3000) .RE .PP \fB \-fsize \fR \fIsample_num\fR .RS 3n 窓サイズをサンプル数で指定 (default: 400). .RE .PP \fB \-fshift \fR \fIsample_num\fR .RS 3n フレームシフト幅をサンプル数で指定 (default: 160). .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/generate-ngram.10000644001051700105040000000304411071102424015444 0ustar ritrlab.\" Title: generate\-ngram .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GENERATE\-NGRAM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" generate\-ngram \- N\-gram に従って文をランダム生成する .SH "概要" .HP 15 \fBgenerate\-ngram\fR [\fIoptions\fR...] {\fIbinary_ngram\fR} .SH "DESCRIPTION" .PP generate\-ngram は,与えられた N\-gram 確率に従って文をランダム生成する ツールです.\fIbinary_ngram\fR には, バイナリ形式の N\-gram ファイルを指定します. .SH "OPTIONS" .PP \fB \-n \fR \fInum\fR .RS 3n 生成する文数を指定する(デフォルト:10) .RE .PP \fB \-N \fR .RS 3n 使用する N\-gram の長さを制限する(デフォルト:与えられたモデルで定義されている最大値,3\-gram なら 3). .RE .PP \fB \-bos \fR .RS 3n 文開始記号を指定する(デフォルト:) .RE .PP \fB \-eos \fR .RS 3n 文終了記号を指定する(デフォルト:) .RE .PP \fB \-ignore \fR .RS 3n 出力してほしくない単語を指定する(デフォルト:) .RE .PP \fB \-v \fR .RS 3n 冗長な出力を行う. .RE .PP \fB \-debug \fR .RS 3n デバッグ用出力を行う. .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbingram \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/adinrec.10000644001051700105040000001127311666612225014177 0ustar ritrlab.\" Title: adinrec .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ADINREC" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" adinrec \- 1発話の音声入力データをファイルに記録する .SH "概要" .HP 8 \fBadinrec\fR [\fIoptions\fR...] {\fIfilename\fR} .SH "DESCRIPTION" .PP adinrec は,音声区間を一定時間内の零交差数とパワー(振幅レベル)のしき い値に基づいて切り出し,ファイルに記録する.デフォルトでは標準デバイス を用いてマイク入力から録音するが,\fB\-input\fR オプションで デバイスを選択可能である.またプラグイン入力も選択できる. .PP サンプリング周波数は任意に設定可能である.録音形式は 16bit, 1 channel であり,書き出されるファイル形式は Microsoft WAV 形式である. 既に同じ名前のファイルが存在する場合は上書きされる. .PP ファイル名に "\-" を指定すると取り込んだ音声データを標準出力へ出 力する.この場合データ形式は RAW 形式になる. .SH "OPTIONS" .PP Julius の全てのオプションが指定可能である.指定されたもののうち, 音声入力に関係するオプションのみ扱われる.以下に,adinrec 独自の オプションと関係する Julius オプションに分けて解説する. .SS "adinrec specific options" .PP \fB \-freq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .RE .PP \fB \-raw \fR .RS 3n RAWファイル形式で出力する. .RE .SS "Concerning Julius options" .PP \fB \-input \fR {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} .RS 3n 音声入力ソースを選択する.音声波形ファイルの場合は fileあるいはrawfileを指 定する.起動後にプロンプトが表れるので,それに対してファイ ル名を入力する.adinnet では, adintool などのクライアントプロセスから音声 データをネットワーク経由で受け取ることができる. netaudio はDatLinkのサーバから, stdinは標準入力から音声入力を行う. esdは,音声デバイスの共有手段として多くの Linuxのデスクトップ環境で利用されている EsounD daemon から入力する. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n 音声入力の処理バッファ(フラグメント)のサイズ.値はサンプ ル数.(default: 1000) .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n 振幅レベルのしきい値.値は 0 から 32767 の範囲で指定する. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n 零交差数のしきい値.値は1秒あたりの交差数で指定する. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n 音声区間開始部のマージン.単位はミリ秒. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n 音声区間終了部のマージン.単位はミリ秒. (default: 400) .RE .PP \fB \-zmean \fR .RS 3n 入力音声ストリームに対して直流成分除去を行う.全ての音声処理の の前段として処理される. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .RE .PP \fB \-48 \fR .RS 3n 48kHzで入力を行い,16kHzにダウンサンプリングする. これは 16kHz のモデルを使用しているときのみ有効である. ダウンダンプリングの内部機能は sptk から 移植された. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n DatLink サーバのデバイス名 (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n \fB\-input adinnet\fR 使用時,接続を受け付ける adinnet のボート番号を指定する.(default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n 音声取り込み時,デバイスやファイルによっては,音声波形中に振幅 が "0" となるフレームが存在することがある.Julius は通常,音声 入力に含まれるそのようなフレームを除去する.この零サンプル除去が うまく動かない場合,このオプションを指定することで自動消去を 無効化することができる. .RE .PP \fB \-C \fR \fIjconffile\fR .RS 3n jconf設定ファイルを読み込む.ファイルの内容がこの場所に展開される. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n プラグインを読み込むディレクトリを指定する.複数の場合は コロンで区切って並べて指定する. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (マイク入力で alsa デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "default". .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (マイク入力で oss デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "\fI/dev/dsp\fR". .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Linux (alsa/oss) および Windows で,マイク入力時の遅延時間をミ リ秒単位で指定する.短い値を設定することで入力遅延を小さくでき るが,CPU の負荷が大きくなり,また環境によってはプロセスやOSの 挙動が不安定になることがある.最適な値はOS やデバイスに大きく 依存する.デフォルト値は動作環境に依存する. .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB adintool \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/mkdfa.pl.10000644001051700105040000000461511071102424014251 0ustar ritrlab.\" Title: mkdfa.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKDFA.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkdfa.pl \- Julius 形式の文法をオートマトンに変換するコンパイラ .SH "概要" .HP 9 \fBmkdfa.pl\fR [\fIoptions\fR...] {prefix} .SH "DESCRIPTION" .PP mkdfa.pl は Julius の文法コンパイラです.記述された文法ファイル (.grammar) と語彙ファイル (.voca) から,Julius用の有限状態オートマトン ファイル (.dfa) および認識辞書 (.dict) を生成します.カテゴリ名と生成 後の各ファイルで用いられるカテゴリ ID 番号との対応が .term ファイルと して出力されます. .PP 各ファイル形式の詳細については,別途ドキュメントをご覧下さい. .PP prefix は,.grammar ファイルおよび .vocaファイルの プレフィックスを引数として与えます.prefix.grammarと prefix.vocaからprefix.dfa, prefix.dictおよび prefix.termが生成されます. .PP バージョン 3.5.3 以降の Julius に付属の \fBmkdfa.pl\fR は, \fBdfa_minimize\fR を内部で自動的に呼び出すので, 出力される .dfa は常に最小化されています. .SH "OPTIONS" .PP \fB \-n \fR .RS 3n 辞書を出力しない..voca 無しで .grammar のみを .dfa に変換する ことができる. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBTMP\fR\fR\fB または \fR\fB\fBTEMP\fR\fR\fB \fR .RS 3n 変換中に一時ファイルを置くディレクトリを指定する. 指定が無い場合, \fI/tmp\fR, \fI/var/tmp\fR, \fI/WINDOWS/Temp\fR, \fI/WINNT/Temp\fR の順で最初に見つかった場所が使用される. .RE .SH "EXAMPLES" .PP 文法ファイル \fIfoo.grammar\fR, \fIfoo.voca\fRに 対して以下を実行することで \fIfoo.dfa\fRと \fIfoo.voca\fRおよび\fIfoo.term\fRが出力される. .sp .RS 3n .nf % \fBmkdfa.pl\fR foo .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB generate \fR( 1 ) , \fB nextword \fR( 1 ) , \fB accept_check \fR( 1 ) , \fB dfa_minimize \fR( 1 ) .SH "DIAGNOSTICS" .PP mkdfa.pl は内部で \fBmkfa\fR および \fBdfa_minimize\fR を呼び出します.実行時,これらの実行ファ イルが,この mkdfa.pl と同じディレクトリに置いてある必要があります. これらはJulius に同梱されています. .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/julius.10000644001051700105040000012057411666612225014112 0ustar ritrlab.\" Title: julius .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 02/11/2009 .\" Manual: .\" Source: .\" .TH "JULIUS" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" julius \- 大語彙連続音声認識エンジン .SH "概要" .HP 7 \fBjulius\fR [\-C\ \fIjconffile\fR] [\fIoptions\fR...] .SH "内容" .PP Julius は数万語を対象とした大語彙連続音声認識を行うことのできるフリー の認識エンジンです.単語N\-gramを用いた2パス構成の段階的探索により高精 度な認識を行うことができます.また,小規模語彙のための文法ベースの認識 や単単語認識も行うことができます.認識対象としてマイク入力,録音済みの 音声波形ファイル,特徴抽出したパラメータファイルなどに対応しています. .PP コアの認識処理は,全て JuliusLib ライブラリとして提供されています. JuliusはJuliusLibを用いる音声アプリケーションの一つです. .PP Julius を用いて音声認識を行うには,音響モデル,単語辞書,および言語モ デルが必要です. .SH "設定" .PP Julius および JuliusLib コアエンジンの設定(動作選択,設定,モデル指定, パラメータ変更など)は,すべてここで説明する「オプション」で指定する. Julius に対しては,これらのオプションをコマンドライン引数として直接指 定するか,あるいはテキストファイル内に記述したものを "\fB\-C\fR" につづけて指定する.このオプションを記述したテキストファイルは "jconf 設定ファイル" と呼ばれる. .PP JuliusLib を用いる他のアプリケーションにおいても,JuliusLib内の認識 エンジンへの動作指定は,同様にこのオプション群で行う.jconf 設定ファイル に設定内容を記述して,それをメイン関数の最初で \fBj_config_load_file_new(char *jconffile);\fR で呼び出 すことで,JuliusLib 内の認識エンジンに設定をセットすることができる. .PP なお,jconf 設定ファイル内では,相対ファイルパスはその jconf ファイル の位置からの相対パスとして解釈される(カレントディレクトリではない). 注意されたい. .PP 以下に各オプションを解説する. .SS "Julius アプリケーションオプション" .PP .PP JuliusLib とは独立した,アプリケーションとしての Julius の機能に関する オプションである.認識結果の出力,文字コード変換,ログの設定,モジュー ルモードなどを含む.これらのオプションは,JuliusLib を組み込んだ他のア プリケーションでは使用できないので注意すること. .PP \fB \-outfile \fR .RS 3n 認識結果を個別のファイルに出力する.入力ファイルごとの認識結果を, その拡張子を ".out" に変えたファイルに保存する. (rev. 4.0) .RE .PP \fB \-separatescore \fR .RS 3n 認識結果で言語スコアと音響スコアを個別に出力する.指定しない場 合,和の値が認識結果のスコアとして出力される. .RE .PP \fB \-callbackdebug \fR .RS 3n コールバックがJuliusLibから呼ばれたときにコールバック名を 標準出力に出力する.デバッグ用である.(rev.4.0) .RE .PP \fB \-charconv \fR \fIfrom\fR \fIto\fR .RS 3n 出力で文字コードの変換を行う.\fIfrom\fR は言語モデルの文字セットを,\fIto\fR は出 力の文字セットを指定する.文字セットは,Linux では iconv で用いられるコード名である.Windows では,コードページ番号あるいはいかに示すコード名のどれかである: ansi, mac, oem, utf\-7, utf\-8, sjis, euc. .RE .PP \fB \-nocharconv \fR .RS 3n 文字コード変換を行わない.\fB\-charconv\fR の指定を リセットする. .RE .PP \fB \-module \fR [port] .RS 3n Julius を「サーバモジュールモード」で起動する.TCP/IP 経由でク ライアンとやりとりし,処理の制御や認識結果・イベントの通知が行 える.port はポート番号であり,省略時は 10500 が用いられる. .RE .PP \fB \-record \fR \fIdir\fR .RS 3n 区間検出された入力音声をファイルに逐次保存する. \fIdir\fRは保存するディレクトリを指定する. ファイル名は,それぞれの処理終了時のシステム時間から YYYY.MMDD.HHMMSS.wavの形で保存される.ファ イル形式は16bit, 1チャンネルのWAV形式である. なお,入力がGMM等によって棄却された場合も記録される. .RE .PP \fB \-logfile \fR \fIfile\fR .RS 3n 通常 Julius は全てのログ出力を標準出力に出力する. このオプションを指定することで,それらの出力を指定ファイルに 切替えることができる.(Rev.4.0) .RE .PP \fB \-nolog \fR .RS 3n ログ出力を禁止する.標準出力には何も出力されなくなる. (Rev.4.0) .RE .PP \fB \-help \fR .RS 3n エンジン設定,オプション一覧などのヘルプを出力して終了する. .RE .SS "全体オプション" .PP 全体オプションは,モデルや探索以外のオプションであり, 音声入力・音検出・GMM・プラグイン・その他の設定を含む. 全体オプションは, 音響モデル(\fB\-AM\fR)・言語モデル(\fB\-LM\fR)・デ コーダ(\fB\-SR\fR)などのセクション定義の前に定義するか, \fB\-GLOBAL\fR のあとに指定する. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBオーディオ入力\fR .RS .PP \fB \-input \fR {mic|rawfile|mfcfile|adinnet|stdin|netaudio|esd|alsa|oss} .RS 3n 音声入力ソースを選択する.音声波形ファイルの場合は fileあるいはrawfileを指 定する.HTK 形式の特徴量ファイルを認識する場合は htkparamあるいはmfcfile を指定する.起動後にプロンプトが表れるので,それに対してファイ ル名を入力する.adinnet では, adintool などのクライアントプロセスから音声 データをネットワーク経由で受け取ることができる. netaudio はDatLinkのサーバから, stdinは標準入力からの音声入力を認識する. esdは,音声デバイスの共有手段として多くの Linuxのデスクトップ環境で利用されている EsounD daemon からの入 力を認識する. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n 音声入力の処理バッファ(フラグメント)のサイズ.値はサンプ ル数.(default: 1000) .RE .PP \fB \-filelist \fR \fIfilename\fR .RS 3n (\fB\-input rawfile|mfcfile\fR 時) \fIfilename\fR内に列挙されている全てのファ イルについて認識を順次行う. \fIfilename\fRには認識する入力ファイル名 を1行に1つずつ記述する. .RE .PP \fB \-notypecheck \fR .RS 3n 入力の特徴量ベクトルの型チェックを無効にする.通常 Julius は入 力の型が音響モデルとマッチするかどうかをチェックし,マッチしない とエラー終了する.このオプションはそのチェックを回避する.なんらかの 理由で型チェックがうまく動作しないときに使用する. .RE .PP \fB \-48 \fR .RS 3n 48kHzで入力を行い,16kHzにダウンサンプリングしながら認識する. これは 16kHz のモデルを使用しているときのみ有効である. ダウンダンプリングの内部機能は sptk から 移植された. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n DatLink サーバのデバイス名 (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n \fB\-input adinnet\fR 使用時,接続を受け付ける adinnet のボート番号を指定する.(default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n 音声取り込み時,デバイスやファイルによっては,音声波形中に振幅 が "0" となるフレームが存在することがある.Julius は通常,音声 入力に含まれるそのようなフレームを除去する.この零サンプル除去が うまく動かない場合,このオプションを指定することで自動消去を 無効化することができる. .RE .PP \fB \-zmean \fR, \fB \-nozmean \fR .RS 3n 入力音声ストリームに対して直流成分除去を行う.全ての音声処理の の前段として処理される. \fB\-zmeansource\fRオプションも見よ. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBレベルと零交差による入力検知\fR .RS .PP \fB \-cutsilence \fR, \fB \-nocutsilence \fR .RS 3n レベルと零交差による入力検知を行うかどうかを指定する.デフォル トは,リアルタイム認識(デバイス直接入力およびネットワーク入力) では on, ファイル入力では off である.このオプションを指定する ことで,例えば長時間録音された音声ファイルに対して音声区間検出 を行いながら認識を行うこともできる. .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n 振幅レベルのしきい値.値は 0 から 32767 の範囲で指定する. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n 零交差数のしきい値.値は1秒あたりの交差数で指定する. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n 音声区間開始部のマージン.単位はミリ秒. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n 音声区間終了部のマージン.単位はミリ秒. (default: 400) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB入力棄却\fR .RS .PP 入力長,あるいは平均パワーによる入力の事後棄却が行える. 平均パワーによる棄却は,デフォルトでは無効化されており,ソースからコンパイ ルする際に configureに \-\-enable\-power\-reject を指定することで有効となる. リアルタイム認識時で,かつ特徴量でパワー項を持つ場合のみ使用できる. .PP \fB \-rejectshort \fR \fImsec\fR .RS 3n 検出された区間長が\fImsec\fR以下の入力を 棄却する.その区間の認識は中断・破棄される. .RE .PP \fB \-powerthres \fR \fIthres\fR .RS 3n 切り出し区間の平均パワーのしきい値.(Rev.4.0) .sp このオプションはコンパイル時に \-\-enable\-power\-rejectが指定されたときに 有効となる. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBGMM / GMM\-VAD\fR .RS .PP \fB \-gmm \fR \fIhmmdefs_file\fR .RS 3n GMM定義ファイル.3状態(出力状態が1つのみ)のHMMとして定義す る.形式はHTK形式で与える.形式や使用できる特徴量の制限は音響 モデルと同じである. なお,GMMで用いるMFCC特徴量の設定は, \fB\-AM_GMM\fRのあとに音響モデルと同様に指定する.こ の特徴量設定は音響モデルと別に,明示的に指定する必要があること に注意が必要である. .RE .PP \fB \-gmmnum \fR \fInumber\fR .RS 3n GMM指定時,計算するガウス分布数を指定する.フレームごとにGMMの 出力確率を求める際,各モデルで定義されている混合ガウス分布のう ち,この\fInumber\fRで指定した数の上位ガ ウス分布の確率のみを計算する.小さな値を指定するほどGMMの計算 量を削減できるが,計算精度が悪くなる.(default: 10) .RE .PP \fB \-gmmreject \fR \fIstring\fR .RS 3n GMMで定義されているモデル名のうち,非音声として棄却すべきモデ ルの名称を指定する.モデル名を複数指定することができる.複数指 定する場合は,空白を入れずコンマで区切って一つの \fIstring\fRとして指定する. .RE .PP \fB \-gmmmargin \fR \fIframes\fR .RS 3n (GMM_VAD) GMM VAD による区間検出の開始部マージン.単位はフレー ム数で指定する.(default: 20) (Rev. 4.0) .sp このオプションは\-\-enable\-gmm\-vad付きでコンパイル されたときに有効となる. .RE .PP \fB \-gmmup \fR \fIvalue\fR .RS 3n (GMM_VAD) 音声区間の開始とみなす VAD スコアの閾値.VADスコアは (音声GMMの最大尤度 \- 非音声HMMの最大尤度) で表される. (Default: 0.7) (Rev.4.1) .sp このオプションは\-\-enable\-gmm\-vad付きでコンパイル されたときに有効となる. .RE .PP \fB \-gmmdown \fR \fIvalue\fR .RS 3n (GMM_VAD) 音声区間の終了とみなす VAD スコアの閾値.VADスコアは (音声GMMの最大尤度 \- 非音声HMMの最大尤度) で表される. (Default: \-0.2) (Rev.4.1) .sp このオプションは\-\-enable\-gmm\-vad付きでコンパイル されたときに有効となる. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBデコーディングオプション\fR .RS .PP デコーディングオプションは,使用する認識アルゴリズムに関する設定を行う オプションである.この設定はエンジン全体に対する設定であり,全ての認識 処理インスタンスで共通の設定となる.探索の幅や言語重みなどの個々のデコー ディング設定については,認識処理インスタンスごとに指定する. .PP \fB \-realtime \fR, \fB \-norealtime \fR .RS 3n 入力と並行してリアルタイム認識を行うかどうかを明示的に指定する. デフォルトの設定は入力デバイスに依存し,マイクロフォン等のデバ イス直接認識,ネットワーク入力,および DatLink/NetAudio 入力の 場合は ON, ファイル入力や特徴量入力についてはOFFとなっている. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBその他\fR .RS .PP \fB \-C \fR \fIjconffile\fR .RS 3n jconf設定ファイルを読み込む.ファイルの内容がこの場所に展開される. .RE .PP \fB \-version \fR .RS 3n バージョン情報を標準エラー出力に出力して終了する. .RE .PP \fB \-setting \fR .RS 3n エンジン設定情報を標準エラー出力に出力して終了する. .RE .PP \fB \-quiet \fR .RS 3n 出力を抑制する.認識結果は単語列のみが出力される. .RE .PP \fB \-debug \fR .RS 3n (デバッグ用) モデルの詳細や探索過程の記録など,様々な デバッグ情報をログに出力する. .RE .PP \fB \-check \fR {wchmm|trellis|triphone} .RS 3n デバッグ用のチェックモードに入る. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n プラグインを読み込むディレクトリを指定する.複数の場合は コロンで区切って並べて指定する. .RE .RE .SS "複数モデル認識のためのインスタンス宣言" .PP \fB \-AM \fR \fIname\fR .RS 3n 音響モデルインスタンスを新たに宣言する.以降の音響モデルに関す る設定はこのインスタンスに対するものと解釈される. \fIname\fR にはインスタンスにつける名前を 指定する(既にある音響モデルインスタンスと同じ名前であってはい けない). (Rev.4.0) .RE .PP \fB \-LM \fR \fIname\fR .RS 3n 言語モデルインスタンスを新たに宣言する.以降の言語モデルに関す る設定はこのインスタンスに対するものと解釈される. \fIname\fR にはインスタンスにつける名前を 指定する(既にある言語モデルインスタンスと同じ名前であってはい けない). (Rev.4.0) .RE .PP \fB \-SR \fR \fIname\fR \fIam_name\fR \fIlm_name\fR .RS 3n 認識処理インスタンスを新たに宣言する.以降の認識処理や探索に関 する設定はこのインスタンスに対するものと解釈される. \fIname\fR にはインスタンスにつける名前を 指定する(既にある認識処理インスタンスと同じ名前であってはいけ ない).\fIam_name\fR, \fIlm_name\fR にはそれぞれこのインスタン スが使用する音響モデルと言語モデルのインスタンスを名前,あるい は ID 番号で指定する.(Rev.4.0) .RE .PP \fB \-AM_GMM \fR .RS 3n GMM使用時に,GMM計算のための特徴量抽出パラメータを,この宣言の あとに指定する.もし GMM 使用時にこのオプションでGMM用の特徴量 パラメータを指定しなかった場合,最後に指定した音響モデル用の特 徴量がそのまま用いられる. (Rev.4.0) .RE .PP \fB \-GLOBAL \fR .RS 3n 全体オプション用のセクションを開始する.\fB\-AM\fR, \fB\-LM\fR, \fB\-SR\fR などのインスタンス 宣言を用いる場合,音声入力設定などの全体オプションは,これらの 全てのインスタンス定義よりも前か,あるいはこのオプションの あとに指定する必要がある.この全体オプション用のセクションは, jconf 内で何回現れても良い. (Rev.4.1) .RE .PP \fB \-nosectioncheck \fR, \fB \-sectioncheck \fR .RS 3n 複数インスタンスを用いる jconf において,オプションの位置チェッ クの有効・無効を指定する.有効である場合,ある種類のインスタン スの宣言がされたあとは,他のインスタンス宣言が現れるまで,その インスタンスのオプションしか指定できない(例: \fB\-AM\fR のあと,他の \fB\-AM\fRや \fB\-LM\fRなどが現れるまで,音響モデルオプションしか 指定できない.他のオプションがあらわれた場合はエラーとなる). また,全体オプションは全てのモデルインスタンスの前に指定する必 要がある.デフォルトでは有効になっている.(Rev.4.1) .RE .SS "言語モデル (\fB\-LM\fR)" .PP このグループには,各モデルタイプごとに指定するオプションが含まれている. 一つのインスタンスには一つのモデルタイプだけが指定可能である. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBN\-gram\fR .RS .PP \fB \-d \fR \fIbingram_file\fR .RS 3n 使用するN\-gramをバイナリファイル形式で指定する. バイナリ形式への変換は mkbingram を 使用する. .RE .PP \fB \-nlr \fR \fIarpa_ngram_file\fR .RS 3n 前向き (left\-to\-right) のN\-gram 言語モデルを指定する. \fIarpa_ngram_file\fR はARPA標準形式のファ イルである必要がある. .RE .PP \fB \-nrl \fR \fIarpa_ngram_file\fR .RS 3n 後ろ向き (right\-to\-left) のN\-gram 言語モデルを指定する. \fIarpa_ngram_file\fR はARPA標準形式のファ イルである必要がある. .RE .PP \fB \-v \fR \fIdict_file\fR .RS 3n N\-gram,または文法用の単語辞書ファイルを指定する. .RE .PP \fB \-silhead \fR \fIword_string\fR \fB \-siltail \fR \fIword_string\fR .RS 3n 音声入力両端の無音区間に相当する「無音単語」エントリを指定する. 単語の読み(N\-gramエントリ名),あるいは"#"+単語番号(辞書ファ イルの行番号\-1)で指定する.デフォルトはそれぞれ "", "" である. .RE .PP \fB \-mapunk \fR \fIword_string\fR .RS 3n unknown に対応する単語名を指定する.デフォルトは, "" あるいは "" である.この単語は, 認識辞書において N\-gram にない単語を指定した場合にマッピングされ る単語である. .RE .PP \fB \-iwspword \fR .RS 3n ポーズに対応する無音単語を辞書に追加する.追加される単語の内容は オプション\fB\-iwspentry\fRで変更できる. .RE .PP \fB \-iwspentry \fR \fIword_entry_string\fR .RS 3n \fB\-iwspword\fR指定時に追加される単語エントリの内容 を指定する.辞書エントリと同じ形式で指定する.(default: " [sp] sp sp") .RE .PP \fB \-sepnum \fR \fInumber\fR .RS 3n 木構造化辞書の構築時に線形登録する単語数を指定する.(default: 150) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB文法\fR .RS .PP \fB\-gram\fRや\fB\-gramlist\fRで文法を複数回指定す ることで,一つのインスタンス内で複数の文法を用いることができる. (旧Juliusのオプション \fB\-dfa\fR, \fB\-v\fR の 組合せは単一の文法のみ指定可能である) .PP \fB \-gram \fR gramprefix1[,gramprefix2[,gramprefix3,...]] .RS 3n 認識に使用する文法を指定する.文法はファイル(辞書および構文制約 有限オートマトン)のプレフィックスで指定する.すなわち,ある認 識用文法が\fIdir/foo.dict\fRと \fIdir/foo.dfa\fR としてあるとき, dir/fooのように拡張子を省いた名前で指定する. 文法はコンマで区切って複数指定することができる.また繰り返し 使用することでも複数指定できる. .RE .PP \fB \-gramlist \fR \fIlist_file\fR .RS 3n 認識に使用する文法のリストをファイルで指定する. \fIlist_file\fRには, \fB\-gram\fR と同様の文法プレフィックスを1行に1つず つ記述する.また,このオプションを繰り返し使用することで,複数 のリストファイルを指定できる.なお,リスト内で文法を相対パスで 指定した場合,それは,そのリストファイルからの相対パスとして解 釈されることに注意が必要である. .RE .PP \fB \-dfa \fR \fIdfa_file\fR \fB \-v \fR \fIdict_file\fR .RS 3n 認識に使用する文法の構文制約オートマトンと辞書をそれぞれ指定する. (Julius\-3.x との互換性のための古いオプションであり,使用すべきでない) .RE .PP \fB \-nogram \fR .RS 3n それまでに \fB\-gram\fR,\fB\-gramlist\fR, \fB\-dfa\fR および \fB\-v\fR で 指定された文法のリストをクリアし,文法の指定なしの状態 にする. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB単単語\fR .RS .PP \fB \-w \fR \fIdict_file\fR .RS 3n 単単語認識で用いる単語辞書を指定する.ファイル形式は単語N\-gram や文法と同一である.辞書上の全ての単語が認識対象となる. (Rev.4.0) .RE .PP \fB \-wlist \fR \fIlist_file\fR .RS 3n 単語辞書のリストを指定する.\fIlist_file\fR には1行に一つずつ,使用する単語辞書のパスを記述する.相対パスを 用いた場合,それはその\fIlist_file\fRから の相対パスとして解釈される. (Rev.4.0) .RE .PP \fB \-nogram \fR .RS 3n それまでに \fB\-w\fR あるいは \fB\-wlist\fRで 指定された辞書のリストをクリアし,指定なしの状態に戻す. .RE .PP \fB \-wsil \fR \fIhead_sil_model_name\fR \fItail_sil_model_name\fR \fIsil_context_name\fR .RS 3n 単単語認識時,音声入力の両端の無音モデルおよびそのコンテキスト 名を指定する. \fIsil_context_name\fRとして NULLを指定した場合,各モデル名がそのまま コンテキストとして用いられる. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBUser\-defined LM\fR .RS .PP \fB \-userlm \fR .RS 3n プログラム中のユーザ定義言語スコア計算関数を使用することを宣言 する.(Rev.4.0) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBその他の言語モデル関連\fR .RS .PP \fB \-forcedict \fR .RS 3n 単語辞書読み込み時のエラーを無視する.通常Juliusは単語辞書内に エラーがあった場合そこで動作を停止するが,このオプションを 指定することで,エラーの生じる単語をスキップして処理を続行する ことができる. .RE .RE .SS "音響モデル・特徴量抽出 (\fB\-AM\fR) (\fB\-AM_GMM\fR)" .PP 音響モデルオプションは,音響モデルおよび特徴量抽出・フロントエンド処理 に関する設定を行う.特徴量抽出,正規化処理,スペクトルサブトラクションの 指定もここで行う. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB音響HMM関連\fR .RS .PP \fB \-h \fR \fIhmmdef_file\fR .RS 3n 使用するHMM定義ファイル. HTK の ASCII 形 式ファイル,あるいはJulius バイナリ形式のファイルのどちらかを 指定する.バイナリ形式へは mkbinhmm で 変換できる. .RE .PP \fB \-hlist \fR \fIhmmlist_file\fR .RS 3n HMMlistファイルを指定する.テキスト形式,あるいはバイナリ形式 のどちらかを指定する.バイナリ形式へは mkbinhmmlist で変換できる. .RE .PP \fB \-tmix \fR \fInumber\fR .RS 3n Gaussianpruning の計算状態数を指定する.小さ いほど計算が速くなるが,音響尤度の誤差が大きくなる.See also \fB\-gprune\fR. (default: 2) .RE .PP \fB \-spmodel \fR \fIname\fR .RS 3n 文中のショートポーズに対応する音韻HMMの名前を指定する.このポーズ モデル名は,\fB\-iwsp\fR, \fB\-spsegment\fR, \fB\-pausemodels\fRに関係する.また,文法使用時に スキップ可能なポーズ単語エントリの識別にも用いられる. (default: "sp") .RE .PP \fB \-multipath \fR \fB \-nomultipath \fR .RS 3n 状態間遷移を拡張するマルチパスモードを有効にする.オプション指 定がない場合,Julius は音響モデルの遷移をチェックし,必要であ れば自動的にマルチパスモードを有効にする.このオプションは,ユー ザが明示的にモードを指定したい場合に使用する. .sp この機能は 3.x ではコンパイル時オプションであったが,4.0 より 実行時オプションとなった.(rev.4.0) .RE .PP \fB \-gprune \fR {safe|heuristic|beam|none|default} .RS 3n 使用する Gaussian pruning アルゴリズムを選択する. noneを指定すると Gaussian pruning を無効化 しすべてのガウス分布について厳密に計算する. safe は上位 N 個を計算する. heuristic と beam はsafe に比べてより積極的な枝刈りを行うため計算量削減の効果が大きいが, 認識精度の低下を招く可能性がある.defaultが 指定された場合,デフォルトの手法を使う.(default: tied\-mixture model の場合,standard 設定ではsafe,fast設 定ではbeam.tied\-mixture でない場合 none). .RE .PP \fB \-iwcd1 \fR {max|avg|best number} .RS 3n 第1パスの単語間トライフォン計算法を指定する. max 指定時,同じコンテキストのトライフォン集合の 全尤度の最大値を近似尤度として用いる.avg は 全尤度の平均値を用いる.best number は上位 N 個の トライフォンの平均値を用いる. デフォルトは,一緒に使用される言語モデルに依存する.N\-gram使用 時には best 3,文法使用時は avgとなる.もしこの音響モデルが異なるタイプの 複数の言語モデルで共有される場合は,後に定義されたほうのデフォルトが デフォルト値として用いられる. .RE .PP \fB \-iwsppenalty \fR \fIfloat\fR .RS 3n \fB\-iwsp\fRによって末尾に付加される単語末ショートポー ズの挿入ペナルティ.ここで指定した値が,通常単語の末尾から単語 末ショートポーズへの遷移に追加される. .RE .PP \fB \-gshmm \fR \fIhmmdef_file\fR .RS 3n Gaussian Mixture Selection 用のモノフォン音響モデルを指定する. GMS用モノフォンは通常のモノフォンから mkgshmm によって生成できる. .RE .PP \fB \-gsnum \fR \fInumber\fR .RS 3n GMS 使用時,対応するトライフォンを詳細計算するモノフォンの 状態の数を指定する. (default: 24) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB特徴量抽出パラメータ\fR .RS .PP \fB \-smpPeriod \fR \fIperiod\fR .RS 3n 音声のサンプリング周期を指定する.単位は,100ナノ秒の単位で指 定する.サンプリング周期は \fB\-smpFreq\fR でも指定 可能.(default: 625) .sp このオプションは HTK の SOURCERATE に対応する.同じ値が指定できる. .sp 複数の音響モデルを用いる場合,全インスタンスで共通の値を指定する必要 がある. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n 音声のサンプリング周波数 (Hz) を指定する.(default: 16,000) .sp 複数の音響モデルを用いる場合,全インスタンスで共通の値を指定する必要 がある. .RE .PP \fB \-fsize \fR \fIsample_num\fR .RS 3n 窓サイズをサンプル数で指定 (default: 400). .sp このオプションは HTK の WINDOWSIZE に対応する.ただし値はHTKと異なり,(HTKの値 / smpPeriod) となる. .sp 複数の音響モデルを用いる場合,全インスタンスで共通の値を指定する必要 がある. .RE .PP \fB \-fshift \fR \fIsample_num\fR .RS 3n フレームシフト幅をサンプル数で指定 (default: 160). .sp このオプションは HTK の TARGETRATE に対応する.ただし値はHTKと異なり,(HTKの値 / smpPeriod) となる. .sp 複数の音響モデルを用いる場合,全インスタンスで共通の値を指定する必要 がある. .RE .PP \fB \-preemph \fR \fIfloat\fR .RS 3n プリエンファシス係数 (default: 0.97) .sp このオプションは HTK の PREEMCOEF に対応する.同じ値が指定できる. .RE .PP \fB \-fbank \fR \fInum\fR .RS 3n フィルタバンクチャンネル数.(default: 24) .sp このオプションは HTK の NUMCHANS に対応する.同じ値が指定できる.指定しないときのデフォルト値が HTKと異なっていることに注意(HTKでは22). .RE .PP \fB \-ceplif \fR \fInum\fR .RS 3n ケプストラムのリフタリング係数. (default: 22) .sp このオプションは HTK の CEPLIFTER に対応する.同じ値が指定できる. .RE .PP \fB \-rawe \fR, \fB \-norawe \fR .RS 3n エネルギー項の値として,プリエンファシス前の raw energy を使用 する / しない (default: disabled=使用しない) .sp このオプションは HTK の RAWENERGY に対応する. 指定しないときのデフォルトがHTKと異なっていることに注意(HTKで はenabled). .RE .PP \fB \-enormal \fR, \fB \-noenormal \fR .RS 3n エネルギー項の値として,発話全体の平均で正規化した正規化エネルギー を用いるかどうかを指定する.(default: \-noenormal) .sp このオプションは HTK の ENORMALISE に対応する. 指定しないときのデフォルトがHTKと異なっていることに注意(HTKで はenabled). .RE .PP \fB \-escale \fR \fIfloat_scale\fR .RS 3n エネルギー正規化時の,対数エネルギー項のスケーリング係数. (default: 1.0) .sp このオプションは HTK の ESCALE に対応する.デフォルト値がHTKと異なっていることに注意(HTKでは 0.1). .RE .PP \fB \-silfloor \fR \fIfloat\fR .RS 3n エネルギー正規化時の,無音部のエネルギーのフロアリング値. (default: 50.0) .sp このオプションは HTK の SILFLOOR に対応する.同じ値が指定できる. .RE .PP \fB \-delwin \fR \fIframe\fR .RS 3n 一次差分計算用のウィンドウフレーム幅.(default: 2) .sp このオプションは HTK の DELTAWINDOW に対応する.同じ値が指定できる. .RE .PP \fB \-accwin \fR \fIframe\fR .RS 3n 二次差分計算用のウィンドウフレーム幅.(default: 2) .sp このオプションは HTK の ACCWINDOW に対応する.同じ値が指定できる. .RE .PP \fB \-hifreq \fR \fIHz\fR .RS 3n MFCCのフィルタバンク計算時におけるバンド制限を有効化する.この オプションではカットオフ周波数の上限値を指定する. \-1 を指定することで無効化できる.(default: \-1) .sp このオプションは HTK の HIFREQ に対応する.同じ値が指定できる. .RE .PP \fB \-lofreq \fR \fIHz\fR .RS 3n MFCCのフィルタバンク計算時におけるバンド制限を有効化する.この オプションではカットオフ周波数の下限値を指定する. \-1 を指定することで無効化できる.(default: \-1) .sp このオプションは HTK の LOFREQ に対応する.同じ値が指定できる. .RE .PP \fB \-zmeanframe \fR, \fB \-nozmeanframe \fR .RS 3n 窓単位の直流成分除去を有効化/無効化する. (default: disabled) .sp このオプションは HTK の ZMEANSOURCE に対応する.\fB\-zmean\fR も参照のこと. .RE .PP \fB \-usepower \fR .RS 3n フィルタバンク解析で振幅の代わりにパワーを使う.(default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB正規化処理\fR .RS .PP \fB \-cvn \fR .RS 3n ケプストラム分散正規化 (cepstral variance normalization; CVN) を有効にする.ファイル入力では,入力全体の分散に基づいて正規化 が行われる.直接入力ではあらかじめ分散が得られないため,最後の 入力の分散で代用される.音声信号入力でのみ有効である. .RE .PP \fB \-vtln \fR \fIalpha\fR \fIlowcut\fR \fIhicut\fR .RS 3n 周波数ワーピングを行う.声道長正規化 (vocal tract length normalization; VTLN) に使用できる.引数はそれぞれワーピング 係数,周波数上端,周波数下端であり,HTK設定の WARPFREQ,WARPHCUTOFF および WARPLCUTOFF に対応する. .RE .PP \fB \-cmnload \fR \fIfile\fR .RS 3n 起動時にケプストラム平均ベクトルを \fIfile\fRから読み込む.ファイルは \fB\-cmnsave\fR で保存されたファイルを指定する.これ は MAP\-CMN において,起動後最初の発話においてケプストラム平均 の初期値として用いられる.通常,2発話目以降は初期値は,直前の 入力の平均に更新されるが,\fB\-cmnnoupdate\fRを指定 された場合,常にこのファイルの値が各発話の初期値として用いられ る. .RE .PP \fB \-cmnsave \fR \fIfile\fR .RS 3n 認識中に計算したケプストラム平均ベクトルを \fIfile\fRへ保存する.すでにファイルがあ る場合は上書きされる.この保存は音声入力が行われるたびに上書きで 行われる. .RE .PP \fB \-cmnupdate \fR \fB \-cmnnoupdate \fR .RS 3n 実時間認識時,初期ケプストラム平均を入力ごとに更新するかどうか を指定する.通常は有効 (\fB\-cmnupdate\fR) であり, 過去5秒間の入力の平均を初期値として更新する. \fB\-cmnnoupdate\fR が指定された場合,更新は行われず, 初期値は起動時の値に固定される.\fB\-cmnload\fR で初期値 を指定することで,常に同じ初期値を使うようにすることができる. .RE .PP \fB \-cmnmapweight \fR \fIfloat\fR .RS 3n MAP\-CMN の初期ケプストラム平均への重みを指定する.値が大きいほ ど初期値に長時間依存し,小さいほど早く現入力のケプストラム平均 を用いるようになる.(default: 100.0) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBフロントエンド処理\fR .RS .PP \fB \-sscalc \fR .RS 3n 入力先頭の無音部を用いて,入力全体に対してスペクトルサブトラク ションを行う.先頭部の長さは\fB\-sscalclen\fRで指定する. ファイル入力に対してのみ有効である.\fB\-ssload\fR と 同時に指定できない. .RE .PP \fB \-sscalclen \fR \fImsec\fR .RS 3n \fB\-sscalc\fRオプション指定時,各ファイルにおいて ノイズスペクトルの推定に用いる長さをミリ秒で指定する.(default: 300) .RE .PP \fB \-ssload \fR \fIfile\fR .RS 3n ノイズスペクトルを\fIfile\fRから読み込ん でスペクトルサブトラクションを行う. \fIfile\fRはあらかじめ mkssで作成する.マイク入力・ネットワーク入 力などのオンライン入力でも適用できる.\fB\-sscalc\fRと 同時に指定できない. .RE .PP \fB \-ssalpha \fR \fIfloat\fR .RS 3n \fB\-sscalc\fRおよび\fB\-ssload\fR用の 減算係数を指定する.この値が大きいほど強くスペクトル減算を行うが, 減算後の信号の歪も大きくなる.(default: 2.0) .RE .PP \fB \-ssfloor \fR \fIfloat\fR .RS 3n スペクトルサブトラクションのフロアリング係数を指定する.スペク トル減算時,計算の結果パワースペクトルが負となってしまう帯域に 対しては,原信号にこの係数を乗じたスペクトルが割り当てられる. (default: 0.5) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBその他の音響モデル関連オプション\fR .RS .PP \fB \-htkconf \fR \fIfile\fR .RS 3n HTK Config ファイルを解析して,対応する特徴量抽出オプションを Julius に自動設定する.\fIfile\fR は HTK で音響モデル学習時に使用した Config ファイルを指定する.なお, Julius と HTK ではパラメータのデフォルト値が一部異なるが, このオプションを使用する場合,デフォルト値も HTK のデフォルト に切替えれられる. .RE .RE .SS "認識処理・探索 (\fB\-SR\fR)" .PP 認識処理・探索オプションは,第1パス・第2パス用のビーム幅や言語重みのパラメータ,ショートポーズセグメンテーションの設定,単語ラティス・CN 出力用設定,forced alignment の指定,その他の認識処理と結果出力に関するパラメータを含む. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB第1パスパラメータ\fR .RS .PP \fB \-lmp \fR \fIweight\fR \fIpenalty\fR .RS 3n (N\-gram使用時) 第1パス用の言語スコア重みおよび挿入ペナルティ. ペナルティは負であれば単語挿入を抑制し,正であれば単語挿入を促 進する. .RE .PP \fB \-penalty1 \fR \fIpenalty\fR .RS 3n (文法使用時) 第1パス用の単語挿入ペナルティ. (default: 0.0) .RE .PP \fB \-b \fR \fIwidth\fR .RS 3n 第1パス探索の枝刈り (rank pruning) のビーム幅を指定する.単位 は HMM ノード数である. デフォルト値は音響モデルやエンジンの設定による.モノフォン 使用時は400, トライフォン使用時は800,トライフォンでかつ setup=v2.1 のときは 1000 となる. .RE .PP \fB \-nlimit \fR \fInum\fR .RS 3n 第1パスでノードごとに保持する仮説トークンの最大数.通常は 1 で 固定されており変更できない.コンパイル時に configureで \-\-enable\-wpairおよび \-\-enable\-wpair\-nlimit が指定されているとき のみ変更できる. .RE .PP \fB \-progout \fR .RS 3n 第1パスで,一定時間おきにその時点での最尤仮説系列を出力する. .RE .PP \fB \-proginterval \fR \fImsec\fR .RS 3n \fB\-progout\fRの出力インターバルをミリ秒で指定する. (default: 300) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB第2パスパラメータ\fR .RS .PP \fB \-lmp2 \fR \fIweight\fR \fIpenalty\fR .RS 3n (N\-gram使用時) 第2パス用の言語スコア重みおよび挿入ペナルティ. ペナルティは負であれば単語挿入を抑制し,正であれば単語挿入を促 進する. .RE .PP \fB \-penalty2 \fR \fIpenalty\fR .RS 3n (文法使用時) 第2パス用の単語挿入ペナルティ. (default: 0.0) .RE .PP \fB \-b2 \fR \fIwidth\fR .RS 3n 第2パス探索における仮説展開回数の上限を指定する.単位は 仮説数.(default: 30) .RE .PP \fB \-sb \fR \fIfloat\fR .RS 3n 第2パスの仮説尤度計算時のスコア幅を指定する.単位は対数尤度差 である.(default: 80.0) .RE .PP \fB \-s \fR \fInum\fR .RS 3n 仮説のスタックサイズを指定する.(default: 500) .RE .PP \fB \-n \fR \fInum\fR .RS 3n \fInum\fR個の文仮説数が見付かるまで探索を 行う.得られた仮説はスコアでソートされて出力される (\fB\-output\fRも見よ).デフォルト値はコンパイル時 のエンジン設定によって変わり,fast 版では 1, standard版では10 である. .RE .PP \fB \-output \fR \fInum\fR .RS 3n 見つかったN\-best候補のうち,結果として出力する文仮説の数を 指定する.\fB\-n\fRも参照のこと.(default: 1) .RE .PP \fB \-m \fR \fIcount\fR .RS 3n 探索打ち切りのための仮説展開回数のしきい値を指定する. (default: 2000) .RE .PP \fB \-lookuprange \fR \fIframe\fR .RS 3n 第2パスの単語展開時に,接続しうる次単語候補を見付けるための 終端時刻の許容幅をフレーム数で指定する.値を大きくするほど その周辺の多くの仮説を次単語候補として仮説展開が行われるように なるが,探索が前に進みにくくなることがある.(default: 5) .RE .PP \fB \-looktrellis \fR .RS 3n 仮説展開を第1パスの結果単語トレリス上に絞る. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBショートポーズセグメンテーション\fR .RS .PP \fB \-spsegment \fR .RS 3n ショートポーズセグメンテーションを有効にする. (Rev.4.0) .RE .PP \fB \-spdur \fR \fIframe\fR .RS 3n 無音区間判定のためのしきい値を指定する.無音単語が一位仮説とな るフレームがこの値以上続いたとき,無音区間として入力が区切られ る.(default: 10) .RE .PP \fB \-pausemodels \fR \fIstring\fR .RS 3n 「無音単語」を定義するための音響モデルにおける無音モデルの名前 を指定する.コンマで区切って複数の名前を指定できる. このオプションが指定されない場合,文法を用いた認識では \fB\-spmodel\fR で指定されるモデルのみを読みとする単 語が無音単語とされる.また,N\-gramではこれに加えて \fB\-silhead\fR および \fB\-siltail\fR で 指定される単語も無音単語として扱われる.(Rev.4.0) .RE .PP \fB \-spmargin \fR \fIframe\fR .RS 3n デコーダベースVADにおいて,アップトリガ時の巻戻し幅をフレーム 数で指定する.(Rev.4.0) .sp このオプションはconfigureに \-\-enable\-decoder\-vadを付けてコンパイルしたとき のみ有効である. .RE .PP \fB \-spdelay \fR \fIframe\fR .RS 3n デコーダベースVADにおいて,アップトリガ判定の遅延幅をフレーム 数で指定する.(Rev.4.0) .sp このオプションはconfigureに \-\-enable\-decoder\-vadを付けてコンパイルしたとき のみ有効である. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB単語ラティス / confusion network 出力\fR .RS .PP \fB \-lattice \fR, \fB \-nolattice \fR .RS 3n 単語グラフ(ラティス)の出力を有効化/無効化する. .RE .PP \fB \-confnet \fR, \fB \-noconfnet \fR .RS 3n Confusion network の出力を有効化/無効化する.confusion network は単語グラフから生成されるため,有効時は同時に \fB\-lattice\fR も有効化される.(Rev.4.0) .RE .PP \fB \-graphrange \fR \fIframe\fR .RS 3n グラフ生成において近傍の同一単語仮説をマージする.開始フレーム および終了フレームの位置の差がそれぞれ \fIframe\fR以下の同一単語仮説についてマー ジする.その際,スコアは高いほうのものが残される.値が \-1 の場 合,マージは一切行われない.値を大きくするほどコンパクトなグラ フが生成されるが,スコアの誤差が大きくなる.このオプションは \fB\-confnet\fRにも影響する.(default: 0) .RE .PP \fB \-graphcut \fR \fIdepth\fR .RS 3n 生成されたグラフに対して,深さによるカットオフを行う. \fIdepth\fRは,あるフレームにおいて存在可 能な単語数の上限を指定する.Julius では,第2パスの探索が不安定 な場合,一部分が極端に深いグラフが生成されることが稀にあり,こ のオプションによってそれを抑制することができる.\-1 を指定する ことでこの機能は無効化される.(default: 80) .RE .PP \fB \-graphboundloop \fR \fIcount\fR .RS 3n 事後的に行われる単語グラフの境界時間調整において,振動による 無限ループを防ぐための打ち切り値を指定する.(default: 20) .RE .PP \fB \-graphsearchdelay \fR, \fB \-nographsearchdelay \fR .RS 3n 巨大グラフ生成用にアルゴリズムをチューニングする.このオプショ ンが有効時,Julius は第1文仮説が見つかる前のグラフ生成時の仮説 中断を行わないように,グラフ生成アルゴリズムを変更する.これは, ビーム幅や探索範囲を極端に大きくして巨大なワードグラフを生成し ようとするときに,グラフの精度を改善することがある.(default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB複数文法/複数辞書認識\fR .RS .PP 文法や単単語認識において,一つのインスタンスで複数の文法や辞書を用いる 場合に指定できるオプションである. .PP \fB \-multigramout \fR, \fB \-nomultigramout \fR .RS 3n 複数文法あるいは複数辞書を用いて認識を行う場合,通常の Julius は全ての文法/辞書の中から最尤仮説を出力する.このオプションを 指定することで,与えられた個々の文法/辞書ごとに一位仮説を 出力することができる.(default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBForced alignment\fR .RS .PP \fB \-walign \fR .RS 3n 認識結果を用いて,入力に対する単語単位の forced alignment を行 う.単語の境界フレームと平均音響尤度が出力される. .RE .PP \fB \-palign \fR .RS 3n 認識結果を用いて,入力に対する音素単位の forced alignment を行 う.音素ごとの境界フレームと平均音響尤度が出力される. .RE .PP \fB \-salign \fR .RS 3n 認識結果を用いて,入力に対するHMMの状態単位の forced alignment を行う.状態ごとの境界フレームと平均音響尤度が出力される. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBその他\fR .RS .PP \fB \-inactive \fR .RS 3n 認識処理インスタンスを一時停止状態 (inactive state) で起動する. (Rev.4.0) .RE .PP \fB \-1pass \fR .RS 3n 第1パスのみを実行する.このオプションを指定した場合,第2パスは 実行されない. .RE .PP \fB \-fallback1pass \fR .RS 3n 通常,第2パスの探索が失敗したとき,Julius は認識結果無しで終了 する.このオプションを指定することで,そのような第2パスの失敗時に, 第1パスの最尤仮説を最終結果として出力することができる. (これはJulius\-3.xでのデフォルトの振る舞いである) .RE .PP \fB \-no_ccd \fR, \fB \-force_ccd \fR .RS 3n 音響モデルを音素コンテキスト依存モデルとして扱うかどうかを明示 的に指定する.デフォルトはHMM中のモデル名から自動判断される. .sp .RE .PP \fB \-cmalpha \fR \fIfloat\fR .RS 3n 確信度計算のためのスコアのスムージング係数.(default: 0.05) .RE .PP \fB \-iwsp \fR .RS 3n (マルチパスモード時のみ有効)単語間にショートポーズモデルを 挟み込んだ認識処理を行う.このオプションを指定すると,辞書上の 全単語の末尾に,スキップ可能なショートポーズモデルが付加される. このショートポーズモデルはコンテキストを考慮せず,また前後の 音素のコンテキストにも表れない.付加するショートポーズモデルは \fB\-spmodel\fR で指定できる. .RE .PP \fB \-transp \fR \fIfloat\fR .RS 3n 透過単語に対する追加の挿入ペナルティを指定する.(default: 0.0) .RE .PP \fB \-demo \fR .RS 3n \fB\-progout \-quiet\fRと同等. .RE .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (マイク入力で alsa デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "default". .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (マイク入力で oss デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "\fI/dev/dsp\fR". .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Linux (alsa/oss) および Windows で,マイク入力時の遅延時間をミ リ秒単位で指定する.短い値を設定することで入力遅延を小さくでき るが,CPU の負荷が大きくなり,また環境によってはプロセスやOSの 挙動が不安定になることがある.最適な値はOS やデバイスに大きく 依存する.デフォルト値は動作環境に依存する. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (マイク入力で alsa デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "default". .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (マイク入力で oss デバイス使用時) 録音デバイス名を指定する. 指定がない場合は "\fI/dev/dsp\fR". .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Linux (alsa/oss) および Windows で,マイク入力時の遅延時間をミ リ秒単位で指定する.短い値を設定することで入力遅延を小さくでき るが,CPU の負荷が大きくなり,また環境によってはプロセスやOSの 挙動が不安定になることがある.最適な値はOS やデバイスに大きく 依存する.デフォルト値は動作環境に依存する. .RE .SH "EXAMPLES" .PP 使用例については付属のチュートリアルをご覧下さい. .SH "SEE ALSO" .PP \fBjulian\fR(1), \fBjcontrol\fR(1), \fBadinrec\fR(1), \fBadintool\fR(1), \fBmkbingram\fR(1), \fBmkbinhmm\fR(1), \fBmkgsmm\fR(1), wav2\fBmfcc\fR(1), \fBmkss\fR(1) .PP [1]\&\fIhttp://julius.sourceforge.jp/\fR .SH "DIAGNOSTICS" .PP 正常終了した場合,Julius は exit status として 0 を返します.エラーが見付かった場合は異常終了し, exist status として 1 を返します. 入力ファイルが見つからない場合やうまく読み込めなかった場合は,そのファ イルに対する処理をスキップします. .SH "BUGS" .PP 使用できるモデルにはサイズやタイプに若干の制限があります.詳しく はパッケージに付属のドキュメントを参照してください. バグ報告・問い合わせ・コメントなどは julius\-info at lists.sourceforge.jp までお願いします. .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "AUTHORS" .PP Rev.1.0 (1998/02/20) .RS 3n 設計:河原達也と李 晃伸 (京都大学) .sp 実装:李 晃伸 (京都大学) .RE .PP Rev.1.1 (1998/04/14), Rev.1.2 (1998/10/31), Rev.2.0 (1999/02/20), Rev.2.1 (1999/04/20), Rev.2.2 (1999/10/04), Rev.3.0 (2000/02/14), Rev.3.1 (2000/05/11) .RS 3n 実装:李 晃伸 (京都大学) .RE .PP Rev.3.2 (2001/08/15), Rev.3.3 (2002/09/11), Rev.3.4 (2003/10/01), Rev.3.4.1 (2004/02/25), Rev.3.4.2 (2004/04/30) .RS 3n 実装:李 晃伸 (奈良先端科学技術大学院大学) .RE .PP Rev.3.5 (2005/11/11), Rev.3.5.1 (2006/03/31), Rev.3.5.2 (2006/07/31), Rev.3.5.3 (2006/12/29), Rev.4.0 (2007/12/19), Rev.4.1 (2008/09) .RS 3n 実装:李 晃伸 (名古屋工業大学) .RE .SH "THANKS TO" .PP このプログラムは Rev.3.1 まで,情報処理振興事業協会(IPA)独創的情報技術育 成事業「日本語ディクテーションの基本ソフトウェアの開発」(代表者:鹿野 清宏 奈良先端科学技術大学院大学教授)の援助を受けて行われました. Rev.3.4.2までは「情報処理学会 連続音声認識コンソーシアム」において公開さ れました. .PP 3.x 時代のマルチプラットフォーム DLL版 は,板野秀樹氏(現名城大学)の手 によって作成・公開されました.また,Windows Microsoft Speech API対応版は 住吉貴志氏(京都大学・当時)の手によるものです. .PP そのほか,上記の協力・貢献してくださった方々,およびさまざまな助言・コ メントをいただく関係者各位に深く感謝いたします. .SH "REFERENCES" .TP 3 1.\ http://julius.sourceforge.jp/ \%http://julius.sourceforge.jp/en/ julius-4.2.2/man/ja/mkbinhmmlist.10000644001051700105040000000362011071102424015246 0ustar ritrlab.\" Title: mkbinhmmlist .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKBINHMMLIST" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" mkbinhmmlist \- HMMList ファイルをバイナリ形式に変換 .SH "概要" .HP 13 \fBmkbinhmmlist\fR {hmmdefs_file} {HMMList_file} {output_binhmmlist_file} .SH "DESCRIPTION" .PP mkbinhmmlist は,主にトライフォンとともに使用される HMMList ファイルを バイナリ形式に変換します.通常のテキスト形式の代わりにこれを使うことで Juliusの起動を高速化することができます. .PP 変換には,HMMList ファイルのほかに,一緒に使う音響モデル定義ファイル \fIhmmdefs_file\fR が必要です(HTK ASCII形式 / Juliusバイナリ形式のどちらも可). .PP Julius で使用する際には,通常のテキスト形式と同じく "\fB\-hlist\fR" オプションで指定します. テキスト形式かバイナリ形式かの判定は Julius 側で自動的に行われます. .PP mkbinhmmlist は gzip 圧縮されたファイルをそのまま読み込めます. .SH "OPTIONS" .PP \fIhmmdefs_file\fR .RS 3n 音響モデル定義ファイル.HTK ASCII 形式,あるいはJulius バイナ リ形式. .RE .PP \fIHMMList_file\fR .RS 3n 変換対象の HMMList ファイル. .RE .PP \fIoutput_binhmmlist_file\fR .RS 3n 出力先となるJulius用バイナリ形式HMMListファイル.すでに ある場合は上書きされる. .RE .SH "EXAMPLES" .PP HMMList ファイル \fIlogicalTri\fRをバイナリ形式に変換して \fIlogicalTri.bin\fR に保存する: .sp .RS 3n .nf % \fBmkbinhmmlist\fR binhmm logicalTri logicalTri.bin .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbinhmm \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 京都大学 河原研究室 .PP Copyright (c) 1997\-2000 情報処理振興事業協会(IPA) .PP Copyright (c) 2000\-2008 奈良先端科学技術大学院大学 鹿野研究室 .PP Copyright (c) 2005\-2008 名古屋工業大学 Julius開発チーム .SH "LICENSE" .PP Julius の使用許諾に準じます. julius-4.2.2/man/ja/jclient.pl.10000644001051700105040000000330611071102424014613 0ustar ritrlab.\" Title: jclient.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "JCLIENT.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "名前" jclient.pl \- perl 版サンプルクライアント .SH "概要" .HP 11 \fBjclient.pl\fR .SH "DESCRIPTION" .PP Julius に付属のサンプルクライアント "jcontrol" の Perl 版です. モジュール(サーバ)モードで動く Julius から認識結果を受け取ったり, Julius を制御したりできます. .PP わずか 57 行の簡単なプログラムです.アプリケーションへ Julius を組み込 む際の参考になれば幸いです.ご自由にご利用ください。 .SH "EXAMPLES" .PP .RS 3n .nf % \fBjulius\fR \-C ... \-module .fi .RE 上記のようにして Julius をモジュールモードで起動した後,jclient.pl を 起動します.接続するホストのデフォルトは localhost, ポート番号は 10500 です.変えたい場合はスクリプトの冒頭を書き換えてください. .sp .RS 3n .nf % \fBjclient.pl\fR .fi .RE 音声入力を行えば,イベント内容や結果が jclient.pl 側に送信され, 標準出力に出力されます.また,jclient.pl に対してコマンドを入力する (最後に Enter を押す)と,Julius にコマンドが送信され,Julius が制御されます. コマンドは,仕様書にあるモジュールコマンドを生のまま記述します. .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB jcontrol \fR( 1 ) .SH "COPYRIGHT" .PP jclient.pl は 西村竜一 さん (nisimura@sys.wakayama\-u.ac.jp) によって作 成されました.本プログラムのご利用に関しては,作者は一切の保証をしませ ん.各自の責任のもとでご利用ください. .PP 感想、御意見、御要望などのフィードバックは歓迎いたしますので, 上記メールアドレス,または下記ホームページへ御連絡ください. .PP http://w3voice.jp/ julius-4.2.2/man/dfa_determinize.10000644001051700105040000000302311071102423015303 0ustar ritrlab.\" Title: dfa_determinize .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "DFA_DETERMINIZE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" dfa_determinize \- Determinize NFA grammar network. .SH "SYNOPSIS" .HP 16 \fBdfa_determinize\fR [\-o\ \fIoutfile\fR] {dfafile} .SH "DESCRIPTION" .PP \fBdfa_determinize\fR converts a non\-deterministic .dfa file into deterministic DFA. Output to standard output, or file specified by "\fB\-o\fR" option. .PP This additional tool is not necessary on a grammar building procedure in Julius, since the grammar network generated by \fBmkdfa.pl\fR is always determinized. .SH "OPTIONS" .PP \fB \-o \fR \fIoutfile\fR .RS 3n Outout file. If not specified, output to stdout. .RE .SH "EXAMPLES" .PP Determinize \fIfoo.dfa\fR to \fIbar.dfa\fR: .sp .RS 3n .nf % \fBdfa_determinize\fR \-o bar.dfa foo.dfa .fi .RE Another way: .sp .RS 3n .nf % \fBdfa_determinize\fR < foo.dfa > bar.dfa .fi .RE .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB dfa_minimize \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/generate.10000644001051700105040000000375411071102423013757 0ustar ritrlab.\" Title: generate .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GENERATE" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" generate \- random sentence generator from a grammar .SH "SYNOPSIS" .HP 9 \fBgenerate\fR [\-v] [\-t] [\-n\ \fInum\fR] [\-s\ \fIspname\fR] {prefix} .SH "DESCRIPTION" .PP This small program randomly generates sentences that are acceptable by the given grammar. .PP \fI.dfa\fR, \fI.dict\fR and \fI.term\fR files are needed to execute. They can be generated from \fI.grammar\fR and \fI.voca\fR file by \fBmkdfa.pl\fR. .SH "OPTIONS" .PP \fB \-t \fR .RS 3n Output in word's category name. .RE .PP \fB \-n \fR \fInum\fR .RS 3n Set number of sentences to be generated (default: 10) .RE .PP \fB \-s \fR \fIspname\fR .RS 3n the name string of short\-pause word to be supressed (default: "sp") .RE .PP \fB \-v \fR .RS 3n Debug output mode. .RE .SH "EXAMPLES" .PP Exmple output of a sample grammar "fruit": .sp .RS 3n .nf % \fBgenerate\fR fruit Stat: init_voca: read 36 words Reading in term file (optional)...done 15 categories, 36 words DFA has 26 nodes and 42 arcs \-\-\-\-\- I WANT ONE APPLE I WANT TEN PEARS CAN I HAVE A PINEAPPLE I WANT ONE PEAR COULD I HAVE A BANANA I WANT ONE APPLE PLEASE I WANT NINE APPLES NINE APPLES I WANT ONE PINEAPPLE I WANT A PEAR .fi .RE .sp .SH "SEE ALSO" .PP \fB mkdfa.pl \fR( 1 ) , \fB generate\-ngram \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/mkbingram.10000644001051700105040000000743011144475314014144 0ustar ritrlab.\" Title: mkbingram .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 02/11/2009 .\" Manual: .\" Source: .\" .TH "MKBINGRAM" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkbingram \- make binary N\-gram from ARPA N\-gram file .SH "SYNOPSIS" .HP 10 \fBmkbingram\fR [\-nlr\ \fIforward_ngram.arpa\fR] [\-nrl\ \fIbackward_ngram.arpa\fR] [\-d\ \fIold_bingram_file\fR] {output_bingram_file} .SH "DESCRIPTION" .PP \fBmkbingram\fR is a tool to convert N\-gram definition file(s) in ARPA standard format to a compact Julius binary format. It will speed up the initial loading time of N\-gram much faster. It can read gzipped file directly. .PP From rev.4.0, Julius can deal with forward N\-gram, backward N\-gram and their combinations. So, \fBmkbingram\fR now generates binary N\-gram file from one of them, or combining them two to produce one binary N\-gram. .PP When only a forward N\-gram is specified, \fBmkbingram\fR generates binary N\-gram from only the forward N\-gram. When using this binary N\-gram at Julius, it performs the 1st pass with the 2\-gram probabilities in the N\-gram, and run the 2nd pass with the given N\-gram fully, with converting forward probabilities to backward probabilities by Bayes rule. .PP When only a backward N\-gram is specified, \fBmkbingram\fR generates an binary N\-gram file that contains only the backward N\-gram. The 1st pass will use forward 2\-gram probabilities that can be computed from the backward 2\-gram using Bayes rule, and the 2nd pass use the given backward N\-gram fully. .PP When both forward and backward N\-grams are specified, the 2\-gram part in the forward N\-gram and all backward N\-gram will be combined into single bingram file. The forward 2\-gram will be applied for the 1st pass and backward N\-gram for the 2nd pass. Note that both N\-gram should be trained in the same corpus with same parameters (i.e. cut\-off thresholds), with same vocabulary. .PP The old binary N\-gram produced by \fBmkbingram\fR of version 3.x and earlier can be used in Julius\-4, but you can convert the old version to the new version by specifying it as input of current \fBmkbingram\fR by option "\fB\-d\fR". .PP Please note that binary N\-gram file converted by \fBmkbingram\fR of version 4.0 and later cannot be read by older Julius 3.x. .SH "OPTIONS" .PP \fB \-nlr \fR \fIforward_ngram.arpa\fR .RS 3n Read in a forward (left\-to\-right) word N\-gram file in ARPA standard format. .RE .PP \fB \-nrl \fR \fIbackward_ngram.arpa\fR .RS 3n Read in a backward (right\-to\-left) word N\-gram file in ARPA standard format. .RE .PP \fB \-d \fR \fIold_bingram_file\fR .RS 3n Read in a binary N\-gram file. .RE .PP \fB \-swap \fR .RS 3n Swap BOS word and EOS word in N\-gram. .RE .PP \fIoutput_bingram_file\fR .RS 3n binary N\-gram file name to output. .RE .SH "EXAMPLES" .PP Convert a set of forward and backward N\-gram in ARPA format into Julius binary form: .sp .RS 3n .nf % \fBmkbingram\fR \-nlr 2gram.arpa \-nrl rev\-Ngram.arpa outfile .fi .RE Convert a single forward 4\-gram in ARPA format into a binary file: .sp .RS 3n .nf % \fBmkbingram\fR \-nlr 4gram.arpa outfile .fi .RE Convert old binary N\-gram file to current format: .sp .RS 3n .nf % \fBmkbingram\fR \-d old_bingram new_bingram .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbinhmm \fR( 1 ) , \fB mkbinhmmlist \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/jcontrol.10000644001051700105040000001452511071102423014015 0ustar ritrlab.\" Title: jcontrol .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "JCONTROL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" jcontrol \- a sample module client written in C .SH "SYNOPSIS" .HP 9 \fBjcontrol\fR {\fIhostname\fR} [\fIportnum\fR] .SH "DESCRIPTION" .PP \fBjcontrol\fR is a simple console program to control julius running on other host via network API. It can send command to Julius, and receive messages from Julius. .PP When invoked, jcontrol tries to connect to Julius running in "module mode" on specified hostname. After connection established, jcontrol waits for user commands from standard input. .PP When user types a command to jcontrol, it will be interpreted and cor\- responding API command will be sent to Julius. When a message is received from Julius, its content will be output to standard output. .PP For the details about the API, see the related documents. .SH "OPTIONS" .PP \fB hostname \fR .RS 3n Host name where Julius is runnning in module mode. .RE .PP \fB portnum \fR .RS 3n port number (default: 10500) .RE .SH "COMMANDS" .PP \fBjcontrol\fR interprets commands from standard input. Below is a list of all commands. .SS "Engine control" .PP pause .RS 3n Stop Julius and enter into paused status. In paused status, Julius will not run recognition even if speech input occurs. When this command is issued while recognition is running, Julius will stop after the recognition has been finished. .RE .PP terminate .RS 3n Same as pause, but discard the current speech input when received command in the middle of recognition process. .RE .PP resume .RS 3n Restart Julius that has been paused or terminated. .RE .PP inputparam \fIarg\fR .RS 3n Tell Julius how to deal with speech input in case grammar is changed just when recognition is running. Specify one: "TERMINATE", "PAUSE" or "WAIT". .RE .PP version .RS 3n Tell Julius to send version description string. .RE .PP status .RS 3n Tell Julius to send the system status (active / sleep) .RE .SS "Grammar handling" .PP changegram \fIprefix\fR .RS 3n Send a new grammar "\fIprefix.dfa\fR" and "\fIprefix.dict\fR", and tell julius to use it as a new grammar. All the current grammars used in the current process of Julius will be deleted and replaced to the specifed grammar. .RE .PP addgram \fIprefix\fR .RS 3n Send a new grammar "\fIprefix.dfa\fR" and "\fIprefix.dict\fR" and add it to the current grammar. .RE .PP deletegram \fIgramlist\fR .RS 3n Tell Julius to delete existing grammar. The grammar can be specified by either prefix name or number ID. The number ID can be determined from the message sent from Julius at each time grammar information has changed. When want to delete more than one grammar, specify all of them as comma\-sparated. .RE .PP deactivategram \fIgramlist\fR .RS 3n Tell Julius to de\-activate a specified grammar. The specified grammar will still be kept but will not be used for recognition. .sp The target grammar can be specified by either prefix name or number ID. The number ID can be determined from the message sent from Julius at each time grammar information has changed. When want to delete more than one grammar, specify all of them as comma\-sparated. .RE .PP activategram \fIgramlist\fR .RS 3n Tell Julius to activate previously de\-activated grammar. The target grammar can be specified by either prefix name or number ID. The number ID can be determined from the message sent from Julius at each time grammar information has changed. When want to delete more than one grammar, specify all of them as comma\-sparated. .RE .PP addword \fIgrammar_name_or_id\fR \fIdictfile\fR .RS 3n Add the recognition word entries in the specified \fIdictfile\fR to the specified grammar on current process. .RE .PP syncgram .RS 3n Force synchronize grammar status, like unix command "sync". .RE .SS "Process management" .PP Julius\-4 supports multi\-model recognition nad multi decoding. In this case it is possible to control each recognition process, as defined by "\fB\-SR\fR" option, from module client. .PP In multi decoding mode, the module client holds "current process", and the process commands and grammar related commands will be issued toward the current process. .PP listprocess .RS 3n Tell Julius to send the list of existing recognition process. .RE .PP currentprocess \fIprocname\fR .RS 3n Switch the current process to the process specified by the name. .RE .PP shiftprocess .RS 3n Rotate the current process. At each call the current process will be changed to the next one. .RE .PP addprocess \fIjconffile\fR .RS 3n Tell Julisu to load a new recognition process into engine. The argument \fIjconffile\fR should be a jconf file that contains only one set of LM options and one SR definition. Note that the file should be visible on the running Julius, since \fBjcontrol\fR only send the path name and Julius actually read the jconf file. .sp The new LM and SR process will have the name of the jconffile. .RE .PP delprocess \fIprocname\fR .RS 3n Delete the specified recognition process from the engine. .RE .PP deactivateprocess \fIprocname\fR .RS 3n Tell Julius to temporary stop the specified recognition process. The stopped process will not be executed for the input until activated again. .RE .PP activateprocess \fIprocname\fR .RS 3n Tell Julius to activate the temporarily stopped process. .RE .SH "EXAMPLES" .PP The dump messages from Julius are output to tty with prefix ">" appended to each line. Julius can be started in module mode like this: .sp .RS 3n .nf % \fBjulius\fR \-C ... \-module .fi .RE \fBjcontrol\fRcan be launched with the host name: .sp .RS 3n .nf % \fBjcontrol\fR hostname .fi .RE It will then receive the outputs of Julius and output the raw message to standard out. Also, by inputting the commands above to the standard input of \fBjcontrol\fR, it will be sent to Julius. See manuals for the specification of module mode. .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/mkss.10000644001051700105040000000321211071102423013127 0ustar ritrlab.\" Title: mkss .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKSS" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkss \- calculate average spectrum for spectral subtraction .SH "SYNOPSIS" .HP 5 \fBmkss\fR [\fIoptions\fR...] {filename} .SH "DESCRIPTION" .PP \fBmkss\fR is a tool to estimate noise spectrum for spectral subtraction on Julius. It reads a few seconds of sound data from microphone input, calculate the average spectrum and save it to a file. The output file can be used as a noise spectrum data in Julius (option "\fB\-ssload\fR"). .PP The recording will start immediately after startup. Sampling format is 16bit, monoral. If outpue file already exist, it will be overridden. .SH "OPTIONS" .PP \fB \-freq \fR \fIHz\fR .RS 3n Sampling frequency in Hz (default: 16,000) .RE .PP \fB \-len \fR \fImsec\fR .RS 3n capture length in milliseconds (default: 3000) .RE .PP \fB \-fsize \fR \fIsample_num\fR .RS 3n frame size in number of samples (default: 400) .RE .PP \fB \-fshift \fR \fIsample_num\fR .RS 3n frame shift in number of samples (default: 160) .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/generate-ngram.10000644001051700105040000000315711071102423015056 0ustar ritrlab.\" Title: generate\-ngram .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "GENERATE\-NGRAM" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" generate\-ngram \- random sentence generator from N\-gram .SH "SYNOPSIS" .HP 15 \fBgenerate\-ngram\fR [\fIoptions\fR...] {\fIbinary_ngram\fR} .SH "DESCRIPTION" .PP \fBgenerate\-ngram\fR is a tool to generate sentences randomly according to the given N\-gram language model. The N\-gram model file \fIbinary_ngram\fR should be an binary format. .SH "OPTIONS" .PP \fB \-n \fR \fInum\fR .RS 3n Number of sentences to generate (default: 10) .RE .PP \fB \-N \fR .RS 3n Specify which length of N\-gram to use (default: available max in the given model) .RE .PP \fB \-bos \fR .RS 3n Beginning\-of\-sentence word (default: "") .RE .PP \fB \-eos \fR .RS 3n End\-of\-sentence word (default: "") .RE .PP \fB \-ignore \fR .RS 3n Specify a word to be supressed from output (default: " .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "ADINREC" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" adinrec \- record audio device and save one utterance to a file .SH "SYNOPSIS" .HP 8 \fBadinrec\fR [\fIoptions\fR...] {\fIfilename\fR} .SH "DESCRIPTION" .PP \fBadinrec\fR opens an audio stream, detects an utterance input and store it to a specified file. The utterance detection is done by level and zero\-cross thresholds. Default input device is microphone, but other audio input source, including Julius A/D\-in plugin, can be used by using "\fB\-input\fR" option. .PP The audio format is 16 bit, 1 channel, in Microsoft WAV format. If the given filename already exists, it will be overridden. .PP If filename is "\-" , the captured data will be streamed into standard out, with no header (raw format). .SH "OPTIONS" .PP \fBadinrec\fR uses JuliusLib and adopts Julius options. Below is a list of valid options. .SS "adinrec specific options" .PP \fB \-freq \fR \fIHz\fR .RS 3n Set sampling rate in Hz. (default: 16,000) .RE .PP \fB \-raw \fR .RS 3n Output in raw file format. .RE .SS "JuliusLib options" .PP \fB \-input \fR {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} .RS 3n Choose speech input source. Specify 'file' or 'rawfile' for waveform file. On file input, users will be prompted to enter the file name from stdin. .sp \'mic' is to get audio input from a default live microphone device, and 'adinnet' means receiving waveform data via tcpip network from an adinnet client. 'netaudio' is from DatLink/NetAudio input, and 'stdin' means data input from standard input. .sp At Linux, you can choose API at run time by specifying alsa, oss and esd. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n Audio fragment size in number of samples. (default: 1000) .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n Level threshold for speech input detection. Values should be in range from 0 to 32767. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n Zero crossing threshold per second. Only input that goes over the level threshold (\fB\-lv\fR) will be counted. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n Silence margin at the start of speech segment in milliseconds. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n Silence margin at the end of speech segment in milliseconds. (default: 400) .RE .PP \fB \-zmean \fR .RS 3n This option enables DC offset removal. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n Set sampling rate in Hz. (default: 16,000) .RE .PP \fB \-48 \fR .RS 3n Record input with 48kHz sampling, and down\-sample it to 16kHz on\-the\-fly. This option is valid for 16kHz model only. The down\-sampling routine was ported from sptk. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n Host name for DatLink server input (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n With \fB\-input adinnet\fR, specify adinnet port number to listen. (default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n Julius by default removes successive zero samples in input speech data. This option stop it. .RE .PP \fB \-C \fR \fIjconffile\fR .RS 3n Load a jconf file at here. The content of the jconffile will be expanded at this point. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n Specify which directories to load plugin. If several direcotries exist, specify them by colon\-separated list. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n Device name string for ALSA. (default: "default") .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n Device name string for OSS. (default: "\fI/dev/dsp\fR") .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Input latency of microphone input in milliseconds. Smaller value will shorten latency but sometimes make process unstable. Default value will depend on the running OS. .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB adintool \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/mkdfa.pl.10000644001051700105040000000416711071102423013660 0ustar ritrlab.\" Title: mkdfa.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKDFA.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkdfa.pl \- grammar compiler .SH "SYNOPSIS" .HP 9 \fBmkdfa.pl\fR [\fIoptions\fR...] {prefix} .SH "DESCRIPTION" .PP \fBmkdfa.pl\fR compiles the Julian format grammar (\fI.grammar\fR and \fI.voca\fR) to Julian native formats (\fI.dfa\fR and \fI.dict\fR). In addition, "\fI.term\fR" will be also generated that stores correspondence of category ID used in the output files to the source category name. .PP prefix should be the common file name prefix of "\fI.grammar\fR" and "voca" file. From prefix.grammar and prefix.voca file, prefix.dfa, prefix.dict and prefix.term will be output. .SH "OPTIONS" .PP \fB \-n \fR .RS 3n Not process dictionary. You can only convert \fI.grammar\fR file to \fI.dfa\fR file without \fI.voca\fR file. .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBTMP\fR\fR\fB or \fR\fB\fBTEMP\fR\fR\fB \fR .RS 3n Set directory to store temporal file. If not specified, one of them on the following list will be used: \fI/tmp\fR, \fI/var/tmp\fR, \fI/WINDOWS/Temp\fR, \fI/WINNT/Temp\fR. .RE .SH "EXAMPLES" .PP Convert a grammar \fIfoo.grammar\fR and \fIfoo.voca\fR to \fIfoo.dfa\fR, \fIfoo.voca\fR and \fIfoo.term\fR. .sp .RS 3n .nf % \fBmkdfa.pl\fR foo .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB generate \fR( 1 ) , \fB nextword \fR( 1 ) , \fB accept_check \fR( 1 ) , \fB dfa_minimize \fR( 1 ) .SH "DIAGNOSTICS" .PP \fBmkdfa.pl\fR invokes \fBmkfa\fR and \fBdfa_minimize\fR internally. They should be placed at the same directory as \fBmkdfa.pl\fR. .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/julius.10000644001051700105040000015522111666612225013515 0ustar ritrlab.\" Title: julius .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 02/11/2009 .\" Manual: .\" Source: .\" .TH "JULIUS" "1" "02/11/2009" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" julius \- open source multi\-purpose LVCSR engine .SH "SYNOPSIS" .HP 7 \fBjulius\fR [\-C\ \fIjconffile\fR] [\fIoptions\fR...] .SH "DESCRIPTION" .PP \fBjulius\fR is a high\-performance, multi\-purpose, open\-source speech recognition engine for researchers and developers. It is capable of performing almost real\-time recognition of continuous speech with over 60k\-word 3\-gram language model and triphone HMM model, on most current PCs. \fBjulius\fR can perform recognition on audio files, live microphone input, network input and feature parameter files. .PP The core recognition module is implemented as C library called "JuliusLib". It can also be extended by plug\-in facility. .SS "Supported Models" .PP \fBjulius\fR needs a language model and an acoustic model to run as a speech recognizer. \fBjulius\fR supports the following models. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBAcoustic model\fR .RS .PP Sub\-word HMM (Hidden Markov Model) in HTK ascii format are supported. Phoneme models (monophone), context dependent phoneme models (triphone), tied\-mixture and phonetic tied\-mixture models of any unit can be used. When using context dependent models, inter\-word context dependency is also handled. Multi\-stream feature and MSD\-HMM is also supported. You can further use a tool \fBmkbinhmm\fR to convert the ascii HMM file to a compact binary format for faster loading. .PP Note that \fBjulius\fR itself can only extract MFCC features from speech data. If you use acoustic HMM trained for other feature, you should give the input in HTK parameter file of the same feature type. .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBLanguage model: word N\-gram\fR .RS .PP Word N\-gram language model, up to 10\-gram, is supported. Julius uses different N\-gram for each pass: left\-to\-right 2\-gram on 1st pass, and right\-to\-left N\-gram on 2nd pass. It is recommended to use both LR 2\-gram and RL N\-gram for Julius. However, you can use only single LR N\-gram or RL N\-gram. In such case, approximated LR 2\-gram computed from the given N\-gram will be applied at the first pass. .PP The Standard ARPA format is supported. In addition, a binary format is also supported for efficiency. The tool \fBmkbingram\fR(1) can convert ARPA format N\-gram to binary format. .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBLanguage model: grammar\fR .RS .PP The grammar format is an original one, and tools to create a recognirion grammar are included in the distribution. A grammar consists of two files: one is a 'grammar' file that describes sentence structures in a BNF style, using word 'category' name as terminate symbols. Another is a 'voca' file that defines words with its pronunciations (i.e. phoneme sequences) for each category. They should be converted by \fBmkdfa.pl\fR(1) to a deterministic finite automaton file (.dfa) and a dictionary file (.dict), respectively. You can also use multiple grammars. .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBLanguage model: isolated word\fR .RS .PP You can perform isolated word recognition using only word dictionary. With this model type, Julius will perform rapid one pass recognition with static context handling. Silence models will be added at both head and tail of each word. You can also use multiple dictionaries in a process. .RE .SS "Search Algorithm" .PP Recognition algorithm of \fBjulius\fR is based on a two\-pass strategy. Word 2\-gram and reverse word 3\-gram is used on the respective passes. The entire input is processed on the first pass, and again the final searching process is performed again for the input, using the result of the first pass to narrow the search space. Specifically, the recognition algorithm is based on a tree\-trellis heuristic search combined with left\-to\-right frame\-synchronous beam search and right\-to\-left stack decoding search. .PP When using context dependent phones (triphones), interword contexts are taken into consideration. For tied\-mixture and phonetic tied\-mixture models, high\-speed acoustic likelihood calculation is possible using gaussian pruning. .PP For more details, see the related documents. .SH "OPTIONS" .PP These options specify the models, system behaviors and various search parameters to Julius. These option can be set at the command line, but it is recommended that you write them in a text file as a "jconf file", and specify it by "\-C" option. .PP Applications incorporating JuliusLib also use these options to set the parameters of core recognition engine. For example, a jconf file can be loaded to the enine by calling \fBj_config_load_file_new()\fR with the jconf file name as argument. .PP Please note that relative paths in a jconf file should be relative to the jconf file itself, not the current working directory. .PP Below are the details of all options, gathered by group. .SS "Julius application option" .PP These are application options of Julius, outside of JuliusLib. It contains parameters and switches for result output, character set conversion, log level, and module mode options. These option are specific to Julius, and cannot be used at applications using JuliusLib other than Julius. .PP \fB \-outfile \fR .RS 3n On file input, this option write the recognition result of each file to a separate file. The output file of an input file will be the same name but the suffix will be changed to ".out". (rev.4.0) .RE .PP \fB \-separatescore \fR .RS 3n Output the language and acoustic scores separately. .RE .PP \fB \-callbackdebug \fR .RS 3n Print the callback names at each call for debug. (rev.4.0) .RE .PP \fB \-charconv \fR \fIfrom\fR \fIto\fR .RS 3n Print with character set conversion. \fIfrom\fR is the source character set used in the language model, and \fIto\fR is the target character set you want to get. .sp On Linux, the arguments should be a code name. You can obtain the list of available code names by invoking the command "iconv \-\-list". On Windows, the arguments should be a code name or codepage number. Code name should be one of "ansi", "mac", "oem", "utf\-7", "utf\-8", "sjis", "euc". Or you can specify any codepage number supported at your environment. .RE .PP \fB \-nocharconv \fR .RS 3n Disable character conversion. .RE .PP \fB \-module \fR [port] .RS 3n Run Julius on "Server Module Mode". After startup, Julius waits for tcp/ip connection from client. Once connection is established, Julius start communication with the client to process incoming commands from the client, or to output recognition results, input trigger information and other system status to the client. The default port number is 10500. .RE .PP \fB \-record \fR \fIdir\fR .RS 3n Auto\-save all input speech data into the specified directory. Each segmented inputs are recorded each by one. The file name of the recorded data is generated from system time when the input ends, in a style of YYYY.MMDD.HHMMSS.wav. File format is 16bit monoral WAV. Invalid for mfcfile input. .sp With input rejection by \fB\-rejectshort\fR, the rejected input will also be recorded even if they are rejected. .RE .PP \fB \-logfile \fR \fIfile\fR .RS 3n Save all log output to a file instead of standard output. (Rev.4.0) .RE .PP \fB \-nolog \fR .RS 3n Disable all log output. (Rev.4.0) .RE .PP \fB \-help \fR .RS 3n Output help message and exit. .RE .SS "Global options" .PP These are model\-/search\-dependent options relating audio input, sound detection, GMM, decoding algorithm, plugin facility, and others. Global options should be placed before any instance declaration (\fB\-AM\fR, \fB\-LM\fR, or \fB\-SR\fR), or just after "\fB\-GLOBAL\fR" option. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBAudio input\fR .RS .PP \fB \-input \fR {mic|rawfile|mfcfile|adinnet|stdin|netaudio|alsa|oss|esd} .RS 3n Choose speech input source. Specify 'file' or 'rawfile' for waveform file, 'htkparam' or 'mfcfile' for HTK parameter file. On file input, users will be prompted to enter the file name from stdin, or you can use \fB\-filelist\fR option to specify list of files to process. .sp \'mic' is to get audio input from a default live microphone device, and 'adinnet' means receiving waveform data via tcpip network from an adinnet client. 'netaudio' is from DatLink/NetAudio input, and 'stdin' means data input from standard input. .sp For waveform file input, only WAV (no compression) and RAW (noheader, 16bit, big endian) are supported by default. Other format can be read when compiled with libsnd library. To see what format is actually supported, see the help message using option \fB\-help\fR. For stdin input, only WAV and RAW is supported. (default: mfcfile) .sp At Linux, you can choose API at run time by specifying alsa, oss and esd. .RE .PP \fB \-chunk_size \fR \fIsamples\fR .RS 3n Audio fragment size in number of samples. (default: 1000) .RE .PP \fB \-filelist \fR \fIfilename\fR .RS 3n (With \fB\-input rawfile|mfcfile\fR) perform recognition on all files listed in the file. The file should contain input file per line. Engine will end when all of the files are processed. .RE .PP \fB \-notypecheck \fR .RS 3n By default, Julius checks the input parameter type whether it matches the AM or not. This option will disable the check and force engine to use the input vector as is. .RE .PP \fB \-48 \fR .RS 3n Record input with 48kHz sampling, and down\-sample it to 16kHz on\-the\-fly. This option is valid for 16kHz model only. The down\-sampling routine was ported from sptk. (Rev. 4.0) .RE .PP \fB \-NA \fR \fIdevicename\fR .RS 3n Host name for DatLink server input (\fB\-input netaudio\fR). .RE .PP \fB \-adport \fR \fIport_number\fR .RS 3n With \fB\-input adinnet\fR, specify adinnet port number to listen. (default: 5530) .RE .PP \fB \-nostrip \fR .RS 3n Julius by default removes successive zero samples in input speech data. This option inhibits the removal. .RE .PP \fB \-zmean \fR, \fB \-nozmean \fR .RS 3n This option enables/disables DC offset removal of input waveform. Offset will be estimated from the whole input. For microphone / network input, zero mean of the first 48000 samples (3 seconds in 16kHz sampling) will be used for the estimation. (default: disabled) .sp This option uses static offset for the channel. See also \fB\-zmeansource\fR for frame\-wise offset removal. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBSpeech detection by level and zero\-cross\fR .RS .PP \fB \-cutsilence \fR, \fB \-nocutsilence \fR .RS 3n Turn on / off the speech detection by level and zero\-cross. Default is on for mic / adinnet input, and off for files. .RE .PP \fB \-lv \fR \fIthres\fR .RS 3n Level threshold for speech input detection. Values should be in range from 0 to 32767. (default: 2000) .RE .PP \fB \-zc \fR \fIthres\fR .RS 3n Zero crossing threshold per second. Only input that goes over the level threshold (\fB\-lv\fR) will be counted. (default: 60) .RE .PP \fB \-headmargin \fR \fImsec\fR .RS 3n Silence margin at the start of speech segment in milliseconds. (default: 300) .RE .PP \fB \-tailmargin \fR \fImsec\fR .RS 3n Silence margin at the end of speech segment in milliseconds. (default: 400) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBInput rejection\fR .RS .PP Two simple front\-end input rejection methods are implemented, based on input length and average power of detected segment. The rejection by average power is experimental, and can be enabled by \-\-enable\-power\-reject on compilation. Valid for MFCC feature with power coefficient and real\-time input only. .PP For GMM\-based input rejection see the GMM section below. .PP \fB \-rejectshort \fR \fImsec\fR .RS 3n Reject input shorter than specified milliseconds. Search will be terminated and no result will be output. .RE .PP \fB \-powerthres \fR \fIthres\fR .RS 3n Reject the inputted segment by its average energy. If the average energy of the last recognized input is below the threshold, Julius will reject the input. (Rev.4.0) .sp This option is valid when \-\-enable\-power\-reject is specified at compilation time. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBGaussian mixture model / GMM\-VAD\fR .RS .PP GMM will be used for input rejection by accumulated score, or for front\-end GMM\-based VAD when \-\-enable\-gmm\-vad is specified. .PP NOTE: You should also set the proper MFCC parameters required for the GMM, specifying the acoustic parameters described in AM section \fB\-AM_GMM\fR. .PP When GMM\-based VAD is enabled, the voice activity score will be calculated at each frame as front\-end processing. The value will be computed as \\[ \\max_{m \\in M_v} p(x|m) \- \\max_{m \\in M_n} p(x|m) \\] where $M_v$ is a set of voice GMM, and $M_n$ is a set of noise GMM whose names should be specified by \fB\-gmmreject\fR. The activity score will be then averaged for the last N frames, where N is specified by \fB\-gmmmargin\fR. Julius updates the averaged activity score at each frame, and detect speech up\-trigger when the value gets higher than a value specified by \fB\-gmmup\fR, and detecgt down\-trigger when it gets lower than a value of \fB\-gmmdown\fR. .PP \fB \-gmm \fR \fIhmmdefs_file\fR .RS 3n GMM definition file in HTK format. If specified, GMM\-based input verification will be performed concurrently with the 1st pass, and you can reject the input according to the result as specified by \fB\-gmmreject\fR. The GMM should be defined as one\-state HMMs. .RE .PP \fB \-gmmnum \fR \fInumber\fR .RS 3n Number of Gaussian components to be computed per frame on GMM calculation. Only the N\-best Gaussians will be computed for rapid calculation. The default is 10 and specifying smaller value will speed up GMM calculation, but too small value (1 or 2) may cause degradation of identification performance. .RE .PP \fB \-gmmreject \fR \fIstring\fR .RS 3n Comma\-separated list of GMM names to be rejected as invalid input. When recognition, the log likelihoods of GMMs accumulated for the entire input will be computed concurrently with the 1st pass. If the GMM name of the maximum score is within this string, the 2nd pass will not be executed and the input will be rejected. .RE .PP \fB \-gmmmargin \fR \fIframes\fR .RS 3n (GMM_VAD) Head margin in frames. When a speech trigger detected by GMM, recognition will start from current frame minus this value. (Rev.4.0) .sp This option will be valid only if compiled with \-\-enable\-gmm\-vad. .RE .PP \fB \-gmmup \fR \fIvalue\fR .RS 3n (GMM_VAD) Up trigger threshold of voice activity score. (Rev.4.1) .sp This option will be valid only if compiled with \-\-enable\-gmm\-vad. .RE .PP \fB \-gmmdown \fR \fIvalue\fR .RS 3n (GMM_VAD) Down trigger threshold of voice activity score. (Rev.4.1) .sp This option will be valid only if compiled with \-\-enable\-gmm\-vad. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBDecoding option\fR .RS .PP Real\-time processing means concurrent processing of MFCC computation 1st pass decoding. By default, real\-time processing on the pass is on for microphone / adinnet / netaudio input, and for others. .PP \fB \-realtime \fR, \fB \-norealtime \fR .RS 3n Explicitly switch on / off real\-time (pipe\-line) processing on the first pass. The default is off for file input, and on for microphone, adinnet and NetAudio input. This option relates to the way CMN and energy normalization is performed: if off, they will be done using average features of whole input. If on, MAP\-CMN and energy normalization to do real\-time processing. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBMisc. options\fR .RS .PP \fB \-C \fR \fIjconffile\fR .RS 3n Load a jconf file at here. The content of the jconffile will be expanded at this point. .RE .PP \fB \-version \fR .RS 3n Print version information to standard error, and exit. .RE .PP \fB \-setting \fR .RS 3n Print engine setting information to standard error, and exit. .RE .PP \fB \-quiet \fR .RS 3n Output less log. For result, only the best word sequence will be printed. .RE .PP \fB \-debug \fR .RS 3n (For debug) output enormous internal message and debug information to log. .RE .PP \fB \-check \fR {wchmm|trellis|triphone} .RS 3n For debug, enter interactive check mode. .RE .PP \fB \-plugindir \fR \fIdirlist\fR .RS 3n Specify directory to load plugin. If several direcotries exist, specify them by colon\-separated list. .RE .RE .SS "Instance declaration for multi decoding" .PP The following arguments will create a new configuration set with default parameters, and switch current set to it. Jconf parameters specified after the option will be set into the current set. .PP To do multi\-model decoding, these argument should be specified at the first of each model / search instances with different names. Any options before the first instance definition will be IGNORED. .PP When no instance definition is found (as older version of Julius), all the options are assigned to a default instance named _default. .PP Please note that decoding with a single LM and multiple AMs is not fully supported. For example, you may want to construct the jconf file as following. .sp .RS 3n .nf \-AM am_1 \-AM am_2 \-LM lm (LM spec..) \-SR search1 am_1 lm \-SR search2 am_2 lm .fi .RE This type of model sharing is not supported yet, since some part of LM processing depends on the assigned AM. Instead, you can get the same result by defining the same LMs for each AM, like this: .sp .RS 3n .nf \-AM am_1 \-AM am_2 \-LM lm_1 (LM spec..) \-LM lm_2 (same LM spec..) \-SR search1 am_1 lm_1 \-SR search2 am_2 lm_2 .fi .RE .PP \fB \-AM \fR \fIname\fR .RS 3n Create a new AM configuration set, and switch current to the new one. You should give a unique name. (Rev.4.0) .RE .PP \fB \-LM \fR \fIname\fR .RS 3n Create a new LM configuration set, and switch current to the new one. You should give a unique name. (Rev.4.0) .RE .PP \fB \-SR \fR \fIname\fR \fIam_name\fR \fIlm_name\fR .RS 3n Create a new search configuration set, and switch current to the new one. The specified AM and LM will be assigned to it. The \fIam_name\fR and \fIlm_name\fR can be either name or ID number. You should give a unique name. (Rev.4.0) .RE .PP \fB \-AM_GMM \fR .RS 3n When using GMM for front\-end processing, you can specify GMM\-specific acoustic parameters after this option. If you does not specify \fB\-AM_GMM\fR with GMM, the GMM will share the same parameter vector as the last AM. The current AM will be switched to the GMM one, so be careful not to confuse with normal AM configurations. (Rev.4.0) .RE .PP \fB \-GLOBAL \fR .RS 3n Start a global section. The global options should be placed before any instance declaration, or after this option on multiple model recognition. This can be used multiple times. (Rev.4.1) .RE .PP \fB \-nosectioncheck \fR, \fB \-sectioncheck \fR .RS 3n Disable / enable option location check in multi\-model decoding. When enabled, the options between instance declaration is treated as "sections" and only the belonging option types can be written. For example, when an option \fB\-AM\fR is specified, only the AM related option can be placed after the option until other declaration is found. Also, global options should be placed at top, before any instance declarataion. This is enabled by default. (Rev.4.1) .RE .SS "Language model (\fB\-LM\fR)" .PP This group contains options for model definition of each language model type. When using multiple LM, one instance can have only one LM. .PP Only one type of LM can be specified for a LM configuration. If you want to use multi model, you should define them one as a new LM. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBN\-gram\fR .RS .PP \fB \-d \fR \fIbingram_file\fR .RS 3n Use binary format N\-gram. An ARPA N\-gram file can be converted to Julius binary format by mkbingram. .RE .PP \fB \-nlr \fR \fIarpa_ngram_file\fR .RS 3n A forward, left\-to\-right N\-gram language model in standard ARPA format. When both a forward N\-gram and backward N\-gram are specified, Julius uses this forward 2\-gram for the 1st pass, and the backward N\-gram for the 2nd pass. .sp Since ARPA file often gets huge and requires a lot of time to load, it may be better to convert the ARPA file to Julius binary format by mkbingram. Note that if both forward and backward N\-gram is used for recognition, they together will be converted to a single binary. .sp When only a forward N\-gram is specified by this option and no backward N\-gram specified by \fB\-nrl\fR, Julius performs recognition with only the forward N\-gram. The 1st pass will use the 2\-gram entry in the given N\-gram, and The 2nd pass will use the given N\-gram, with converting forward probabilities to backward probabilities by Bayes rule. (Rev.4.0) .RE .PP \fB \-nrl \fR \fIarpa_ngram_file\fR .RS 3n A backward, right\-to\-left N\-gram language model in standard ARPA format. When both a forward N\-gram and backward N\-gram are specified, Julius uses the forward 2\-gram for the 1st pass, and this backward N\-gram for the 2nd pass. .sp Since ARPA file often gets huge and requires a lot of time to load, it may be better to convert the ARPA file to Julius binary format by mkbingram. Note that if both forward and backward N\-gram is used for recognition, they together will be converted to a single binary. .sp When only a backward N\-gram is specified by this option and no forward N\-gram specified by \fB\-nlr\fR, Julius performs recognition with only the backward N\-gram. The 1st pass will use the forward 2\-gram probability computed from the backward 2\-gram using Bayes rule. The 2nd pass fully use the given backward N\-gram. (Rev.4.0) .RE .PP \fB \-v \fR \fIdict_file\fR .RS 3n Word dictionary file. .RE .PP \fB \-silhead \fR \fIword_string\fR \fB \-siltail \fR \fIword_string\fR .RS 3n Silence word defined in the dictionary, for silences at the beginning of sentence and end of sentence. (default: "", "") .RE .PP \fB \-mapunk \fR \fIword_string\fR .RS 3n Specify unknown word. Default is "" or "". This will be used to assign word probability on unknown words, i.e. words in dictionary that are not in N\-gram vocabulary. .RE .PP \fB \-iwspword \fR .RS 3n Add a word entry to the dictionary that should correspond to inter\-word pauses. This may improve recognition accuracy in some language model that has no explicit inter\-word pause modeling. The word entry to be added can be changed by \fB\-iwspentry\fR. .RE .PP \fB \-iwspentry \fR \fIword_entry_string\fR .RS 3n Specify the word entry that will be added by \fB\-iwspword\fR. (default: " [sp] sp sp") .RE .PP \fB \-sepnum \fR \fInumber\fR .RS 3n Number of high frequency words to be isolated from the lexicon tree, to ease approximation error that may be caused by the one\-best approximation on 1st pass. (default: 150) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBGrammar\fR .RS .PP Multiple grammars can be specified by repeating \fB\-gram\fR and \fB\-gramlist\fR. Note that this is unusual behavior from other options (in normal Julius option, last one will override previous ones). You can use \fB\-nogram\fR to reset the grammars already specified before the point. .PP \fB \-gram \fR gramprefix1[,gramprefix2[,gramprefix3,...]] .RS 3n Comma\-separated list of grammars to be used. the argument should be a prefix of a grammar, i.e. if you have \fIfoo.dfa\fR and \fIfoo.dict\fR, you should specify them with a single argument foo. Multiple grammars can be specified at a time as a comma\-separated list. .RE .PP \fB \-gramlist \fR \fIlist_file\fR .RS 3n Specify a grammar list file that contains list of grammars to be used. The list file should contain the prefixes of grammars, each per line. A relative path in the list file will be treated as relative to the file, not the current path or configuration file. .RE .PP \fB \-dfa \fR \fIdfa_file\fR \fB \-v \fR \fIdict_file\fR .RS 3n An old way of specifying grammar files separately. This is bogus, and should not be used any more. .RE .PP \fB \-nogram \fR .RS 3n Remove the current list of grammars already specified by \fB\-gram\fR, \fB\-gramlist\fR, \fB\-dfa\fR and \fB\-v\fR. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBIsolated word\fR .RS .PP Dictionary can be specified by using \fB\-w\fR and \fB\-wlist\fR. When you specify multiple times, all of them will be read at startup. You can use \fB\-nogram\fR to reset the already specified dictionaries at that point. .PP \fB \-w \fR \fIdict_file\fR .RS 3n Word dictionary for isolated word recognition. File format is the same as other LM. (Rev.4.0) .RE .PP \fB \-wlist \fR \fIlist_file\fR .RS 3n Specify a dictionary list file that contains list of dictionaries to be used. The list file should contain the file name of dictionaries, each per line. A relative path in the list file will be treated as relative to the list file, not the current path or configuration file. (Rev.4.0) .RE .PP \fB \-nogram \fR .RS 3n Remove the current list of dictionaries already specified by \fB\-w\fR and \fB\-wlist\fR. .RE .PP \fB \-wsil \fR \fIhead_sil_model_name\fR \fItail_sil_model_name\fR \fIsil_context_name\fR .RS 3n On isolated word recognition, silence models will be appended to the head and tail of each word at recognition. This option specifies the silence models to be appended. \fIsil_context_name\fR is the name of the head sil model and tail sil model as a context of word head phone and tail phone. For example, if you specify \-wsil silB silE sp, a word with phone sequence b eh t will be translated as silB sp\-b+eh b\-eh+t eh\-t+sp silE. (Rev.4.0) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBUser\-defined LM\fR .RS .PP \fB \-userlm \fR .RS 3n Declare to use user LM functions in the program. This option should be specified if you use user\-defined LM functions. (Rev.4.0) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBMisc. LM options\fR .RS .PP \fB \-forcedict \fR .RS 3n Skip error words in dictionary and force running. .RE .RE .SS "Acoustic model and feature analysis (\fB\-AM\fR) (\fB\-AM_GMM\fR)" .PP This section is about options for acoustic model, feature extraction, feature normalizations and spectral subtraction. .PP After \-AM name, an acoustic model and related specification should be written. You can use multiple AMs trained with different MFCC types. For GMM, the required parameter condition should be specified just as same as AMs after \fB\-AM_GMM\fR. .PP When using multiple AMs, the values of \fB\-smpPeriod\fR, \fB\-smpFreq\fR, \fB\-fsize\fR and \fB\-fshift\fR should be the same among all AMs. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBAcoustic HMM\fR .RS .PP \fB \-h \fR \fIhmmdef_file\fR .RS 3n Acoustic HMM definition file. It should be in HTK ascii format, or Julius binary format. You can convert HTK ascii format to Julius binary format using mkbinhmm. .RE .PP \fB \-hlist \fR \fIhmmlist_file\fR .RS 3n HMMList file for phone mapping. This file provides mapping between logical triphone names generated in the dictionary and the defined HMM names in hmmdefs. This option should be specified for context\-dependent model. .RE .PP \fB \-tmix \fR \fInumber\fR .RS 3n Specify the number of top Gaussians to be calculated in a mixture codebook. Small number will speed up the acoustic computation, but AM accuracy may get worse with too small value. See also \fB\-gprune\fR. (default: 2) .RE .PP \fB \-spmodel \fR \fIname\fR .RS 3n Specify HMM model name that corresponds to short\-pause in an utterance. The short\-pause model name will be used in recognition: short\-pause skipping on grammar recognition, word\-end short\-pause model insertion with \fB\-iwsp\fR on N\-gram, or short\-pause segmentation (\fB\-spsegment\fR). (default: "sp") .RE .PP \fB \-multipath \fR .RS 3n Enable multi\-path mode. To make decoding faster, Julius by default impose a limit on HMM transitions that each model should have only one transition from initial state and to end state. On multi\-path mode, Julius does extra handling on inter\-model transition to allows model\-skipping transition and multiple output/input transitions. Note that specifying this option will make Julius a bit slower, and the larger beam width may be required. .sp This function was a compilation\-time option on Julius 3.x, and now becomes a run\-time option. By default (without this option), Julius checks the transition type of specified HMMs, and enable the multi\-path mode if required. You can force multi\-path mode with this option. (rev.4.0) .RE .PP \fB \-gprune \fR {safe|heuristic|beam|none|default} .RS 3n Set Gaussian pruning algorithm to use. For tied\-mixture model, Julius performs Gaussian pruning to reduce acoustic computation, by calculating only the top N Gaussians in each codebook at each frame. The default setting will be set according to the model type and engine setting. default will force accepting the default setting. Set this to none to disable pruning and perform full computation. safe guarantees the top N Gaussians to be computed. heuristic and beam do more aggressive computational cost reduction, but may result in small loss of accuracy model (default: safe (standard), beam (fast) for tied mixture model, none for non tied\-mixture model). .RE .PP \fB \-iwcd1 \fR {max|avg|best number} .RS 3n Select method to approximate inter\-word triphone on the head and tail of a word in the first pass. .sp max will apply the maximum likelihood of the same context triphones. avg will apply the average likelihood of the same context triphones. best number will apply the average of top N\-best likelihoods of the same context triphone. .sp Default is best 3 for use with N\-gram, and avg for grammar and word. When this AM is shared by LMs of both type, latter one will be chosen. .RE .PP \fB \-iwsppenalty \fR \fIfloat\fR .RS 3n Insertion penalty for word\-end short pauses appended by \fB\-iwsp\fR. .RE .PP \fB \-gshmm \fR \fIhmmdef_file\fR .RS 3n If this option is specified, Julius performs Gaussian Mixture Selection for efficient decoding. The hmmdefs should be a monophone model generated from an ordinary monophone HMM model, using mkgshmm. .RE .PP \fB \-gsnum \fR \fInumber\fR .RS 3n On GMS, specify number of monophone states to compute corresponding triphones in detail. (default: 24) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBSpeech analysis\fR .RS .PP Only MFCC feature extraction is supported in current Julius. Thus when recognizing a waveform input from file or microphone, AM must be trained by MFCC. The parameter condition should also be set as exactly the same as the training condition by the options below. .PP When you give an input in HTK Parameter file, you can use any parameter type for AM. In this case Julius does not care about the type of input feature and AM, just read them as vector sequence and match them to the given AM. Julius only checks whether the parameter types are the same. If it does not work well, you can disable this checking by \fB\-notypecheck\fR. .PP In Julius, the parameter kind and qualifiers (as TARGETKIND in HTK) and the number of cepstral parameters (NUMCEPS) will be set automatically from the content of the AM header, so you need not specify them by options. .PP Other parameters should be set exactly the same as training condition. You can also give a HTK Config file which you used to train AM to Julius by \fB\-htkconf\fR. When this option is applied, Julius will parse the Config file and set appropriate parameter. .PP You can further embed those analysis parameter settings to a binary HMM file using mkbinhmm. .PP If options specified in several ways, they will be evaluated in the order below. The AM embedded parameter will be loaded first if any. Then, the HTK config file given by \fB\-htkconf\fR will be parsed. If a value already set by AM embedded value, HTK config will override them. At last, the direct options will be loaded, which will override settings loaded before. Note that, when the same options are specified several times, later will override previous, except that \fB\-htkconf\fR will be evaluated first as described above. .PP \fB \-smpPeriod \fR \fIperiod\fR .RS 3n Sampling period of input speech, in unit of 100 nanoseconds. Sampling rate can also be specified by \fB\-smpFreq\fR. Please note that the input frequency should be set equal to the training conditions of AM. (default: 625, corresponds to 16,000Hz) .sp This option corresponds to the HTK Option SOURCERATE. The same value can be given to this option. .sp When using multiple AM, this value should be the same among all AMs. .RE .PP \fB \-smpFreq \fR \fIHz\fR .RS 3n Set sampling frequency of input speech in Hz. Sampling rate can also be specified using \fB\-smpPeriod\fR. Please note that this frequency should be set equal to the training conditions of AM. (default: 16,000) .sp When using multiple AM, this value should be the same among all AMs. .RE .PP \fB \-fsize \fR \fIsample_num\fR .RS 3n Window size in number of samples. (default: 400) .sp This option corresponds to the HTK Option WINDOWSIZE, but value should be in samples (HTK value / smpPeriod). .sp When using multiple AM, this value should be the same among all AMs. .RE .PP \fB \-fshift \fR \fIsample_num\fR .RS 3n Frame shift in number of samples. (default: 160) .sp This option corresponds to the HTK Option TARGETRATE, but value should be in samples (HTK value / smpPeriod). .sp When using multiple AM, this value should be the same among all AMs. .RE .PP \fB \-preemph \fR \fIfloat\fR .RS 3n Pre\-emphasis coefficient. (default: 0.97) .sp This option corresponds to the HTK Option PREEMCOEF. The same value can be given to this option. .RE .PP \fB \-fbank \fR \fInum\fR .RS 3n Number of filterbank channels. (default: 24) .sp This option corresponds to the HTK Option NUMCHANS. The same value can be given to this option. Be aware that the default value not the same as in HTK (22). .RE .PP \fB \-ceplif \fR \fInum\fR .RS 3n Cepstral liftering coefficient. (default: 22) .sp This option corresponds to the HTK Option CEPLIFTER. The same value can be given to this option. .RE .PP \fB \-rawe \fR, \fB \-norawe \fR .RS 3n Enable/disable using raw energy before pre\-emphasis (default: disabled) .sp This option corresponds to the HTK Option RAWENERGY. Be aware that the default value differs from HTK (enabled at HTK, disabled at Julius). .RE .PP \fB \-enormal \fR, \fB \-noenormal \fR .RS 3n Enable/disable normalizing log energy. On live input, this normalization will be approximated from the average of last input. (default: disabled) .sp This option corresponds to the HTK Option ENORMALISE. Be aware that the default value differs from HTK (enabled at HTK, disabled at Julius). .RE .PP \fB \-escale \fR \fIfloat_scale\fR .RS 3n Scaling factor of log energy when normalizing log energy. (default: 1.0) .sp This option corresponds to the HTK Option ESCALE. Be aware that the default value differs from HTK (0.1). .RE .PP \fB \-silfloor \fR \fIfloat\fR .RS 3n Energy silence floor in dB when normalizing log energy. (default: 50.0) .sp This option corresponds to the HTK Option SILFLOOR. .RE .PP \fB \-delwin \fR \fIframe\fR .RS 3n Delta window size in number of frames. (default: 2) .sp This option corresponds to the HTK Option DELTAWINDOW. The same value can be given to this option. .RE .PP \fB \-accwin \fR \fIframe\fR .RS 3n Acceleration window size in number of frames. (default: 2) .sp This option corresponds to the HTK Option ACCWINDOW. The same value can be given to this option. .RE .PP \fB \-hifreq \fR \fIHz\fR .RS 3n Enable band\-limiting for MFCC filterbank computation: set upper frequency cut\-off. Value of \-1 will disable it. (default: \-1) .sp This option corresponds to the HTK Option HIFREQ. The same value can be given to this option. .RE .PP \fB \-lofreq \fR \fIHz\fR .RS 3n Enable band\-limiting for MFCC filterbank computation: set lower frequency cut\-off. Value of \-1 will disable it. (default: \-1) .sp This option corresponds to the HTK Option LOFREQ. The same value can be given to this option. .RE .PP \fB \-zmeanframe \fR, \fB \-nozmeanframe \fR .RS 3n With speech input, this option enables/disables frame\-wise DC offset removal. This corresponds to HTK configuration ZMEANSOURCE. This cannot be used together with \fB\-zmean\fR. (default: disabled) .RE .PP \fB \-usepower \fR .RS 3n Use power instead of magnitude on filterbank analysis. (default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBNormalization\fR .RS .PP Julius can perform cepstral mean normalization (CMN) for inputs. CMN will be activated when the given AM was trained with CMN (i.e. has "_Z" qualifier in the header). .PP The cepstral mean will be estimated in different way according to the input type. On file input, the mean will be computed from the whole input. On live input such as microphone and network input, the ceptral mean of the input is unknown at the start. So MAP\-CMN will be used. On MAP\-CMN, an initial mean vector will be applied at the beginning, and the mean vector will be smeared to the mean of the incrementing input vector as input goes. Options below can control the behavior of MAP\-CMN. .PP \fB \-cvn \fR .RS 3n Enable cepstral variance normalization. At file input, the variance of whole input will be calculated and then applied. At live microphone input, variance of the last input will be applied. CVN is only supported for an audio input. .RE .PP \fB \-vtln \fR \fIalpha\fR \fIlowcut\fR \fIhicut\fR .RS 3n Do frequency warping, typically for a vocal tract length normalization (VTLN). Arguments are warping factor, high frequency cut\-off and low freq. cut\-off. They correspond to HTK Config values, WARPFREQ, WARPHCUTOFF and WARPLCUTOFF. .RE .PP \fB \-cmnload \fR \fIfile\fR .RS 3n Load initial cepstral mean vector from file on startup. The \fIfile\fR should be one saved by \fB\-cmnsave\fR. Loading an initial cepstral mean enables Julius to better recognize the first utterance on a real\-time input. When used together with \fB\-cmnnoupdate\fR, this initial value will be used for all input. .RE .PP \fB \-cmnsave \fR \fIfile\fR .RS 3n Save the calculated cepstral mean vector into \fIfile\fR. The parameters will be saved at each input end. If the output file already exists, it will be overridden. .RE .PP \fB \-cmnupdate \fR \fB \-cmnnoupdate \fR .RS 3n Control whether to update the cepstral mean at each input on real\-time input. Disabling this and specifying \fB\-cmnload\fR will make engine to always use the loaded static initial cepstral mean. .RE .PP \fB \-cmnmapweight \fR \fIfloat\fR .RS 3n Specify the weight of initial cepstral mean for MAP\-CMN. Specify larger value to retain the initial cepstral mean for a longer period, and smaller value to make the cepstral mean rely more on the current input. (default: 100.0) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBFront\-end processing\fR .RS .PP Julius can perform spectral subtraction to reduce some stationary noise from audio input. Though it is not a powerful method, but it may work on some situation. Julius has two ways to estimate noise spectrum. One way is to assume that the first short segment of an speech input is noise segment, and estimate the noise spectrum as the average of the segment. Another way is to calculate average spectrum from noise\-only input using other tool mkss, and load it in Julius. The former one is popular for speech file input, and latter should be used in live input. The options below will switch / control the behavior. .PP \fB \-sscalc \fR .RS 3n Perform spectral subtraction using head part of each file as silence part. The head part length should be specified by \fB\-sscalclen\fR. Valid only for file input. Conflict with \fB\-ssload\fR. .RE .PP \fB \-sscalclen \fR \fImsec\fR .RS 3n With \fB\-sscalc\fR, specify the length of head silence for noise spectrum estimation in milliseconds. (default: 300) .RE .PP \fB \-ssload \fR \fIfile\fR .RS 3n Perform spectral subtraction for speech input using pre\-estimated noise spectrum loaded from \fIfile\fR. The noise spectrum file can be made by mkss. Valid for all speech input. Conflict with \fB\-sscalc\fR. .RE .PP \fB \-ssalpha \fR \fIfloat\fR .RS 3n Alpha coefficient of spectral subtraction for \fB\-sscalc\fR and \fB\-ssload\fR. Noise will be subtracted stronger as this value gets larger, but distortion of the resulting signal also becomes remarkable. (default: 2.0) .RE .PP \fB \-ssfloor \fR \fIfloat\fR .RS 3n Flooring coefficient of spectral subtraction. The spectral power that goes below zero after subtraction will be substituted by the source signal with this coefficient multiplied. (default: 0.5) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBMisc. AM options\fR .RS .PP .PP \fB \-htkconf \fR \fIfile\fR .RS 3n Parse the given HTK Config file, and set corresponding parameters to Julius. When using this option, the default parameter values are switched from Julius defaults to HTK defaults. .RE .RE .SS "Recognition process and search (\fB\-SR\fR)" .PP This section contains options for search parameters on the 1st / 2nd pass such as beam width and LM weights, configurations for short\-pause segmentation, switches for word lattice output and confusion network output, forced alignments, and other options relating recognition process and result output. .PP Default values for beam width and LM weights will change according to compile\-time setup of JuliusLib , AM model type, and LM size. Please see the startup log for the actual values. .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB1st pass parameters\fR .RS .PP \fB \-lmp \fR \fIweight\fR \fIpenalty\fR .RS 3n (N\-gram) Language model weights and word insertion penalties for the first pass. .RE .PP \fB \-penalty1 \fR \fIpenalty\fR .RS 3n (Grammar) word insertion penalty for the first pass. (default: 0.0) .RE .PP \fB \-b \fR \fIwidth\fR .RS 3n Beam width in number of HMM nodes for rank beaming on the first pass. This value defines search width on the 1st pass, and has dominant effect on the total processing time. Smaller width will speed up the decoding, but too small value will result in a substantial increase of recognition errors due to search failure. Larger value will make the search stable and will lead to failure\-free search, but processing time will grow in proportion to the width. .sp The default value is dependent on acoustic model type: 400 (monophone), 800 (triphone), or 1000 (triphone, setup=v2.1) .RE .PP \fB \-nlimit \fR \fInum\fR .RS 3n Upper limit of token per node. This option is valid when \-\-enable\-wpair and \-\-enable\-wpair\-nlimit are enabled at compilation time. .RE .PP \fB \-progout \fR .RS 3n Enable progressive output of the partial results on the first pass. .RE .PP \fB \-proginterval \fR \fImsec\fR .RS 3n Set the time interval for \fB\-progout\fR in milliseconds. (default: 300) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fB2nd pass parameters\fR .RS .PP \fB \-lmp2 \fR \fIweight\fR \fIpenalty\fR .RS 3n (N\-gram) Language model weights and word insertion penalties for the second pass. .RE .PP \fB \-penalty2 \fR \fIpenalty\fR .RS 3n (Grammar) word insertion penalty for the second pass. (default: 0.0) .RE .PP \fB \-b2 \fR \fIwidth\fR .RS 3n Envelope beam width (number of hypothesis) at the second pass. If the count of word expansion at a certain hypothesis length reaches this limit while search, shorter hypotheses are not expanded further. This prevents search to fall in breadth\-first\-like situation stacking on the same position, and improve search failure mostly for large vocabulary condition. (default: 30) .RE .PP \fB \-sb \fR \fIfloat\fR .RS 3n Score envelope width for enveloped scoring. When calculating hypothesis score for each generated hypothesis, its trellis expansion and Viterbi operation will be pruned in the middle of the speech if score on a frame goes under the width. Giving small value makes the second pass faster, but computation error may occur. (default: 80.0) .RE .PP \fB \-s \fR \fInum\fR .RS 3n Stack size, i.e. the maximum number of hypothesis that can be stored on the stack during the search. A larger value may give more stable results, but increases the amount of memory required. (default: 500) .RE .PP \fB \-m \fR \fIcount\fR .RS 3n Number of expanded hypotheses required to discontinue the search. If the number of expanded hypotheses is greater then this threshold then, the search is discontinued at that point. The larger this value is, The longer Julius gets to give up search. (default: 2000) .RE .PP \fB \-n \fR \fInum\fR .RS 3n The number of candidates Julius tries to find. The search continues till this number of sentence hypotheses have been found. The obtained sentence hypotheses are sorted by score, and final result is displayed in the order (see also the \fB\-output\fR). The possibility that the optimum hypothesis is correctly found increases as this value gets increased, but the processing time also becomes longer. The default value depends on the engine setup on compilation time: 10 (standard) or 1 (fast or v2.1) .RE .PP \fB \-output \fR \fInum\fR .RS 3n The top N sentence hypothesis to be output at the end of search. Use with \fB\-n\fR (default: 1) .RE .PP \fB \-lookuprange \fR \fIframe\fR .RS 3n Set the number of frames before and after to look up next word hypotheses in the word trellis on the second pass. This prevents the omission of short words, but with a large value, the number of expanded hypotheses increases and system becomes slow. (default: 5) .RE .PP \fB \-looktrellis \fR .RS 3n (Grammar) Expand only the words survived on the first pass instead of expanding all the words predicted by grammar. This option makes second pass decoding faster especially for large vocabulary condition, but may increase deletion error of short words. (default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBShort\-pause segmentation / decoder\-VAD\fR .RS .PP When compiled with \-\-enable\-decoder\-vad, the short\-pause segmentation will be extended to support decoder\-based VAD. .PP \fB \-spsegment \fR .RS 3n Enable short\-pause segmentation mode. Input will be segmented when a short pause word (word with only silence model in pronunciation) gets the highest likelihood at certain successive frames on the first pass. When detected segment end, Julius stop the 1st pass at the point, perform 2nd pass, and continue with next segment. The word context will be considered among segments. (Rev.4.0) .sp When compiled with \-\-enable\-decoder\-vad, this option enables decoder\-based VAD, to skip long silence. .RE .PP \fB \-spdur \fR \fIframe\fR .RS 3n Short pause duration length to detect end of input segment, in number of frames. (default: 10) .RE .PP \fB \-pausemodels \fR \fIstring\fR .RS 3n A comma\-separated list of pause model names to be used at short\-pause segmentation. The word whose pronunciation consists of only the pause models will be treated as "pause word" and used for pause detection. If not specified, name of \fB\-spmodel\fR, \fB\-silhead\fR and \fB\-siltail\fR will be used. (Rev.4.0) .RE .PP \fB \-spmargin \fR \fIframe\fR .RS 3n Back step margin at trigger up for decoder\-based VAD. When speech up\-trigger found by decoder\-VAD, Julius will rewind the input parameter by this value, and start recognition at the point. (Rev.4.0) .sp This option will be valid only if compiled with \-\-enable\-decoder\-vad. .RE .PP \fB \-spdelay \fR \fIframe\fR .RS 3n Trigger decision delay frame at trigger up for decoder\-based VAD. (Rev.4.0) .sp This option will be valid only if compiled with \-\-enable\-decoder\-vad. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBWord lattice / confusion network output\fR .RS .PP \fB \-lattice \fR, \fB \-nolattice \fR .RS 3n Enable / disable generation of word graph. Search algorithm also has changed to optimize for better word graph generation, so the sentence result may not be the same as normal N\-best recognition. (Rev.4.0) .RE .PP \fB \-confnet \fR, \fB \-noconfnet \fR .RS 3n Enable / disable generation of confusion network. Enabling this will also activates \fB\-lattice\fR internally. (Rev.4.0) .RE .PP \fB \-graphrange \fR \fIframe\fR .RS 3n Merge same words at neighbor position at graph generation. If the beginning time and ending time of two word candidates of the same word is within the specified range, they will be merged. The default is 0 (allow merging same words on exactly the same location) and specifying larger value will result in smaller graph output. Setting this value to \-1 will disable merging, in that case same words on the same location of different scores will be left as they are. (default: 0) .RE .PP \fB \-graphcut \fR \fIdepth\fR .RS 3n Cut the resulting graph by its word depth at post\-processing stage. The depth value is the number of words to be allowed at a frame. Setting to \-1 disables this feature. (default: 80) .RE .PP \fB \-graphboundloop \fR \fIcount\fR .RS 3n Limit the number of boundary adjustment loop at post\-processing stage. This parameter prevents Julius from blocking by infinite adjustment loop by short word oscillation. (default: 20) .RE .PP \fB \-graphsearchdelay \fR, \fB \-nographsearchdelay \fR .RS 3n When this option is enabled, Julius modifies its graph generation algorithm on the 2nd pass not to terminate search by graph merging, until the first sentence candidate is found. This option may improve graph accuracy, especially when you are going to generate a huge word graph by setting broad search. Namely, it may result in better graph accuracy when you set wide beams on both 1st pass \fB\-b\fR and 2nd pass \fB\-b2\fR, and large number for \fB\-n\fR. (default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBMulti\-gram / multi\-dic recognition\fR .RS .PP \fB \-multigramout \fR, \fB \-nomultigramout \fR .RS 3n On grammar recognition using multiple grammars, Julius will output only the best result among all grammars. Enabling this option will make Julius to output result for each grammar. (default: disabled) .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBForced alignment\fR .RS .PP \fB \-walign \fR .RS 3n Do viterbi alignment per word units for the recognition result. The word boundary frames and the average acoustic scores per frame will be calculated. .RE .PP \fB \-palign \fR .RS 3n Do viterbi alignment per phone units for the recognition result. The phone boundary frames and the average acoustic scores per frame will be calculated. .RE .PP \fB \-salign \fR .RS 3n Do viterbi alignment per state for the recognition result. The state boundary frames and the average acoustic scores per frame will be calculated. .RE .RE .sp .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br \fBMisc. search options\fR .RS .PP \fB \-inactive \fR .RS 3n Start this recognition process instance with inactive state. (Rev.4.0) .RE .PP \fB \-1pass \fR .RS 3n Perform only the first pass. .RE .PP \fB \-fallback1pass \fR .RS 3n When 2nd pass fails, Julius finish the recognition with no result. This option tell Julius to output the 1st pass result as a final result when the 2nd pass fails. Note that some score output (confidence etc.) may not be useful. This was the default behavior of Julius\-3.x. .RE .PP \fB \-no_ccd \fR, \fB \-force_ccd \fR .RS 3n Explicitly switch phone context handling at search. Normally Julius determines whether the using AM is a context\-dependent model or not from the model names, i.e., whether the names contain character + and \-. This option will override the automatic detection. .RE .PP \fB \-cmalpha \fR \fIfloat\fR .RS 3n Smoothing parameter for confidence scoring. (default: 0.05) .RE .PP \fB \-iwsp \fR .RS 3n (Multi\-path mode only) Enable inter\-word context\-free short pause insertion. This option appends a skippable short pause model for every word end. The short\-pause model can be specified by \fB\-spmodel\fR. .RE .PP \fB \-transp \fR \fIfloat\fR .RS 3n Additional insertion penalty for transparent words. (default: 0.0) .RE .PP \fB \-demo \fR .RS 3n Equivalent to \fB\-progout \-quiet\fR. .RE .RE .SH "ENVIRONMENT VARIABLES" .PP \fB \fR\fB\fBALSADEV\fR\fR\fB \fR .RS 3n (using mic input with alsa device) specify a capture device name. If not specified, "default" will be used. .RE .PP \fB \fR\fB\fBAUDIODEV\fR\fR\fB \fR .RS 3n (using mic input with oss device) specify a capture device path. If not specified, "\fI/dev/dsp\fR" will be used. .RE .PP \fB \fR\fB\fBLATENCY_MSEC\fR\fR\fB \fR .RS 3n Try to set input latency of microphone input in milliseconds. Smaller value will shorten latency but sometimes make process unstable. Default value will depend on the running OS. .RE .SH "EXAMPLES" .PP For examples of system usage, refer to the tutorial section in the Julius documents. .SH "NOTICE" .PP Note about jconf files: relative paths in a jconf file are interpreted as relative to the jconf file itself, not to the current directory. .SH "SEE ALSO" .PP \fBjulian\fR(1), \fBjcontrol\fR(1), \fBadinrec\fR(1), \fBadintool\fR(1), \fBmkbingram\fR(1), \fBmkbinhmm\fR(1), \fBmkgsmm\fR(1), wav2\fBmfcc\fR(1), \fBmkss\fR(1) .PP \fIhttp://julius.sourceforge.jp/en/\fR .SH "DIAGNOSTICS" .PP Julius normally will return the exit status 0. If an error occurs, Julius exits abnormally with exit status 1. If an input file cannot be found or cannot be loaded for some reason then Julius will skip processing for that file. .SH "BUGS" .PP There are some restrictions to the type and size of the models Julius can use. For a detailed explanation refer to the Julius documentation. For bug\-reports, inquires and comments please contact julius\-info at lists.sourceforge.jp. .SH "COPYRIGHT" .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 2000\-2008 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "AUTHORS" .PP Rev.1.0 (1998/02/20) .RS 3n Designed by Tatsuya KAWAHARA and Akinobu LEE (Kyoto University) .sp Development by Akinobu LEE (Kyoto University) .RE .PP Rev.1.1 (1998/04/14), Rev.1.2 (1998/10/31), Rev.2.0 (1999/02/20), Rev.2.1 (1999/04/20), Rev.2.2 (1999/10/04), Rev.3.0 (2000/02/14), Rev.3.1 (2000/05/11) .RS 3n Development of above versions by Akinobu LEE (Kyoto University) .RE .PP Rev.3.2 (2001/08/15), Rev.3.3 (2002/09/11), Rev.3.4 (2003/10/01), Rev.3.4.1 (2004/02/25), Rev.3.4.2 (2004/04/30) .RS 3n Development of above versions by Akinobu LEE (Nara Institute of Science and Technology) .RE .PP Rev.3.5 (2005/11/11), Rev.3.5.1 (2006/03/31), Rev.3.5.2 (2006/07/31), Rev.3.5.3 (2006/12/29), Rev.4.0 (2007/12/19), Rev.4.1 (2008/10/03) .RS 3n Development of above versions by Akinobu LEE (Nagoya Institute of Technology) .RE .SH "THANKS TO" .PP From rev.3.2, Julius is released by the "Information Processing Society, Continuous Speech Consortium". .PP The Windows DLL version was developed and released by Hideki BANNO (Nagoya University). .PP The Windows Microsoft Speech API compatible version was developed by Takashi SUMIYOSHI (Kyoto University). julius-4.2.2/man/mkbinhmmlist.10000644001051700105040000000367511071102423014665 0ustar ritrlab.\" Title: mkbinhmmlist .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "MKBINHMMLIST" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" mkbinhmmlist \- convert HMMList file into binary format .SH "SYNOPSIS" .HP 13 \fBmkbinhmmlist\fR {hmmdefs_file} {HMMList_file} {output_binhmmlist_file} .SH "DESCRIPTION" .PP \fBmkbinhmmlist\fR converts a HMMList file to binary format. Since the index trees for lookup are also stored in the binary format, it will speed up the startup of Julius, namely when using big HMMList file. .PP For conversion, HMM definition file \fIhmmdefs_file\fR that will be used together at Julius needs to be specified. The format of the HMM definition file can be either ascii or Julius binary format. .PP The output binary file can be used in Julius as the same by "\fB\-hlist\fR". The format wil be auto\-detected by Julius. .PP \fBmkbinhmmlist\fR can read gzipped file. .SH "OPTIONS" .PP \fIhmmdefs_file\fR .RS 3n Acoustic HMM definition file, in HMM ascii format or Julius binary format. .RE .PP \fIHMMList_file\fR .RS 3n Source HMMList file .RE .PP \fIoutput_binhmmlist_file\fR .RS 3n Output file, will be overwritten if already exist. .RE .SH "EXAMPLES" .PP Convert a HMMList file \fIlogicalTri\fR into binary format and store to \fIlogicalTri.bin\fR: .sp .RS 3n .nf % \fBmkbinhmmlist\fR binhmm logicalTri logicalTri.bin .fi .RE .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB mkbinhmm \fR( 1 ) .SH "COPYRIGHT" .PP Copyright (c) 1997\-2000 Information\-technology Promotion Agency, Japan .PP Copyright (c) 1991\-2008 Kawahara Lab., Kyoto University .PP Copyright (c) 2000\-2005 Shikano Lab., Nara Institute of Science and Technology .PP Copyright (c) 2005\-2008 Julius project team, Nagoya Institute of Technology .SH "LICENSE" .PP The same as Julius. julius-4.2.2/man/jclient.pl.10000644001051700105040000000323211071102423014216 0ustar ritrlab.\" Title: jclient.pl .\" Author: .\" Generator: DocBook XSL Stylesheets v1.71.0 .\" Date: 10/02/2008 .\" Manual: .\" Source: .\" .TH "JCLIENT.PL" "1" "10/02/2008" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" jclient.pl \- sample client for module mode (perl version) .SH "SYNOPSIS" .HP 11 \fBjclient.pl\fR .SH "DESCRIPTION" .PP This is yet another sample client written in perl. It will connect to Julius running in module mode, receive recognition results from Julius, and cna send commands to control Julius. .PP This is a tiny program with only 57 lines. You can use it for free. .SH "EXAMPLES" .PP Invoke Julius with module mode by specifying "\fB\-module\fR" option: .sp .RS 3n .nf % \fBjulius\fR \-C ... \-module .fi .RE Then, at other terminal or other host, invoke \fBjclient.pl\fR like below. The default hostname is "localhost", and port number is 10500. You can change them by editing the top part of the script. .sp .RS 3n .nf % \fBjclient.pl\fR .fi .RE It will then receive the outputs of Julius and output the raw message to standard out. Also, by inputting a raw module command to the standard input of \fBjclient.pl\fR, it will be sent to Julius. See manuals for the specification of module mode. .SH "SEE ALSO" .PP \fB julius \fR( 1 ) , \fB jcontrol \fR( 1 ) .SH "COPYRIGHT" .PP "\fBjclient.pl\fR" has been developed by Dr. Ryuichi Nisimura (nisimura@sys.wakayama\-u.ac.jp). Use at your own risk. .PP If you have any feedback, comment or request, please contact the E\-mail address above, or look at the Web page below. .PP http://w3voice.jp/ julius-4.2.2/00readme.txt0000644001051700105040000001074512004452377013501 0ustar ritrlab====================================================================== Large Vocabulary Continuous Speech Recognition Engine Julius (Rev 4.2.2 2012/08/01) (Rev 4.2.1 2011/12/25) (Rev 4.2 2011/05/01) (Rev 4.1.5 2010/06/04) (Rev 4.1 2008/10/03) (Rev 4.0.2 2008/05/27) (Rev 4.0 2007/12/19) (Rev 3.5.3 2006/12/29) (Rev 3.4.2 2004/04/30) (Rev 2.0 1999/02/20) (Rev 1.0 1998/02/20) Copyright (c) 1991-2012 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology All rights reserved ====================================================================== About Julius ============= "Julius" is an open-source high-performance large vocabulary continuous speech recognition (LVCSR) decoder software for speech-related researchers and developers. Based on word N-gram and triphone context-dependent HMM, it can perform almost real-time decoding on most current PCs with small amount of memory. It also has high vesatility. The acoustic models and language models are pluggable, and you can build various types of speech recognition system by building your own models and modules to be suitable for your task. It also adopts standard formats to cope with other toolkit such as HTK, CMU-Cam SLM toolkit, etc. The core engine is implemented as embeddable library, to aim to offer speech recognition capability to various applications. The recent version supports plug-in capability so that the engine can be extended by user. The main platform is Linux and other Unix workstations, and also works on Windows (SAPI/console). Julius is distributed with open license together with source codes. What's new in Julius-4.2.2 =========================== Version 4.2.2 is a bug fix release. Several bugs has been fixed. The grammar compiler (mkfa) now does not link flex library, so you can compile without flex. See the "Release.txt" file for the full list of updates. Contents of Julius-4.2.2 ========================= (Documents with suffix "ja" are written in Japanese) 00readme.txt ReadMe (This file) LICENSE.txt Terms and conditions of use Release.txt Release note / ChangeLog configure configure script configure.in Sample.jconf Sample configuration file julius/ Julius sources libjulius/ JuliusLib core engine library sources libsent/ JuliusLib low-level library sources adinrec/ Record one sentence utterance to a file adintool/ Record/split/send/receive speech data generate-ngram/ Tool to generate random sentences from N-gram gramtools/ Tools to build and test recognition grammar jcontrol/ A sample network client module mkbingram/ Convert N-gram to binary format mkbinhmm/ Convert ascii hmmdefs to binary format mkgshmm/ Model conversion for Gaussian Mixture Selection mkss/ Estimate noise spectrum from mic input support/ some tools to compile julius/julian from source jclient-perl/ A simple perl version of module mode client plugin/ Several plugin source codes and documentation man/ Unix online manuals msvc/ Files to compile on Microsoft VC++ 2008 Documentation =============== The up-to-date documentations are available at the Julius Web site: http://julius.sourceforge.jp/en/ License ======== Julius is an open-source software provided as is. For more information about the license, please refer to the "LICENSE.txt" file included in this archive. Contact Us =========== For QA, discussion and development information, please see and join the Julius web forum at: http://julius.sourceforge.jp/forum/ The contact address of Julius/Julian development team is: (please replace 'at' with '@') "julius-info at lists.sourceforge.jp" EOF julius-4.2.2/LICENSE.txt0000644001051700105040000002155312004452377013165 0ustar ritrlab*** English translation is available in the latter of this file *** ubAFッGW Juliusv p Copyright (c) 1991-2012 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2005 [wZpw@w ュコ Copyright (c) 2005-2012 Hw JuliusJ`[ ---------------------------------------------------------------------------- ubAFッGW JuliusviJulianjAsw コA[wZpw@w ュコAyHw JuliusJ`[JB1997Nx3NAU (IPA)タ{unIZpvB sw コAIPAA[wZpw@w ュコAy Hw JuliusJ`[iuメvjA メA{pS A\[XR[h{vOyhLe[Vi u{\tgEFAvjB{\ tgEFApA{pS B ypz 1. A{pSA{\tgEFAS gpAA|AXAgA A|AXzzAMBA {\tgEFApメA{\tgEFAS XzzAMOメ2 \ヲL{\tgEFAX|AXメyXm \ヲB 2. AgpAA|AXAgA{\tgEFA pl@AA|AXS OメA{\tgEFAL\ヲyJ |{pSit@CjX \ヲYtB L Copyright (c) 1991-2012 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2005 [wZpw@w ュコ Copyright (c) 2005-2012 Hw JuliusJ`[ 3. {\tgEFApm\sA ubAFッGW JuliusvpLB 4. {\tgEFAAJJ A{\tgEFAAヲAヲA pA@ AsBA{\tgEFAi ソA\AiAIKAOメL AAWYcvNQ AB {\tgEFApメA{\tgEFA A{\tgEFAXNpメゥg B`{p ]s\A{\tgEFAp B{\tgEFAppyOメ QAQAIAIAtIAhIQiク vjA_As@sCA琲rSCA C@AB 5. {\tgEFApA{@@Asn RB 6. {\tgEFAeiXT|[gALOp AHw JuliusJ`[Asw コ B *** This is English translation of the Japanese original for reference *** Large Vocabulary Continuous Speech Recognition Engine Julius Copyright (c) 1991-2012 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology "Large Vocabulary Continuous Speech Recognition Engine Julius", including Julian, is being developed at Kawahara Lab., Kyoto University, Shikano Lab., Nara Institute of Science and Technology, and Julius project team, Nagoya Institute of Technology (collectively referred to herein as the "Licensers"). Julius was funded by the Advanced Information Technology Program Project of Information-technology Promotion Agency (IPA), Japan for three years since 1997. The Licensers reserve the copyright thereto. However, as long as you accept and remain in strict compliance with the terms and conditions of the license set forth herein, you are hereby granted a royalty-free license to use "Large Vocabulary Continuous Speech Recognition Engine Julius" including the source code thereof and the documentation thereto (collectively referred to herein as the "Software"). Use by you of the Software shall constitute acceptance by you of all terms and conditions of the license set forth herein. TERMS AND CONDITIONS OF LICENSE 1. So long as you accept and strictly comply with the terms and conditions of the license set forth herein, the Licensers will not enforce the copyright or moral rights in respect of the Software, in connection with the use, copying, duplication, adaptation, modification, preparation of a derivative work, aggregation with another program, or insertion into another program of the Software or the distribution or transmission of the Software. However, in the event you or any other user of the Software revises all or any portion of the Software, and such revision is distributed, then, in addition to the notice required to be affixed pursuant to paragraph 2 below, a notice shall be affixed indicating that the Software has been revised, and indicating the date of such revision and the name of the person or entity that made the revision. 2. In the event you provide to any third party all or any portion of the Software, whether for copying, duplication, adaptation, modification, preparation of a derivative work, aggregation with another program, insertion into another program, or other use, you shall affix the following copyright notice and all terms and conditions of this license (both the Japanese original and English translation) as set forth herein, without any revision or change whatsoever. Form of copyright notice: Copyright (c) 1991-2012 Kawahara Lab., Kyoto University Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology 3. When you publish or present any results by using the Software, you must explicitly mention your use of "Large Vocabulary Continuous Speech Recognition Engine Julius". 4. The Licensers are licensing the Software, which is the trial product of research and project, on an "as is" and royalty-free basis, and makes no warranty or guaranty whatsoever with respect to the Software, whether express or implied, irrespective of the nation where used, and whether or not arising out of statute or otherwise, including but not limited to any warranty or guaranty with respect to quality, performance, merchantability, fitness for a particular purpose, absence of defects, or absence of infringement of copyright, patent rights, trademark rights or other intellectual property rights, trade secrets or proprietary rights of any third party. You and every other user of the Software hereby acknowledge that the Software is licensed without any warranty or guaranty, and assume all risks arising out of the absence of any warranty or guaranty. In the event that obligations imposed upon you by judgment of a court would make it impossible for you to comply with the conditions of this license, you may not use the Software. The Licensers shall not have any liability to you or to any third party for damages or liabilities of any nature whatsoever arising out of your use of or inability to use the Software, whether of an ordinary, special, direct, indirect, consequential or incidental nature (including without limitation lost profits) or otherwise, and whether arising out of contract, negligence, tortuous conduct, product liability or any other legal theory or reason whatsoever of any nation or jurisdiction. 5. This license of use of the Software shall be governed by the laws of Japan, and the Kyoto District Court shall have exclusive primary jurisdiction with respect to all disputes arising with respect thereto. 6. Inquiries for support or maintenance of the Software, or inquiries concerning this license of use besides the conditions above, may be sent to Julius project team, Nagoya Institute of Technology, or Kawahara Lab., Kyoto University. julius-4.2.2/configure.in0000644001051700105040000000202712004452377013646 0ustar ritrlabdnl Copyright (c) 1991-2012 Kawahara Lab., Kyoto University dnl Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology dnl Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology dnl All rights reserved dnl dnl $Id: configure.in,v 1.6 2012/07/27 08:44:47 sumomo Exp $ dnl AC_INIT(Sample.jconf) AC_CONFIG_AUX_DIR(support) AC_CONFIG_SUBDIRS(mkgshmm gramtools jcontrol julius libjulius libsent) dnl all configure options will be passed to all subdirectries. AC_CANONICAL_HOST AC_MSG_CHECKING([host specific optimization flag]) if test -z "$CFLAGS" ; then OPTFLAG=support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG AC_MSG_RESULT([$OPTFLAG]) else AC_MSG_RESULT([no]) fi else AC_MSG_RESULT([skipped]) fi AC_PROG_CC AC_PROG_CPP AC_PROG_INSTALL AC_PATH_PROG(RM, rm) AC_EXEEXT AC_OUTPUT(Makefile mkbingram/Makefile mkbinhmm/Makefile adinrec/Makefile adintool/Makefile mkss/Makefile generate-ngram/Makefile jclient-perl/Makefile man/Makefile) julius-4.2.2/mkss/0000755001051700105040000000000012004463507012306 5ustar ritrlabjulius-4.2.2/mkss/Makefile.in0000644001051700105040000000254412004452411014350 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # $Id: Makefile.in,v 1.7 2012/07/27 08:44:57 sumomo Exp $ # SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../libsent LIBJULIUS=../libjulius CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=-I. -I$(LIBJULIUS)/include -I$(LIBSENT)/include @CPPFLAGS@ `$(LIBSENT)/libsent-config --cflags` `$(LIBJULIUS)/libjulius-config --cflags` LDFLAGS=@LDFLAGS@ -L$(LIBJULIUS) `$(LIBJULIUS)/libjulius-config --libs` -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ TARGET=mkss@EXEEXT@ all: $(TARGET) $(TARGET): mkss.c $(LIBSENT)/libsent.a $(LIBJULIUS)/libjulius.a $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ mkss.c $(LDFLAGS) ############################################################ install: install.bin install.bin: $(TARGET) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ ############################################################ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) Makefile julius-4.2.2/mkss/00readme.txt0000644001051700105040000000303511071102425014435 0ustar ritrlab mkss MKSS(1) MKSS(1) NAME mkss - calculate average spectrum for spectral subtraction SYNOPSIS mkss [options...] {filename} DESCRIPTION mkss is a tool to estimate noise spectrum for spectral subtraction on Julius. It reads a few seconds of sound data from microphone input, calculate the average spectrum and save it to a file. The output file can be used as a noise spectrum data in Julius (option "-ssload"). The recording will start immediately after startup. Sampling format is 16bit, monoral. If outpue file already exist, it will be overridden. OPTIONS -freq Hz Sampling frequency in Hz (default: 16,000) -len msec capture length in milliseconds (default: 3000) -fsize sample_num frame size in number of samples (default: 400) -fshift sample_num frame shift in number of samples (default: 160) SEE ALSO julius ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 MKSS(1) julius-4.2.2/mkss/mkss.c0000644001051700105040000001572312004452411013427 0ustar ritrlab/* * Copyright (c) 2002-2012 Kawahara Lab., Kyoto University * Copyright (c) 2002-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* * mkss --- compute average spectrum of mic input for SS in Julius * * $Id: mkss.c,v 1.6 2012/07/27 08:44:57 sumomo Exp $ * */ #include #include static int fd = -1; /* file descriptor for output */ static char *filename = NULL; /* output file name */ static boolean stout = FALSE; /* true if output to stdout ("-") */ static int sfreq; /* sampling frequency */ static int slen = 3000; /* record length in msec */ /* parameter for SS */ static SP16 *speech; static int speechnum; static int samples; static boolean opt_help(Jconf *jconf, char *arg[], int argnum) { fprintf(stderr, "mkss --- compute averate spectrum of mic input for SS\n"); fprintf(stderr, "Usage: mkss [options..] filename\n"); fprintf(stderr, " [-freq frequency] sampling freq in Hz (%ld)\n", jconf->am_root->analysis.para_default.smp_freq); fprintf(stderr, " [-len msec] record length in msec (%d)\n", slen); fprintf(stderr, " [-fsize samplenum] window size (%d)\n", jconf->am_root->analysis.para_default.framesize); fprintf(stderr, " [-fshift samplenum] frame shift (%d)\n", jconf->am_root->analysis.para_default.frameshift); fprintf(stderr, " [-zmean] enable zmean (off)\n"); fprintf(stderr, " [-zmeanframe] frame-wise zmean (off)\n"); fprintf(stderr, "Library configuration: "); confout_version(stderr); confout_audio(stderr); confout_process(stderr); fprintf(stderr, "\n"); exit(1); /* exit here */ return TRUE; } static boolean opt_freq(Jconf *jconf, char *arg[], int argnum) { jconf->amnow->analysis.para.smp_freq = atoi(arg[0]); jconf->amnow->analysis.para.smp_period = freq2period(jconf->amnow->analysis.para.smp_freq); return TRUE; } static boolean opt_len(Jconf *jconf, char *arg[], int argnum) { slen = atoi(arg[0]); return TRUE; } static int adin_callback(SP16 *now, int len, Recog *recog) { int num; int ret; /* store recorded data up to samples */ if (speechnum + len > samples) { num = samples - speechnum; /* stop recording */ ret = 1; } else { num = len; /* continue recording */ ret = 0; } memcpy(&(speech[speechnum]), now, num * sizeof(SP16)); if (speechnum / sfreq != (speechnum + num) / sfreq) { fprintf(stderr, "|"); } else { fprintf(stderr, "."); } speechnum += num; return(ret); } static int x; static int sslen; int main(int argc, char *argv[]) { Recog *recog; Jconf *jconf; float *ss; MFCCWork *wrk; /* create instance */ recog = j_recog_new(); jconf = j_jconf_new(); recog->jconf = jconf; /* set application-specific additional options */ j_add_option("-freq", 1, 1, "sampling freq in Hz", opt_freq); j_add_option("-len", 1, 1, "record length in msec", opt_len); j_add_option("-h", 0, 0, "display this help", opt_help); j_add_option("-help", 0, 0, "display this help", opt_help); j_add_option("--help", 0, 0, "display this help", opt_help); /* when no argument, output help and exit */ if (argc <= 1) { opt_help(jconf, NULL, 0); return 0; } /* regard last arg as filename */ if (strmatch(argv[argc-1], "-")) { stout = TRUE; } else { filename = argv[argc-1]; } /* set default as same as "-input mic" */ jconf->input.type = INPUT_WAVEFORM; jconf->input.speech_input = SP_MIC; jconf->input.device = SP_INPUT_DEFAULT; /* process config and load them */ if (j_config_load_args(jconf, argc-1, argv) == -1) { fprintf(stderr, "Error reading arguments\n"); return -1; } /* force some default values */ jconf->detect.silence_cut = 0; /* disable silence cut */ jconf->preprocess.strip_zero_sample = TRUE; /* strip zero samples */ jconf->detect.level_thres = 0; /* no VAD, record all */ /* set Julius default parameters for unspecified acoustic parameters */ apply_para(&(jconf->am_root->analysis.para), &(jconf->am_root->analysis.para_default)); /* set some values */ jconf->input.sfreq = jconf->am_root->analysis.para.smp_freq; jconf->input.period = jconf->am_root->analysis.para.smp_period; jconf->input.frameshift = jconf->am_root->analysis.para.frameshift; jconf->input.framesize = jconf->am_root->analysis.para.framesize; sfreq = jconf->am_root->analysis.para.smp_freq; /* output file check */ if (!stout) { if (access(filename, F_OK) == 0) { if (access(filename, W_OK) == 0) { fprintf(stderr, "Warning: overwriting file \"%s\"\n", filename); } else { perror("mkss"); return(1); } } } /* allocate speech store buffer */ samples = sfreq * slen / 1000; speech = (SP16 *)mymalloc(sizeof(SP16) * samples); /* allocate work area to compute spectrum */ wrk = WMP_work_new(&(jconf->am_root->analysis.para)); if (wrk == NULL) { jlog("ERROR: m_fusion: failed to initialize MFCC computation for SS\n"); return -1; } /* initialize input device */ if (j_adin_init(recog) == FALSE) { fprintf(stderr, "Error in initializing adin device\n"); return -1; } /* open device */ if (j_open_stream(recog, NULL) < 0) { fprintf(stderr, "Error in opening adin device\n"); } /* record mic input */ fprintf(stderr, "%dHz recording for %.2f seconds of noise\n", sfreq, (float)slen /(float)1000); speechnum = 0; adin_go(adin_callback, NULL, recog); /* close device */ adin_end(recog->adin); fprintf(stderr, "\n%d samples (%d bytes, %.1f sec) recorded\n", samples, samples * sizeof(SP16), (float)samples / (float)sfreq); /* compute SS */ fprintf(stderr, "compute SS:\n"); fprintf(stderr, " fsize : %4d samples (%.1f msec)\n", jconf->input.framesize, (float)jconf->input.framesize * 1000.0/ (float)sfreq); fprintf(stderr, " fshift: %4d samples (%.1f msec)\n", jconf->input.frameshift, (float)jconf->input.frameshift * 1000.0/ (float)sfreq); ss = new_SS_calculate(speech, samples, &sslen, wrk, &(jconf->am_root->analysis.para)); fprintf(stderr, " points: %4d\n", sslen); fprintf(stderr, "noise spectrum was measured\n"); /* open file for recording */ fprintf(stderr, "writing average noise spectrum to [%s]...", filename); if (stout) { fd = 1; } else { if ((fd = open(filename, O_CREAT | O_RDWR #ifdef O_BINARY | O_BINARY #endif , 0644)) == -1) { perror("mkss"); return(1); } } x = sslen; #ifndef WORDS_BIGENDIAN swap_bytes((char *)&x, sizeof(int), 1); #endif if (write(fd, &x, sizeof(int)) < sizeof(int)) { perror("mkss"); return(1); } #ifndef WORDS_BIGENDIAN swap_bytes((char *)ss, sizeof(float), sslen); #endif if (write(fd, ss, sslen * sizeof(float)) < sslen * sizeof(float)) { perror("mkss"); return(1); } if (!stout) { if (close(fd) < 0) { perror("mkss"); return(1); } } fprintf(stderr, "done\n"); WMP_free(wrk); return 0; } julius-4.2.2/mkss/00readme-ja.txt0000644001051700105040000000317011071102425015025 0ustar ritrlab mkss MKSS(1) MKSS(1) O mkss - XyNgTugNVpmCYXyNgvZ Tv mkss [options...] {filename} DESCRIPTION mkss CXyNgTugNVpmCYXyNgvZc[D wG}CN^C ZXyN g t@CoDot@CCJulius X yNg TugNVmCYXyNgt@CiIvV "-ssload"jgpD ^NJnDTvO16bit signed short (big endian), monoral DOt@C DCt@C "-" w Woo D OPTIONS -freq Hz TvOg (Hz) wD(default: 16,000) -len msec ^~bPwidefault: 3000j -fsize sample_num TCYTvw (default: 400)D -fshift sample_num t[VtgTvw (default: 160)D SEE ALSO julius ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 MKSS(1) julius-4.2.2/Release.txt0000644001051700105040000007147612001663600013462 0ustar ritrlab4.2.2 (2012.08.01) ================== Fixes: - Now can be compiled without flex library - Fix failure of reading binary N-gram when compiled with "--enable-words-int" - Fix incorrect handling of file paths with backslash in jconf file at Windows - Fix segfault when reading an errorous word dictionary. - Fix occasional segfault which may occur while search. 4.2.1 (2011.12.25) =================== New features: - Add support for per-word insertion penalty setting at grammar recognition. You can set different word insertion score for each word entry at .dict file. For example, if you have an entry 15 [a] a in .dict file and want to assign word insertion score of "-2.0" to this word, you can write like this: 15 @-2.0 15 [a] a The figure after "@" is the insertion penalty. The third element should be the same as the first element. - New option "-chunk_size" can specify the audio fragment size in number of samples. The default value is 1000. - At "adintool", enable input detection by default for standard input. Fixed bugs: - (IMPORTANT) CMN is not performed for C0 coef. This bug exists in the versions from 4.1.3 to 4.2. - "-forcedict" won't work for additional dictionaries given by "-adddict". - Corrupted header of recorded WAV file when interrupted by CTRL+C. - Occasional segfault when reading a wrongly formatted dictionary. - Won't compile with configure option "--enable-word-graph". - Segfault of "mkbingram" and "generate-ngram" at cygwin. 4.2 (2011.05.01) ================= New features: - Additional score-based pruning at the 1st pass. It is disabled by default, you can enable by using an option "-bs arg". The argument is score range. - New support for PulseAudio (--with-mictype=pulseaudio) - New Option "-adddict", "-addword" to read additional dictionaries / words. - Portaudio library updated to V19. Audio capture device can be changed by env. "PORTAUDIO_DEV_NUM". The device list will be output at start up. Changed behavior: - "mkbinhmmlist" now saves pseudo phone list extracted from AM for faster start up. The output should be used with the same AM specified at generation. Note that the converted binhmmlist file can not be used with older Julius. - Audio library linking was modified at configure script. When "--with-mictype=..." is explicitly specified, Julius will link ONLY the audio library. If not specified, Julius will link all the audio devices whose development file was detected by the configure. Library functions: - j_config_load_string_new(char *str): like j_config_load_file(), but parse the given string to set parameters. - add_dict(), add_word(): the same as "-adddict" and "-addword". (They should be called at start up before starting engine) - (portaudio/Windows) j_open_stream(recog, NUMSTR) to choose device NUM. ex. 'j_open_stream(recog, "1")' will open device number one. - (portaudio/Windows) get_device_list(): obtain list of available devices. Fixes: - Improved tree lexicon structure for better memory management. - Reduce malloc calls at reading N-gram. - Eliminated memory leaks using Valgrind. - Workarounds to avoid crash with j_close_stream(). - Now allow "-iwsp" only with multi-path acoustic model. 4.1.5.1 (2010.12.25) ===================== Modified: - Fixed problem related to the license. 4.1.5 (2010.06.04) =================== Bug fixes: - Language model / decoding (these bugs may affect the ASR performance): - Several wrong word insertion penalty handling on grammar was found and fixed. - Now correctly add the prob. of the first word at the second pass. - MFCC computation: - Support MFCC computation when liftering parameter (CEPLIFTER) = 0. - Compilation: - Fixes to build Julius on cygwin and MSVC. - Supports "gcc -mno-cygwin" on cygwin. - Compilation error with configure "--disable-plugin" - Module mode: - Unable to send grammar from jcontrol. - Not working "DELPROCESS" command when SR and LM has different names. - Other fixed bugs: - wrong parsing of "-mapunk" option. - "-htkconf" in a jconf file now correctly handles the file path as relative to the jconf file. - "-input stdin" now supports WAV format. - not working "-plugin DIRNAME" on Win32/MSVC. 4.1.4 (2009.12.25) =================== New feature: - added function to choose input audio device on MSVC compiled Julius, by specifying a device ID with env. var. "PORTAUDIO_DEV_NUM". The available device IDs will be listed in the system log at start up. - You can now set a locale for a LM in Julius.cpp. Bug fixes: - now can be compield on Mac OS X (OS X 10.6 SDK). - fixes around portaudio for smaller latency and compatibility (Windows). 4.1.3 (2009.11.02) =================== New features: - new MSVC support: please read "msvc/00README.txt" - extended N-gram to support arbitrary N - portaudio external library (V19) can be used instead of internal V18. When configure detects portaudio library installed in your system, Julius will use it instead of internal V18. You can also choose input device by "PORTAUDIO_DEV" env. var. at V19library. See the log text at start up to know how to set it. - allow word alignment output (-walign) in module mode Modified: - ! now Julius do not perform CMN on 0'th cepstral coefficients, which is the same as the old 4.0.x versions. - j_get_current_filename() added on JuliusLib - improved "--enable-wpair" handling Bug fixes: - many bugs around audio open/close API on JuliusLib - fail to do make in julius-simple - unable to record inputs at cygwin - segfault on adintool with "-server" - occasional segfault at grammar recognition 4.1.2 (2009.02.12) =================== [SRILM support] - Added swapping "" and "" when reading BACKWARD ARPA file trained by SRILM. It will be automatically detected. If detection fails, you can specify an option "-swap" in mkbingram to do that. - Internally modify the unigram probability of "" or "", since they may be set to "-99" in SRILM model. The same value as opposite will be assigned. [N-gram] - Size limit extended from 2GB to 4GB for big N-gram. - "" and "" can be changed by "-mapunk". - More strict check for unknown words: Julius now terminates with error when dictionary has OOV words and N-gram is not open (no unk word). [Improvements] - Faster successor list building algorithm - Update yomi2voca.pl to cover more minor Japanese pronunciation. - Workaround for audio buffer overrun in ALSA [JuliusLib] - Added API function "j_close_stream()" to exit main recognition loop. [Bug Fixes] - Fixed segfault on adintool when specifying multiple servers. - Fixed compilation error on cygwin (libesd) - Fixed segfault when not specifying "-input" option. 4.1.1 (2008.12.13) =================== Bug fixes: [N-gram] - sometimes could not read an ARPA N-gram file trained by SRILM. [A/D-in] - "-input stdin" does not work. - "SOURCERATE" at "-htkconf" is ignored. [Forced alignments] - now can be used in isolated word recognition and with "-1pass". - "-palign", "-walign" and "-salign" can not be run together at a time. [Module mode] - freezes when a grammar is specified by its ID number. - wrong grammar ID in recognition result (GRAM=.. always 0) - "SYNCGRAM" will cause crash at isolated word recognition. - unable to receive/activate/dactivate on isolated word recognition. [Others] - fails to compile on several OS (needs "-ldl"). - does not handle backslash escaping correctly in Jconf file. - does not output the 1st pass result as a final result with "-1pass". [Tools] Jcontrol - does not support "graminfo" command. - can not send a dictionary to Julius running isolated word recognition. mkdfa - segfault on mkfa - fails to read a grammar file on DOS format. adintool - wrong behavior when splitting a long audio file. - now output time of each segment. 4.1 (2008.10.3) ================ New plugin extension: - supported types: - A/D-in plugin - feature vector input plugin - audio input monitor / postprocess plugin - feature vector monitor / postprocess plugin - result plugin - can add arbitrary JuliusLib callback via plugin - sample codes is included, with full documentation of function spec. - run on Linux, Windows and other unix variants with dlopen() capability Newly supported features: - multi-stream feature input - MSD-HMM (compatible with "HTS" toolkit) - CVN - frequency warping for VTLN (no estimation yet) - "-input alsa", "-input oss" and "-input esd" - perl version of jcontrol client "jclient-perl" Modified: - Restrict option orders when multiple instances defined (-AM, -LM, -SR): - Option should be just after correspondence instance declaration. (ex. LM options should be placed after "-LM" and before other instance declaration.) - Global option should be before any instance declaration, or just after "-GLOBAL" option. This new restriction can be removed by "-nosectioncheck" option. Fixed bugs: - "-record" fails to record the first silence part! - Not working "-multigramout" - environment variable expansion sometimes fail within jconf file. - limits extended: maximum HMM name length = 256 char, Number of HMM states unlimited. - Module mode error message on grammar command. Documents: - Alpha version of "Juliusbook" (contains only manuals at this time) - Unix manuals are moved to "man" directory. 4.0.2 (2008.5.27) ================== New features: - New option "-fallback1pass" will output 1st pass result as final result when the 2nd pass fails. - Added support for "USEPOWER=T" on feature extraction. Modified: - "-AM_GMM" becomes optional: GMM will share AM params if not specified. Fixed: - GMM rejection does not work (since 4.0.1) - Cannot specify other A/D device on Linux/ALSA correctly. - Sometimes fails to read a big N-gram. - Sometimes crush with "-record" option. - Callback timing modified on real-time input with sp-segment/GMM/VAD. - Other minor fixes. 4.0.1 (2008.3.12) ================== New features: A/D-in - ALSA now become default on Linux instead of OSS. Module mode - "ACTIVATEGRAM", "DEACTIVATEGRAM" and "DELGRAM" now accepts grammar name as arguments in addition to grammar ID number. - new command "GRAMINFO" to get list of current grammars. Fixed bugs: A/D-in - ALSA codes updated to work on 1.x drivers. - segfault with "-48". - segfault on MFCC input with zero frames with "-spsegment". VAD - CMN not working on spsegment/GMM-VAD/decoder-VAD with microphone input. Acoustic model - Error when no short-pause model defined in multi-path mode. N-gram - incorrect 2-gram prob on 1st pass with backward N-gram only. - incorrect 1-gram prob for unknown words. - fail to read some ARPA files with no back-off compaction. - read failure or segfault on big N-gram with over 24bit entries. - redundant index for back-off weights in some case. Word recognition - incorrect N-best output with "-output N" on word recognition. Installation - "make install" fails on cygwin. Source code - Static variables in functions that are not meant to be static are made local. - Global variables in search are moved to StackDecode. 4.0 (2007.12.19) ================= For more detail about new features in 4.0, please see other document. - Re-constructed all data structures and re-organize source code. - Core engine now becomes a library called JuliusLib, with API and callbacks. - Multi-model decoding now available. - Modularize language model handling, and merge Julian to JuliusLib. - Support longer N-gram (N > 3). - User-defined LM function support. - Handy isolated word recognition mode. - Confusion network output. - Improvements in short-pause segmentation, especially for live input. - GMM-based VAD. - Decoder-based VAD. - Integrated many compile-time options. - Reduce memory usage. - Sample application to use the JuliusLib is included: "julius-simple". - Update tools: - "adintool" supports multi-server mode. - "generate-ngram" newly added to generate sentences from N-gram 3.5.3 (2006.12.29) =================== o Improved Performance: - acoustic computation optimized: now becomes 20%-40% faster! - optimize memory access: re-use work area of deleted hypothesis in the 2nd pass. - some memory allocation improvement on dictionary and word trellis. o New Grammar Tools: - "dfa_minimize", "dfa_determinize" will minimize/determinize DFA. mkdfa.pl now calls dfa_mimize in it. - "slf2dfa": a toolkit to convert HTK slf to Julian dfa (separate kit) o Embedding HTK Acoustic Parameters: - add option to load HTK Config file to set correct acoustic parameter configuration at recognition time. - the acoustic parameter configuration can be embedded into header of a binary HMM file. o Improved Word Graph: - add an option to completely separate graph words: words with different phone contexts can be output separatedly by "-graphrange -1". o Support for online energy normalization: - Preliminary support for live recognition using acoustic model with energy normalization. (approximate with maximum energy of last input) o Code refinements: - re-organize libsent/src/wav2mfcc. - modularize acoustic parameter (Value) handling. - output compile-time configuration of libsent with "--setting" option. - Doxygen 1.5.0 support. - "julius-info@lists.sourceforge.jp" becomes the official contact address. - fixed typo on copyright notice. o Fixed bugs: - sometimes unable to read a binary LM on "--enable-words-int". - memory leaks around option handling, global variables and local buffers. - segmentation fault on very long input. - doublely counted initial state of DFA. - mkdfa.pl: unable to find mkfa on some OS. - adintool: makes empty output file on termination. - adintool: miss last inputs when killed. - other small changes. 3.5.2 (2006.07.31) =================== o Speed-up and improvement on Windows console: - Support DirectSound for better input handling - Support input threading utilizing callback API on portaudio. - Support newest MinGW (tested on 5.0.2) o More accurate word graph output: - Add option to cut the resulting graph by its depth (option -graphcut, and enabled by default!) - Set limit for post-processing loop to avoid infinite loop (option -graphboundloop, and set by default) - Refine graph generation algorithm concerning dynamic word merging and search termination on the second pass. o Add capability to output word graph instead of trellis on 1st pass: - 1st pass generates word graph instead of word trellis as intermediate result by specifying "--enable-word-graph". In that case, the 2nd pass will be restricted on the graph, not on the whole trellis. - With "--enable-word-graph" and "--enable-wpair" option, the first pass of Julius can perform 1-pass graph generation based on 2-gram with basically the same algorithm as other popular word graph based decoders. o Bug fixes: - configure script did not work on Solaris 8/9 - "-gprune none" did not work on tied-mixture AM - Incorrect error message for AM with duration header other than "NULLD" - Always warns abount zero frame stripping upon MFCC o Imprementation improvements: - bmalloc2-based AM memory management 3.5.1 (2006.03.31) =================== o Wider MFCC types support: - Added extraction of acceleration coefficients (_A). Now you can recognize waveform or microphone input with AM trained with _A. - Support all MFCC qualifiers (_0, _E, _N, _D, _A, _N, _Z) and their combination - Support for any vector lenth (will be guessed from AM header) - New option: "-accwin" - New option "-zmeanframe": frame-wise DC offset removal, like HTK - New options to specify detailed analysis parameters (see manual): -preemph, -fbank, -ceplif, -rawe / -norawe, -enormal / -noenormal, -escale, -silfloor o Improved microphone / network recognition by MAP-CMN: - New option "-cmnmapweight" to change MAP weight - Option "-cmnload" can be used to specify the initial cepstral mean at startup - Cepstral mean of last 5 second input is used as an initial mean for each input. You can inhibit updating of the initial mean and keep the value loaded by "-cmnload" by option "-cmnnoupdate". o Module issue: - Julius now outputs "" when recognition starts, and "" after recognition stopped by module command. Use this for safer server-client synchronization. - now can specify grammar name from client by specifying a name after a command like "ADDGRAM name" or "CHANGEGRAM name". o Bug fixes: - Sometimes segfault on pause/resume command on module mode while input. - Can not read N-gram with tuples > 2^24. - Can not read HMM with 3-state (1 output state) model on multi-path. - Sometimes omit the last transition definition in DFA file. - Sometimes fails to compile the gramtools on MacOSX. 3.5 (2005.11.11) ================= o New features: - Input verification / rejection using GMM (-gmm, -gmmnum, -gmmreject) - Word graph output (--enable-graphout, --enable-graphout-nbest) - Pruning on 2nd pass based on local posterior CM (--enable-cmthres) - Multiple/per-grammar recognition (-gram, -gramlist, -multigramout) - Can specify multiple grammars at startup: "-gram prefix1,prefix2,..." or "-gramlist listfile" where listfile contains list of prefixes. - General output character set conversion "-charconv from to" based on iconv (Linux) or Win32API+libjcode (Windows) o Improved audio inputs on Linux: - ALSA-1.x support. (--with-mictype=alsa) - EsounD daemon input support. (--with-mictype=esd) - Fixed some bugs on USB audio input. - Audio capturing device can be specified via env. "AUDIODEV". - Extra microphone API support using portaudio and spLib API. o Performance improvements: - Reduced memory size for beam operation on the 1st pass. - Slightly optimized tree lexicon by removing redundant data. - Reduced size of word N-gram index (reduced from 32 bit to 24 bit). o Fixed bugs: - Not working spectral subtraction. - Memory leak when stack exhausted ("stack empty") on 2nd pass. - Segmentation fault on a very short input of 1 to 4 frames. - AM trained with no CMN cannot be used with waveform/mic input. - Wrong short-pause word handling on successive decoding mode. (--enable-sp-segment) - No output of "maxcodebooksize" at startup. - No output of the number of sentences found when stack exhausted. - No output of "-separatescore" on module mode. - Beam width does not adjusted when grammar has been changed and full beam options (-b 0) is specified in Julian. - Wrong update of category-aware cross-word triphones when dynamically switching grammar on Julian. - No output of grammar to stdout on multiple grammar mode. - Unable to send/receive audio data between different endian machines. - (Linux) crash when compiled with icc. - (Linux) some strange behavior on USB audio. - (Windows) confuse with CR/LF newline inputs in several text inputs. - (Windows) mkdfa.pl could not work on cygwin. - (Windows) sometimes fails to read a file when not using zlib. - (Windows) wrong file suffix when recording with "-record" (.raw->.wav) o Unified source code: - Linux and Windows version are integrated into one source. - Multi-path version has been integrated with the normal version into one source. The multi-path version of Julius/Julian, that allows any transitions of HMMs including model skip transition, can be compiled by "--enable-multipath" option. The part of source codes for the multi-path version can be identified by the definition "MULTIPATH_VERSION". o Other improvements: - Now can be compiled on MinGW/MSYS on Windows - Totally rewritten comments in entire source in Doxygen format. You can generate fully browsable source documents in English. Try "make doxygen" at the top directory (you need doxygen installed) - Install additional executables of julius/julian with version and setting names like "julius-3.5-fast" when "make install" is invoked. - Updated LICENSE.txt with English translation for reference. o Changed behaviors: - Binary N-gram file format has been changed for smaller size. The old files can still be read directly by julius, in which case on-line conversion will be performed at startup. You can convert the old files (3.4.2 and earlier) to the new format with the new mkbingram by involing the command below: "mkbingram -d oldbinary newbinary" Please note that since mkbingram now output the new format file, it can not be read by older Julius. The binary N-gram file version can be detected by the first 17 bytes of the file: old format should be "julius_bingram_v3" and new format should be "julius_bingram_v4". - Byte order of audio stream via tcpip fixed to LITTLE ENDIAN. - Now use built-in zlib by default for compressed files. This may make the engine startup slower, and if you prefer, you can still use the previous method using external gzip command by specifying "--disable-zlib". - (Windows) Changed the compilation procedure on VC++. You can build Julian by only specifying "-DBUILD_JULIAN" at compiler option, and do not need to alter "julius.h". 3.4.2 (2004.03.31) =================== - New option "-rejectshort msec" to reject short input. - More stable PAUSE/RESUME on module mode with adinnet input. - Bug fixes: - Memory leak on very short input. - Missing Nth result when small vocabulary is used. - Hang up of "generate" on small grammar. - Cosmetic changes: - Cleanup codes to confirm for 'gcc -Wall'. - Update of config.guess and config.sub. - Update of copyright to 2004. 3.4.1 (2004.02.25) =================== - AM and LM computation method is slightly modified to improve search stability of 2nd pass. These modification are enabled by default, and MAY IMPROVE THE RECOGNITION ACCURACY as compared with older versions. - fixed overcounting of LM score for the expanded word. - new inter-word triphone approximation (-iwcd1 best #) on 1st pass. This new algorithm now becomes default. - Newly supports binary HMM (original format, not compatible with HTK). A tool "mkbinhmm" converts a hmmdefs(ascii) file to the binary format. - MFCC computation becomes faster by sin/cos table lookup. - Bugs below have been fixed: - (-input adinnet) recognition does not start immediately after speech inputs begin when using adinnet client. - (-input adinnet) together with module mode, speech input cannot stop by pause/terminate command. - (-input adinnet) unneccesary fork when connecting with adinnet client. - (-input rawfile) error in reading wave files created by Windows sound recorder. - (CMN) CMN was applied any time even when acoustic models does not want. - (AM) numerous messages in case of missing triphone errors at startup. - (adintool) immediately exit after single file input. - (sp-segment) fixed many bugs relating short pause word and LM - (sp-segment) wow it works with microphone input. - (-[wps]align) memory leak on continuous input. - Add option to remove DC offset from speech input (option -zmean). - (-module) new output message: '' - Optional feature "Search Space Visualization" is added (--enable-visualize) - HTML documentations greatly revised in doc. New argument: "-iwcd1 best #" "-zmean" New configure option: "--disable-lmfix", "--enable-visualize" 3.4 (2003.10.01) =================== - Confidence measure support - New parameter "-cmalpha" as smoothing coef. - New command "-outcode C" to output CM in module output - Can be disabled by configure option "--disbale-cm" - Can use an alternate CM algorithm by configure option "--enable-cm-nbest" - Class N-gram support - Can be disabled by configure option "--disable-class-ngram" - Factoring basis changed from N-gram entry to dictionary word - WAV format recording in "adinrec", "adintool" and "-record" option - Modified output message startup messages, engine configuration message in --version and --help, - Fixes: some outputs in module mode, bug in only several frame input (realtime-1stpass.c), long silence at end of segmented speech miscompilation with NetAudio, word size check in binary N-gram, bug in acoustic computation (gprune_none.c). "-version" -> "-setting", "-hipass" -> "-hifreq", "-lopass" -> "-lofreq" 3.3p4 (2003.05.06) =================== - Fixes for audio input: - Fix segfault/hangup with continuous microphone input. - Fix client hangup when input speech too long in module mode. (now send an buffer overflow message to the client) - Fix audio input buffering for very short input (<1000 samples). - Fix blocking handling in tcpip adin. - Some cosmetic changes (jcontrol, LOG_TEN, etc.) 3.3p3 (2003.01.08) =================== - New inter-word short pause handling: - [Julius] New option added for short pause handling. Specifying "-iwspword" adds a short-pause word entry, namely " [sp] sp sp", to the dictionary. The entry content to be changed by using "-iwspentry". - [multi-path] Supports inter-word context-free short pause handling. "-iwsp" option automatically appends a skippable short pause model at every word end. The added model will also be ignored in context modeling. The short pause model to be appended by "-iwsp" can be specified by "-spmodel" options. See documents for details. - Fixes for audio input: - Input delay improved: the initial response to mic input now becomes much faster than previous versions (200ms -> 50ms approx.). - Would not block when other process is using the audio device, but just output error and exit. - Update support for libsndfile-1.0.x. - Update support for ALSA-0.9.x (to use this, add "--with-mictype=alsa" to configure option.) 3.3p2 (2002.11.18) =================== - [multi-path version] Supports model-skip transition. From this version, you can use "any" type of state transition in HTK format acoustic model. - New feature: "-record dir" records speech inputs sucessively into the specified directory with time-stamp file names. - fix segfault on Solaris with "-input mfcfile". - fix blocking command input when using module mode and adinnet together. - modified the output flush timing to make sure the last recognition result will be output immediately. 3.3p1 (2002.10.15) =================== Following bugs are fixed: - Fixed incorrect default value of language weights for second pass (-lmp2). - Fixed sometimes read failure of dictionary file (double space enabled). - Fixed wrong output of "-separatescore" together with monophone model. 3.3 (2002.09.12) ================== The updates and new features from rev.3.2 is shown below. - New features added: - Server module mode - control Julius (input on/off, grammar switching) from other client process via network. - Online grammar changing and multi-grammar recognition supported. - Noise robustness: - Spectral subtraction incorporated. - Support more variety of acoustic models: - "multi-path version" is available that allows any transition including loop, skip and parallel transition. - A little improvement of recognition performance by bug fixes - Other minor extensions (CMN parameter saving, etc.) - Many bug fixes English documents are available in o online manuals (will be installed by default), and o Translated full documentation in PDF format: Julius-3.2-book-e.pdf. We are sorry that current release contains only documents for old rev.3.2. We are now working to update it to catch up with the current rev.3.3 version. julius-4.2.2/mkgshmm/0000755001051700105040000000000012004463507012774 5ustar ritrlabjulius-4.2.2/mkgshmm/configure0000755001051700105040000010301411556416023014704 0ustar ritrlab#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated automatically using autoconf version 2.13 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # Defaults: ac_help= ac_default_prefix=/usr/local # Any additions from configure.in: # Initialize some variables set by options. # The variables have the same names as the options, with # dashes changed to underlines. build=NONE cache_file=./config.cache exec_prefix=NONE host=NONE no_create= nonopt=NONE no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= target=NONE verbose= x_includes=NONE x_libraries=NONE bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' # Initialize some other variables. subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. ac_max_here_lines=12 ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi case "$ac_option" in -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) ac_optarg= ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case "$ac_option" in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir="$ac_optarg" ;; -build | --build | --buil | --bui | --bu) ac_prev=build ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build="$ac_optarg" ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file="$ac_optarg" ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir="$ac_optarg" ;; -disable-* | --disable-*) ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` eval "enable_${ac_feature}=no" ;; -enable-* | --enable-*) ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "enable_${ac_feature}='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix="$ac_optarg" ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he) # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat << EOF Usage: configure [options] [host] Options: [defaults in brackets after descriptions] Configuration: --cache-file=FILE cache test results in FILE --help print this message --no-create do not create output files --quiet, --silent do not print \`checking...' messages --version print the version of autoconf that created configure Directory and file names: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [same as prefix] --bindir=DIR user executables in DIR [EPREFIX/bin] --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] --libexecdir=DIR program executables in DIR [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data in DIR [PREFIX/share] --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data in DIR [PREFIX/com] --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] --libdir=DIR object code libraries in DIR [EPREFIX/lib] --includedir=DIR C header files in DIR [PREFIX/include] --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] --infodir=DIR info documentation in DIR [PREFIX/info] --mandir=DIR man documentation in DIR [PREFIX/man] --srcdir=DIR find the sources in DIR [configure dir or ..] --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names EOF cat << EOF Host type: --build=BUILD configure for building on BUILD [BUILD=HOST] --host=HOST configure for HOST [guessed] --target=TARGET configure for TARGET [TARGET=HOST] Features and packages: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR EOF if test -n "$ac_help"; then echo "--enable and --with options recognized:$ac_help" fi exit 0 ;; -host | --host | --hos | --ho) ac_prev=host ;; -host=* | --host=* | --hos=* | --ho=*) host="$ac_optarg" ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir="$ac_optarg" ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir="$ac_optarg" ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir="$ac_optarg" ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir="$ac_optarg" ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir="$ac_optarg" ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir="$ac_optarg" ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir="$ac_optarg" ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix="$ac_optarg" ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix="$ac_optarg" ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix="$ac_optarg" ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name="$ac_optarg" ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir="$ac_optarg" ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir="$ac_optarg" ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site="$ac_optarg" ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir="$ac_optarg" ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir="$ac_optarg" ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target="$ac_optarg" ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers) echo "configure generated by autoconf version 2.13" exit 0 ;; -with-* | --with-*) ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "with_${ac_package}='$ac_optarg'" ;; -without-* | --without-*) ac_package=`echo $ac_option|sed -e 's/-*without-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` eval "with_${ac_package}=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes="$ac_optarg" ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries="$ac_optarg" ;; -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } ;; *) if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then echo "configure: warning: $ac_option: invalid host type" 1>&2 fi if test "x$nonopt" != xNONE; then { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } fi nonopt="$ac_option" ;; esac done if test -n "$ac_prev"; then { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } fi trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 # File descriptor usage: # 0 standard input # 1 file creation # 2 errors and warnings # 3 some systems may open it to /dev/tty # 4 used on the Kubota Titan # 6 checking for... messages and results # 5 compiler messages saved in config.log if test "$silent" = yes; then exec 6>/dev/null else exec 6>&1 fi exec 5>./config.log echo "\ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. " 1>&5 # Strip out --no-create and --no-recursion so they do not pile up. # Also quote any args containing shell metacharacters. ac_configure_args= for ac_arg do case "$ac_arg" in -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) ac_configure_args="$ac_configure_args '$ac_arg'" ;; *) ac_configure_args="$ac_configure_args $ac_arg" ;; esac done # NLS nuisances. # Only set these to C if already set. These must not be set unconditionally # because not all systems understand e.g. LANG=C (notably SCO). # Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! # Non-C LC_CTYPE values break the ctype check. if test "${LANG+set}" = set; then LANG=C; export LANG; fi if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo > confdefs.h # A filename unique to this package, relative to the directory that # configure is in, which we can look for to find out if srcdir is correct. ac_unique_file=mkgshmm.in # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_prog=$0 ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } else { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } fi fi srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then echo "loading site script $ac_site_file" . "$ac_site_file" fi done if test -r "$cache_file"; then echo "loading cache $cache_file" . $cache_file else echo "creating cache $cache_file" > $cache_file fi ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross ac_exeext= ac_objext=o if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then ac_n= ac_c=' ' ac_t=' ' else ac_n=-n ac_c= ac_t= fi else ac_n= ac_c='\c' ac_t= fi ac_aux_dir= for ac_dir in ../support $srcdir/../support; do if test -f $ac_dir/install-sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f $ac_dir/install.sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break fi done if test -z "$ac_aux_dir"; then { echo "configure: error: can not find install-sh or install.sh in ../support $srcdir/../support" 1>&2; exit 1; } fi ac_config_guess=$ac_aux_dir/config.guess ac_config_sub=$ac_aux_dir/config.sub ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 echo "configure:556: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" for ac_dir in $PATH; do # Account for people who put trailing slashes in PATH elements. case "$ac_dir/" in /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do if test -f $ac_dir/$ac_prog; then if test $ac_prog = install && grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : else ac_cv_path_install="$ac_dir/$ac_prog -c" break 2 fi fi done ;; esac done IFS="$ac_save_IFS" fi if test "${ac_cv_path_install+set}" = set; then INSTALL="$ac_cv_path_install" else # As a last resort, use the slow shell script. We don't cache a # path for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the path is relative. INSTALL="$ac_install_sh" fi fi echo "$ac_t""$INSTALL" 1>&6 # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' # Extract the first word of "rm", so it can be a program name with args. set dummy rm; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:611: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_RM'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$RM" in /*) ac_cv_path_RM="$RM" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_RM="$RM" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_RM="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi RM="$ac_cv_path_RM" if test -n "$RM"; then echo "$ac_t""$RM" 1>&6 else echo "$ac_t""no" 1>&6 fi # Extract the first word of "perl", so it can be a program name with args. set dummy perl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:646: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_PERL'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$PERL" in /*) ac_cv_path_PERL="$PERL" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_PERL="$PERL" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_PERL="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi PERL="$ac_cv_path_PERL" if test -n "$PERL"; then echo "$ac_t""$PERL" 1>&6 else echo "$ac_t""no" 1>&6 fi echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 echo "configure:679: checking for Cygwin environment" >&5 if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_cygwin=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_cygwin=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_cygwin" 1>&6 CYGWIN= test "$ac_cv_cygwin" = yes && CYGWIN=yes echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6 echo "configure:712: checking for mingw32 environment" >&5 if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_mingw32=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_mingw32=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_mingw32" 1>&6 MINGW32= test "$ac_cv_mingw32" = yes && MINGW32=yes echo $ac_n "checking for executable suffix""... $ac_c" 1>&6 echo "configure:743: checking for executable suffix" >&5 if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test "$CYGWIN" = yes || test "$MINGW32" = yes; then ac_cv_exeext=.exe else rm -f conftest* echo 'int main () { return 0; }' > conftest.$ac_ext ac_cv_exeext= if { (eval echo configure:753: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then for file in conftest.*; do case $file in *.c | *.o | *.obj) ;; *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;; esac done else { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } fi rm -f conftest* test x"${ac_cv_exeext}" = x && ac_cv_exeext=no fi fi EXEEXT="" test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} echo "$ac_t""${ac_cv_exeext}" 1>&6 ac_exeext=$EXEEXT trap '' 1 2 15 cat > confcache <<\EOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs. It is not useful on other systems. # If it contains results you don't want to keep, you may remove or edit it. # # By default, configure uses ./config.cache as the cache file, # creating it if it does not exist already. You can give configure # the --cache-file=FILE option to use a different cache file; that is # what configure does when it calls configure scripts in # subdirectories, so they share the cache. # Giving --cache-file=/dev/null disables caching, for debugging configure. # config.status only pays attention to the cache file if you give it the # --recheck option to rerun configure. # EOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote substitution # turns \\\\ into \\, and sed turns \\ into \). sed -n \ -e "s/'/'\\\\''/g" \ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' ;; esac >> confcache if cmp -s $cache_file confcache; then : else if test -w $cache_file; then echo "updating cache $cache_file" cat confcache > $cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Any assignment to VPATH causes Sun make to only execute # the first set of double-colon rules, so remove it if not needed. # If there is a colon in the path, we need to keep it. if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' fi trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. cat > conftest.defs <<\EOF s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g s%\[%\\&%g s%\]%\\&%g s%\$%$$%g EOF DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` rm -f conftest.defs # Without the "./", some shells look in PATH for config.status. : ${CONFIG_STATUS=./config.status} echo creating $CONFIG_STATUS rm -f $CONFIG_STATUS cat > $CONFIG_STATUS </dev/null | sed 1q`: # # $0 $ac_configure_args # # Compiler output produced by configure, useful for debugging # configure, is in ./config.log if it exists. ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" for ac_option do case "\$ac_option" in -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; -version | --version | --versio | --versi | --vers | --ver | --ve | --v) echo "$CONFIG_STATUS generated by autoconf version 2.13" exit 0 ;; -help | --help | --hel | --he | --h) echo "\$ac_cs_usage"; exit 0 ;; *) echo "\$ac_cs_usage"; exit 1 ;; esac done ac_given_srcdir=$srcdir ac_given_INSTALL="$INSTALL" trap 'rm -fr `echo "Makefile mkgshmm" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 EOF cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF $ac_vpsub $extrasub s%@SHELL@%$SHELL%g s%@CFLAGS@%$CFLAGS%g s%@CPPFLAGS@%$CPPFLAGS%g s%@CXXFLAGS@%$CXXFLAGS%g s%@FFLAGS@%$FFLAGS%g s%@DEFS@%$DEFS%g s%@LDFLAGS@%$LDFLAGS%g s%@LIBS@%$LIBS%g s%@exec_prefix@%$exec_prefix%g s%@prefix@%$prefix%g s%@program_transform_name@%$program_transform_name%g s%@bindir@%$bindir%g s%@sbindir@%$sbindir%g s%@libexecdir@%$libexecdir%g s%@datadir@%$datadir%g s%@sysconfdir@%$sysconfdir%g s%@sharedstatedir@%$sharedstatedir%g s%@localstatedir@%$localstatedir%g s%@libdir@%$libdir%g s%@includedir@%$includedir%g s%@oldincludedir@%$oldincludedir%g s%@infodir@%$infodir%g s%@mandir@%$mandir%g s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g s%@INSTALL_DATA@%$INSTALL_DATA%g s%@RM@%$RM%g s%@PERL@%$PERL%g s%@EXEEXT@%$EXEEXT%g CEOF EOF cat >> $CONFIG_STATUS <<\EOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. ac_file=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_cmds # Line after last line for current file. ac_more_lines=: ac_sed_cmds="" while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file else sed "${ac_end}q" conftest.subs > conftest.s$ac_file fi if test ! -s conftest.s$ac_file; then ac_more_lines=false rm -f conftest.s$ac_file else if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f conftest.s$ac_file" else ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" fi ac_file=`expr $ac_file + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_cmds` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case "$ac_file" in *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; *) ac_file_in="${ac_file}.in" ;; esac # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. # Remove last slash and all that follows it. Not all systems have dirname. ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then # The file is in a subdirectory. test ! -d "$ac_dir" && mkdir "$ac_dir" ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" # A "../" for each directory in $ac_dir_suffix. ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` else ac_dir_suffix= ac_dots= fi case "$ac_given_srcdir" in .) srcdir=. if test -z "$ac_dots"; then top_srcdir=. else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; *) # Relative path. srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" top_srcdir="$ac_dots$ac_given_srcdir" ;; esac case "$ac_given_INSTALL" in [/$]*) INSTALL="$ac_given_INSTALL" ;; *) INSTALL="$ac_dots$ac_given_INSTALL" ;; esac echo creating "$ac_file" rm -f "$ac_file" configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." case "$ac_file" in *Makefile*) ac_comsub="1i\\ # $configure_input" ;; *) ac_comsub= ;; esac ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` sed -e "$ac_comsub s%@configure_input@%$configure_input%g s%@srcdir@%$srcdir%g s%@top_srcdir@%$top_srcdir%g s%@INSTALL@%$INSTALL%g " $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file fi; done rm -f conftest.s* EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF chmod +x mkgshmm exit 0 EOF chmod +x $CONFIG_STATUS rm -fr confdefs* $ac_clean_files test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 julius-4.2.2/mkgshmm/Makefile.in0000644001051700105040000000157312004452411015037 0ustar ritrlab# Copyright (c) 2001-2012 Kawahara Lab., Kyoto University # Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ TARGET=mkgshmm all: $(TARGET) $(TARGET): @echo 'nothing to do. just do "make install"' ############################################################ install: install.bin install.bin: $(TARGET) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ ############################################################ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) config.cache config.log config.status $(RM) Makefile julius-4.2.2/mkgshmm/00readme.txt0000644001051700105040000000260311071102425015123 0ustar ritrlab mkgshmm MKGSHMM(1) MKGSHMM(1) NAME mkgshmm - convert monophone HMM to GS HMM for Julius SYNOPSIS mkgshmm {monophone_hmmdefs} > {outputfile} DESCRIPTION mkgshmm converts monophone HMM definition file in HTK format into a special format for Gaussian Mixture Selection (GMS) in Julius. GMS is an algorithm to reduce the amount of acoustic computation with triphone HMM, by pre-selection of promising gaussian mixtures using likelihoods of corresponding monophone mixtures. EXAMPLES (1) Prepare a monophone model which was trained by the same corpus as target triphone model. (2) Convert the monophone model using mkgshmm. (3) Specify the output file in Julius with option "-gshmm" SEE ALSO julius ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 MKGSHMM(1) julius-4.2.2/mkgshmm/configure.in0000644001051700105040000000066512004452411015304 0ustar ritrlabdnl Copyright (c) 2001-2012 Kawahara Lab., Kyoto University dnl Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology dnl Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology dnl All rights reserved AC_INIT(mkgshmm.in) AC_CONFIG_AUX_DIR(../support) AC_PROG_INSTALL AC_PATH_PROG(RM, rm) AC_PATH_PROG(PERL, perl) AC_EXEEXT AC_OUTPUT_COMMANDS([chmod +x mkgshmm]) AC_OUTPUT(Makefile mkgshmm) julius-4.2.2/mkgshmm/00readme-ja.txt0000644001051700105040000000270211071102425015513 0ustar ritrlab mkgshmm MKGSHMM(1) MKGSHMM(1) O mkgshmm - mtHHMM GMS p Tv mkgshmm {monophone_hmmdefs} > {outputfile} DESCRIPTION mkgshmm HTK`ョmonophone HMM Julius Gaussian Mixture Selection (GMS) pperlXNvgD GMSJulius-3.2T|[gxvZ@D t [ monophone xtriphonePTM\ I CxvZD EXAMPLES ^[QbgtriphonePTMCR[pXwK monophone fpD monophonef mkgshmm p GMS pD Julius "-gshmm" wD GMSpftriphonePTMR[pXKv_ Dgshmm ~X}b`IC\ D SEE ALSO julius ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 MKGSHMM(1) julius-4.2.2/mkgshmm/mkgshmm.in0000755001051700105040000000325212004452411014764 0ustar ritrlab#!@PERL@ # Copyright (c) 2001-2012 Kawahara Lab., Kyoto University # Copyright (c) 2001-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # mkgshmm.pl --- output Gaussian Mixture Selection model for Julius-3.2 # from monophone HMM (gziped file allowed) # # # ver0.1 2001/06/13 # if ($#ARGV < 0) { print "convert monophone HMM to GS HMM for Julius\n"; print "usage: $0 monophone_hmmdefs > outputfile\n"; exit; } # # 1) extract states and output as macros # open(SRC, "gzip -dcf $ARGV[0] |") || die "cannot open $ARGV[0]\n"; $sw = 0; ## output till a "~h" appears while () { if (/^\~h \"(.*)\"/) { $name = $1; last; } print; } ## extract states as macros while () { if (/^\~h \"(.*)\"/) { $name = $1; next; } if (/^ ([0-9]*)/i) { $stateloc = $1; $sw = 1; print "~s \"${name}${stateloc}m\"\n"; # 'm' is needed next; } if (/^/i) { $sw = 0; } if ($sw == 1) { print; } } close(SRC); # # 2) reopen source and output the rest # open(SRC, "gzip -dcf $ARGV[0] |") || die "cannot open $ARGV[0]\n"; $sw = 0; ## skip till a "~h" appears while () { if (/^\~h \"(.*)\"/) { $name = $1; print; last; } } ## output HMMs with refering to the macros while () { if (/^\~h \"(.*)\"/) { $name = $1; print; next; } if (/^ ([0-9]*)/i) { $stateloc = $1; $sw = 1; print; print "~s \"${name}${stateloc}m\"\n"; # 'm' is needed next; } if (/^/i) { $sw = 0; } if ($sw == 0) { print; } } close(SRC); ##################### end of program julius-4.2.2/gramtools/0000755001051700105040000000000012004463507013340 5ustar ritrlabjulius-4.2.2/gramtools/configure0000755001051700105040000014554212001663601015254 0ustar ritrlab#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated automatically using autoconf version 2.13 # Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. # Defaults: ac_help= ac_default_prefix=/usr/local # Any additions from configure.in: # Initialize some variables set by options. # The variables have the same names as the options, with # dashes changed to underlines. build=NONE cache_file=./config.cache exec_prefix=NONE host=NONE no_create= nonopt=NONE no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= target=NONE verbose= x_includes=NONE x_libraries=NONE bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datadir='${prefix}/share' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' libdir='${exec_prefix}/lib' includedir='${prefix}/include' oldincludedir='/usr/include' infodir='${prefix}/info' mandir='${prefix}/man' # Initialize some other variables. subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Maximum number of lines to put in a shell here document. ac_max_here_lines=12 ac_prev= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval "$ac_prev=\$ac_option" ac_prev= continue fi case "$ac_option" in -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; *) ac_optarg= ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case "$ac_option" in -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir="$ac_optarg" ;; -build | --build | --buil | --bui | --bu) ac_prev=build ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build="$ac_optarg" ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file="$ac_optarg" ;; -datadir | --datadir | --datadi | --datad | --data | --dat | --da) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ | --da=*) datadir="$ac_optarg" ;; -disable-* | --disable-*) ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` eval "enable_${ac_feature}=no" ;; -enable-* | --enable-*) ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } fi ac_feature=`echo $ac_feature| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "enable_${ac_feature}='$ac_optarg'" ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix="$ac_optarg" ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he) # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat << EOF Usage: configure [options] [host] Options: [defaults in brackets after descriptions] Configuration: --cache-file=FILE cache test results in FILE --help print this message --no-create do not create output files --quiet, --silent do not print \`checking...' messages --version print the version of autoconf that created configure Directory and file names: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [same as prefix] --bindir=DIR user executables in DIR [EPREFIX/bin] --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] --libexecdir=DIR program executables in DIR [EPREFIX/libexec] --datadir=DIR read-only architecture-independent data in DIR [PREFIX/share] --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data in DIR [PREFIX/com] --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] --libdir=DIR object code libraries in DIR [EPREFIX/lib] --includedir=DIR C header files in DIR [PREFIX/include] --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] --infodir=DIR info documentation in DIR [PREFIX/info] --mandir=DIR man documentation in DIR [PREFIX/man] --srcdir=DIR find the sources in DIR [configure dir or ..] --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names EOF cat << EOF Host type: --build=BUILD configure for building on BUILD [BUILD=HOST] --host=HOST configure for HOST [guessed] --target=TARGET configure for TARGET [TARGET=HOST] Features and packages: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --x-includes=DIR X include files are in DIR --x-libraries=DIR X library files are in DIR EOF if test -n "$ac_help"; then echo "--enable and --with options recognized:$ac_help" fi exit 0 ;; -host | --host | --hos | --ho) ac_prev=host ;; -host=* | --host=* | --hos=* | --ho=*) host="$ac_optarg" ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir="$ac_optarg" ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir="$ac_optarg" ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir="$ac_optarg" ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir="$ac_optarg" ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst \ | --locals | --local | --loca | --loc | --lo) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* \ | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) localstatedir="$ac_optarg" ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir="$ac_optarg" ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir="$ac_optarg" ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix="$ac_optarg" ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix="$ac_optarg" ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix="$ac_optarg" ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name="$ac_optarg" ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir="$ac_optarg" ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir="$ac_optarg" ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site="$ac_optarg" ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir="$ac_optarg" ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir="$ac_optarg" ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target="$ac_optarg" ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers) echo "configure generated by autoconf version 2.13" exit 0 ;; -with-* | --with-*) ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` case "$ac_option" in *=*) ;; *) ac_optarg=yes ;; esac eval "with_${ac_package}='$ac_optarg'" ;; -without-* | --without-*) ac_package=`echo $ac_option|sed -e 's/-*without-//'` # Reject names that are not valid shell variable names. if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } fi ac_package=`echo $ac_package| sed 's/-/_/g'` eval "with_${ac_package}=no" ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes="$ac_optarg" ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries="$ac_optarg" ;; -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } ;; *) if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then echo "configure: warning: $ac_option: invalid host type" 1>&2 fi if test "x$nonopt" != xNONE; then { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } fi nonopt="$ac_option" ;; esac done if test -n "$ac_prev"; then { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } fi trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 # File descriptor usage: # 0 standard input # 1 file creation # 2 errors and warnings # 3 some systems may open it to /dev/tty # 4 used on the Kubota Titan # 6 checking for... messages and results # 5 compiler messages saved in config.log if test "$silent" = yes; then exec 6>/dev/null else exec 6>&1 fi exec 5>./config.log echo "\ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. " 1>&5 # Strip out --no-create and --no-recursion so they do not pile up. # Also quote any args containing shell metacharacters. ac_configure_args= for ac_arg do case "$ac_arg" in -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c) ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) ac_configure_args="$ac_configure_args '$ac_arg'" ;; *) ac_configure_args="$ac_configure_args $ac_arg" ;; esac done # NLS nuisances. # Only set these to C if already set. These must not be set unconditionally # because not all systems understand e.g. LANG=C (notably SCO). # Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! # Non-C LC_CTYPE values break the ctype check. if test "${LANG+set}" = set; then LANG=C; export LANG; fi if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -rf conftest* confdefs.h # AIX cpp loses on an empty file, so make sure it contains at least a newline. echo > confdefs.h # A filename unique to this package, relative to the directory that # configure is in, which we can look for to find out if srcdir is correct. ac_unique_file=00readme.txt # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then its parent. ac_prog=$0 ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. srcdir=$ac_confdir if test ! -r $srcdir/$ac_unique_file; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r $srcdir/$ac_unique_file; then if test "$ac_srcdir_defaulted" = yes; then { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } else { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } fi fi srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` # Prefer explicitly selected file to automatically selected ones. if test -z "$CONFIG_SITE"; then if test "x$prefix" != xNONE; then CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" else CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi fi for ac_site_file in $CONFIG_SITE; do if test -r "$ac_site_file"; then echo "loading site script $ac_site_file" . "$ac_site_file" fi done if test -r "$cache_file"; then echo "loading cache $cache_file" . $cache_file else echo "creating cache $cache_file" > $cache_file fi ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross ac_exeext= ac_objext=o if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then ac_n= ac_c=' ' ac_t=' ' else ac_n=-n ac_c= ac_t= fi else ac_n= ac_c='\c' ac_t= fi ac_aux_dir= for ac_dir in ../support $srcdir/../support; do if test -f $ac_dir/install-sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f $ac_dir/install.sh; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break fi done if test -z "$ac_aux_dir"; then { echo "configure: error: can not find install-sh or install.sh in ../support $srcdir/../support" 1>&2; exit 1; } fi ac_config_guess=$ac_aux_dir/config.guess ac_config_sub=$ac_aux_dir/config.sub ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. # Make sure we can run config.sub. if ${CONFIG_SHELL-/bin/sh} $ac_config_sub sun4 >/dev/null 2>&1; then : else { echo "configure: error: can not run $ac_config_sub" 1>&2; exit 1; } fi echo $ac_n "checking host system type""... $ac_c" 1>&6 echo "configure:551: checking host system type" >&5 host_alias=$host case "$host_alias" in NONE) case $nonopt in NONE) if host_alias=`${CONFIG_SHELL-/bin/sh} $ac_config_guess`; then : else { echo "configure: error: can not guess host type; you must specify one" 1>&2; exit 1; } fi ;; *) host_alias=$nonopt ;; esac ;; esac host=`${CONFIG_SHELL-/bin/sh} $ac_config_sub $host_alias` host_cpu=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` host_vendor=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` host_os=`echo $host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` echo "$ac_t""$host" 1>&6 echo $ac_n "checking host-specific optimization flag""... $ac_c" 1>&6 echo "configure:572: checking host-specific optimization flag" >&5 if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG echo "$ac_t""$OPTFLAG" 1>&6 else echo "$ac_t""no" 1>&6 fi else echo "$ac_t""skipped" 1>&6 fi # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:587: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="gcc" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:617: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_prog_rejected=no ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" break fi done IFS="$ac_save_ifs" if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# -gt 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift set dummy "$ac_dir/$ac_word" "$@" shift ac_cv_prog_CC="$@" fi fi fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$CC"; then case "`uname -s`" in *win32* | *WIN32*) # Extract the first word of "cl", so it can be a program name with args. set dummy cl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:668: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_prog_CC="cl" break fi done IFS="$ac_save_ifs" fi fi CC="$ac_cv_prog_CC" if test -n "$CC"; then echo "$ac_t""$CC" 1>&6 else echo "$ac_t""no" 1>&6 fi ;; esac fi test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 echo "configure:700: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross cat > conftest.$ac_ext << EOF #line 711 "configure" #include "confdefs.h" main(){return(0);} EOF if { (eval echo configure:716: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then ac_cv_prog_cc_works=yes # If we can't run a trivial program, we are probably using a cross compiler. if (./conftest; exit) 2>/dev/null; then ac_cv_prog_cc_cross=no else ac_cv_prog_cc_cross=yes fi else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 ac_cv_prog_cc_works=no fi rm -fr conftest* ac_ext=c # CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. ac_cpp='$CPP $CPPFLAGS' ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' cross_compiling=$ac_cv_prog_cc_cross echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 if test $ac_cv_prog_cc_works = no; then { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } fi echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 echo "configure:742: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 cross_compiling=$ac_cv_prog_cc_cross echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 echo "configure:747: checking whether we are using GNU C" >&5 if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then ac_cv_prog_gcc=yes else ac_cv_prog_gcc=no fi fi echo "$ac_t""$ac_cv_prog_gcc" 1>&6 if test $ac_cv_prog_gcc = yes; then GCC=yes else GCC= fi ac_test_CFLAGS="${CFLAGS+set}" ac_save_CFLAGS="$CFLAGS" CFLAGS= echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 echo "configure:775: checking whether ${CC-cc} accepts -g" >&5 if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else echo 'void f(){}' > conftest.c if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then ac_cv_prog_cc_g=yes else ac_cv_prog_cc_g=no fi rm -f conftest* fi echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 if test "$ac_test_CFLAGS" = set; then CFLAGS="$ac_save_CFLAGS" elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 echo "configure:807: checking how to run the C preprocessor" >&5 # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else # This must be in double quotes, not single quotes, because CPP may get # substituted into the Makefile and "${CC-cc}" will confuse make. CPP="${CC-cc} -E" # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:828: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -E -traditional-cpp" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:845: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP="${CC-cc} -nologo -E" cat > conftest.$ac_ext < Syntax Error EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:862: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then : else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* CPP=/lib/cpp fi rm -f conftest* fi rm -f conftest* fi rm -f conftest* ac_cv_prog_CPP="$CPP" fi CPP="$ac_cv_prog_CPP" else ac_cv_prog_CPP="$CPP" fi echo "$ac_t""$CPP" 1>&6 # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # ./install, which can be erroneously created by make from ./install.sh. echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 echo "configure:898: checking for a BSD compatible install" >&5 if test -z "$INSTALL"; then if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" for ac_dir in $PATH; do # Account for people who put trailing slashes in PATH elements. case "$ac_dir/" in /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do if test -f $ac_dir/$ac_prog; then if test $ac_prog = install && grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : else ac_cv_path_install="$ac_dir/$ac_prog -c" break 2 fi fi done ;; esac done IFS="$ac_save_IFS" fi if test "${ac_cv_path_install+set}" = set; then INSTALL="$ac_cv_path_install" else # As a last resort, use the slow shell script. We don't cache a # path for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the path is relative. INSTALL="$ac_install_sh" fi fi echo "$ac_t""$INSTALL" 1>&6 # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' echo $ac_n "checking for Cygwin environment""... $ac_c" 1>&6 echo "configure:951: checking for Cygwin environment" >&5 if eval "test \"`echo '$''{'ac_cv_cygwin'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_cygwin=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_cygwin=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_cygwin" 1>&6 CYGWIN= test "$ac_cv_cygwin" = yes && CYGWIN=yes echo $ac_n "checking for mingw32 environment""... $ac_c" 1>&6 echo "configure:984: checking for mingw32 environment" >&5 if eval "test \"`echo '$''{'ac_cv_mingw32'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* ac_cv_mingw32=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* ac_cv_mingw32=no fi rm -f conftest* rm -f conftest* fi echo "$ac_t""$ac_cv_mingw32" 1>&6 MINGW32= test "$ac_cv_mingw32" = yes && MINGW32=yes echo $ac_n "checking for executable suffix""... $ac_c" 1>&6 echo "configure:1015: checking for executable suffix" >&5 if eval "test \"`echo '$''{'ac_cv_exeext'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else if test "$CYGWIN" = yes || test "$MINGW32" = yes; then ac_cv_exeext=.exe else rm -f conftest* echo 'int main () { return 0; }' > conftest.$ac_ext ac_cv_exeext= if { (eval echo configure:1025: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; }; then for file in conftest.*; do case $file in *.c | *.o | *.obj) ;; *) ac_cv_exeext=`echo $file | sed -e s/conftest//` ;; esac done else { echo "configure: error: installation or configuration problem: compiler cannot create executables." 1>&2; exit 1; } fi rm -f conftest* test x"${ac_cv_exeext}" = x && ac_cv_exeext=no fi fi EXEEXT="" test x"${ac_cv_exeext}" != xno && EXEEXT=${ac_cv_exeext} echo "$ac_t""${ac_cv_exeext}" 1>&6 ac_exeext=$EXEEXT echo $ac_n "checking host specific optimization flag""... $ac_c" 1>&6 echo "configure:1047: checking host specific optimization flag" >&5 if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG echo "$ac_t""$OPTFLAG" 1>&6 else echo "$ac_t""no" 1>&6 fi else echo "$ac_t""skipped" 1>&6 fi # Extract the first word of "rm", so it can be a program name with args. set dummy rm; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1063: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_RM'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$RM" in /*) ac_cv_path_RM="$RM" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_RM="$RM" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_RM="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi RM="$ac_cv_path_RM" if test -n "$RM"; then echo "$ac_t""$RM" 1>&6 else echo "$ac_t""no" 1>&6 fi # Extract the first word of "perl", so it can be a program name with args. set dummy perl; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1098: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_PERL'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$PERL" in /*) ac_cv_path_PERL="$PERL" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_PERL="$PERL" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_PERL="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi PERL="$ac_cv_path_PERL" if test -n "$PERL"; then echo "$ac_t""$PERL" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$PERL"; then { echo "configure: error: perl not found! installation terminated" 1>&2; exit 1; } fi # Extract the first word of "iconv", so it can be a program name with args. set dummy iconv; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 echo "configure:1137: checking for $ac_word" >&5 if eval "test \"`echo '$''{'ac_cv_path_ICONV'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else case "$ICONV" in /*) ac_cv_path_ICONV="$ICONV" # Let the user override the test with a path. ;; ?:/*) ac_cv_path_ICONV="$ICONV" # Let the user override the test with a dos path. ;; *) IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" ac_dummy="$PATH" for ac_dir in $ac_dummy; do test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$ac_word; then ac_cv_path_ICONV="$ac_dir/$ac_word" break fi done IFS="$ac_save_ifs" ;; esac fi ICONV="$ac_cv_path_ICONV" if test -n "$ICONV"; then echo "$ac_t""$ICONV" 1>&6 else echo "$ac_t""no" 1>&6 fi if test -z "$ICONV"; then echo "configure: warning: no iconv, gram2sapixml.pl may not work" 1>&2 else echo $ac_n "checking for Jcode module in perl""... $ac_c" 1>&6 echo "configure:1173: checking for Jcode module in perl" >&5 if $PERL -mJcode -e "print;"; then echo "$ac_t""yes" 1>&6 else echo "configure: warning: no Jcode module in perl, gram2sapixml.pl may not work" 1>&2 fi fi have_readline=no READLINE_LIBS="" ac_safe=`echo "readline/readline.h" | sed 'y%./+-%__p_%'` echo $ac_n "checking for readline/readline.h""... $ac_c" 1>&6 echo "configure:1184: checking for readline/readline.h" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1194: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* eval "ac_cv_header_$ac_safe=no" fi rm -f conftest* fi if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then echo "$ac_t""yes" 1>&6 echo $ac_n "checking for readline in -lreadline""... $ac_c" 1>&6 echo "configure:1211: checking for readline in -lreadline" >&5 ac_lib_var=`echo readline'_'readline | sed 'y%./+-%__p_%'` if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else ac_save_LIBS="$LIBS" LIBS="-lreadline $LIBS" cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=yes" else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* eval "ac_cv_lib_$ac_lib_var=no" fi rm -f conftest* LIBS="$ac_save_LIBS" fi if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then echo "$ac_t""yes" 1>&6 have_readline=yes READLINE_LIBS="-lreadline" else echo "$ac_t""no" 1>&6 echo $ac_n "checking for readline in -lreadline with -ltermcap""... $ac_c" 1>&6 echo "configure:1250: checking for readline in -lreadline with -ltermcap" >&5 ac_save_LIBS="$LIBS" LIBS="$ac_save_LIBS -lreadline -ltermcap" cat > conftest.$ac_ext < #include int main() { readline(""); ; return 0; } EOF if { (eval echo configure:1262: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* echo "$ac_t""yes" 1>&6 READLINE_LIBS="-lreadline -ltermcap" LIBS="$ac_save_LIBS" have_readline=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* echo "$ac_t""no" 1>&6 echo $ac_n "checking for readline in -lreadline with -lncurses""... $ac_c" 1>&6 echo "configure:1274: checking for readline in -lreadline with -lncurses" >&5 LIBS="$ac_save_LIBS -lreadline -lncurses" cat > conftest.$ac_ext < #include int main() { readline(""); ; return 0; } EOF if { (eval echo configure:1285: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then rm -rf conftest* echo "$ac_t""yes" 1>&6 READLINE_LIBS="-lreadline -lncurses" LIBS="$ac_save_LIBS" have_readline=yes else echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* echo "$ac_t""no" 1>&6 READLINE_LIBS="" LIBS="$ac_save_LIBS" fi rm -f conftest* fi rm -f conftest* fi else echo "$ac_t""no" 1>&6 fi if test "$have_readline" = yes; then cat >> confdefs.h <<\EOF #define HAVE_READLINE 1 EOF echo $ac_n "checking for readline verion > 4.1""... $ac_c" 1>&6 echo "configure:1318: checking for readline verion > 4.1" >&5 cat > conftest.$ac_ext < EOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | egrep "rl_compentry_func_t" >/dev/null 2>&1; then rm -rf conftest* echo "$ac_t""yes" 1>&6 else rm -rf conftest* echo "$ac_t""no - use old func" 1>&6 cat >> confdefs.h <<\EOF #define HAVE_READLINE_4_1_OLDER 1 EOF fi rm -f conftest* fi for ac_hdr in malloc.h do ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 echo "configure:1346: checking for $ac_hdr" >&5 if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then echo $ac_n "(cached) $ac_c" 1>&6 else cat > conftest.$ac_ext < EOF ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" { (eval echo configure:1356: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` if test -z "$ac_err"; then rm -rf conftest* eval "ac_cv_header_$ac_safe=yes" else echo "$ac_err" >&5 echo "configure: failed program was:" >&5 cat conftest.$ac_ext >&5 rm -rf conftest* eval "ac_cv_header_$ac_safe=no" fi rm -f conftest* fi if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then echo "$ac_t""yes" 1>&6 ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` cat >> confdefs.h <&6 fi done trap '' 1 2 15 cat > confcache <<\EOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs. It is not useful on other systems. # If it contains results you don't want to keep, you may remove or edit it. # # By default, configure uses ./config.cache as the cache file, # creating it if it does not exist already. You can give configure # the --cache-file=FILE option to use a different cache file; that is # what configure does when it calls configure scripts in # subdirectories, so they share the cache. # Giving --cache-file=/dev/null disables caching, for debugging configure. # config.status only pays attention to the cache file if you give it the # --recheck option to rerun configure. # EOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, don't put newlines in cache variables' values. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. (set) 2>&1 | case `(ac_space=' '; set | grep ac_space) 2>&1` in *ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote substitution # turns \\\\ into \\, and sed turns \\ into \). sed -n \ -e "s/'/'\\\\''/g" \ -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" ;; *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' ;; esac >> confcache if cmp -s $cache_file confcache; then : else if test -w $cache_file; then echo "updating cache $cache_file" cat confcache > $cache_file else echo "not updating unwritable cache $cache_file" fi fi rm -f confcache trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Any assignment to VPATH causes Sun make to only execute # the first set of double-colon rules, so remove it if not needed. # If there is a colon in the path, we need to keep it. if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' fi trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. cat > conftest.defs <<\EOF s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%-D\1=\2%g s%[ `~#$^&*(){}\\|;'"<>?]%\\&%g s%\[%\\&%g s%\]%\\&%g s%\$%$$%g EOF DEFS=`sed -f conftest.defs confdefs.h | tr '\012' ' '` rm -f conftest.defs # Without the "./", some shells look in PATH for config.status. : ${CONFIG_STATUS=./config.status} echo creating $CONFIG_STATUS rm -f $CONFIG_STATUS cat > $CONFIG_STATUS </dev/null | sed 1q`: # # $0 $ac_configure_args # # Compiler output produced by configure, useful for debugging # configure, is in ./config.log if it exists. ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" for ac_option do case "\$ac_option" in -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; -version | --version | --versio | --versi | --vers | --ver | --ve | --v) echo "$CONFIG_STATUS generated by autoconf version 2.13" exit 0 ;; -help | --help | --hel | --he | --h) echo "\$ac_cs_usage"; exit 0 ;; *) echo "\$ac_cs_usage"; exit 1 ;; esac done ac_given_srcdir=$srcdir ac_given_INSTALL="$INSTALL" trap 'rm -fr `echo "Makefile mkdfa/Makefile mkdfa/mkdfa.pl mkdfa/mkfa-1.44-flex/Makefile dfa_minimize/Makefile generate/Makefile accept_check/Makefile nextword/Makefile yomi2voca/Makefile yomi2voca/yomi2voca.pl gram2sapixml/Makefile gram2sapixml/gram2sapixml.pl dfa_determinize/Makefile" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 EOF cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF $ac_vpsub $extrasub s%@SHELL@%$SHELL%g s%@CFLAGS@%$CFLAGS%g s%@CPPFLAGS@%$CPPFLAGS%g s%@CXXFLAGS@%$CXXFLAGS%g s%@FFLAGS@%$FFLAGS%g s%@DEFS@%$DEFS%g s%@LDFLAGS@%$LDFLAGS%g s%@LIBS@%$LIBS%g s%@exec_prefix@%$exec_prefix%g s%@prefix@%$prefix%g s%@program_transform_name@%$program_transform_name%g s%@bindir@%$bindir%g s%@sbindir@%$sbindir%g s%@libexecdir@%$libexecdir%g s%@datadir@%$datadir%g s%@sysconfdir@%$sysconfdir%g s%@sharedstatedir@%$sharedstatedir%g s%@localstatedir@%$localstatedir%g s%@libdir@%$libdir%g s%@includedir@%$includedir%g s%@oldincludedir@%$oldincludedir%g s%@infodir@%$infodir%g s%@mandir@%$mandir%g s%@host@%$host%g s%@host_alias@%$host_alias%g s%@host_cpu@%$host_cpu%g s%@host_vendor@%$host_vendor%g s%@host_os@%$host_os%g s%@CC@%$CC%g s%@CPP@%$CPP%g s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g s%@INSTALL_DATA@%$INSTALL_DATA%g s%@EXEEXT@%$EXEEXT%g s%@RM@%$RM%g s%@PERL@%$PERL%g s%@ICONV@%$ICONV%g s%@READLINE_LIBS@%$READLINE_LIBS%g CEOF EOF cat >> $CONFIG_STATUS <<\EOF # Split the substitutions into bite-sized pieces for seds with # small command number limits, like on Digital OSF/1 and HP-UX. ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. ac_file=1 # Number of current file. ac_beg=1 # First line for current file. ac_end=$ac_max_sed_cmds # Line after last line for current file. ac_more_lines=: ac_sed_cmds="" while $ac_more_lines; do if test $ac_beg -gt 1; then sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file else sed "${ac_end}q" conftest.subs > conftest.s$ac_file fi if test ! -s conftest.s$ac_file; then ac_more_lines=false rm -f conftest.s$ac_file else if test -z "$ac_sed_cmds"; then ac_sed_cmds="sed -f conftest.s$ac_file" else ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" fi ac_file=`expr $ac_file + 1` ac_beg=$ac_end ac_end=`expr $ac_end + $ac_max_sed_cmds` fi done if test -z "$ac_sed_cmds"; then ac_sed_cmds=cat fi EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". case "$ac_file" in *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; *) ac_file_in="${ac_file}.in" ;; esac # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. # Remove last slash and all that follows it. Not all systems have dirname. ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then # The file is in a subdirectory. test ! -d "$ac_dir" && mkdir "$ac_dir" ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" # A "../" for each directory in $ac_dir_suffix. ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` else ac_dir_suffix= ac_dots= fi case "$ac_given_srcdir" in .) srcdir=. if test -z "$ac_dots"; then top_srcdir=. else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; *) # Relative path. srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" top_srcdir="$ac_dots$ac_given_srcdir" ;; esac case "$ac_given_INSTALL" in [/$]*) INSTALL="$ac_given_INSTALL" ;; *) INSTALL="$ac_dots$ac_given_INSTALL" ;; esac echo creating "$ac_file" rm -f "$ac_file" configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." case "$ac_file" in *Makefile*) ac_comsub="1i\\ # $configure_input" ;; *) ac_comsub= ;; esac ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` sed -e "$ac_comsub s%@configure_input@%$configure_input%g s%@srcdir@%$srcdir%g s%@top_srcdir@%$top_srcdir%g s%@INSTALL@%$INSTALL%g " $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file fi; done rm -f conftest.s* EOF cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF exit 0 EOF chmod +x $CONFIG_STATUS rm -fr confdefs* $ac_clean_files test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 julius-4.2.2/gramtools/Makefile.in0000644001051700105040000000175212004452400015400 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # $Id: Makefile.in,v 1.5 2012/07/27 08:44:48 sumomo Exp $ # SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ SUBDIRS=mkdfa dfa_minimize generate accept_check nextword yomi2voca gram2sapixml dfa_determinize all: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE)); \ done install: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) install); \ done install.bin: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) install.bin); \ done clean: for d in $(SUBDIRS); do \ (cd $$d; $(MAKE) clean); \ done $(RM) config.log config.cache distclean: for d in $(SUBDIRS); do \ if test -f $$d/Makefile; then \ (cd $$d; $(MAKE) distclean); \ fi; \ done $(RM) config.log config.cache $(RM) config.status $(RM) Makefile julius-4.2.2/gramtools/gram2sapixml/0000755001051700105040000000000012004463507015746 5ustar ritrlabjulius-4.2.2/gramtools/gram2sapixml/Makefile.in0000644001051700105040000000104212004452400017776 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ TARGET=gram2sapixml.pl all: chmod +x $(TARGET) install: install.bin install.bin: ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ clean: echo nothing to do distclean: $(RM) $(TARGET) $(RM) Makefile julius-4.2.2/gramtools/gram2sapixml/00readme.txt0000644001051700105040000000305111556443771020117 0ustar ritrlab gram2sapixml.pl GRAM2SAPIXML.PL(1) GRAM2SAPIXML.PL(1) NAME gram2sapixml.pl - convert Julius grammar to SAPI XML grammar format SYNOPSIS gram2sapixml.pl [prefix...] DESCRIPTION gram2sapixml.pl converts a recognition grammar file of Julius (.grammar, .voca) to Microsoft SAPI XML grammar format. prefix should be a file name of target grammar, excluding suffixes. If multiple argument is given, each will be process sequencialy in turn. The internal character set should be in UTF-8 format. By default gram2sapixml.pl assume input in EUC-JP encoding and tries to convert it to UTF-8 using iconv. You may want to disable this feature within the script. It will fail to convert a left recursive rule in the grammar. When fails, it will leave the source rules in the target .xml file, so you should modify the output manually to solve it. SEE ALSO mkdfa.pl ( 1 ) DIAGNOSTICS The conversion procedure is somewhat dumb one, only converting the non-terminal symbols and terminal symbols (=word category name) into corresponding rules one by one. This is only a help tool, and you will need a manual inspection and editing to use it on a real SAPI application. COPYRIGHT Copyright (c) 2002 Takashi Sumiyoshi LICENSE The same as Julius. 10/02/2008 GRAM2SAPIXML.PL(1) julius-4.2.2/gramtools/gram2sapixml/gram2sapixml.txt0000644001051700105040000000252010677066017021125 0ustar ritrlabJulian `ョ@ SAPI XML @XNvg 2002/09/12 Takashi Sumiyoshi (sumiyosi@kuis.kyoto-u.ac.jp) vO Perl XNvgAJulian p .grammar, .voca `ョt@CASAPI XML `ョXNvgB タs@ vOAiconv vOpA r iconv vOKvB タsAPg\ヲB % ./gram2sapixml.pl .grammar, .voca t@Ct@Cgq OwB ../sample_grammars/vfr/vfr.{grammar,voca} Ro[g AB % ./gram2sapixml.pl ../sample_grammars/vfr/vfr Avfr.xml t@C ../sample_grammars/vfr/ B t@CwAB t@C UTF-8 GR[fBOB At@C@I[LI[L(PJeS) [PB t@C .grammar \A.grammar ALq .xml fB A .xml As B AタSAPIAvP[VgAvpeBw ACKvB julius-4.2.2/gramtools/gram2sapixml/gram2sapixml.pl.in0000755001051700105040000002220410677066017021332 0ustar ritrlab#!@PERL@ ## Copyright (c) 2002 Takashi Sumiyoshi # ------------------------------------------------------------ # Julian 形式の文法 (.grammar, .voca) を SAPI XML 文法に変換します。 # 引数なし起動で使い方が表示されます。 # 実行には Jcode モジュールが必要です。 # 出力を UTF-8 形式に変換するのに外部コマンドとして iconv を使用しています。 # ------------------------------------------------------------ # 注意: Julian 形式の文法では、右再帰が使えません。逆に SAPI XML 形式で # は左再帰が使えません。このツールはその変換まではしないので、左再帰を # 含む文法は、変換後に手作業で修正する必要があります。 # ------------------------------------------------------------ # 出力される SAPI XML 文法ファイルは、元ファイルの文法の非終端記号、終端記号 # をルールに変換するという単純な変換であるため、よりエレガントにするには # 手作業で修正する必要があります。 # ------------------------------------------------------------ use strict; use Jcode; my $iconv = "iconv -f eucJP -t UTF-8"; # iconv command line ############################################################ # convertphone で使用する ############################################################ sub vowel { if ($_[0] eq "a") { return $_[1];} if ($_[0] eq "i") { return $_[2];} if ($_[0] eq "u") { return $_[3];} if ($_[0] eq "e") { return $_[4];} if ($_[0] eq "o") { return $_[5];} if ($_[0] eq "a:") { return $_[1]."ー";} if ($_[0] eq "i:") { return $_[2]."ー";} if ($_[0] eq "u:") { return $_[3]."ー";} if ($_[0] eq "e:") { return $_[4]."ー";} if ($_[0] eq "o:") { return $_[5]."ー";} return 0; } ############################################################ # サブルーティン: 入力音素配列からカナ文字列を生成。入力は正しいと仮定 ############################################################ sub convertphone { my $rval = ""; my $c; my $d; my $r; while($c = shift @_) { if ($c eq "k") { $d = shift @_; if ($r = vowel($d,"か","き","く","け","こ")) { $rval .= $r; } } if ($c eq "ky") { $d = shift @_; if ($r = vowel($d,"きゃ","kyi?","きゅ","kye?","きょ")) { $rval .= $r; } } if ($c eq "s") { $d = shift @_; if ($r = vowel($d,"さ","し","す","せ","そ")) { $rval .= $r; } } if ($c eq "sy") { $d = shift @_; if ($r = vowel($d,"しゃ","syi?","しゅ","しぇ","しょ")) { $rval .= $r; } } if ($c eq "sh") { $d = shift @_; if ($r = vowel($d,"しゃ","し","しゅ","しぇ","しょ")) { $rval .= $r; } } if ($c eq "t") { $d = shift @_; if ($r = vowel($d,"た","ち","つ","て","と")) { $rval .= $r; } } if ($c eq "ts") { $d = shift @_; if ($r = vowel($d,"た","ち","つ","て","と")) { $rval .= $r; } } if ($c eq "ty") { $d = shift @_; if ($r = vowel($d,"ちゃ","tyi?","ちゅ","ちぇ","ちょ")) { $rval .= $r; } } if ($c eq "ch") { $d = shift @_; if ($r = vowel($d,"ちゃ","ち","ちゅ","ちぇ","ちょ")) { $rval .= $r; } } if ($c eq "n") { $d = shift @_; if ($r = vowel($d,"な","に","ぬ","ね","の")) { $rval .= $r; } } if ($c eq "ny") { $d = shift @_; if ($r = vowel($d,"にゃ","nyi?","にゅ","にぇ","にょ")) { $rval .= $r; } } if ($c eq "h") { $d = shift @_; if ($r = vowel($d,"は","ひ","ふ","へ","ほ")) { $rval .= $r; } } if ($c eq "hy") { $d = shift @_; if ($r = vowel($d,"ひゃ","hyi?","ひゅ","ひぇ","ひょ")) { $rval .= $r; } } if ($c eq "f") { $d = shift @_; if ($r = vowel($d,"は","ひ","ふ","へ","ほ")) { $rval .= $r; } } if ($c eq "m") { $d = shift @_; if ($r = vowel($d,"ま","み","む","め","も")) { $rval .= $r; } } if ($c eq "my") { $d = shift @_; if ($r = vowel($d,"みゃ","myi?","みゅ","みぇ","みょ")) { $rval .= $r; } } if ($c eq "y") { $d = shift @_; if ($r = vowel($d,"や","い","ゆ","え","よ")) { $rval .= $r; } } if ($c eq "r") { $d = shift @_; if ($r = vowel($d,"ら","り","る","れ","ろ")) { $rval .= $r; } } if ($c eq "ry") { $d = shift @_; if ($r = vowel($d,"りゃ","ryi?","りゅ","りぇ","りょ")) { $rval .= $r; } } if ($c eq "w") { $d = shift @_; if ($r = vowel($d,"わ","うぃ","wu?","うぇ","を")) { $rval .= $r; } } if ($c eq "g") { $d = shift @_; if ($r = vowel($d,"が","ぎ","ぐ","げ","ご")) { $rval .= $r; } } if ($c eq "gy") { $d = shift @_; if ($r = vowel($d,"ぎゃ","gyi?","ぎゅ","ぎぇ","ぎょ")) { $rval .= $r; } } if ($c eq "z") { $d = shift @_; if ($r = vowel($d,"ざ","じ","ず","ぜ","ぞ")) { $rval .= $r; } } if ($c eq "zy") { $d = shift @_; if ($r = vowel($d,"じゃ","zyi?","じゅ","じぇ","じょ")) { $rval .= $r; } } if ($c eq "j") { $d = shift @_; if ($r = vowel($d,"じゃ","じ","じゅ","じぇ","じょ")) { $rval .= $r; } } if ($c eq "d") { $d = shift @_; if ($r = vowel($d,"だ","ぢ","づ","で","ど")) { $rval .= $r; } } if ($c eq "dy") { $d = shift @_; if ($r = vowel($d,"ぢゃ","dyi?","ぢゅ","ぢぇ","ぢょ")) { $rval .= $r; } } if ($c eq "b") { $d = shift @_; if ($r = vowel($d,"ば","び","ぶ","べ","ぼ")) { $rval .= $r; } } if ($c eq "by") { $d = shift @_; if ($r = vowel($d,"びゃ","byi?","びゅ","びぇ","びょ")) { $rval .= $r; } } if ($c eq "p") { $d = shift @_; if ($r = vowel($d,"ぱ","ぴ","ぷ","ぺ","ぽ")) { $rval .= $r; } } if ($c eq "py") { $d = shift @_; if ($r = vowel($d,"ぴゃ","pyi?","ぴゅ","ぴぇ","ぴょ")) { $rval .= $r; } } if ($c eq "N") { $rval .= "ん" } if ($c eq "q") { $rval .= "っ" } if ($c eq "sp") { $rval .= '@sp' } if ($c eq "silB") { $rval .= '@silB' } if ($c eq "silE") { $rval .= '@silE' } if ($r = vowel($c,"あ","い","う","え","お")) { $rval .= $r; } } return $rval; } ############################################################ # メイン関数 ############################################################ if (@ARGV == 0) { print STDERR << "EOF"; gram2sapixml.pl by Takashi Sumiyoshi 2002 usage: gram2sapixml.pl [basename] ... input files: .grammar (Julian grammar file) .voca (Julian voca file) output file: .xml (SAPI Grammar XML file in UTF-8 Format) This script uses the iconv command to convert the encoding. EOF exit; } my $removesps = 1; # sp, silB, silE を除く while(@ARGV) { my $filebase = shift @ARGV; my $grammarfile = $filebase . ".grammar"; my $vocafile = $filebase . ".voca"; my $sapixmlfile = $filebase . ".xml"; print STDERR "Processing $vocafile, $grammarfile...\n"; my $vocaword = ""; my %lexicon_disp; my %lexicon_yomi; my %grammar_left; my @input; my $disp; my $yomi; my $hiragana; ### ### load voca file ### open (VOCA, $vocafile) or die "Cannot open $vocafile"; while() { chomp; next if /^#/; @input = split (/[ \t]+/, $_); if (/^\%/) { s/#.*$//; $vocaword = substr($_, 1); # 先頭の % を抜く $vocaword =~ s/^[ \t]+//g; $vocaword =~ s/[ \t]+$//g; } else { $disp = shift @input; $disp = Jcode->new($disp)->euc; if ($disp ne "") { if ($removesps == 1 && ($disp eq "sp" || $disp eq "silB" || $disp eq "silE")) { } else { # 音素表記をかな文字列に変換 $hiragana = convertphone(@input); # print "voca [$vocaword] in $disp,$hiragana\n"; # lexicon_disp, lexicon_yomi に格納 push @{$lexicon_disp{$vocaword}}, $disp; push @{$lexicon_yomi{$vocaword}}, $hiragana; } } } } close (VOCA); ### ### load grammar file ### open (GRAMMAR, $grammarfile) or die "Cannot open $grammarfile"; my $left; while() { chomp; next if /^#/; s/#.*$//; next if $_ eq ""; @input = split (/[ \t:]+/, $_); $left = shift @input; # grammar_left は配列へのリファレンスの配列を要素にもつ連想配列 # MEMO: [@input] を \@input とかすると実体がすべて同じになりまずい push @{$grammar_left{$left}}, [@input]; } close (GRAMMAR); ### ### save sapixml file ### ### ### convert by iconv ### open (SAPIXML, "| $iconv > $sapixmlfile") or die "Cannot open $sapixmlfile or cannot exec iconv"; print SAPIXML "\n"; print SAPIXML "\n"; # まずは grammar ファイルのルール # RULEREF を並べる my $i; my $n; my $a; my @b; foreach $i (keys %grammar_left) { if ($i eq "S") { print SAPIXML "\n"; } else { print SAPIXML "\n"; } print SAPIXML " \n"; while ($a = shift @{$grammar_left{$i}}) { print SAPIXML "

\n"; @b = @{$a}; while ($n = shift @b) { if ($removesps == 1 && ! exists $lexicon_disp{$n} && ! exists $grammar_left{$n}) { # print SAPIXML "# \n"; } else { print SAPIXML " \n"; } } print SAPIXML "

\n"; } print SAPIXML "
\n"; print SAPIXML "
\n"; } # そして voca ファイルのカテゴリ名→単語 foreach $i (keys %lexicon_disp) { print SAPIXML "\n"; print SAPIXML " \n"; while ($disp = shift @{$lexicon_disp{$i}}) { $yomi = shift @{$lexicon_yomi{$i}}; if ($disp eq $yomi) { print SAPIXML "

$yomi

\n"; } else { print SAPIXML "

/$disp/$yomi;

\n"; } } print SAPIXML "
\n"; print SAPIXML "
\n"; } print SAPIXML "
\n"; } julius-4.2.2/gramtools/gram2sapixml/00readme-ja.txt0000644001051700105040000000305111556443771020507 0ustar ritrlab gram2sapixml.pl GRAM2SAPIXML.PL(1) GRAM2SAPIXML.PL(1) O gram2sapixml.pl - Fッp@ SAPI XML @XNvg Tv gram2sapixml.pl [prefix...] DESCRIPTION gram2sapixml.pl CJulius Fッp@t@C (.grammar, .voca) Microsoft SAPI XML `ョXNvgD prefix C .grammar, .voca t@ Ct@CgqOw DwC D R[h EUC-JPzDot@C UTF-8 GR[ fBODR[h iconv gp D ACKvDt@C .grammar \C.grammar ALq .xml fDC .xml A sD SEE ALSO mkdfa.pl ( 1 ) DIAGNOSTICS Ct@C@I[LI[L(PJeS)[ PDタSAPIAvP[Vg CvpeBwCCKvD R[h iconv gpD タspX iconv CG[D COPYRIGHT Copyright (c) 2002 Takashi Sumiyoshi LICENSE Julius gpD 10/02/2008 GRAM2SAPIXML.PL(1) julius-4.2.2/gramtools/00readme.txt0000644001051700105040000000123711071102421015465 0ustar ritrlabThese are tools for building recognition grammar for Julian. Julian uses original grammar format. mkdfa (mkdfa.pl) grammar compiler dfa_minimize minimize DFA grammar generate randam sentence generation tool accept_check tool to check acception/rejection of input sentence nextword display next predicted words of given partial sentence gram2sapixml perl script to convert Julian grammar to SAPI XML format dfa_determinize DFA determinizer Please see online manual or "00readme.txt" file under each directory. Other document in Juliusbook or on the Web site will also helps you. http://julius.sourceforge.jp/en/ julius-4.2.2/gramtools/generate/0000755001051700105040000000000012004463507015132 5ustar ritrlabjulius-4.2.2/gramtools/generate/generate.c0000644001051700105040000001400112004452400017052 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* generate --- generate random sentences acceptable by given grammar */ #include "common.h" #include "gen_next.h" #if defined(_WIN32) && !defined(__CYGWIN32__) #include "process.h" #endif #define MAXHYPO 300 WORD_INFO *winfo; DFA_INFO *dfa; char **termname; boolean verbose_flag = FALSE; boolean term_mode = FALSE; boolean no_term_file; NODE * new_generate() { NEXTWORD **nw; NODE *now; int i,j,num,selected; /* init */ nw = nw_malloc(); now = (NODE *)mymalloc(sizeof(NODE)); now->endflag = FALSE; now->seqnum = 0; /* set init hypo */ if (term_mode) { num = dfa_firstterms(nw); } else { num = dfa_firstwords(nw); } for (;;) { if (verbose_flag) { if (no_term_file) { for(i=0;i %s\t%s\n",winfo->wname[nw[i]->id],winfo->woutput[nw[i]->id]); } else { for(i=0;i %s\t%s\n",termname[winfo->wton[nw[i]->id]],winfo->woutput[nw[i]->id]); } } /* select random one */ if (num == 1) { selected = 0; } else { j = abs(rand()) % num; for(i=0;i= num) selected = num - 1; now->seq[now->seqnum++] = nw[selected]->id; now->state = nw[selected]->next_state; if (now->seqnum >= MAXSEQNUM) { printf("word num exceeded %d\n", MAXSEQNUM); nw_free(nw); return(now); } /* output */ if (verbose_flag) { printf("(%3d) %s\n", now->state, winfo->woutput[now->seq[now->seqnum-1]]); } /* end check */ if (dfa_acceptable(now)) break; /* get next words */ if (term_mode) { num = dfa_nextterms(now, nw); } else { num = dfa_nextwords(now, nw); } } nw_free(nw); return(now); } static boolean match_node(NODE *a, NODE *b) { int i; if (a->seqnum != b->seqnum) return(FALSE); for (i=0;iseqnum;i++) { if (a->seq[i] != b->seq[i]) return(FALSE); } return(TRUE); } static void generate_main(int num) { NODE *sent; NODE **stock; int i,n,c; /* avoid generating same sentence */ stock = (NODE **)mymalloc(sizeof(NODE *)*num); n = 0; c = 0; while (n < num) { sent = new_generate(); for (i=0;i= n) { /* no match, store as new */ stock[n++] = sent; for (i=sent->seqnum-1;i>=0;i--) { if (term_mode) { if (no_term_file) { printf(" %s", winfo->wname[sent->seq[i]]); } else { printf(" %s", termname[winfo->wton[sent->seq[i]]]); } } else { printf(" %s", winfo->woutput[sent->seq[i]]); } } printf("\n"); c = 0; } else { /* same, ignored */ c++; if (c >= MAXHYPO) { printf("no further sentence in the last %d trial\n", c); break; } free(sent); } } for(i=0;iterm_num,winfo->num); printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num); } int main(int argc, char *argv[]) { int i, len; char *prefix = NULL; char *dfafile, *dictfile, *termfile; int gnum = 10; char *spname_default = SPNAME_DEF; char *spname = NULL; #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i] /* argument */ for(i=1;i= argc) { usage(argv[0]); } spname = argv[i]; break; default: fprintf(stderr, "no such option: %s\n",argv[i]); usage(argv[0]); } } else { prefix = argv[i]; } } if (prefix == NULL) usage(argv[0]); if (spname == NULL) spname = spname_default; len = strlen(prefix) + 10; dfafile = (char *)mymalloc(len); dictfile = (char *)mymalloc(len); termfile = (char *)mymalloc(len); strcpy(dfafile, prefix); strcat(dfafile, ".dfa"); strcpy(dictfile, prefix); strcat(dictfile, ".dict"); strcpy(termfile, prefix); strcat(termfile, ".term"); /* start init */ winfo = word_info_new(); init_voca(winfo, dictfile, NULL, TRUE, FALSE); dfa = dfa_info_new(); init_dfa(dfa, dfafile); make_dfa_voca_ref(dfa, winfo); termname = (char **)mymalloc(sizeof(char *) * dfa->term_num); init_term(termfile, termname); if (termname[0] == NULL) { /* no .term file */ no_term_file = TRUE; } else { no_term_file = FALSE; } /* output info */ put_dfainfo(); /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */ { int t, i; WORD_ID w; dfa->sp_id = WORD_INVALID; dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num); for(t=0;tterm_num;t++) { dfa->is_sp[t] = FALSE; for(i=0;iterm.wnum[t]; i++) { w = dfa->term.tw[t][i]; if (strcmp(winfo->woutput[w], spname) == 0) { if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w; dfa->is_sp[t] = TRUE; break; } } } } if (verbose_flag) { if (dfa->sp_id != WORD_INVALID) { printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]); } } printf("----- \n"); /* random seed */ srand(getpid()); /* main loop */ generate_main(gnum); free(dfafile); free(dictfile); return 0; } julius-4.2.2/gramtools/generate/Makefile.in0000644001051700105040000000224412004452400017167 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../../libsent CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=@CPPFLAGS@ @DEFS@ -I$(LIBSENT)/include `$(LIBSENT)/libsent-config --cflags` LDFLAGS=@LDFLAGS@ @LIBS@ -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ OBJ = generate.o gen_next.o rdterm.o TARGET = generate@EXEEXT@ all: $(TARGET) $(TARGET): $(OBJ) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $(OBJ) $(LDFLAGS) ############################################################ install: install.bin install.bin: $(TARGET) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ ############################################################ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) Makefile julius-4.2.2/gramtools/generate/00readme.txt0000644001051700105040000000374311071102422017264 0ustar ritrlab generate GENERATE(1) GENERATE(1) NAME generate - random sentence generator from a grammar SYNOPSIS generate [-v] [-t] [-n num] [-s spname] {prefix} DESCRIPTION This small program randomly generates sentences that are acceptable by the given grammar. .dfa, .dict and .term files are needed to execute. They can be generated from .grammar and .voca file by mkdfa.pl. OPTIONS -t Output in word's category name. -n num Set number of sentences to be generated (default: 10) -s spname the name string of short-pause word to be supressed (default: "sp") -v Debug output mode. EXAMPLES Exmple output of a sample grammar "fruit": % generate fruit Stat: init_voca: read 36 words Reading in term file (optional)...done 15 categories, 36 words DFA has 26 nodes and 42 arcs ----- I WANT ONE APPLE I WANT TEN PEARS CAN I HAVE A PINEAPPLE I WANT ONE PEAR COULD I HAVE A BANANA I WANT ONE APPLE PLEASE I WANT NINE APPLES NINE APPLES I WANT ONE PINEAPPLE I WANT A PEAR SEE ALSO mkdfa.pl ( 1 ) , generate-ngram ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 GENERATE(1) julius-4.2.2/gramtools/generate/common.h0000644001051700105040000000050412004452400016560 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #define SPNAME_DEF "sp" void init_term(char *filename, char **termname); julius-4.2.2/gramtools/generate/gen_next.h0000644001051700105040000000236012004452400017101 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #ifndef __NEXT__H__ #define __NEXT__H__ #include #include #include #include /* ported from julian/dfa_decode.c */ /* $BuBVHV9f(B */ boolean can_insert_sp; /* $B2>@b$H$3$NC18l$N4V$K(Bsp$B$,F~$k2DG=@-$,$"$k>l9g(B TRUE */ } NEXTWORD; /* $BItJ,J82>@b(B */ typedef struct __node__ { boolean endflag; /* $BC5:w=*N;%U%i%0(B */ WORD_ID seq[MAXSEQNUM]; /* $B2>@b$NC18l7ONs(B */ short seqnum; /* $B2>@b$NC18l$N?t(B */ int state; /* $B8=:_$N(BDFA$B>uBVHV9f(B */ } NODE; NEXTWORD **nw_malloc(); void nw_free(NEXTWORD **nw); int dfa_firstwords(NEXTWORD **nw); int dfa_nextwords(NODE *hypo, NEXTWORD **nw); int dfa_firstterms(NEXTWORD **nw); int dfa_nextterms(NODE *hypo, NEXTWORD **nw); boolean dfa_acceptable(NODE *hypo); #endif /* __NEXT__H__ */ julius-4.2.2/gramtools/generate/rdterm.c0000644001051700105040000000225112004452400016561 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #include "common.h" void init_term(char *filename, char **termname) { FILE *fd; int n; static char buf[512]; char *p; fprintf(stderr, "Reading in term file (optional)..."); if ((fd = fopen_readfile(filename)) == NULL) { termname[0] = NULL; fprintf(stderr, "not found\n"); return; } while (getl(buf, sizeof(buf), fd) != NULL) { if ((p = strtok(buf, DELM)) == NULL) { fprintf(stderr, "Error: term file failed to parse, corrupted or invalid data?\n"); return; } n = atoi(p); if ((p = strtok(NULL, DELM)) == NULL) { fprintf(stderr, "Error: term file failed to parse, corrupted or invalid data?\n"); return; } termname[n] = strdup(p); } if (fclose_readfile(fd) == -1) { fprintf(stderr, "close error\n"); exit(1); } fprintf(stderr, "done\n"); } julius-4.2.2/gramtools/generate/gen_next.c0000644001051700105040000001006612004452400017076 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* next_word functions */ #include "common.h" #include "gen_next.h" extern WORD_INFO *winfo; extern DFA_INFO *dfa; NEXTWORD ** nw_malloc() { NEXTWORD **nw; NEXTWORD *nwtmp; int i; int maxnw; maxnw = winfo->num * 2; /* NOISE$B$rHt$P$9J,(B */ /* $BO"B3NN0h$rG[Ns$K3d$jEv$F$k(B */ nw = (NEXTWORD **)malloc(maxnw * sizeof(NEXTWORD *)); nwtmp = (NEXTWORD *)malloc(maxnw * sizeof(NEXTWORD)); nw[0] = nwtmp; for (i=1;iuBV$+$iA+0\$7$&$kC18l=89g$rJV$9(B */ /* $BJV$jCM(B: $BC18l?t(B*/ /* NOISE: $B$3$3$K$OMh$J$$;EMM(B */ int dfa_firstwords(NEXTWORD **nw) { DFA_ARC *arc; int i, cate, iw, ns; int num = 0; for (i=0;istate_num;i++) { if ((dfa->st[i].status & INITIAL_S) != 0) { /* $B=i4|>uBV$+$i(B */ for (arc = dfa->st[i].arc; arc; arc = arc->next) { /* $BA4$F$NA+0\(B */ cate = arc->label; ns = arc->to_state; /* $BA+0\$KBP1~$9$k%+%F%4%jFb$NA4C18l$rE83+(B */ for (iw=0;iwterm.wnum[cate];iw++) { nw[num]->id = dfa->term.tw[cate][iw]; nw[num]->next_state = ns; nw[num]->can_insert_sp = FALSE; num++; } } } } return num; } int dfa_firstterms(NEXTWORD **nw) { DFA_ARC *arc; int i, cate, ns; int num = 0; for (i=0;istate_num;i++) { if ((dfa->st[i].status & INITIAL_S) != 0) { /* $B=i4|>uBV$+$i(B */ for (arc = dfa->st[i].arc; arc; arc = arc->next) { /* $BA4$F$NA+0\(B */ cate = arc->label; ns = arc->to_state; /* $BA+0\$KBP1~$9$k%+%F%4%jFb$N(B1$BC18l$rE83+(B */ if (dfa->term.wnum[cate] == 0) continue; nw[num]->id = dfa->term.tw[cate][0]; nw[num]->next_state = ns; nw[num]->can_insert_sp = FALSE; num++; } } } return num; } /* $Bst[hypo->state].arc; arc; arc = arc->next) { cate = arc->label; ns = arc->to_state; if (dfa->is_sp[cate]) { /* $B@h$^$G8+$k!#<+J,$OE83+$7$J$$(B */ for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) { cate2 = arc2->label; ns2 = arc2->to_state; for (iw=0;iwterm.wnum[cate2];iw++) { nw[num]->id = dfa->term.tw[cate2][iw]; nw[num]->next_state = ns2; nw[num]->can_insert_sp = TRUE; num++; } } } else { /* $BA+0\$KBP1~$9$k%+%F%4%jFb$NA4C18l$rE83+(B */ for (iw=0;iwterm.wnum[cate];iw++) { nw[num]->id = dfa->term.tw[cate][iw]; nw[num]->next_state = ns; nw[num]->can_insert_sp = FALSE; num++; } } } return num; } int dfa_nextterms(NODE *hypo, NEXTWORD **nw) { DFA_ARC *arc, *arc2; int cate,ns,cate2,ns2; int num = 0; for (arc = dfa->st[hypo->state].arc; arc; arc = arc->next) { cate = arc->label; ns = arc->to_state; if (dfa->is_sp[cate]) { /* $B@h$^$G8+$k!#<+J,$OE83+$7$J$$(B */ for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) { cate2 = arc2->label; ns2 = arc2->to_state; if (dfa->term.wnum[cate2] == 0) continue; nw[num]->id = dfa->term.tw[cate2][0]; nw[num]->next_state = ns2; nw[num]->can_insert_sp = TRUE; num++; } } else { /* $BA+0\$KBP1~$9$k%+%F%4%jFb$NA4C18l$rE83+(B */ if (dfa->term.wnum[cate] == 0) continue; nw[num]->id = dfa->term.tw[cate][0]; nw[num]->next_state = ns; nw[num]->can_insert_sp = FALSE; num++; } } return num; } /* $B2>@b$,J8$H$7$FuBV$J$i(B */ if (dfa->st[hypo->state].status & ACCEPT_S) { return TRUE; } else { return FALSE; } } julius-4.2.2/gramtools/generate/00readme-ja.txt0000644001051700105040000000376111071102422017654 0ustar ritrlab generate GENERATE(1) GENERATE(1) O generate - @_ Tv generate [-v] [-t] [-n num] [-s spname] {prefix} DESCRIPTION generate @]_D タs .dfa, .dict, .term et@CKvD mkdfa.pl D OPTIONS -t PJeSoD -n num w (default: 10) -s spname XLbvV[g|[YPOwD (default: "sp") -v fobOoD EXAMPLES vfr (tBbeBO^XNp@) タsF % generate vfr Reading in dictionary...done Reading in DFA grammar...done Mapping dict item <-> DFA terminal (category)...done Reading in term file (optional)...done 42 categories, 99 words DFA has 135 nodes and 198 arcs ----- silB silE silB I silE silB Vc X[c silE silB X[c Jb^[ F silE silB silE silB o silE silB o silE silB silE silB X[c Vc silE silB silE SEE ALSO mkdfa.pl ( 1 ) , generate-ngram ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 GENERATE(1) julius-4.2.2/gramtools/yomi2voca/0000755001051700105040000000000012004463507015250 5ustar ritrlabjulius-4.2.2/gramtools/yomi2voca/Makefile.in0000644001051700105040000000104212004452401017301 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ TARGET=yomi2voca.pl all: chmod +x $(TARGET) install: install.bin install.bin: ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ clean: echo nothing to do distclean: $(RM) yomi2voca.pl $(RM) Makefile julius-4.2.2/gramtools/yomi2voca/yomi2voca.pl.in0000755001051700105040000001515512004452401020123 0ustar ritrlab#!@PERL@ # Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved # # @configure_input@ # # ひらがな -> Julius 標準モデル用変換スクリプト # 第2フィールドのひらがなを変換する. # # .yomi -> .dict # # 助詞の「は」「へ」「を」→「w a」「e」「o」は変換後に手動で直すこと. # # ver2: 小さい「ぁぃぅぇぉ」や「う゛」などに対応 # # $error = 0; $lineno = 0; while (<>) { # 文法用に,"%" で始まる行はそのまま出力する. if (/^%/){ print; next; } chomp; # 表記とひらがな読みを分離 @a = split; $_ = $a[1]; # ひらがな,長音以外はそのまま # 3文字以上からなる変換規則(v a) s/う゛ぁ/ b a/g; s/う゛ぃ/ b i/g; s/う゛ぇ/ b e/g; s/う゛ぉ/ b o/g; s/う゛ゅ/ by u/g; # 2文字からなる変換規則 s/ぅ゛/ b u/g; s/あぁ/ a a/g; s/いぃ/ i i/g; s/いぇ/ i e/g; s/いゃ/ y a/g; s/うぅ/ u:/g; s/えぇ/ e e/g; s/おぉ/ o:/g; s/かぁ/ k a:/g; s/きぃ/ k i:/g; s/くぅ/ k u:/g; s/くゃ/ ky a/g; s/くゅ/ ky u/g; s/くょ/ ky o/g; s/けぇ/ k e:/g; s/こぉ/ k o:/g; s/がぁ/ g a:/g; s/ぎぃ/ g i:/g; s/ぐぅ/ g u:/g; s/ぐゃ/ gy a/g; s/ぐゅ/ gy u/g; s/ぐょ/ gy o/g; s/げぇ/ g e:/g; s/ごぉ/ g o:/g; s/さぁ/ s a:/g; s/しぃ/ sh i:/g; s/すぅ/ s u:/g; s/すゃ/ sh a/g; s/すゅ/ sh u/g; s/すょ/ sh o/g; s/せぇ/ s e:/g; s/そぉ/ s o:/g; s/ざぁ/ z a:/g; s/じぃ/ j i:/g; s/ずぅ/ z u:/g; s/ずゃ/ zy a/g; s/ずゅ/ zy u/g; s/ずょ/ zy o/g; s/ぜぇ/ z e:/g; s/ぞぉ/ z o:/g; s/たぁ/ t a:/g; s/ちぃ/ ch i:/g; s/つぁ/ ts a/g; s/つぃ/ ts i/g; s/つぅ/ ts u:/g; s/つゃ/ ch a/g; s/つゅ/ ch u/g; s/つょ/ ch o/g; s/つぇ/ ts e/g; s/つぉ/ ts o/g; s/てぇ/ t e:/g; s/とぉ/ t o:/g; s/だぁ/ d a:/g; s/ぢぃ/ j i:/g; s/づぅ/ d u:/g; s/づゃ/ zy a/g; s/づゅ/ zy u/g; s/づょ/ zy o/g; s/でぇ/ d e:/g; s/どぉ/ d o:/g; s/なぁ/ n a:/g; s/にぃ/ n i:/g; s/ぬぅ/ n u:/g; s/ぬゃ/ ny a/g; s/ぬゅ/ ny u/g; s/ぬょ/ ny o/g; s/ねぇ/ n e:/g; s/のぉ/ n o:/g; s/はぁ/ h a:/g; s/ひぃ/ h i:/g; s/ふぅ/ f u:/g; s/ふゃ/ hy a/g; s/ふゅ/ hy u/g; s/ふょ/ hy o/g; s/へぇ/ h e:/g; s/ほぉ/ h o:/g; s/ばぁ/ b a:/g; s/びぃ/ b i:/g; s/ぶぅ/ b u:/g; s/ふゃ/ hy a/g; s/ぶゅ/ by u/g; s/ふょ/ hy o/g; s/べぇ/ b e:/g; s/ぼぉ/ b o:/g; s/ぱぁ/ p a:/g; s/ぴぃ/ p i:/g; s/ぷぅ/ p u:/g; s/ぷゃ/ py a/g; s/ぷゅ/ py u/g; s/ぷょ/ py o/g; s/ぺぇ/ p e:/g; s/ぽぉ/ p o:/g; s/まぁ/ m a:/g; s/みぃ/ m i:/g; s/むぅ/ m u:/g; s/むゃ/ my a/g; s/むゅ/ my u/g; s/むょ/ my o/g; s/めぇ/ m e:/g; s/もぉ/ m o:/g; s/やぁ/ y a:/g; s/ゆぅ/ y u:/g; s/ゆゃ/ y a:/g; s/ゆゅ/ y u:/g; s/ゆょ/ y o:/g; s/よぉ/ y o:/g; s/らぁ/ r a:/g; s/りぃ/ r i:/g; s/るぅ/ r u:/g; s/るゃ/ ry a/g; s/るゅ/ ry u/g; s/るょ/ ry o/g; s/れぇ/ r e:/g; s/ろぉ/ r o:/g; s/わぁ/ w a:/g; s/をぉ/ o:/g; s/う゛/ b u/g; s/でぃ/ d i/g; s/でぇ/ d e:/g; s/でゃ/ dy a/g; s/でゅ/ dy u/g; s/でょ/ dy o/g; s/てぃ/ t i/g; s/てぇ/ t e:/g; s/てゃ/ ty a/g; s/てゅ/ ty u/g; s/てょ/ ty o/g; s/すぃ/ s i/g; s/ずぁ/ z u a/g; s/ずぃ/ z i/g; s/ずぅ/ z u/g; s/ずゃ/ zy a/g; s/ずゅ/ zy u/g; s/ずょ/ zy o/g; s/ずぇ/ z e/g; s/ずぉ/ z o/g; s/きゃ/ ky a/g; s/きゅ/ ky u/g; s/きょ/ ky o/g; s/しゃ/ sh a/g; s/しゅ/ sh u/g; s/しぇ/ sh e/g; s/しょ/ sh o/g; s/ちゃ/ ch a/g; s/ちゅ/ ch u/g; s/ちぇ/ ch e/g; s/ちょ/ ch o/g; s/とぅ/ t u/g; s/とゃ/ ty a/g; s/とゅ/ ty u/g; s/とょ/ ty o/g; s/どぁ/ d o a/g; s/どぅ/ d u/g; s/どゃ/ dy a/g; s/どゅ/ dy u/g; s/どょ/ dy o/g; s/どぉ/ d o:/g; s/にゃ/ ny a/g; s/にゅ/ ny u/g; s/にょ/ ny o/g; s/ひゃ/ hy a/g; s/ひゅ/ hy u/g; s/ひょ/ hy o/g; s/みゃ/ my a/g; s/みゅ/ my u/g; s/みょ/ my o/g; s/りゃ/ ry a/g; s/りゅ/ ry u/g; s/りょ/ ry o/g; s/ぎゃ/ gy a/g; s/ぎゅ/ gy u/g; s/ぎょ/ gy o/g; s/ぢぇ/ j e/g; s/ぢゃ/ j a/g; s/ぢゅ/ j u/g; s/ぢょ/ j o/g; s/じぇ/ j e/g; s/じゃ/ j a/g; s/じゅ/ j u/g; s/じょ/ j o/g; s/びゃ/ by a/g; s/びゅ/ by u/g; s/びょ/ by o/g; s/ぴゃ/ py a/g; s/ぴゅ/ py u/g; s/ぴょ/ py o/g; s/うぁ/ u a/g; s/うぃ/ w i/g; s/うぇ/ w e/g; s/うぉ/ w o/g; s/ふぁ/ f a/g; s/ふぃ/ f i/g; s/ふぅ/ f u/g; s/ふゃ/ hy a/g; s/ふゅ/ hy u/g; s/ふょ/ hy o/g; s/ふぇ/ f e/g; s/ふぉ/ f o/g; # 1音からなる変換規則 s/あ/ a/g; s/い/ i/g; s/う/ u/g; s/え/ e/g; s/お/ o/g; s/か/ k a/g; s/き/ k i/g; s/く/ k u/g; s/け/ k e/g; s/こ/ k o/g; s/さ/ s a/g; s/し/ sh i/g; s/す/ s u/g; s/せ/ s e/g; s/そ/ s o/g; s/た/ t a/g; s/ち/ ch i/g; s/つ/ ts u/g; s/て/ t e/g; s/と/ t o/g; s/な/ n a/g; s/に/ n i/g; s/ぬ/ n u/g; s/ね/ n e/g; s/の/ n o/g; s/は/ h a/g; s/ひ/ h i/g; s/ふ/ f u/g; s/へ/ h e/g; s/ほ/ h o/g; s/ま/ m a/g; s/み/ m i/g; s/む/ m u/g; s/め/ m e/g; s/も/ m o/g; s/ら/ r a/g; s/り/ r i/g; s/る/ r u/g; s/れ/ r e/g; s/ろ/ r o/g; s/が/ g a/g; s/ぎ/ g i/g; s/ぐ/ g u/g; s/げ/ g e/g; s/ご/ g o/g; s/ざ/ z a/g; s/じ/ j i/g; s/ず/ z u/g; s/ぜ/ z e/g; s/ぞ/ z o/g; s/だ/ d a/g; s/ぢ/ j i/g; s/づ/ z u/g; s/で/ d e/g; s/ど/ d o/g; s/ば/ b a/g; s/び/ b i/g; s/ぶ/ b u/g; s/べ/ b e/g; s/ぼ/ b o/g; s/ぱ/ p a/g; s/ぴ/ p i/g; s/ぷ/ p u/g; s/ぺ/ p e/g; s/ぽ/ p o/g; s/や/ y a/g; s/ゆ/ y u/g; s/よ/ y o/g; s/わ/ w a/g; s/ゐ/ i/g; s/ゑ/ e/g; s/ん/ N/g; s/っ/ q/g; s/ー/:/g; # ここまでに処理されてない ぁぃぅぇぉ はそのまま大文字扱い s/ぁ/ a/g; s/ぃ/ i/g; s/ぅ/ u/g; s/ぇ/ e/g; s/ぉ/ o/g; s/ゎ/ w a/g; s/ぉ/ o/g; # その他特別なルール s/を/ o/g; # 最初の空白を削る s/^ ([a-z])/$1/g; # 変換の結果長音記号が続くことがまれにあるので一つにまとめる s/:+/:/g; # アルファベット列になっていない場合,変換に失敗しているので # 標準エラー出力に出力する. $lineno++; if (! /^[ a-zA-Z:]+$/) { if ($error == 0) { $error = 1; print STDERR "Error: (they were also printed to stdout)\n"; } print STDERR "line " , $lineno , ": " , @a[0], "\t", $_,"\n"; } print @a[0], "\t", $_,"\n"; } julius-4.2.2/gramtools/dfa_determinize/0000755001051700105040000000000012004463507016471 5ustar ritrlabjulius-4.2.2/gramtools/dfa_determinize/dfa_determinize.c0000644001051700105040000002303012004452400021752 0ustar ritrlab/** * @file dfa_determinize.c * * @brief Determinize DFA for Julian grammar. * * @author Akinobu Lee * @date Wed Oct 4 17:42:16 2006 * * $Revision: 1.4 $ * */ /* * Copyright (c) 2006-2012 Kawahara Lab., Kyoto University * Copyright (c) 2006-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #undef DEBUG ///< Define this to enable debug output static DFA_INFO *dfa; ///< Input DFA info static char buf[MAXLINELEN]; ///< Local text buffer to read in /** * Get one line, stripping carriage return and newline. * * @param buf [in] text buffer * @param maxlen [in] maximum length of @a buf * @param fp [in] file pointer * * @return pointer to the given buffer, or NULL when failed. *
*/ static char * mygetl(char *buf, int maxlen, FILE *fp) { int newline; while(fgets(buf, maxlen, fp) != NULL) { newline = strlen(buf)-1; /* chop newline */ if (buf[newline] == '\n') { buf[newline] = '\0'; newline--; } if (newline >= 0 && buf[newline] == '\r') buf[newline] = '\0'; if (buf[0] == '\0') continue; /* if blank line, read next */ return buf; } return NULL; } /** * Read in DFA file, line by line. Actual parser is in libsent library. * * @param fp [in] file pointer * @param dinfo [out] DFA info * * @return TRUE if succeeded. */ static boolean myrddfa(FILE *fp, DFA_INFO *dinfo) { int state_max, arc_num, terminal_max; dfa_state_init(dinfo); state_max = 0; arc_num = 0; terminal_max = 0; while (mygetl(buf, MAXLINELEN, fp) != NULL) { if (rddfa_line(buf, dinfo, &state_max, &arc_num, &terminal_max) == FALSE) { break; } } dinfo->state_num = state_max + 1; dinfo->arc_num = arc_num; dinfo->term_num = terminal_max + 1; return(TRUE); } /** * Output usage. * */ static void usage() { fprintf(stderr, "usage: dfa_determinize [dfafile] [-o outfile]\n"); } /************************************************************************/ /** * Structure to hold state set * */ typedef struct __stateq__ { char *s; ///< State index (if 1, the state is included) int len; ///< Buffer length of above. int checked; ///< flag to check if the outgoing arcs of this set is already examined void *ac; ///< Root pointer to the list of outgoing arcs. int start; ///< if 1, this should be a begin node int end; ///< if 1, this should eb an accept node int id; ///< assigned ID struct __stateq__ *next; ///< Pointer to the next state set. } STATEQ; /** * Structure to hold outgoing arcs from / to the stateset * */ typedef struct __arc__ { int label; ///< Input label ID STATEQ *to; ///< Destination state set struct __arc__ *next; ///< Pointer to the next arc } STATEQ_ARC; /** * Output information of a state set to stdout, for debug * * @param sq [in] state set */ void sput(STATEQ *sq) { int i; STATEQ_ARC *ac; for(i=0;ilen;i++) { if (sq->s[i] == 1) printf("-%d", i); } printf("\n"); printf("checked: %d\n", sq->checked); printf("to:\n"); for(ac=sq->ac;ac;ac=ac->next) { printf("\t(%d) ", ac->label); for(i=0;ito->len;i++) { if (ac->to->s[i] == 1) printf("-%d", i); } printf("\n"); } } /** * Create a new state set. * * @param num [in] number of possible states * * @return pointer to the newly assigned state set. */ STATEQ * snew(int num) { STATEQ *new; int i; new = (STATEQ *)malloc(sizeof(STATEQ)); new->s = (char *)malloc(sizeof(char)*num); new->len = num; new->ac = NULL; new->next = NULL; for(i=0;is[i] = 0; new->checked = 0; new->start = 0; new->end = 0; return new; } /** * Free the state set. * * @param sq */void sfree(STATEQ *sq) { STATEQ_ARC *sac, *atmp; sac=sq->ac; while(sac) { atmp = sac->next; free(sac); sac = atmp; } free(sq->s); free(sq); } static STATEQ *root = NULL; ///< root node of current list of state set /** * @brief Perform determinization. * * The result will be output in DFA format, to the specified file pointer. * * * @param dfa [in] original DFA info * @param fpout [in] output file pointer */ boolean determinize(DFA_INFO *dfa, FILE *fpout) { STATEQ *src, *stmp, *stest; STATEQ_ARC *sac; int i, t, tnum; DFA_ARC *ac; int *tlist; int modified; int arcnum, nodenum; STATEQ **slist; /* allocate work area */ tlist = (int *)malloc(sizeof(int) * dfa->state_num); /* set initial node (a state set with single initial state) */ src = NULL; for(i=0;istate_num;i++) { if (dfa->st[i].status & INITIAL_S) { if (src == NULL) { src = snew(dfa->state_num); src->s[i] = 1; src->start = 1; root = src; } else { printf("Error: more than one initial node??\n"); return FALSE; } } } /* loop until no more state set is generated */ do { #ifdef DEBUG printf("---\n"); #endif modified = 0; for(src=root;src;src=src->next) { if (src->checked == 1) continue; #ifdef DEBUG printf("===checking===\n"); sput(src); printf("==============\n"); #endif for(t=0;tterm_num;t++) { /* examining an input label "t" on state set "src" */ /* get list of outgoing states from this state set by the input label "t", and set to tlist[0..tnum-1] */ tnum = 0; for(i=0;ilen;i++) { if (src->s[i] == 1) { for(ac=dfa->st[i].arc;ac;ac=ac->next) { if (ac->label == t) { tlist[tnum] = ac->to_state; tnum++; } } } } /* if no output with this label, skip it */ if (tnum == 0) continue; /* build the destination state set */ stest = snew(dfa->state_num); for(i=0;is[tlist[i]] = 1; } #ifdef DEBUG printf("\tinput (%d) -> states: ", t); for(i=0;ilen;i++) { if (stest->s[i] == 1) printf("-%d", i); } printf("\n"); #endif /* find if the destination state set is already generated */ for(stmp=root;stmp;stmp=stmp->next) { if (memcmp(stmp->s, stest->s, sizeof(char) * stest->len) ==0) { break; } } if (stmp == NULL) { /* not yet generated, register it as new */ #ifdef DEBUG printf("\tNEW\n"); #endif stest->next = root; root = stest; stmp = stest; } else { /* already generated, just point to it */ #ifdef DEBUG printf("\tFOUND\n"); #endif sfree(stest); } /* add arc to the destination state set to "src" */ sac = (STATEQ_ARC *)malloc(sizeof(STATEQ_ARC)); sac->label = t; sac->to = stmp; sac->next = src->ac; src->ac = sac; } src->checked = 1; modified = 1; #ifdef DEBUG printf("====result====\n"); sput(src); printf("==============\n"); #endif } } while (modified == 1); /* annotate ID and count number of nodes */ /* Also, force the state number of initial nodes to 0 by Julian requirement */ nodenum = 1; for(src=root;src;src=src->next) { if (src->start == 1) { src->id = 0; } else { src->id = nodenum++; } for(i=0;ilen;i++) { if (src->s[i] == 1) { if (dfa->st[i].status & ACCEPT_S) { src->end = 1; } } } } /* output the result in DFA form */ slist = (STATEQ **)malloc(sizeof(STATEQ *) * nodenum); for(src=root;src;src=src->next) slist[src->id] = src; arcnum = 0; for(i=0;iend == 1) t = 1; for(sac=src->ac;sac;sac=sac->next) { if (t == 1) { fprintf(fpout, "%d %d %d 1 0\n", src->id, sac->label, sac->to->id); t = 0; } else { fprintf(fpout, "%d %d %d 0 0\n", src->id, sac->label, sac->to->id); } arcnum++; } if (t == 1) { fprintf(fpout, "%d -1 -1 1 0\n", src->id); } } free(slist); /* output status to stderr */ fprintf(stderr, "-> determinized: %d nodes, %d arcs\n", nodenum, arcnum); /* free work area */ src = root; while(src) { stmp = src->next; sfree(src); src = stmp; } free(tlist); return TRUE; } /************************************************************************/ /** * Main function. * * @param argc [in] number of command argument * @param argv [in] array of command arguments * * @return -1 on failure, 0 on success */ int main(int argc, char *argv[]) { FILE *fp, *fpout; char *infile, *outfile; int i; /* option parsing */ infile = NULL; outfile = NULL; for(i=1;i= argc) { usage(); return -1; } outfile = argv[i]; break; default: fprintf(stderr, "invalid option: %s\n", argv[i]); usage(); return -1; } } else { infile = argv[i]; } } /* open files */ if (infile != NULL) { if ((fp = fopen(infile, "r")) == NULL) { fprintf(stderr, "Error: cannot open \"%s\"\n", infile); return -1; } } else { fp = stdin; } if (outfile != NULL) { if ((fpout = fopen(outfile, "w")) == NULL) { fprintf(stderr, "Error: cannot open \"%s\" for writing\n", outfile); return -1; } } else { fpout = stdout; } /* read in a DFA file */ dfa = dfa_info_new(); if (!myrddfa(fp, dfa)) { fprintf(stderr, "Failed to read DFA from "); if (infile) printf("\"%s\"\n", infile); else printf("stdin\n"); } if (fp != stdin) fclose(fp); fprintf(stderr, "%d categories, %d nodes, %d arcs\n", dfa->term_num, dfa->state_num, dfa->arc_num); /* do determinization */ if (determinize(dfa, fpout) == FALSE) { fprintf(stderr, "Error in determinization\n"); return -1; } if (fpout != stdout) { fclose(fpout); } return 0; } julius-4.2.2/gramtools/dfa_determinize/Makefile.in0000644001051700105040000000223412004452400020525 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../../libsent CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=@CPPFLAGS@ @DEFS@ -I$(LIBSENT)/include `$(LIBSENT)/libsent-config --cflags` LDFLAGS=@LDFLAGS@ @LIBS@ -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ OBJ = dfa_determinize.o TARGET=dfa_determinize@EXEEXT@ all: $(TARGET) $(TARGET): $(OBJ) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $(OBJ) $(LDFLAGS) ############################################################ install: install.bin install.bin: $(TARGET) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ ############################################################ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) Makefile julius-4.2.2/gramtools/dfa_determinize/00readme.txt0000644001051700105040000000243711071102422020622 0ustar ritrlab dfa_determinize DFA_DETERMINIZE(1) DFA_DETERMINIZE(1) NAME dfa_determinize - Determinize NFA grammar network. SYNOPSIS dfa_determinize [-o outfile] {dfafile} DESCRIPTION dfa_determinize converts a non-deterministic .dfa file into deterministic DFA. Output to standard output, or file specified by "-o" option. This additional tool is not necessary on a grammar building procedure in Julius, since the grammar network generated by mkdfa.pl is always determinized. OPTIONS -o outfile Outout file. If not specified, output to stdout. EXAMPLES Determinize foo.dfa to bar.dfa: Another way: SEE ALSO mkdfa.pl ( 1 ) , dfa_minimize ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 DFA_DETERMINIZE(1) julius-4.2.2/gramtools/dfa_determinize/00readme-ja.txt0000644001051700105040000000224411071102422021206 0ustar ritrlab dfa_determinize DFA_DETERMINIZE(1) DFA_DETERMINIZE(1) O dfa_determinize - LI[g}g@ Tv dfa_determinize [-o outfile] {dfafile} DESCRIPTION dfa_determinize C.dfa t@C .dfa t@CC WooDIvV -o o wD mkdfa.pl DFAC Cmkdfa.pl .dfa t@C c[gKvD OPTIONS -o outfile ot@CwD EXAMPLES foo.dfa bar.dfa D @F SEE ALSO mkdfa.pl ( 1 ) , dfa_minimize ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 DFA_DETERMINIZE(1) julius-4.2.2/gramtools/nextword/0000755001051700105040000000000012004463507015212 5ustar ritrlabjulius-4.2.2/gramtools/nextword/Makefile.in0000644001051700105040000000230712004452401017250 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< LIBSENT=../../libsent CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=@CPPFLAGS@ @DEFS@ -I$(LIBSENT)/include `$(LIBSENT)/libsent-config --cflags` LDFLAGS=@LDFLAGS@ @LIBS@ @READLINE_LIBS@ -L$(LIBSENT) `$(LIBSENT)/libsent-config --libs` RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ############################################################ OBJ = nextword.o get_next.o wtoken.o rdterm.o readline.o TARGET=nextword@EXEEXT@ all: $(TARGET) $(TARGET): $(OBJ) $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ $(OBJ) $(LDFLAGS) ############################################################ install: install.bin install.bin: $(TARGET) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ ############################################################ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) Makefile julius-4.2.2/gramtools/nextword/get_next.c0000644001051700105040000000261012004452401017161 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #include "nextword.h" extern WORD_INFO *winfo; extern DFA_INFO *dfa; extern char **termname; /* $B%U%i%0C#(B */ extern boolean no_term_file; extern boolean verbose_flag; extern boolean term_mode; /* $BuBV$N=89g$rJV$9(B */ int next_terms(int stateid, int *termbuf, int *nextstatebuf) { DFA_ARC *arc, *arc2; int cate, ns; int cnum; cnum = 0; for (arc = dfa->st[stateid].arc; arc; arc = arc->next) { cate = arc->label; ns = arc->to_state; if (dfa->is_sp[cate]) { for (arc2 = dfa->st[ns].arc; arc2; arc2 = arc2->next) { termbuf[cnum] = arc2->label; nextstatebuf[cnum] = arc2->to_state; cnum++; } } else { /* not noise */ termbuf[cnum] = cate; nextstatebuf[cnum] = ns; cnum++; } } return cnum; } /* $B%+%F%4%jHV9f$N=EJ#$rHr$1$k(B */ int compaction_int(int *a, int num) { int i,j,d; d = 0; for(i=0;i A BANANA [wseq: A BANANA ] [cate: (NUM_1|NUM_1|A|A) FRUIT_SINGULAR NS_E] PREDICTED CATEGORIES/WORDS: NS_B ( ) HAVE (HAVE ) WANT (WANT ) NS_B ( ) HAVE (HAVE ) WANT (WANT ) SEE ALSO mkdfa.pl ( 1 ) , generate ( 1 ) , accept_check ( 1 ) COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 NEXTWORD(1) julius-4.2.2/gramtools/nextword/common.h0000644001051700105040000000050412004452401016641 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #define SPNAME_DEF "sp" void init_term(char *filename, char **termname); julius-4.2.2/gramtools/nextword/rdterm.c0000644001051700105040000000225112004452401016642 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #include "common.h" void init_term(char *filename, char **termname) { FILE *fd; int n; static char buf[512]; char *p; fprintf(stderr, "Reading in term file (optional)..."); if ((fd = fopen_readfile(filename)) == NULL) { termname[0] = NULL; fprintf(stderr, "not found\n"); return; } while (getl(buf, sizeof(buf), fd) != NULL) { if ((p = strtok(buf, DELM)) == NULL) { fprintf(stderr, "Error: term file failed to parse, corrupted or invalid data?\n"); return; } n = atoi(p); if ((p = strtok(NULL, DELM)) == NULL) { fprintf(stderr, "Error: term file failed to parse, corrupted or invalid data?\n"); return; } termname[n] = strdup(p); } if (fclose_readfile(fd) == -1) { fprintf(stderr, "close error\n"); exit(1); } fprintf(stderr, "done\n"); } julius-4.2.2/gramtools/nextword/nextword.h0000644001051700105040000000152112004452401017223 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ typedef struct __wtoken__ { WORD_ID wid; struct __wtoken__ *next; } WTOKEN; typedef struct __state_chain__ { int state; struct __state_chain__ *next; } STATECHAIN; void put_wtoken(WTOKEN **wseq, int nseq); boolean new_get_wtoken(char *buf, WTOKEN **wseq, int *nseq_ret); void free_wtoken(WTOKEN **wseq, int nseq); int next_terms(int stateid, int *termbuf, int *nextstatebuf); int compaction_int(int *a, int num); char *rl_gets (char *prompt); char *dfaterm_generator(char *text, int state); char *dfaword_generator(char *text, int state); void init_term(char *filename, char **termname); julius-4.2.2/gramtools/nextword/00readme-ja.txt0000644001051700105040000000461411071102423017733 0ustar ritrlab nextword NEXTWORD(1) NEXTWORD(1) O nextword - DFA @itjP\c[ Tv nextword [-t] [-r] [-s spname] [-v] {prefix} DESCRIPTION nextword Cmkdfa.pl DFA @ C^ PWoD タs .dfa, .dict, .term et@CKvD mkdfa.pl D II mkdfa.pl o@C @CO t@D CJulius 2pX TsD Cnextword ^t D OPTIONS -t PJeSEoD -r PtD -s spname XLbvV[g|[YPOwD (default: "sp") -v fobOoD EXAMPLES vfr (tBbeBO^XNp@) タsF % nextword vfr Reading in dictionary...done Reading in DFA grammar...done Mapping dict item <-> DFA terminal (category)...done Reading in term file (optional)...done 42 categories, 99 words DFA has 135 nodes and 198 arcs ----- wseq > silE [wseq: silE] [cate: (NI|NI_AT) SURU_V KUDASAI_V NS_E] PREDICTED CATEGORIES/WORDS: KEIDOU_A (h n ) BANGOU_N ( ) HUKU_N ( ) PATTERN_N (`FbN cネ ネ ...) GARA_N ( ) KANZI_N ( ) IRO_N (F ) COLOR_N ( ...) wseq > SEE ALSO mkdfa.pl ( 1 ) , generate ( 1 ) , accept_check ( 1 ) COPYRIGHT Copyright (c) 1991-2008 sw コ Copyright (c) 1997-2000 U(IPA) Copyright (c) 2000-2008 [wZpw@w ュコ Copyright (c) 2005-2008 Hw JuliusJ`[ LICENSE Julius gpD 10/02/2008 NEXTWORD(1) julius-4.2.2/gramtools/nextword/wtoken.c0000644001051700105040000000646212004452401016664 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #include "nextword.h" extern WORD_INFO *winfo; extern DFA_INFO *dfa; extern char **termname; /* $B%U%i%0C#(B */ extern boolean no_term_file; extern boolean verbose_flag; extern boolean term_mode; extern boolean reverse_mode; /* $BA4(Bwtoken$B$r=PNO(B */ void put_wtoken(WTOKEN **wseq, int nseq) { int i; WTOKEN *tok; printf("[wseq:"); for (i=0;iwoutput[wseq[i]->wid]); } printf("]\n"); printf("[cate:"); for (i=0;inext != NULL) { /* more than one */ printf(" ("); } else { printf(" "); } if (no_term_file) { printf("%s", winfo->wname[wseq[i]->wid]); } else { printf("%s", termname[winfo->wton[wseq[i]->wid]]); } if (wseq[i]->next != NULL) { /* more than one */ for(tok = wseq[i]->next; tok; tok = tok->next) { if (no_term_file) { printf("|%s", winfo->wname[tok->wid]); } else { printf("|%s", termname[winfo->wton[tok->wid]]); } } printf(")"); } } printf("]\n"); } /* buf $B$+$i(B wseq $B$r@8@.(B */ boolean new_get_wtoken(char *buf, WTOKEN **wseq, int *nseq_ret) { char *p; int i,it; WTOKEN *new, *prev; int nseq; /* decode string -> wid */ nseq = 0; for(p = strtok(buf, " "); p; p = strtok(NULL, " ")) { it = 0; prev = NULL; if (term_mode) { if (no_term_file) { if (atoi(p) >= 0 && atoi(p) < dfa->term_num) { new = (WTOKEN *)mymalloc(sizeof(WTOKEN)); if (dfa->term.wnum[atoi(p)] == 0) { printf("word %d: category \"%s\" has no word\n", nseq+1, p); return(FALSE); } new->wid = dfa->term.tw[atoi(p)][0]; new->next = prev; prev = new; it++; } } else { /* termname exist */ for (i=0;iterm_num;i++) { if (strmatch(p, termname[i])) { if (dfa->term.wnum[i] == 0) { printf("word %d: category \"%s\" has no word\n", nseq+1, p); return(FALSE); } new = (WTOKEN *)mymalloc(sizeof(WTOKEN)); new->wid = dfa->term.tw[i][0]; new->next = prev; prev = new; it++; } } } if (prev == NULL) { /* not found */ printf("word %d: category \"%s\" not exist\n", nseq+1, p); return(FALSE); } } else { /* normal word mode */ for (i=0;inum;i++) { if (strmatch(p, winfo->woutput[i])) { new = (WTOKEN *)mymalloc(sizeof(WTOKEN)); new->wid = i; new->next = prev; prev = new; it++; } } if (prev == NULL) { /* not found */ printf("word %d: word \"%s\" not in voca\n", nseq+1, p); return(FALSE); } } wseq[nseq++] = new; } if (reverse_mode) { for (i=0;i < nseq / 2;i++) { new = wseq[i]; wseq[i] = wseq[nseq - 1 - i]; wseq[nseq - 1 - i] = new; } } *nseq_ret = nseq; return(TRUE); } /* $B%a%b%j2rJ|(B */ void free_wtoken(WTOKEN **wseq, int nseq) { int i; WTOKEN *tok, *tmp; for (i=0;inext; free(tok); tok = tmp; } } } julius-4.2.2/gramtools/nextword/nextword.c0000644001051700105040000002106212004452401017220 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ /* nextword --- interactive gramamr checker */ #include #include #include #include #ifdef HAVE_READLINE #include #include #endif #include "nextword.h" #include "common.h" WORD_INFO *winfo; /* $BC18l>pJs(B */ DFA_INFO *dfa; /* DFA */ char **termname; /* $B%+%F%4%jL>(B from .term */ /* $B%U%i%0C#(B */ boolean no_term_file; boolean verbose_flag = FALSE; boolean term_mode = FALSE; boolean reverse_mode = FALSE; /* $BF~NO(B */ static WTOKEN *wseq[MAXSEQNUM]; /* $BF~NOC18l7ONs(B */ static int nseq; /* $BF~NOC18l?t(B */ /* $B%Q!<%8%s%07k2L(B */ static STATECHAIN *reach_state; /* $B:GBgE~C#>uBV(B */ static boolean can_accept; /* $BuBV$O%9%?%C%/$K$?$a$F$*$/(B */ static void push_state(int stateid) { STATECHAIN *new; new = (STATECHAIN *)mymalloc(sizeof(STATECHAIN)); new->state = stateid; new->next = reach_state; reach_state = new; } static void free_reachstate() { STATECHAIN *st, *tmp; st = reach_state; while (st) { tmp = st->next; free(st); st = tmp; } } static void put_state(int s, int l) { int i; for (i=0;i<=l;i++) printf(" "); printf("[%d]\n",s); } /* $B>uBV(Bstateid$B$K$F(Biseq$BHVL\$NF~NO$,term_num * 2); nstate = (int *)mymalloc(sizeof(int)*dfa->term_num * 2); if (verbose_flag) put_state(stateid, iseq); /* end process */ if (nseq_reached > iseq) nseq_reached = iseq; if (iseq < 0) { /* reaches last */ /* push current status */ push_state(stateid); if (dfa->st[stateid].status & ACCEPT_S) { can_accept = TRUE; } free(nterms); free(nstate); return; } for (token = wseq[iseq]; token; token = token->next) { if (verbose_flag) { for(i=0;i<=iseq;i++) printf(" "); if (no_term_file) { printf("%s(%s)\n",winfo->woutput[token->wid], winfo->wname[token->wid]); } else { printf("%s(%s:%s)\n",winfo->woutput[token->wid], termname[winfo->wton[token->wid]], winfo->wname[token->wid]); } } cnum = next_terms(stateid, nterms, nstate); for (i=0;iwton[token->wid]) { /* found */ can_accept_recursive(nstate[i], iseq - 1); } } } free(nterms); free(nstate); return; } #if 0 /* $BL52;%b%G%k(B silB, silE $B$,$J$1$l$PA^F~(B */ static char * pad_sil(char *s) { char *name; char *p; name = s; if (strncmp(name, "silB",4) != 0) { p = (char *)mymalloc(strlen(name)+6); strcpy(p, "silB "); strcat(p, name); free(name); name = p; } if (strncmp(&(name[strlen(name)-4]), "silE",4) != 0) { p = (char *)mymalloc(strlen(name)+6); strcpy(p, name); strcat(p, " silE"); free(name); name = p; } return(name); } #endif static void put_nextword() { STATECHAIN *tmp; int state; int *nterms, *nstate; int cnum; int i,j; #define NW_PUT_LIMIT 3 nterms = (int *)mymalloc(sizeof(int)*dfa->term_num * 2); nstate = (int *)mymalloc(sizeof(int)*dfa->term_num * 2); for(tmp=reach_state; tmp; tmp=tmp->next) { state = tmp->state; cnum = next_terms(state, nterms, nstate); cnum = compaction_int(nterms, cnum); for (i=0;iterm.wnum[nterms[i]] > NW_PUT_LIMIT) { for(j=0;j < NW_PUT_LIMIT;j++) { printf("%s ",winfo->woutput[dfa->term.tw[nterms[i]][j]]); } printf("...)\n"); } else { for(j=0;jterm.wnum[nterms[i]];j++) { printf("%s ",winfo->woutput[dfa->term.tw[nterms[i]][j]]); } printf(")\n"); } } } free(nterms); free(nstate); } static void nextword_main() { int i; char *buf; /* get word sequence */ do { if (term_mode) buf = rl_gets("cate > "); else buf = rl_gets("wseq > "); } while (buf == NULL || new_get_wtoken(buf, wseq, &nseq) == FALSE); put_wtoken(wseq, nseq); reach_state = NULL; nseq_reached = nseq; can_accept = FALSE; for (i=0;istate_num;i++) { if ((dfa->st[i].status & INITIAL_S) != 0) { /* $B=i4|>uBV$+$i(B */ can_accept_recursive(i, nseq-1); } } /* results stored in can_accept and reach_state */ if (reach_state == NULL) { /* rejected */ printf("REJECTED at %d\n", nseq_reached + 1); } else { if (can_accept) printf("ACCEPTABLE\n"); printf("PREDICTED CATEGORIES/WORDS:\n"); put_nextword(); } free_wtoken(wseq, nseq); free_reachstate(); } static char * usage(char *s) { fprintf(stderr, "nextword --- tty-based interactive grammar checker\n"); fprintf(stderr, "usage: %s prefix\n",s); fprintf(stderr, " -t ... use category symbols instead of words (needs .term)\n"); fprintf(stderr, " -s string ... specify short-pause model\n"); fprintf(stderr, " -r ... reverse order input\n"); fprintf(stderr, " -v ... verbose output\n"); #ifndef HAVE_READLINE fprintf(stderr, "(READLINE feature disabled)\n"); #endif exit(1); } static void put_dfainfo() { printf("%d categories, %d words\n",dfa->term_num,winfo->num); printf("DFA has %d nodes and %d arcs\n", dfa->state_num, dfa->arc_num); } int main(int argc, char *argv[]) { int i, len; char *prefix; char *dfafile, *dictfile, *termfile; char *spname_default = SPNAME_DEF; char *spname = NULL; #define NEXTARG (++i >= argc) ? (char *)usage(argv[0]) : argv[i] /* argument */ if (argc == 1) usage(argv[0]); for(i=1;i= argc) { usage(argv[0]); } spname = argv[i]; default: fprintf(stderr, "no such option: %s\n",argv[i]); usage(argv[0]); } } else { prefix = argv[i]; } } if (spname == NULL) spname = spname_default; len = strlen(prefix) + 10; dfafile = (char *)mymalloc(len); dictfile = (char *)mymalloc(len); termfile = (char *)mymalloc(len); strcpy(dfafile, prefix); strcat(dfafile, ".dfa"); strcpy(dictfile, prefix); strcat(dictfile, ".dict"); strcpy(termfile, prefix); strcat(termfile, ".term"); /* start init */ winfo = word_info_new(); init_voca(winfo, dictfile, NULL, TRUE, FALSE); dfa = dfa_info_new(); init_dfa(dfa, dfafile); make_dfa_voca_ref(dfa, winfo); termname = (char **)mymalloc(sizeof(char *) * dfa->term_num); init_term(termfile, termname); if (termname[0] == NULL) { /* no .term file */ no_term_file = TRUE; } else { no_term_file = FALSE; } /* output info */ put_dfainfo(); /* set dfa->sp_id and dfa->is_sp[cid] from name "sp" */ { int t, i; WORD_ID w; dfa->sp_id = WORD_INVALID; dfa->is_sp = (boolean *)mymalloc(sizeof(boolean) * dfa->term_num); for(t=0;tterm_num;t++) { dfa->is_sp[t] = FALSE; for(i=0;iterm.wnum[t]; i++) { w = dfa->term.tw[t][i]; if (strcmp(winfo->woutput[w], spname) == 0) { if (dfa->sp_id == WORD_INVALID) dfa->sp_id = w; dfa->is_sp[t] = TRUE; break; } } } } if (verbose_flag) { if (dfa->sp_id != WORD_INVALID) { printf("skippable word for NOISE: %s\t%s\n", winfo->wname[dfa->sp_id], winfo->woutput[dfa->sp_id]); } } printf("----- \n"); #ifdef HAVE_READLINE printf("command completion is built-in\n----- \n"); #else printf("command completion is disabled\n----- \n"); #endif /* initialize readline */ #ifdef HAVE_READLINE /* rl_bind_key(PAGE, rl_menu_complete); */ if (term_mode && !no_term_file) { #ifdef HAVE_READLINE_4_1_OLDER rl_completion_entry_function = (Function *)dfaterm_generator; #else rl_completion_entry_function = (rl_compentry_func_t *)dfaterm_generator; #endif } else { #ifdef HAVE_READLINE_4_1_OLDER rl_completion_entry_function = (Function *)dfaword_generator; #else rl_completion_entry_function = (rl_compentry_func_t *)dfaword_generator; #endif } #endif /* main loop */ for (;;) { nextword_main(); } free(dfafile); free(dictfile); return 0; } julius-4.2.2/gramtools/nextword/readline.c0000644001051700105040000000526112004452401017134 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #include #ifdef HAVE_READLINE #include #include #endif #include "nextword.h" extern WORD_INFO *winfo; extern DFA_INFO *dfa; extern char **termname; /* $B%U%i%0C#(B */ extern boolean no_term_file; extern boolean verbose_flag; extern boolean term_mode; static char *line_read = (char *)NULL; /* $BFI$_9~$s$@J8;zNs(B */ #ifdef HAVE_READLINE /* readline$B4X78(B */ /* Read a string, and return a pointer to it. Returns NULL on EOF. */ char * rl_gets (char *prompt) { /* If the buffer has already been allocated, return the memory to the free pool. */ if (line_read) { free (line_read); line_read = (char *)NULL; } /* Get a line from the user. */ line_read = readline (prompt); /* If the line has any text in it, save it on the history. */ if (line_read && *line_read) { /*line_read = pad_sil(line_read); printf("%s\n",line_read);*/ add_history (line_read); } return (line_read); } char * dfaterm_generator(char *text, int state) { static int list_index, len; char *name; if (!state) { list_index = 0; len = strlen(text); } while (list_index < dfa->term_num) { name = termname[list_index++]; if (strncmp(name, text, len) == 0) { return(strdup(name)); } } return((char *)NULL); } char * dfaword_generator(char *text, int state) { static int list_index, len; char *name; if (!state) { list_index = 0; len = strlen(text); } while (list_index < winfo->num) { name = winfo->woutput[list_index++]; if (strncmp(name, text, len) == 0) { return(strdup(name)); } } return((char *)NULL); } #else /* ~HAVE_READLINE */ /* Read a string, and return a pointer to it. Returns NULL on EOF. */ char * rl_gets (char *prompt) { char *p; /* allocate buffer if not yet */ if (! line_read) { if ((line_read = malloc(MAXLINELEN)) == NULL) { fprintf(stderr, "memory exceeded\n"); exit(1); } } /* Get a line from the user. */ fprintf(stderr, "%s", prompt); if (fgets(line_read, MAXLINELEN, stdin) == NULL) { /* input error */ return NULL; } /* strip last newline */ p = line_read + strlen(line_read) - 1; while (p >= line_read && *p == '\n') { *p = '\0'; p--; } if (*line_read == '\0') { /* no input */ return NULL; } return (line_read); } #endif /* HAVE_READLINE */ julius-4.2.2/gramtools/configure.in0000644001051700105040000000674112004452400015647 0ustar ritrlabdnl Copyright (c) 1991-2012 Kawahara Lab., Kyoto University dnl Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology dnl Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology dnl All rights reserved AC_INIT(00readme.txt) AC_CONFIG_AUX_DIR(../support) dnl Checks for optimization flag AC_CANONICAL_HOST AC_MSG_CHECKING([host-specific optimization flag]) if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG AC_MSG_RESULT([$OPTFLAG]) else AC_MSG_RESULT([no]) fi else AC_MSG_RESULT([skipped]) fi AC_PROG_CC AC_PROG_CPP AC_PROG_INSTALL AC_EXEEXT dnl Checks for optimization flag AC_MSG_CHECKING([host specific optimization flag]) if test -z "$CFLAGS" ; then OPTFLAG=../support/cflags.${host_cpu}-${host_vendor}-${host_os} if test -f "$OPTFLAG" ; then . $OPTFLAG AC_MSG_RESULT([$OPTFLAG]) else AC_MSG_RESULT([no]) fi else AC_MSG_RESULT([skipped]) fi AC_PATH_PROG(RM,rm) AC_PATH_PROG(PERL,perl) if test -z "$PERL"; then AC_MSG_ERROR([perl not found! installation terminated]) fi dnl AC_PATH_PROG(NKF, nkf) dnl if test -z "$NKF"; then dnl AC_MSG_ERROR([nkf not found! installation terminated]) dnl fi dnl not more use of bison and flex: use pre-compiled source instead dnl AC_PATH_PROG(BISON, bison) dnl if test -z "$BISON"; then dnl AC_MSG_ERROR([bison not found! installation terminated]) dnl fi dnl AC_PATH_PROG(FLEX, flex) dnl if test -z "$FLEX"; then dnl AC_MSG_ERROR([flex not found! installation terminated]) dnl fi AC_PATH_PROG(ICONV,iconv) if test -z "$ICONV"; then AC_MSG_WARN([no iconv, gram2sapixml.pl may not work]) else AC_MSG_CHECKING([for Jcode module in perl]) if $PERL -mJcode -e "print;"; then AC_MSG_RESULT([yes]) else AC_MSG_WARN([no Jcode module in perl, gram2sapixml.pl may not work]) fi fi dnl check for readline library for nextword have_readline=no READLINE_LIBS="" AC_CHECK_HEADER(readline/readline.h, AC_CHECK_LIB(readline, readline, have_readline=yes READLINE_LIBS="-lreadline", AC_MSG_CHECKING([for readline in -lreadline with -ltermcap]) ac_save_LIBS="$LIBS" LIBS="$ac_save_LIBS -lreadline -ltermcap" AC_TRY_LINK([#include #include ],[readline("");], AC_MSG_RESULT([yes]) READLINE_LIBS="-lreadline -ltermcap" LIBS="$ac_save_LIBS" have_readline=yes, AC_MSG_RESULT([no]) AC_MSG_CHECKING([for readline in -lreadline with -lncurses]) LIBS="$ac_save_LIBS -lreadline -lncurses" AC_TRY_LINK([#include #include ],[readline("");], AC_MSG_RESULT([yes]) READLINE_LIBS="-lreadline -lncurses" LIBS="$ac_save_LIBS" have_readline=yes, AC_MSG_RESULT([no]) READLINE_LIBS="" LIBS="$ac_save_LIBS" ) ) ) ) if test "$have_readline" = yes; then AC_DEFINE(HAVE_READLINE) dnl check some definition AC_MSG_CHECKING([for readline verion > 4.1]) AC_EGREP_HEADER(rl_compentry_func_t, readline/readline.h, AC_MSG_RESULT([yes]), AC_MSG_RESULT([no - use old func]) AC_DEFINE(HAVE_READLINE_4_1_OLDER) ) fi AC_SUBST(READLINE_LIBS) dnl check for malloc.h AC_CHECK_HEADERS(malloc.h) AC_OUTPUT(Makefile mkdfa/Makefile mkdfa/mkdfa.pl mkdfa/mkfa-1.44-flex/Makefile dfa_minimize/Makefile generate/Makefile accept_check/Makefile nextword/Makefile yomi2voca/Makefile yomi2voca/yomi2voca.pl gram2sapixml/Makefile gram2sapixml/gram2sapixml.pl dfa_determinize/Makefile) julius-4.2.2/gramtools/mkdfa/0000755001051700105040000000000012004463507014422 5ustar ritrlabjulius-4.2.2/gramtools/mkdfa/Makefile.in0000644001051700105040000000143712004452400016462 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved SHELL=/bin/sh RM=@RM@ -f prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ TARGET=mkdfa.pl SUBDIR=mkfa-1.44-flex all: (cd $(SUBDIR); $(MAKE)) chmod +x $(TARGET) install: (cd $(SUBDIR); $(MAKE) install) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ install.bin: (cd $(SUBDIR); $(MAKE) install.bin) ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ clean: (cd $(SUBDIR); $(MAKE) clean) distclean: if test -f $(SUBDIR)/Makefile; then \ (cd $(SUBDIR); $(MAKE) distclean); \ fi $(RM) mkdfa.pl $(RM) Makefile julius-4.2.2/gramtools/mkdfa/00readme.txt0000644001051700105040000000362011071102422016546 0ustar ritrlab mkdfa.pl MKDFA.PL(1) MKDFA.PL(1) NAME mkdfa.pl - grammar compiler SYNOPSIS mkdfa.pl [options...] {prefix} DESCRIPTION mkdfa.pl compiles the Julian format grammar (.grammar and .voca) to Julian native formats (.dfa and .dict). In addition, ".term" will be also generated that stores correspondence of category ID used in the output files to the source category name. prefix should be the common file name prefix of ".grammar" and "voca" file. From prefix.grammar and prefix.voca file, prefix.dfa, prefix.dict and prefix.term will be output. OPTIONS -n Not process dictionary. You can only convert .grammar file to .dfa file without .voca file. ENVIRONMENT VARIABLES TMP or TEMP Set directory to store temporal file. If not specified, one of them on the following list will be used: /tmp, /var/tmp, /WINDOWS/Temp, /WINNT/Temp. EXAMPLES Convert a grammar foo.grammar and foo.voca to foo.dfa, foo.voca and foo.term. SEE ALSO julius ( 1 ) , generate ( 1 ) , nextword ( 1 ) , accept_check ( 1 ) , dfa_minimize ( 1 ) DIAGNOSTICS mkdfa.pl invokes mkfa and dfa_minimize internally. They should be placed at the same directory as mkdfa.pl. COPYRIGHT Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan Copyright (c) 1991-2008 Kawahara Lab., Kyoto University Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology Copyright (c) 2005-2008 Julius project team, Nagoya Institute of Technology LICENSE The same as Julius. 10/02/2008 MKDFA.PL(1) julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/0000755001051700105040000000000012004463507016660 5ustar ritrlabjulius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/gram.y0000644001051700105040000002075012004452400017772 0ustar ritrlab%{ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include "mkfa.h" #define YYSTYPE char * #define CLASS_NUM 100 void appendNonTerm( char *name, int modeAssign ); BODY *setNonTerm( void ); CLASS *entryNonTerm( char *name, BODY *body, int modeAccpt, int start, int member, int tmp ); void pushBody( CLASS *class, BODY *newbody ); int unifyBody( char *name, BODY *body, BODY *newbody ); char *getNewClassName( char *keyname ); void outputHeader( char *name ); char *chkNoInstantClass( void ); extern CLASS *ClassList; extern CLASS *ClassListTail; extern CLASS *StartSymbol; extern int NoNewLine; extern char GramFile[ 1024 ]; extern char HeaderFile[ 1024 ]; extern int SW_Compati; extern int SW_Quiet; extern int SW_SemiQuiet; extern char VerNo[]; static char HeadName[ SYMBOL_LEN ]; static char BodyName[ CLASS_NUM ][ SYMBOL_LEN ]; static int BodyNo = 0; static int ClassNo = 0; static int ModeAssignAccptFlag = 1; static int BlockReverseSw; static int ModeBlock = 0; static int CurClassNo = 0; static int StartFlag = 0; static FILE *FPheader; static int ErrParse = 0; static int GramModifyNum = 0; %} %token CTRL_ASSIGN %token CTRL_IGNORE %token OPEN %token CLOSE %token REVERSE %token STARTCLASS %token LET %token TAG %token SYMBOL %token REMARK %token NL %% src : statement | statement src; statement : block | single | contol | remark | error NL { yyerrok; }; block : tag OPEN remark members CLOSE remark; tag : TAG { BlockReverseSw = 0; if( ModeAssignAccptFlag ) outputHeader( $1 ); } | REVERSE TAG { BlockReverseSw = 1; if( !ModeAssignAccptFlag ) outputHeader( $2 ); }; members : member | member members; member : define { appendNonTerm( HeadName, ModeAssignAccptFlag ^ BlockReverseSw ); } | head remark { entryNonTerm( HeadName, NULL, ModeAssignAccptFlag ^ BlockReverseSw, 0, 1, 0 ); /*$B6uEPO?(B*/ } | remark; single : define { appendNonTerm( HeadName, ModeAssignAccptFlag ); } | REVERSE define { appendNonTerm( HeadName, !ModeAssignAccptFlag ); }; define : head LET bodies remark; bodies : body | body bodies; head : SYMBOL { strcpy( HeadName, $1 ); } | STARTCLASS SYMBOL { StartFlag = 1; strcpy( HeadName, $2 ); }; body : SYMBOL { strcpy( BodyName[ BodyNo++ ], $1 ); }; contol : CTRL_ASSIGN remark { ModeAssignAccptFlag = 1; } | CTRL_IGNORE { ModeAssignAccptFlag = 0; }; remark : REMARK | NL; %% #include "lex.yy.c" void appendNonTerm( char *name, int modeAssign ) { BODY *body; body = setNonTerm(); entryNonTerm( name, body, modeAssign, StartFlag, ModeBlock, 0 ); BodyNo = 0; } BODY *setNonTerm( void ) { int i; BODY *body; BODY *top = NULL, *prev = NULL; for( i = 0; i < BodyNo; i++ ){ if( (body = malloc( sizeof(BODY) )) == NULL ){ errMes( "Can't alloc nonterminal list buffer" ); } strcpy( body->name, BodyName[ i ] ); body->abort = 0; if( prev != NULL ){ prev->next = body; } else { top = body; } prev = body; } body->next = NULL; return( top ); } CLASS *entryNonTerm( char *name, BODY *body, int modeAccpt, int start, int member, int tmp ) { CLASS *class; class = getClass( name ); if( class != NULL ){ if( member ){ errMes("Accepted flag of class \"%s\" is re-assigned", HeadName ); ErrParse++; } } else { if( (class = malloc( sizeof(CLASS) )) == NULL ){ errMes( "Can't alloc memory for Class Finite Automaton." ); } strcpy( class->name, name ); if( modeAccpt ){ if( member ){ class->no = CurClassNo; } else { if( !tmp ){ outputHeader( name ); class->no = CurClassNo; } } } else { class->no = -1; } class->branch = 0; class->usedFA = 0; class->used = 1; /* non-terminal does not appear in voca */ class->bodyList = NULL; class->tmp = tmp; class->next = NULL; if( ClassListTail == NULL ){ ClassList = class; } else { ClassListTail->next = class; } ClassListTail = class; } if( body != NULL ) pushBody( class, body ); if( start ){ StartFlag = 0; if( StartSymbol == NULL ){ StartSymbol = class; } else { errMes("Start symbol is redifined as \"%s\"", class->name ); ErrParse++; } } return( class ); } void pushBody( CLASS *class, BODY *newbody ) { BODYLIST *bodyList = class->bodyList; BODYLIST *preBodyList = NULL; BODYLIST *newBodyList; BODY *body; int cmp; int defineNo = 1; while( bodyList != NULL ){ body = bodyList->body; cmp = strcmp( body->name, newbody->name ); if( cmp > 0 ) break; if( cmp == 0 ){ if( unifyBody( class->name, body, newbody ) ){ warnMes( "Class \"%s\" is defined as \"%s..\" again.", class->name, body->name ); } return; } preBodyList = bodyList; bodyList = bodyList->next; defineNo++; } if( (newBodyList = malloc( sizeof(BODYLIST) )) == NULL ){ errMes( "Can't alloc class body buffer." ); } newBodyList->body = newbody; if( preBodyList != NULL ){ preBodyList->next = newBodyList; } else { class->bodyList = newBodyList; } newBodyList->next = bodyList; class->branch++; } int unifyBody( char *className, BODY *body, BODY *newbody ) { BODY *bodyNext, *newbodyNext; char *newClassName; BODY *newBody; CLASS *class; bodyNext = body->next; newbodyNext = newbody->next; while( 1 ){ if( bodyNext == NULL && newbodyNext == NULL ){ return( -1 ); } if( newbodyNext == NULL ){ if( body->abort ){ return( -1 ); } else { body->abort = 1; return( 0 ); } } if( bodyNext == NULL ){ body->abort = 1; body->next = newbodyNext; return( 0 ); } if( strcmp( bodyNext->name, newbodyNext->name ) ) break; body = bodyNext; newbody = newbodyNext; bodyNext = body->next; newbodyNext = newbody->next; } class = getClass( body->name ); if( class != NULL && class->tmp ){ entryNonTerm( body->name, newbodyNext, 0, 0, 0, 1 ); } else { newClassName = getNewClassName( className ); entryNonTerm( newClassName, bodyNext, 0, 0, 0, 1 ); entryNonTerm( newClassName, newbodyNext, 0, 0, 0, 1 ); if( (newBody = malloc( sizeof(BODY) )) == NULL ){ errMes( "Can't alloc body buffer of tmp class, \"%s\".", newClassName ); } strcpy( newBody->name, newClassName ); newBody->abort = 0; newBody->next = NULL; body->next = newBody; newbody->next = newBody; } return( 0 ); } char *getNewClassName( char *keyname ) { static char classname[ SYMBOL_LEN ]; static int tmpClassNo = 0; sprintf( classname, "%s#%d", keyname , tmpClassNo++ ); if( !SW_SemiQuiet ){ fprintf( stderr, "\rNow modifying grammar to minimize states[%d]", GramModifyNum ); NoNewLine = 1; } GramModifyNum++; return( classname ); } void setGram( void ) { char *name; if( (yyin = fopen( GramFile, "r" )) == NULL ){ errMes( "Can't open grammar file \"%s\"", GramFile ); } if( SW_Compati ){ strcpy( HeaderFile, "/dev/null" ); } if( (FPheader = fopen( HeaderFile, "w" )) == NULL ){ errMes( "Can't open Header File for writting\"%s\"", HeaderFile ); } fprintf( FPheader, "/* Header of class reduction flag for finite automaton parser\n" " made with mkfa %s\n\n" " Do logicalAND between label and FA's field #4,#5.\n" "*/\n\n", VerNo ); if( !SW_Quiet ) fputs( "Now parsing grammar file\n", stderr ); yyparse(); if( !SW_Quiet ){ fprintf( stderr, "\rNow modifying grammar to minimize states[%d]\n", GramModifyNum - 1 ); NoNewLine = 0; } if( StartSymbol == NULL ) StartSymbol = ClassList; fprintf( FPheader, "/* Start Symbol: %s */\n", StartSymbol->name ); fclose( FPheader ); if( (name = chkNoInstantClass()) != NULL ){ errMes( "Prototype-declared Class \"%s\" has no instant definitions", name ); } if( ErrParse ) errMes( "%d fatal errors exist", ErrParse ); } void outputHeader( char *name ) { if( ClassNo >= CLASSFLAG_MAX ){ if( !SW_Compati ){ warnMes( "Class accepted flag overflow.\"%s\"", name ); CurClassNo = -1; } } else { if( !SW_Compati ){ fprintf( FPheader, "#define ACCEPT_%s 0x%08x\n", name, 1 << ClassNo ); } CurClassNo = ClassNo++; } } char *chkNoInstantClass( void ) { CLASS *class = ClassList; while( class != NULL ){ if( !class->branch ) return( class->name ); class = class->next; } return( NULL ); } int yyerror( char *mes ) { errMes(mes ); ErrParse++; return( 0 ); } julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/dfa.c0000644001051700105040000003113612004452400017550 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include "mkfa.h" #include "dfa.h" #include "nfa.h" void r_makeDFA( FA *fa ); ARC *unifyFA( FA *dstFA, ARC *prevarc, ARC *curarc, FA *prevFA ); void usArc2nsArc( FA *fa ); void connectUnifyFA( FA *fa, int inp, FA *nextFA, FLAG reserved, CLASSFLAGS accpt, CLASSFLAGS start ); ARC *unconnectFA( FA *srcFA, ARC *arcPrev, ARC *arc ); void killFA( FA *fa ); void killIsolatedLoop( FA *vanishFA, FA *curFA ); int chkIsolatedLoop( FA *vanishFA, FA *curFA ); UNIFYARC *appendUnifyArc( UNIFYARC *top, int inp, FA *fa, int reserved ); FALIST *appendGroup( FALIST *groupTop, FA *fa ); FALIST *insertFAlist( FALIST *top, FALIST *preAtom, FALIST *nextAtom, FA *fa ); FA *chkGroup( FALIST *group, CLASSFLAGS accptFlag, CLASSFLAGS startFlag, FLAG *newFlag ); int cmpFAlist( FALIST *group1, FALIST *group2 ); FALIST *volatileFA( FALIST *volatileList, FA *fa ); void unvolatileFA( FALIST *volatileList ); void verboseGroup( FALIST *group ); static FALIST *GroupList = NULL; /* $B>uBVM;9g$5$l$??7>uBV$N%j%9%H(B */ static int DFAtravTotal = 0; /* DFA$B:n@.;~$KN)$A4s$C$?%N!<%I?t(B */ static int DFAtravSuccess = 0; /* $B$=$N$&$A:#$^$G$KN)$A4s$C$F$$$J$+$C$??t(B */ static int FAprocessed = 0; /* $B8=:_$N%9%F%C%W$K$*$$$F=hM}$5$l$?(BFA$B$N?t(B */ extern int FAtotal; /* FA$B$NAm?t(B */ extern FA *FAlist; /* FA$B%M%C%H%o!<%/$K$*$1$k3+;O(BFA$B$N%]%$%s%?(B */ extern char FAfile[ 1024 ]; /* FA$B%U%!%$%kL>(B(DFAorNFA) */ extern int SW_Verbose; extern int SW_Quiet; extern int SW_SemiQuiet; extern int SW_Compati; extern int NoNewLine; /* $BJ#?t$NI=<(%b!<%I$G2~9TLdBj$r2r7h$9$k(B */ extern char Clipboard[ 1024 ]; /* sprintf$BMQ$N0l;~=q$-9~$_%P%C%U%!(B */ void makeDFA( void ) { if( !SW_Quiet ){ fprintf( stderr, "Now making deterministic finite automaton" ); NoNewLine = 1; } r_makeDFA( FAlist ); if( !SW_Quiet ){ fprintf( stderr, "\rNow making deterministic finite automaton[%d/%d] \n", FAprocessed, FAtotal ); if( FAtotal != FAprocessed ){ fprintf( stderr, "* %d released FA nodes are left on isolated loop\n", FAtotal - FAprocessed ); } NoNewLine = 0; } /* $B2?$+%P%0$,$"$C$?$H$-62$$$,8IN)%k!<%W$N%A%'%C%/$,(B $BIT2DG=$J$N$G$7$g$&$,$J$$(B */ FAtotal = FAprocessed; if( SW_Verbose ){ verboseMes( "** traversing efficiency ( success/total )" ); verboseMes( "r_makeDFA: %d/%d(%d%%)", DFAtravSuccess, DFAtravTotal, 100*DFAtravSuccess/DFAtravTotal); } newLineAdjust(); freeFAlist( GroupList ); } void r_makeDFA( FA *fa ) { ARC *prevarc = NULL; ARC *curarc; int inp; int bundleNum; FLAG reserved = 0; int i; FLAG newFlag; FALIST *volatileList = NULL; CLASSFLAGS unifyAccptFlag; CLASSFLAGS unifyStartFlag; verboseMes( "[func]r_makeDFA(FA %08x)", (long)fa ); DFAtravTotal++; if( fa->traversed == 1 ){ verboseMes( "traversed..." ); return; } fa->traversed = 1; DFAtravSuccess++; FAprocessed++; if( !SW_SemiQuiet ){ fprintf( stderr, "\rNow making deterministic finite automaton[%d/%d] ", FAprocessed, FAtotal ); NoNewLine = 1; } curarc = fa->nsList; while( curarc != NULL ){ FA *unifyingDstFA = NULL; { ARC *arc = curarc; int inp = arc->inp; FALIST *group = NULL; CLASSFLAGS accptFlag = 0; CLASSFLAGS startFlag = 0; bundleNum = 0; while( 1 ){ if( arc == NULL || arc->inp != inp ) break; group = appendGroup( group, arc->fa ); accptFlag |= arc->fa->accpt; startFlag |= arc->fa->start; arc = arc->next; bundleNum++; } if( bundleNum > 1 ){ unifyingDstFA = chkGroup( group, accptFlag, startFlag,&newFlag ); } else { /* $B$3$N2<(B4$B9T$O%V%m%C%/30$N(Bwhile$B$KBP$7$F$N$b$N(B */ freeFAlist( group ); prevarc = curarc; curarc = curarc->next; continue; } } inp = curarc->inp; unifyAccptFlag = 0; unifyStartFlag = 0; for( i = 0; i < bundleNum; i++ ){ unifyAccptFlag |= curarc->accpt; unifyStartFlag |= curarc->start; if( !newFlag ){ /* volatileList = volatileFA( volatileList, curarc->ns );*/ curarc = unconnectFA( fa, prevarc, curarc ); } else { if( curarc->fa == fa /* self-loop */ ){ reserved = 1; /* volatileList = volatileFA( volatileList, fa );*/ curarc = unconnectFA( fa, prevarc, curarc ); } else { curarc = unifyFA( unifyingDstFA, prevarc, curarc, fa ); } } } connectUnifyFA( fa, inp, unifyingDstFA, reserved, unifyAccptFlag, unifyStartFlag ); reserved = 0; } usArc2nsArc( fa ); /* unvolatileFA( volatileList );*/ curarc = fa->nsList; while( curarc != NULL ){ r_makeDFA( curarc->fa ); curarc = curarc->next; } } void connectUnifyFA( FA *fa, int inp, FA *nextFA, FLAG reserved, CLASSFLAGS accpt, CLASSFLAGS start ) { /* unifyFA$B$X$N%"!<%/$N%j%9%H$KF~NO$N<-=q=g$GE,@Z0LCV$KA^F~(B $B$^$?F1$8$b$N$,$"$k>l9gEPO?$7$J$$(B */ /* nextFA $B$N(BpsNum$B$r%$%s%/%j%a%s%H$7$J$$(B */ UNIFYARC *newArc; UNIFYARC *curArc = NULL; UNIFYARC *nextArc; UNIFYARC *top = fa->usList; if( (newArc = calloc( 1, sizeof(UNIFYARC) )) == NULL ){ errMes( "Can't alloc forward arc buffer of finite automaton." ); } newArc->inp = inp; newArc->us = nextFA; newArc->reserved = reserved; newArc->accpt = accpt; newArc->start = start; if( (nextArc = top) != NULL ){ while( 1 ){ if( nextArc->inp > inp ) break; if( nextArc->inp == inp && nextArc->us == nextFA ) return; curArc = nextArc; if( (nextArc = nextArc->next) == NULL ) break; } } if( curArc == NULL ){ newArc->next = top; fa->usList = newArc; } else { newArc->next = nextArc; curArc->next = newArc; } } void usArc2nsArc( FA *fa ) { UNIFYARC *uptr; UNIFYARC *disused_uptr; ARC *nptr; ARC *newarc; uptr = fa->usList; while( uptr != NULL ){ if( (newarc = calloc( 1, sizeof(ARC) )) == NULL ){ errMes( "Can't alloc forward arc buffer of finite automaton." ); } connectFA( fa, uptr->inp, uptr->us, uptr->accpt, uptr->start ); uptr = uptr->next; } uptr = fa->usList; while( uptr != NULL ){ if( uptr->reserved ){ uptr->us->accpt |= fa->accpt; nptr = fa->nsList; while( nptr != NULL ){ connectFA( uptr->us, nptr->inp, nptr->fa, nptr->accpt, nptr->start ); nptr = nptr->next; } } disused_uptr = uptr; uptr = uptr->next; free( disused_uptr ); } } FALIST *volatileFA( FALIST *volatileList, FA *fa ) { FALIST *atom; if( (atom = malloc( sizeof(FALIST) )) == NULL ){ errMes( "Can't alloc FA list buffer." ); } fa->volatiled = 1; atom->fa = fa; atom->next = volatileList; return( atom ); } void unvolatileFA( FALIST *volatileList ) { FALIST *atom; FA *fa; while( volatileList != NULL ){ atom = volatileList; fa = atom->fa; fa->volatiled = 0; /* if( chkIsolatedLoop( fa, fa ) ){ killIsolatedLoop( fa, fa ); }*/ volatileList = volatileList->next; free( atom ); } } ARC *unifyFA( FA *dstFA, ARC *prevarc, ARC *curarc, FA *prevFA ) { FA *srcFA = curarc->fa; ARC *arc = srcFA->nsList; dstFA->accpt |= srcFA->accpt; while( arc != NULL ){ connectFA( dstFA, arc->inp, arc->fa, arc->accpt, arc->start ); arc = arc->next; } return( unconnectFA( prevFA, prevarc, curarc ) ); } ARC *unconnectFA( FA *srcFA, ARC *arcPrev, ARC *arc ) /* $B@Z$C$?%"!<%/$NCLG$9$Y$-$J$iCLG$5$;$k!#(B*/ ARC *arcNext = arc->next; FA *vanishFA; if( arcPrev == NULL ){ srcFA->nsList = arcNext; } else { arcPrev->next = arcNext; } vanishFA = arc->fa; free( arc ); if( --vanishFA->psNum == 0 ){ killFA( vanishFA ); }/* else if( chkIsolatedLoop( vanishFA, vanishFA ) ){ killIsolatedLoop( vanishFA, vanishFA ); }*/ return( arcNext ); } void killFA( FA *fa ) { ARC *arc = fa->nsList; verboseMes( "a FA node is vanished" ); while( arc != NULL ){ arc = unconnectFA( fa, NULL, arc ); } free( fa ); FAtotal--; } int chkIsolatedLoop( FA *vanishFA, FA *curFA ) /* $B$b$7<+J,$,>CLG$9$k$H2>Dj$7$?$i<+J,$X$N%"!<%/$,L5$/$J$k$+$r%A%'%C%/(B $B$9$J$o$A%k!<%W$K$h$k@8$-;D$j$r6n=|$9$k(B */ { ARC *arc; int result; if( curFA->volatiled ) return( 0 ); if( curFA->psNum > 1 ) return( 0 ); arc = curFA->nsList; while( arc != NULL ){ FA *nextFA = arc->fa; if( nextFA == vanishFA ) return( 1 ); result = chkIsolatedLoop( vanishFA, nextFA ); if( result ) return( 1 ); arc = arc->next; } return( 0 ); } void killIsolatedLoop( FA *vanishFA, FA *curFA ) /* $B$b$7<+J,$,>CLG$9$k$H2>Dj$7$?$i<+J,$X$N%"!<%/$,L5$/$J$k$+$r%A%'%C%/(B $B$9$J$o$A%k!<%W$K$h$k@8$-;D$j$r6n=|$9$k(B */ { ARC *arc; ARC *prevarc = NULL; if( curFA->volatiled ) return; if( curFA->psNum > 1 ) return; arc = curFA->nsList; while( arc != NULL ){ FA *nextFA = arc->fa; if( nextFA != vanishFA ){ unconnectFA( curFA, prevarc, arc ); } prevarc = arc; arc = arc->next; } free( curFA ); FAtotal--; } FALIST *appendGroup( FALIST *groupTop, FA *fa ) { /* fa$B$,M;9g>uBV$G$J$$$J$i(BFA$B$N%]%$%s%?$r%=!<%H$7$F%0%k!<%W%j%9%H$X(B $BM;9g>uBV$J$i$=$N9=@.%j%9%H$H%0%k!<%W%j%9%H$r9g$o$;$F%=!<%H$9$k(B */ FALIST *preAtom = NULL; FALIST *curAtom = groupTop; FALIST *srcCurAtom = NULL; long cmp; if( fa->group == NULL ){ while( curAtom != NULL ){ cmp = (long)fa - (long)curAtom->fa; if( cmp == 0 ) return( groupTop ); if( cmp < 0 ) break; preAtom = curAtom; curAtom = curAtom->next; } return( insertFAlist( groupTop, preAtom, curAtom, fa ) ); } else { /* srcCurAtom$B$,%=!<%H$5$l$F$$$k$3$H$rMxMQ$9$l$P$b$C$H=hM}$,B.$/$J$k$,(B $B$=$&$9$k$H$J$<$+>uBV?t$,B?>/A}$($F$7$^$&$N$GI,$:$7$bJ]>Z$5$l$F$$$J$$$+$b(B "for"$B$NCmgroup; srcCurAtom != NULL; srcCurAtom = srcCurAtom->next ){ if( curAtom == NULL ){ groupTop = insertFAlist( groupTop, preAtom, curAtom, srcCurAtom->fa ); preAtom = NULL; curAtom = groupTop; } /* for( ; srcCurAtom != NULL; srcCurAtom = srcCurAtom->next ){ groupTop = insertFAlist( groupTop, preAtom, NULL, srcCurAtom->fa ); if( preAtom == NULL ){ preAtom = groupTop->next; } else { preAtom = preAtom->next; } } break; }*/ cmp = (long)srcCurAtom->fa - (long)curAtom->fa; if( cmp == 0 ) continue; if( cmp < 0 ){ groupTop = insertFAlist( groupTop, preAtom, curAtom, srcCurAtom->fa ); preAtom = NULL; curAtom = groupTop; } else { preAtom = curAtom; curAtom = curAtom->next; } } return( groupTop ); } } FALIST *insertFAlist( FALIST *top, FALIST *preAtom, FALIST *nextAtom, FA *fa ) { FALIST *atom; if( (atom = malloc( sizeof(FALIST) )) == NULL ){ errMes( "Can't alloc group buffer for unifying FA" ); } atom->fa = fa; if( preAtom == NULL ){ atom->next = nextAtom; return( atom ); } else { preAtom->next = atom; atom->next = nextAtom; return( top ); } } FA *chkGroup( FALIST *group, CLASSFLAGS accptFlag , CLASSFLAGS startFlag, FLAG *newFlag ) { FALIST *curGroupList = GroupList; FALIST *preGroupList = NULL; int cmp; FA *fa; while( curGroupList != NULL ){ cmp = cmpFAlist( curGroupList->fa->group, group ); if( cmp == 0 ){ if( SW_Compati || (accptFlag == curGroupList->fa->accpt || startFlag == curGroupList->fa->start) ){ freeFAlist( group ); *newFlag = 0; return( curGroupList->fa ); } } if( cmp < 0 ) break; preGroupList = curGroupList; curGroupList = curGroupList->next; } if( SW_Verbose ){ verboseGroup( group ); } fa = makeNewFA(); GroupList = insertFAlist( GroupList, preGroupList, curGroupList, fa ); fa->group = group; fa->accpt = accptFlag; fa->start = startFlag; *newFlag = 1; return( fa ); } void verboseGroup( FALIST *group ) { verboseMes( "Created New Group" ); while( group != NULL ){ verboseMes( " FAadr: %08x", (long)group->fa ); group = group->next; } } int cmpFAlist( FALIST *group1, FALIST *group2 ) { long cmp; while( 1 ){ if( group1 == NULL && group2 == NULL ) return( 0 ); if( group1 == NULL ) return( -1 ); if( group2 == NULL ) return( 1 ); cmp = (long)group1->fa - (long)group2->fa; if( cmp != 0 ) return( cmp ); group1 = group1->next; group2 = group2->next; } } julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/Makefile.in0000644001051700105040000000235412004452400020717 0ustar ritrlab# Copyright (c) 1991-2012 Kawahara Lab., Kyoto University # Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology # Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology # All rights reserved ## Makefile for mkfa, DFA compiler ## *** Needs GNU bison and flex *** SHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .o .c.o: $(CC) $(CFLAGS) $(CPPFLAGS) -o $@ -c $< CC=@CC@ CFLAGS=@CFLAGS@ CPPFLAGS=@CPPFLAGS@ @DEFS@ LDFLAGS=@LDFLAGS@ @LIBS@ RM=@RM@ -f #BISON=@BISON@ #FLEX=@FLEX@ prefix=@prefix@ exec_prefix=@exec_prefix@ INSTALL=@INSTALL@ ###################################################################### #YACS = gram.y #FLEXS = gram.l OBJS = gram.tab.o main.o voca.o nfa.o dfa.o triplet.o TARGET=mkfa@EXEEXT@ all: $(TARGET) $(TARGET): $(OBJS) $(CC) $(CFLAGS) $(CPPFLAGS) -o $(TARGET) $(OBJS) $(LDFLAGS) #gram.tab.c: $(YACS) # $(BISON) $(YACS) # #lex.yy.c: $(FLEXS) # $(FLEX) $(FLEXS) install: install.bin install.bin: ${INSTALL} -d @bindir@ @INSTALL_PROGRAM@ $(TARGET) @bindir@ clean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe distclean: $(RM) *.o *~ core $(RM) $(TARGET) $(TARGET).exe $(RM) Makefile # dependencies gram.tab.o : gram.tab.c lex.yy.c mkfa.h gram.tab.c : gram.y $(OBJS) : mkfa.h julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/gram.l0000644001051700105040000000072410677066017017777 0ustar ritrlab%% "@"[a-zA-Z0-9_]+ { yylval = yytext + 1; return( TAG ); } [a-zA-Z0-9_]+ { yylval = yytext; return( SYMBOL ); } "{" { ModeBlock = 1; return( OPEN ); } "}" { ModeBlock = 0; return( CLOSE ); } "%ASSIGN" return( CTRL_ASSIGN ); "%IGNORE" return( CTRL_IGNORE ); "!" return( REVERSE ); "*" return( STARTCLASS ); ":" return( LET ); \n return( NL ); "#".*\n return( REMARK ); [ \t] {}; . { errMes("Lexical mistake \"%s\"", yytext ); exit( 1 ); } julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/dfa.h0000644001051700105040000000042112004452400017546 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ void makeDFA( void ); julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/gram.tab.c0000644001051700105040000011463010677066017020535 0ustar ritrlab/* A Bison parser, made from gram.y by GNU bison 1.35. */ #define YYBISON 1 /* Identify Bison output. */ # define CTRL_ASSIGN 257 # define CTRL_IGNORE 258 # define OPEN 259 # define CLOSE 260 # define REVERSE 261 # define STARTCLASS 262 # define LET 263 # define TAG 264 # define SYMBOL 265 # define REMARK 266 # define NL 267 #line 1 "gram.y" #include "mkfa.h" #define YYSTYPE char * #define CLASS_NUM 100 void appendNonTerm( char *name, int modeAssign ); BODY *setNonTerm( void ); CLASS *entryNonTerm( char *name, BODY *body, int modeAccpt, int start, int member, int tmp ); void pushBody( CLASS *class, BODY *newbody ); int unifyBody( char *name, BODY *body, BODY *newbody ); char *getNewClassName( char *keyname ); void outputHeader( char *name ); char *chkNoInstantClass( void ); extern CLASS *ClassList; extern CLASS *ClassListTail; extern CLASS *StartSymbol; extern int NoNewLine; extern char GramFile[ 1024 ]; extern char HeaderFile[ 1024 ]; extern int SW_Compati; extern int SW_Quiet; extern int SW_SemiQuiet; extern char VerNo[]; static char HeadName[ SYMBOL_LEN ]; static char BodyName[ CLASS_NUM ][ SYMBOL_LEN ]; static int BodyNo = 0; static int ClassNo = 0; static int ModeAssignAccptFlag = 1; static int BlockReverseSw; static int ModeBlock = 0; static int CurClassNo = 0; static int StartFlag = 0; static FILE *FPheader; static int ErrParse = 0; static int GramModifyNum = 0; #ifndef YYSTYPE # define YYSTYPE int # define YYSTYPE_IS_TRIVIAL 1 #endif #ifndef YYDEBUG # define YYDEBUG 0 #endif #define YYFINAL 43 #define YYFLAG -32768 #define YYNTBASE 14 /* YYTRANSLATE(YYLEX) -- Bison token number corresponding to YYLEX. */ #define YYTRANSLATE(x) ((unsigned)(x) <= 267 ? yytranslate[x] : 27) /* YYTRANSLATE[YYLEX] -- Bison token number corresponding to YYLEX. */ static const char yytranslate[] = { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; #if YYDEBUG static const short yyprhs[] = { 0, 0, 2, 5, 7, 9, 11, 13, 16, 23, 25, 28, 30, 33, 35, 38, 40, 42, 45, 50, 52, 55, 57, 60, 62, 65, 67, 69 }; static const short yyrhs[] = { 15, 0, 15, 14, 0, 16, 0, 20, 0, 25, 0, 26, 0, 1, 13, 0, 17, 5, 26, 18, 6, 26, 0, 10, 0, 7, 10, 0, 19, 0, 19, 18, 0, 21, 0, 23, 26, 0, 26, 0, 21, 0, 7, 21, 0, 23, 9, 22, 26, 0, 24, 0, 24, 22, 0, 11, 0, 8, 11, 0, 11, 0, 3, 26, 0, 4, 0, 12, 0, 13, 0 }; #endif #if YYDEBUG /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const short yyrline[] = { 0, 55, 55, 57, 57, 57, 57, 58, 63, 65, 70, 76, 76, 78, 82, 86, 88, 92, 97, 99, 99, 101, 105, 111, 116, 120, 125, 125 }; #endif #if (YYDEBUG) || defined YYERROR_VERBOSE /* YYTNAME[TOKEN_NUM] -- String name of the token TOKEN_NUM. */ static const char *const yytname[] = { "$", "error", "$undefined.", "CTRL_ASSIGN", "CTRL_IGNORE", "OPEN", "CLOSE", "REVERSE", "STARTCLASS", "LET", "TAG", "SYMBOL", "REMARK", "NL", "src", "statement", "block", "tag", "members", "member", "single", "define", "bodies", "head", "body", "contol", "remark", 0 }; #endif /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ static const short yyr1[] = { 0, 14, 14, 15, 15, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21, 22, 22, 23, 23, 24, 25, 25, 26, 26 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ static const short yyr2[] = { 0, 1, 2, 1, 1, 1, 1, 2, 6, 1, 2, 1, 2, 1, 2, 1, 1, 2, 4, 1, 2, 1, 2, 1, 2, 1, 1, 1 }; /* YYDEFACT[S] -- default rule to reduce with in state S when YYTABLE doesn't specify something else to do. Zero means the default is an error. */ static const short yydefact[] = { 0, 0, 0, 25, 0, 0, 9, 21, 26, 27, 0, 3, 0, 4, 16, 0, 5, 6, 7, 24, 10, 17, 22, 2, 0, 0, 0, 23, 0, 19, 0, 11, 13, 0, 15, 18, 20, 0, 12, 14, 8, 0, 0, 0 }; static const short yydefgoto[] = { 23, 10, 11, 12, 30, 31, 13, 14, 28, 15, 29, 16, 17 }; static const short yypact[] = { 29, 14, 5,-32768, 36, 0,-32768,-32768,-32768,-32768, 2,-32768, 20,-32768,-32768, 25,-32768,-32768,-32768,-32768, -32768,-32768,-32768,-32768, 5, 34, 8,-32768, 5, 34, 42, 8,-32768, -5,-32768,-32768,-32768, 5,-32768,-32768, -32768, 49, 50,-32768 }; static const short yypgoto[] = { 51,-32768,-32768,-32768, 21,-32768,-32768, -3, 24, 12, -32768,-32768, -2 }; #define YYLAST 53 static const short yytable[] = { 19, 21, -1, 1, 25, 2, 3, 8, 9, 4, 5, 22, 6, 7, 8, 9, 5, 8, 9, 7, 8, 9, 26, 32, 34, 24, 35, 18, 32, 34, 1, 39, 2, 3, 25, 40, 4, 5, 33, 6, 7, 8, 9, 33, 5, 27, 20, 7, 37, 42, 43, 41, 38, 36 }; static const short yycheck[] = { 2, 4, 0, 1, 9, 3, 4, 12, 13, 7, 8, 11, 10, 11, 12, 13, 8, 12, 13, 11, 12, 13, 24, 26, 26, 5, 28, 13, 31, 31, 1, 33, 3, 4, 9, 37, 7, 8, 26, 10, 11, 12, 13, 31, 8, 11, 10, 11, 6, 0, 0, 0, 31, 29 }; /* -*-C-*- Note some compilers choke on comments on `#line' lines. */ #line 3 "/usr/share/bison/bison.simple" /* Skeleton output parser for bison, Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* As a special exception, when this file is copied by Bison into a Bison output file, you may use that output file without restriction. This special exception was added by the Free Software Foundation in version 1.24 of Bison. */ /* This is the parser code that is written into each bison parser when the %semantic_parser declaration is not specified in the grammar. It was written by Richard Stallman by simplifying the hairy parser used when %semantic_parser is specified. */ /* All symbols defined below should begin with yy or YY, to avoid infringing on user name space. This should be done even for local variables, as they might otherwise be expanded by user macros. There are some unavoidable exceptions within include files to define necessary library symbols; they are noted "INFRINGES ON USER NAME SPACE" below. */ #if ! defined (yyoverflow) || defined (YYERROR_VERBOSE) /* The parser invokes alloca or malloc; define the necessary symbols. */ # if YYSTACK_USE_ALLOCA # define YYSTACK_ALLOC alloca # else # ifndef YYSTACK_USE_ALLOCA # if defined (alloca) || defined (_ALLOCA_H) # define YYSTACK_ALLOC alloca # else # ifdef __GNUC__ # define YYSTACK_ALLOC __builtin_alloca # endif # endif # endif # endif # ifdef YYSTACK_ALLOC /* Pacify GCC's `empty if-body' warning. */ # define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) # else # if defined (__STDC__) || defined (__cplusplus) # include /* INFRINGES ON USER NAME SPACE */ # define YYSIZE_T size_t # endif # define YYSTACK_ALLOC malloc # define YYSTACK_FREE free # endif #endif /* ! defined (yyoverflow) || defined (YYERROR_VERBOSE) */ #if (! defined (yyoverflow) \ && (! defined (__cplusplus) \ || (YYLTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) /* A type that is properly aligned for any stack member. */ union yyalloc { short yyss; YYSTYPE yyvs; # if YYLSP_NEEDED YYLTYPE yyls; # endif }; /* The size of the maximum gap between one aligned stack and the next. */ # define YYSTACK_GAP_MAX (sizeof (union yyalloc) - 1) /* The size of an array large to enough to hold all stacks, each with N elements. */ # if YYLSP_NEEDED # define YYSTACK_BYTES(N) \ ((N) * (sizeof (short) + sizeof (YYSTYPE) + sizeof (YYLTYPE)) \ + 2 * YYSTACK_GAP_MAX) # else # define YYSTACK_BYTES(N) \ ((N) * (sizeof (short) + sizeof (YYSTYPE)) \ + YYSTACK_GAP_MAX) # endif /* Copy COUNT objects from FROM to TO. The source and destination do not overlap. */ # ifndef YYCOPY # if 1 < __GNUC__ # define YYCOPY(To, From, Count) \ __builtin_memcpy (To, From, (Count) * sizeof (*(From))) # else # define YYCOPY(To, From, Count) \ do \ { \ register YYSIZE_T yyi; \ for (yyi = 0; yyi < (Count); yyi++) \ (To)[yyi] = (From)[yyi]; \ } \ while (0) # endif # endif /* Relocate STACK from its old location to the new one. The local variables YYSIZE and YYSTACKSIZE give the old and new number of elements in the stack, and YYPTR gives the new location of the stack. Advance YYPTR to a properly aligned location for the next stack. */ # define YYSTACK_RELOCATE(Stack) \ do \ { \ YYSIZE_T yynewbytes; \ YYCOPY (&yyptr->Stack, Stack, yysize); \ Stack = &yyptr->Stack; \ yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAX; \ yyptr += yynewbytes / sizeof (*yyptr); \ } \ while (0) #endif #if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__) # define YYSIZE_T __SIZE_TYPE__ #endif #if ! defined (YYSIZE_T) && defined (size_t) # define YYSIZE_T size_t #endif #if ! defined (YYSIZE_T) # if defined (__STDC__) || defined (__cplusplus) # include /* INFRINGES ON USER NAME SPACE */ # define YYSIZE_T size_t # endif #endif #if ! defined (YYSIZE_T) # define YYSIZE_T unsigned int #endif #define yyerrok (yyerrstatus = 0) #define yyclearin (yychar = YYEMPTY) #define YYEMPTY -2 #define YYEOF 0 #define YYACCEPT goto yyacceptlab #define YYABORT goto yyabortlab #define YYERROR goto yyerrlab1 /* Like YYERROR except do call yyerror. This remains here temporarily to ease the transition to the new meaning of YYERROR, for GCC. Once GCC version 2 has supplanted version 1, this can go. */ #define YYFAIL goto yyerrlab #define YYRECOVERING() (!!yyerrstatus) #define YYBACKUP(Token, Value) \ do \ if (yychar == YYEMPTY && yylen == 1) \ { \ yychar = (Token); \ yylval = (Value); \ yychar1 = YYTRANSLATE (yychar); \ YYPOPSTACK; \ goto yybackup; \ } \ else \ { \ yyerror ("syntax error: cannot back up"); \ YYERROR; \ } \ while (0) #define YYTERROR 1 #define YYERRCODE 256 /* YYLLOC_DEFAULT -- Compute the default location (before the actions are run). When YYLLOC_DEFAULT is run, CURRENT is set the location of the first token. By default, to implement support for ranges, extend its range to the last symbol. */ #ifndef YYLLOC_DEFAULT # define YYLLOC_DEFAULT(Current, Rhs, N) \ Current.last_line = Rhs[N].last_line; \ Current.last_column = Rhs[N].last_column; #endif /* YYLEX -- calling `yylex' with the right arguments. */ #if YYPURE # if YYLSP_NEEDED # ifdef YYLEX_PARAM # define YYLEX yylex (&yylval, &yylloc, YYLEX_PARAM) # else # define YYLEX yylex (&yylval, &yylloc) # endif # else /* !YYLSP_NEEDED */ # ifdef YYLEX_PARAM # define YYLEX yylex (&yylval, YYLEX_PARAM) # else # define YYLEX yylex (&yylval) # endif # endif /* !YYLSP_NEEDED */ #else /* !YYPURE */ # define YYLEX yylex () #endif /* !YYPURE */ /* Enable debugging if requested. */ #if YYDEBUG # ifndef YYFPRINTF # include /* INFRINGES ON USER NAME SPACE */ # define YYFPRINTF fprintf # endif # define YYDPRINTF(Args) \ do { \ if (yydebug) \ YYFPRINTF Args; \ } while (0) /* Nonzero means print parse trace. It is left uninitialized so that multiple parsers can coexist. */ int yydebug; #else /* !YYDEBUG */ # define YYDPRINTF(Args) #endif /* !YYDEBUG */ /* YYINITDEPTH -- initial size of the parser's stacks. */ #ifndef YYINITDEPTH # define YYINITDEPTH 200 #endif /* YYMAXDEPTH -- maximum size the stacks can grow to (effective only if the built-in stack extension method is used). Do not make this value too large; the results are undefined if SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH) evaluated with infinite-precision integer arithmetic. */ #if YYMAXDEPTH == 0 # undef YYMAXDEPTH #endif #ifndef YYMAXDEPTH # define YYMAXDEPTH 10000 #endif #ifdef YYERROR_VERBOSE # ifndef yystrlen # if defined (__GLIBC__) && defined (_STRING_H) # define yystrlen strlen # else /* Return the length of YYSTR. */ static YYSIZE_T # if defined (__STDC__) || defined (__cplusplus) yystrlen (const char *yystr) # else yystrlen (yystr) const char *yystr; # endif { register const char *yys = yystr; while (*yys++ != '\0') continue; return yys - yystr - 1; } # endif # endif # ifndef yystpcpy # if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE) # define yystpcpy stpcpy # else /* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in YYDEST. */ static char * # if defined (__STDC__) || defined (__cplusplus) yystpcpy (char *yydest, const char *yysrc) # else yystpcpy (yydest, yysrc) char *yydest; const char *yysrc; # endif { register char *yyd = yydest; register const char *yys = yysrc; while ((*yyd++ = *yys++) != '\0') continue; return yyd - 1; } # endif # endif #endif #line 315 "/usr/share/bison/bison.simple" /* The user can define YYPARSE_PARAM as the name of an argument to be passed into yyparse. The argument should have type void *. It should actually point to an object. Grammar actions can access the variable by casting it to the proper pointer type. */ #ifdef YYPARSE_PARAM # if defined (__STDC__) || defined (__cplusplus) # define YYPARSE_PARAM_ARG void *YYPARSE_PARAM # define YYPARSE_PARAM_DECL # else # define YYPARSE_PARAM_ARG YYPARSE_PARAM # define YYPARSE_PARAM_DECL void *YYPARSE_PARAM; # endif #else /* !YYPARSE_PARAM */ # define YYPARSE_PARAM_ARG # define YYPARSE_PARAM_DECL #endif /* !YYPARSE_PARAM */ /* Prevent warning if -Wstrict-prototypes. */ #ifdef __GNUC__ # ifdef YYPARSE_PARAM int yyparse (void *); # else int yyparse (void); # endif #endif /* YY_DECL_VARIABLES -- depending whether we use a pure parser, variables are global, or local to YYPARSE. */ #define YY_DECL_NON_LSP_VARIABLES \ /* The lookahead symbol. */ \ int yychar; \ \ /* The semantic value of the lookahead symbol. */ \ YYSTYPE yylval; \ \ /* Number of parse errors so far. */ \ int yynerrs; #if YYLSP_NEEDED # define YY_DECL_VARIABLES \ YY_DECL_NON_LSP_VARIABLES \ \ /* Location data for the lookahead symbol. */ \ YYLTYPE yylloc; #else # define YY_DECL_VARIABLES \ YY_DECL_NON_LSP_VARIABLES #endif /* If nonreentrant, generate the variables here. */ #if !YYPURE YY_DECL_VARIABLES #endif /* !YYPURE */ int yyparse (YYPARSE_PARAM_ARG) YYPARSE_PARAM_DECL { /* If reentrant, generate the variables here. */ #if YYPURE YY_DECL_VARIABLES #endif /* !YYPURE */ register int yystate; register int yyn; int yyresult; /* Number of tokens to shift before error messages enabled. */ int yyerrstatus; /* Lookahead token as an internal (translated) token number. */ int yychar1 = 0; /* Three stacks and their tools: `yyss': related to states, `yyvs': related to semantic values, `yyls': related to locations. Refer to the stacks thru separate pointers, to allow yyoverflow to reallocate them elsewhere. */ /* The state stack. */ short yyssa[YYINITDEPTH]; short *yyss = yyssa; register short *yyssp; /* The semantic value stack. */ YYSTYPE yyvsa[YYINITDEPTH]; YYSTYPE *yyvs = yyvsa; register YYSTYPE *yyvsp; #if YYLSP_NEEDED /* The location stack. */ YYLTYPE yylsa[YYINITDEPTH]; YYLTYPE *yyls = yylsa; YYLTYPE *yylsp; #endif #if YYLSP_NEEDED # define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) #else # define YYPOPSTACK (yyvsp--, yyssp--) #endif YYSIZE_T yystacksize = YYINITDEPTH; /* The variables used to return semantic value and location from the action routines. */ YYSTYPE yyval; #if YYLSP_NEEDED YYLTYPE yyloc; #endif /* When reducing, the number of symbols on the RHS of the reduced rule. */ int yylen; YYDPRINTF ((stderr, "Starting parse\n")); yystate = 0; yyerrstatus = 0; yynerrs = 0; yychar = YYEMPTY; /* Cause a token to be read. */ /* Initialize stack pointers. Waste one element of value and location stack so that they stay on the same level as the state stack. The wasted elements are never initialized. */ yyssp = yyss; yyvsp = yyvs; #if YYLSP_NEEDED yylsp = yyls; #endif goto yysetstate; /*------------------------------------------------------------. | yynewstate -- Push a new state, which is found in yystate. | `------------------------------------------------------------*/ yynewstate: /* In all cases, when you get here, the value and location stacks have just been pushed. so pushing a state here evens the stacks. */ yyssp++; yysetstate: *yyssp = yystate; if (yyssp >= yyss + yystacksize - 1) { /* Get the current used size of the three stacks, in elements. */ YYSIZE_T yysize = yyssp - yyss + 1; #ifdef yyoverflow { /* Give user a chance to reallocate the stack. Use copies of these so that the &'s don't force the real ones into memory. */ YYSTYPE *yyvs1 = yyvs; short *yyss1 = yyss; /* Each stack pointer address is followed by the size of the data in use in that stack, in bytes. */ # if YYLSP_NEEDED YYLTYPE *yyls1 = yyls; /* This used to be a conditional around just the two extra args, but that might be undefined if yyoverflow is a macro. */ yyoverflow ("parser stack overflow", &yyss1, yysize * sizeof (*yyssp), &yyvs1, yysize * sizeof (*yyvsp), &yyls1, yysize * sizeof (*yylsp), &yystacksize); yyls = yyls1; # else yyoverflow ("parser stack overflow", &yyss1, yysize * sizeof (*yyssp), &yyvs1, yysize * sizeof (*yyvsp), &yystacksize); # endif yyss = yyss1; yyvs = yyvs1; } #else /* no yyoverflow */ # ifndef YYSTACK_RELOCATE goto yyoverflowlab; # else /* Extend the stack our own way. */ if (yystacksize >= YYMAXDEPTH) goto yyoverflowlab; yystacksize *= 2; if (yystacksize > YYMAXDEPTH) yystacksize = YYMAXDEPTH; { short *yyss1 = yyss; union yyalloc *yyptr = (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); if (! yyptr) goto yyoverflowlab; YYSTACK_RELOCATE (yyss); YYSTACK_RELOCATE (yyvs); # if YYLSP_NEEDED YYSTACK_RELOCATE (yyls); # endif # undef YYSTACK_RELOCATE if (yyss1 != yyssa) YYSTACK_FREE (yyss1); } # endif #endif /* no yyoverflow */ yyssp = yyss + yysize - 1; yyvsp = yyvs + yysize - 1; #if YYLSP_NEEDED yylsp = yyls + yysize - 1; #endif YYDPRINTF ((stderr, "Stack size increased to %lu\n", (unsigned long int) yystacksize)); if (yyssp >= yyss + yystacksize - 1) YYABORT; } YYDPRINTF ((stderr, "Entering state %d\n", yystate)); goto yybackup; /*-----------. | yybackup. | `-----------*/ yybackup: /* Do appropriate processing given the current state. */ /* Read a lookahead token if we need one and don't already have one. */ /* yyresume: */ /* First try to decide what to do without reference to lookahead token. */ yyn = yypact[yystate]; if (yyn == YYFLAG) goto yydefault; /* Not known => get a lookahead token if don't already have one. */ /* yychar is either YYEMPTY or YYEOF or a valid token in external form. */ if (yychar == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); yychar = YYLEX; } /* Convert token to internal form (in yychar1) for indexing tables with */ if (yychar <= 0) /* This means end of input. */ { yychar1 = 0; yychar = YYEOF; /* Don't call YYLEX any more */ YYDPRINTF ((stderr, "Now at end of input.\n")); } else { yychar1 = YYTRANSLATE (yychar); #if YYDEBUG /* We have to keep this `#if YYDEBUG', since we use variables which are defined only if `YYDEBUG' is set. */ if (yydebug) { YYFPRINTF (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); /* Give the individual parser a way to print the precise meaning of a token, for further debugging info. */ # ifdef YYPRINT YYPRINT (stderr, yychar, yylval); # endif YYFPRINTF (stderr, ")\n"); } #endif } yyn += yychar1; if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) goto yydefault; yyn = yytable[yyn]; /* yyn is what to do for this token type in this state. Negative => reduce, -yyn is rule number. Positive => shift, yyn is new state. New state is final state => don't bother to shift, just return success. 0, or most negative number => error. */ if (yyn < 0) { if (yyn == YYFLAG) goto yyerrlab; yyn = -yyn; goto yyreduce; } else if (yyn == 0) goto yyerrlab; if (yyn == YYFINAL) YYACCEPT; /* Shift the lookahead token. */ YYDPRINTF ((stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1])); /* Discard the token being shifted unless it is eof. */ if (yychar != YYEOF) yychar = YYEMPTY; *++yyvsp = yylval; #if YYLSP_NEEDED *++yylsp = yylloc; #endif /* Count tokens shifted since error; after three, turn off error status. */ if (yyerrstatus) yyerrstatus--; yystate = yyn; goto yynewstate; /*-----------------------------------------------------------. | yydefault -- do the default action for the current state. | `-----------------------------------------------------------*/ yydefault: yyn = yydefact[yystate]; if (yyn == 0) goto yyerrlab; goto yyreduce; /*-----------------------------. | yyreduce -- Do a reduction. | `-----------------------------*/ yyreduce: /* yyn is the number of a rule to reduce with. */ yylen = yyr2[yyn]; /* If YYLEN is nonzero, implement the default value of the action: `$$ = $1'. Otherwise, the following line sets YYVAL to the semantic value of the lookahead token. This behavior is undocumented and Bison users should not rely upon it. Assigning to YYVAL unconditionally makes the parser a bit smaller, and it avoids a GCC warning that YYVAL may be used uninitialized. */ yyval = yyvsp[1-yylen]; #if YYLSP_NEEDED /* Similarly for the default location. Let the user run additional commands if for instance locations are ranges. */ yyloc = yylsp[1-yylen]; YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); #endif #if YYDEBUG /* We have to keep this `#if YYDEBUG', since we use variables which are defined only if `YYDEBUG' is set. */ if (yydebug) { int yyi; YYFPRINTF (stderr, "Reducing via rule %d (line %d), ", yyn, yyrline[yyn]); /* Print the symbols being reduced, and their result. */ for (yyi = yyprhs[yyn]; yyrhs[yyi] > 0; yyi++) YYFPRINTF (stderr, "%s ", yytname[yyrhs[yyi]]); YYFPRINTF (stderr, " -> %s\n", yytname[yyr1[yyn]]); } #endif switch (yyn) { case 7: #line 59 "gram.y" { yyerrok; ; break;} case 9: #line 66 "gram.y" { BlockReverseSw = 0; if( ModeAssignAccptFlag ) outputHeader( yyvsp[0] ); ; break;} case 10: #line 71 "gram.y" { BlockReverseSw = 1; if( !ModeAssignAccptFlag ) outputHeader( yyvsp[0] ); ; break;} case 13: #line 79 "gram.y" { appendNonTerm( HeadName, ModeAssignAccptFlag ^ BlockReverseSw ); ; break;} case 14: #line 83 "gram.y" { entryNonTerm( HeadName, NULL, ModeAssignAccptFlag ^ BlockReverseSw, 0, 1, 0 ); /*$B6uEPO?(B*/ ; break;} case 16: #line 89 "gram.y" { appendNonTerm( HeadName, ModeAssignAccptFlag ); ; break;} case 17: #line 93 "gram.y" { appendNonTerm( HeadName, !ModeAssignAccptFlag ); ; break;} case 21: #line 102 "gram.y" { strcpy( HeadName, yyvsp[0] ); ; break;} case 22: #line 106 "gram.y" { StartFlag = 1; strcpy( HeadName, yyvsp[0] ); ; break;} case 23: #line 112 "gram.y" { strcpy( BodyName[ BodyNo++ ], yyvsp[0] ); ; break;} case 24: #line 117 "gram.y" { ModeAssignAccptFlag = 1; ; break;} case 25: #line 121 "gram.y" { ModeAssignAccptFlag = 0; ; break;} } #line 705 "/usr/share/bison/bison.simple" yyvsp -= yylen; yyssp -= yylen; #if YYLSP_NEEDED yylsp -= yylen; #endif #if YYDEBUG if (yydebug) { short *yyssp1 = yyss - 1; YYFPRINTF (stderr, "state stack now"); while (yyssp1 != yyssp) YYFPRINTF (stderr, " %d", *++yyssp1); YYFPRINTF (stderr, "\n"); } #endif *++yyvsp = yyval; #if YYLSP_NEEDED *++yylsp = yyloc; #endif /* Now `shift' the result of the reduction. Determine what state that goes to, based on the state we popped back to and the rule number reduced by. */ yyn = yyr1[yyn]; yystate = yypgoto[yyn - YYNTBASE] + *yyssp; if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) yystate = yytable[yystate]; else yystate = yydefgoto[yyn - YYNTBASE]; goto yynewstate; /*------------------------------------. | yyerrlab -- here on detecting error | `------------------------------------*/ yyerrlab: /* If not already recovering from an error, report this error. */ if (!yyerrstatus) { ++yynerrs; #ifdef YYERROR_VERBOSE yyn = yypact[yystate]; if (yyn > YYFLAG && yyn < YYLAST) { YYSIZE_T yysize = 0; char *yymsg; int yyx, yycount; yycount = 0; /* Start YYX at -YYN if negative to avoid negative indexes in YYCHECK. */ for (yyx = yyn < 0 ? -yyn : 0; yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++) if (yycheck[yyx + yyn] == yyx) yysize += yystrlen (yytname[yyx]) + 15, yycount++; yysize += yystrlen ("parse error, unexpected ") + 1; yysize += yystrlen (yytname[YYTRANSLATE (yychar)]); yymsg = (char *) YYSTACK_ALLOC (yysize); if (yymsg != 0) { char *yyp = yystpcpy (yymsg, "parse error, unexpected "); yyp = yystpcpy (yyp, yytname[YYTRANSLATE (yychar)]); if (yycount < 5) { yycount = 0; for (yyx = yyn < 0 ? -yyn : 0; yyx < (int) (sizeof (yytname) / sizeof (char *)); yyx++) if (yycheck[yyx + yyn] == yyx) { const char *yyq = ! yycount ? ", expecting " : " or "; yyp = yystpcpy (yyp, yyq); yyp = yystpcpy (yyp, yytname[yyx]); yycount++; } } yyerror (yymsg); YYSTACK_FREE (yymsg); } else yyerror ("parse error; also virtual memory exhausted"); } else #endif /* defined (YYERROR_VERBOSE) */ yyerror ("parse error"); } goto yyerrlab1; /*--------------------------------------------------. | yyerrlab1 -- error raised explicitly by an action | `--------------------------------------------------*/ yyerrlab1: if (yyerrstatus == 3) { /* If just tried and failed to reuse lookahead token after an error, discard it. */ /* return failure if at end of input */ if (yychar == YYEOF) YYABORT; YYDPRINTF ((stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1])); yychar = YYEMPTY; } /* Else will try to reuse lookahead token after shifting the error token. */ yyerrstatus = 3; /* Each real token shifted decrements this */ goto yyerrhandle; /*-------------------------------------------------------------------. | yyerrdefault -- current state does not do anything special for the | | error token. | `-------------------------------------------------------------------*/ yyerrdefault: #if 0 /* This is wrong; only states that explicitly want error tokens should shift them. */ /* If its default is to accept any token, ok. Otherwise pop it. */ yyn = yydefact[yystate]; if (yyn) goto yydefault; #endif /*---------------------------------------------------------------. | yyerrpop -- pop the current state because it cannot handle the | | error token | `---------------------------------------------------------------*/ yyerrpop: if (yyssp == yyss) YYABORT; yyvsp--; yystate = *--yyssp; #if YYLSP_NEEDED yylsp--; #endif #if YYDEBUG if (yydebug) { short *yyssp1 = yyss - 1; YYFPRINTF (stderr, "Error: state stack now"); while (yyssp1 != yyssp) YYFPRINTF (stderr, " %d", *++yyssp1); YYFPRINTF (stderr, "\n"); } #endif /*--------------. | yyerrhandle. | `--------------*/ yyerrhandle: yyn = yypact[yystate]; if (yyn == YYFLAG) goto yyerrdefault; yyn += YYTERROR; if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) goto yyerrdefault; yyn = yytable[yyn]; if (yyn < 0) { if (yyn == YYFLAG) goto yyerrpop; yyn = -yyn; goto yyreduce; } else if (yyn == 0) goto yyerrpop; if (yyn == YYFINAL) YYACCEPT; YYDPRINTF ((stderr, "Shifting error token, ")); *++yyvsp = yylval; #if YYLSP_NEEDED *++yylsp = yylloc; #endif yystate = yyn; goto yynewstate; /*-------------------------------------. | yyacceptlab -- YYACCEPT comes here. | `-------------------------------------*/ yyacceptlab: yyresult = 0; goto yyreturn; /*-----------------------------------. | yyabortlab -- YYABORT comes here. | `-----------------------------------*/ yyabortlab: yyresult = 1; goto yyreturn; /*---------------------------------------------. | yyoverflowab -- parser overflow comes here. | `---------------------------------------------*/ yyoverflowlab: yyerror ("parser stack overflow"); yyresult = 2; /* Fall through. */ yyreturn: #ifndef yyoverflow if (yyss != yyssa) YYSTACK_FREE (yyss); #endif return yyresult; } #line 127 "gram.y" #include "lex.yy.c" void appendNonTerm( char *name, int modeAssign ) { BODY *body; body = setNonTerm(); entryNonTerm( name, body, modeAssign, StartFlag, ModeBlock, 0 ); BodyNo = 0; } BODY *setNonTerm( void ) { int i; BODY *body; BODY *top = NULL, *prev = NULL; for( i = 0; i < BodyNo; i++ ){ if( (body = malloc( sizeof(BODY) )) == NULL ){ errMes( "Can't alloc nonterminal list buffer" ); } strcpy( body->name, BodyName[ i ] ); body->abort = 0; if( prev != NULL ){ prev->next = body; } else { top = body; } prev = body; } body->next = NULL; return( top ); } CLASS *entryNonTerm( char *name, BODY *body, int modeAccpt, int start, int member, int tmp ) { CLASS *class; class = getClass( name ); if( class != NULL ){ if( member ){ errMes("Accepted flag of class \"%s\" is re-assigned", HeadName ); ErrParse++; } } else { if( (class = malloc( sizeof(CLASS) )) == NULL ){ errMes( "Can't alloc memory for Class Finite Automaton." ); } strcpy( class->name, name ); if( modeAccpt ){ if( member ){ class->no = CurClassNo; } else { if( !tmp ){ outputHeader( name ); class->no = CurClassNo; } } } else { class->no = -1; } class->branch = 0; class->usedFA = 0; class->used = 1; /* non-terminal does not appear in voca */ class->bodyList = NULL; class->tmp = tmp; class->next = NULL; if( ClassListTail == NULL ){ ClassList = class; } else { ClassListTail->next = class; } ClassListTail = class; } if( body != NULL ) pushBody( class, body ); if( start ){ StartFlag = 0; if( StartSymbol == NULL ){ StartSymbol = class; } else { errMes("Start symbol is redifined as \"%s\"", class->name ); ErrParse++; } } return( class ); } void pushBody( CLASS *class, BODY *newbody ) { BODYLIST *bodyList = class->bodyList; BODYLIST *preBodyList = NULL; BODYLIST *newBodyList; BODY *body; int cmp; int defineNo = 1; while( bodyList != NULL ){ body = bodyList->body; cmp = strcmp( body->name, newbody->name ); if( cmp > 0 ) break; if( cmp == 0 ){ if( unifyBody( class->name, body, newbody ) ){ warnMes( "Class \"%s\" is defined as \"%s..\" again.", class->name, body->name ); } return; } preBodyList = bodyList; bodyList = bodyList->next; defineNo++; } if( (newBodyList = malloc( sizeof(BODYLIST) )) == NULL ){ errMes( "Can't alloc class body buffer." ); } newBodyList->body = newbody; if( preBodyList != NULL ){ preBodyList->next = newBodyList; } else { class->bodyList = newBodyList; } newBodyList->next = bodyList; class->branch++; } int unifyBody( char *className, BODY *body, BODY *newbody ) { BODY *bodyNext, *newbodyNext; char *newClassName; BODY *newBody; CLASS *class; bodyNext = body->next; newbodyNext = newbody->next; while( 1 ){ if( bodyNext == NULL && newbodyNext == NULL ){ return( -1 ); } if( newbodyNext == NULL ){ if( body->abort ){ return( -1 ); } else { body->abort = 1; return( 0 ); } } if( bodyNext == NULL ){ body->abort = 1; body->next = newbodyNext; return( 0 ); } if( strcmp( bodyNext->name, newbodyNext->name ) ) break; body = bodyNext; newbody = newbodyNext; bodyNext = body->next; newbodyNext = newbody->next; } class = getClass( body->name ); if( class != NULL && class->tmp ){ entryNonTerm( body->name, newbodyNext, 0, 0, 0, 1 ); } else { newClassName = getNewClassName( className ); entryNonTerm( newClassName, bodyNext, 0, 0, 0, 1 ); entryNonTerm( newClassName, newbodyNext, 0, 0, 0, 1 ); if( (newBody = malloc( sizeof(BODY) )) == NULL ){ errMes( "Can't alloc body buffer of tmp class, \"%s\".", newClassName ); } strcpy( newBody->name, newClassName ); newBody->abort = 0; newBody->next = NULL; body->next = newBody; newbody->next = newBody; } return( 0 ); } char *getNewClassName( char *keyname ) { static char classname[ SYMBOL_LEN ]; static int tmpClassNo = 0; sprintf( classname, "%s#%d", keyname , tmpClassNo++ ); if( !SW_SemiQuiet ){ fprintf( stderr, "\rNow modifying grammar to minimize states[%d]", GramModifyNum ); NoNewLine = 1; } GramModifyNum++; return( classname ); } void setGram( void ) { char *name; if( (yyin = fopen( GramFile, "r" )) == NULL ){ errMes( "Can't open grammar file \"%s\"", GramFile ); } if( SW_Compati ){ strcpy( HeaderFile, "/dev/null" ); } if( (FPheader = fopen( HeaderFile, "w" )) == NULL ){ errMes( "Can't open Header File for writting\"%s\"", HeaderFile ); } fprintf( FPheader, "/* Header of class reduction flag for finite automaton parser\n" " made with mkfa %s\n\n" " Do logicalAND between label and FA's field #4,#5.\n" "*/\n\n", VerNo ); if( !SW_Quiet ) fputs( "Now parsing grammar file\n", stderr ); yyparse(); if( !SW_Quiet ){ fprintf( stderr, "\rNow modifying grammar to minimize states[%d]\n", GramModifyNum - 1 ); NoNewLine = 0; } if( StartSymbol == NULL ) StartSymbol = ClassList; fprintf( FPheader, "/* Start Symbol: %s */\n", StartSymbol->name ); fclose( FPheader ); if( (name = chkNoInstantClass()) != NULL ){ errMes( "Prototype-declared Class \"%s\" has no instant definitions", name ); } if( ErrParse ) errMes( "%d fatal errors exist", ErrParse ); } void outputHeader( char *name ) { if( ClassNo >= CLASSFLAG_MAX ){ if( !SW_Compati ){ warnMes( "Class accepted flag overflow.\"%s\"", name ); CurClassNo = -1; } } else { if( !SW_Compati ){ fprintf( FPheader, "#define ACCEPT_%s 0x%08x\n", name, 1 << ClassNo ); } CurClassNo = ClassNo++; } } char *chkNoInstantClass( void ) { CLASS *class = ClassList; while( class != NULL ){ if( !class->branch ) return( class->name ); class = class->next; } return( NULL ); } int yyerror( char *mes ) { errMes(mes ); ErrParse++; return( 0 ); } julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/voca.c0000644001051700105040000000656012004452400017751 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include "mkfa.h" extern char VocaFile[ 1024 ]; extern CLASS *ClassList; extern CLASS *ClassListTail; extern int SW_Quiet; extern int SW_Verbose; extern int NoNewLine; char *gettoken( char *line, char *dst ); BODY *appendTerm( BODY *list, char *name ); BODY *entryTerm( char *name, BODY *body, int listLen ); void setVoca( void ) { char token1[ SYMBOL_LEN ]; char token2[ SYMBOL_LEN ]; int virgin = 1; int bodynum = 0; BODY *bodyList = NULL; FILE *fp; char identifier[ SYMBOL_LEN ] = ""; if( (fp = fopen( VocaFile, "r" )) == NULL ){ errMes( "Can't open vocabulary file\"%s\"", VocaFile ); } if( !SW_Quiet ){ newLineAdjust(); fputs( "Now parsing vocabulary file\n", stderr ); } while( 1 ){ static char line[ 1000 ]; char *ptr = line; if( fgets( line, 1000, fp ) == NULL ){ entryTerm( identifier, bodyList, bodynum ); break; } if( line[ 0 ] == '\0' ) continue; if( line[ 0 ] == '#' ){ if( (ptr = gettoken( ptr, token1 )) == NULL ) continue; if( !virgin ){ entryTerm( identifier, bodyList, bodynum ); bodyList = NULL; bodynum = 0; } else { virgin = 0; } strcpy( identifier, token1 + 1 ); continue; } else { ptr = gettoken( ptr, token1 ); if( ptr == NULL ) continue; ptr = gettoken( ptr, token2 ); if( ptr == NULL ){ bodyList = appendTerm( bodyList, token1 ); } else { bodyList = appendTerm( bodyList, token2 ); } bodynum++; } } } char *gettoken( char *line, char *dst ) { char *ptr = dst; char ch; do{ ch = *line++; if( ch == '\0' ) return( NULL ); } while( ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' ); while( 1 ){ *ptr++ = ch; ch = *line++; if( ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' ){ *ptr = '\0'; return( line ); } if( ch == '\0' ){ *ptr = '\0'; return( line - 1 ); } } } BODY *appendTerm( BODY *list, char *name ) { BODY *newTerm; if( (newTerm = malloc( sizeof(BODY) )) == NULL ){ errMes( "Can't alloc term list buffer" ); } strcpy( newTerm->name, name ); newTerm->abort = 0; newTerm->next = list; return( newTerm ); } BODY *entryTerm( char *name, BODY *body, int listLen ) { CLASS *class; static int InputNo = 0; BODYLIST *bodyList; if( getClass( name ) != NULL ){ errMes( "Class redefined \"%s\"", name ); } if( (class = malloc( sizeof(CLASS) )) == NULL ){ errMes( "Can't alloc memory for Class Finite Automaton." ); } class->no = InputNo++; strcpy( class->name, name ); class->branch = -listLen; class->usedFA = 0; class->used=0; class->tmp = 0; if( ClassListTail == NULL ){ ClassList = class; } else { ClassListTail->next = class; } ClassListTail = class; if( (bodyList = malloc( sizeof(BODYLIST) )) == NULL ){ errMes( "Can't alloc nonterminal list buffer" ); } bodyList->body = body; bodyList->next = NULL; /* for test nt = class->nt[ 0 ]; fprintf( stderr, "class: %s\n", class->name ); while( 1 ){ if( *nt->name == '\0' ) break; fprintf( stderr, "%s\n", nt->name ); nt++; }*/ return( NULL ); } julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/doc/0000755001051700105040000000000012004463507017425 5ustar ritrlabjulius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/doc/mkfa.doc0000644001051700105040000006350510677066017021054 0ustar ritrlab finite automaton generator, mkfa reference document programmed and written by 1995-1996 S.Hamada $B"";EMM@bL@(B A g2fa$B$H$N5!G=>e$NAj0cE@(B n 1 $BKvHx:F5"$N@55,8@8l(B($B@5B'(B)$B$r07$&$3$H$,$G$-$k!#$3$l$K$h$jNc$($P%U%l!<(B $B%:$NM-8B7ONs$,07$($k!#:8:F5"$O07$($J$$!#&KA+0\$r=hM}$7$J$$$?$a$G$"(B $B$k!#$7$+$78=:_I,MW$H$5$l$F$$$kJ8K!$G$OA4$/LdBj$,$J$$!#$b$7$b$C$H0l(B $BHL@-$r$b$?$;$?$$$H$9$l$P!"$=$l$KBP1~$9$k$3$H$r9M$($k$h$j(BLR$B$rMQ$$$F(B CFL$B$r$l$N%/%i%9$N4T85$K%"%/%7%g%s$r;\$9$3$H(B $B$,$G$-$k!#uBV(B1$B$,:G=*>uBV$HJ]>Z$5$l$F$$$?$,!"(Bmkfa$B$G$O7h$^$C$F$$$J$$!#(B B $BF~=PNO%U%!%$%k$N;EMM(B $B!v(B $BJ8K!%U%!%$%k(B BNF$B%i%$%/$K=q$-49$(5,B'$r5-=R$9$k$3$H$O(Bg2fa$B$HF1$8!#$?$@$7e$+$i?7$7$$%/%i%9$,H/8+$5$l$k$?$S$K3dIU$i(B $B$l$k!#3+;O%U%i%0$bF1MM!#0J2<3+;O%U%i%0$O>JN,!#(B 2 $B{\n}\n$B$G=q$-49$(5,B'Dj5A9T(B($B$3$l$r$N$_=q$$$F$*$-!"%V%m%C(B $B%/$N8eB39T$G%/%i%9$rDj5A$r$9$k$3$H$b2DG=(B($B$3$l$r%W%m%H%?%$%W@k8@$H(B $B8F$V(B)$B$G$"$k!#%V%m%C%/$N%a%s%P!<$H$7$F3d$jEv$F;XDj$9$k%/%i%9$O!"$=(B $B$l$h$j0JA0$K3d$jEv$F$,9T$J$o$l$F$$$J$$$b$N$K8B$i$l$k(B(2$B=E3d$jEv$F%A%'%C(B $B%/$r(B1$B%Q%9$G:Q$^$;$k$?$a$NET9g(B)$B!#$^$?J#?t$NDj5A$N$"$k%/%i%9$r%a%s%P!<(B $B$H$7$F$/$/$k$H(Bflag-reassigned$B$N%(%i!<$,=P$k$N$G!"$=$N$H$-$O%W%m%H(B $B%?%$%W@k8@$rMQ$$$k$G$-$G$"$k!#(B 3 $B$5$;$k$3$H$,$G$-$k!#$^$?(B $B=i$a$F$NDj5A$G$J$$9T$G(B!$B$rIU$1$?>l9g$OL5;k$5$l$k!#%V%m%C%/Fb$N%/%i(B $B%9$K$OIU$1$i$l$J$$!#$^$?(BIGNORE$B$5$l$?%V%m%C%/Fb$N%W%m%H%?%$%W@k8@$5(B $B$l$?%a%s%P!<$,!"l9g$O(BASSIGN$B$5$l$k!#(B 4 $B%/%i%9L>$O1Q?t;z$H%"%s%@!<%9%3%"$N$_5v$5$l$k!#(B 5 $B%/%i%9L>$ND>A0$K(B*$B$rIU$1$k$H!"$=$N%/%i%9$r3+;O5-9f$H$9$k!#(B 6 #$B0J9_9TKv$^$G$OCmuBV!"F~NO!"A+0\@h!"uBV(B1$B$OI,$:uBV$@$H8GDj$5$l$F$$$?$,!"(Bmkfa$B$G$OJ]>Z$5$l(B $B$J$$!#$=$3$GuBV$O(B{$BuBV(B:? $BF~NO(B:-1 $BA+0\@h(B:-1 $BJ}=PNO$9$k$,!"(Bmkfa$B$G$O(BNFA->DFA $BJQ49$r%a%b%j>e$G9T$J$&$N$G!"(B-NFA,-DFA$B%*%W%7%g%s$GA*BrE*$K=PNO$9$k!#(B $B!v(B $B%X%C%@%U%!%$%k(B mkfa$B$O"$B$H$$$&%i%Y%k$HO@M}OB$9$k$3$H$K$h$j$O1Q?t;z$H%"%s%@!<%9%3%"$N$_5v$5$l$k!#$?$@$7%V%m%C%/(B $B2=$5$l$?%/%i%9$K4X$7$F$O%?%0L>$,%i%Y%k$KM?$($i$l$k!#(B C $B;Y1g5!G=(B mkfa$B$K$OJ8K!:n@.$N$?$a$N;Y1g5!G=$,6/2=$5$l$F$$$k!#$=$l$i$K$O(B g2fa$B$HF~NO%U%!%$%k$N8_49@-$rJ]$A$?$+$C$?$N$G!"$"$-$i$a$?J8K!;EMM$,$"(B $B$k!#0l$D$O!"J8K!%U%!%$%k$r2~9T%U%j!<$G(B";"$B$G6h@Z$i$;$k$3$H!"%/%i%9Dj5A(B $B$O(B"|"$B$rMQ$$$F(B1$B%9%F!<%H%a%s%H$K8GDj$5$;$k$3$H$H!"(BFA$B%U%!%$%k$NBh(B4$B%U%#!<(B $B%k%I$OA+0\@h$N$N%/%i%9$N%j%9%H$X$b$0(B $B$j9~$s$G9T$-!"=*C<$r8+$D$1$?$i(BFA$B$N>uBVA+0\$r$9$k$H$$$&$h$&$K$7$F(BNFA$B$r(B $B:n@.$9$k!#DFA$B$G$"$k$,!"3+;O5-9f$+$i%H%]%m%8!<=g$K%3%s%U%j%/(B $B%H$7$F$$$k%"!<%/$rM;9g$7$F!"$=$N@h$OM;9gBP>]@h$N>uBV$=$l$>$l$+$i=P$F(B $B$$$k%"!<%/$r$9$Y$F%3%T!<$9$k!#$^$?$=$N85$NM;9gBP>]$G$"$k%"!<%/$r>C5n(B $B$9$k!#$3$N;~%"!<%/$r>C$9$3$H$K$h$j$=$NM;9gBP>]@h$r;X$9%"!<%/$,$J$/$J$C(B $B$?$i!"$=$N>uBV$O>CLG$5$;$k!#$=$N8e!";0$DAH%j%9%H$r:n@.$9$k!#(B v.1.01(95.12.31) +> 0 -(a)-> 1* | | | (a) +--+ $B$H$$$&(BNFA$B$+$i(BDFA$B$KJQ49$9$k;~!":#$^$G$O2>$K?7$7$$M;9g>uBV(B2$B$r:n$j!"M;9g(B $BBP>]$=$l$>$l=g$K!"$=$3$+$i$NA+0\$rM;9g>uBV$X%3%T!<$7$F$O(B0$B$+$i$NA+0\$r(B $B@Z$C$F>CLGH=Dj$r$7$F$$$?!#$=$7$F$9$Y$F=*$C$?$iM;9g>uBV72$r%j%9%H$K2C(B $B$($k!#(B $BBgDq$N%k!<%W$G$O$3$N=hM}$G9=$o$J$$$,!">e5-$N$h$&$J<+8J%k!<%W>e$G!"$+(B $B$D<+8J%k!<%W$,M;9g$5$l$k$H$-!"M;9g$NCf4V=hM}Cf$K%3%T!<$,9T$J$o$l$k$?(B $B$aIT6q9g$,5/$3$k!#(B $B6qBNE*$K$O2<$NDL$j!#(B $B1&@~7A$N$_$H$$$&@)Ls$+$i(B1$B$,@h8=$l$k!#$=$7$F(B1$B$+$i$NA+0\$r%3%T!<$7$?8e!"(B 1$B$O(B0$B$+$i$NA+0\$7$+$J$$$+$i>CLG$9$k!#$=$N]$G$"$k(B0$B$+$i$N(B $BA+0\$N%3%T!<$H$J$k$,!"8=:_$N>uBV$O(B0$B$+$i(B0$B$X$N%"!<%/$N$_B8:_$9$k$?$a!"(B $B$=$l$,%3%T!<$5$l$k!#(B 0 -(a)-> 2* 2 -(a)-> 0 $B$H$J$k$o$1$G$"$k!#(B $B$=$3$G(B0$B$N%"!<%/$N=hM}$,40A4$K=*$C$F$$$k$+!"$^$@A4$/;O$a$i$l$F$$$J$$>u(B $BBV$N;~$K%3%T!<$r$9$kI,MW$,$"$k$N$@$,!"%"!<%/$N=hM}$r40A4$K=*$i$;$F$+(B $B$i%3%T!<$r$9$k$h$&$K$7$?!#6qBNE*$K$OM;9gBP>]$NCf$K<+J,$,$$$?$i(B($B$9$J$o(B $B$A<+8J%k!<%W(B)$B!"(Breserved$B$H$$$&%U%i%0$rN)$F$F;^$r@Z$k$N$_$K$7$F$*$/!#<+(B $BJ,$N%"!<%/$N=hM}$,40A4$K=*$C$F$+$i!"(Breserved$B%U%i%0$,N)$C$F$$$kA4$F$N(B $BM;9g>uBV$r=g$K=hM}$7$F$$$/!#(B $B$3$l$K$h$j>e5-$NLdBj$O2r7h$5$l$?!#(B v.1.02(96.1.1) $B:#$^$G$O!"(BNFA$B?75,:n@.;~$K(BFA$B$KHV9f$r?6$j!"(BDFA$B:n@.;~$K>CLG$7$?>uBV$KBP(B $B$7$F$O7gHVEPO?$r$7$?>e$G!"(B3$B$DAH:n@.;~$K7gHV$N$J$$HV9fNs$KuBVHV9f$OITMW$G$"$k!#$=$3$G(BNFA$B:n@.;~$O>uBV$r(B-1$B$H$7$F$*$-!"(B3$B$DAH(B $B:n@.;~$K;2>H$5$l$?%N!<%I$KBP$7$FHV9f$r?6$C$F$$$/$H$$$&=hM}$KJQ99$7$?!#(B $B$3$l$K$h$j(Bg2fa$B$N0EL[%k!<%k$G$"$C$?>uBV(B#1$B$OuBV$H$$$&5,B'$OeCY$+$C$?$N$,!"(B4$BG\0J>eB.$/$J$C$?(B($BE,@50LCV$X$NA^F~$O%G!<%?%Y!<%9(B $B$K$*$1$k%$%s%G%C%/%9%U%!%$%k$_$?$$$J$3$H$r$7$F$$$F7k9=BgJQ$@$C$?$N$K!D(B)$B!#(B $Be$7$F$-$?N'B.CJ3,$O(B18$B!s$r@j$a$k(BappendNextArc$B$@!#$=$NB>!"2hLLI=(B $B<($G$N2~9TLdBj$J$I$r2r7h$7$?!#(B v.1.04(96.1.2) $B%Q!<%6$O(Bscanf$B$r;H$C$F(BDFA$B%U%!%$%k$rFI$_9~$s$G$$$k$N$GBh(B4$B%U%#!<%k%I$N(B0x $B$H$$$&J8;zNs$r=PNO$7$J$$$h$&$KJQ99$7$?!#$^$?%F%9%H$N$?$a(Bg2fa$B$H8_49$N(B $B=PNO$,=PMh$k%*%W%7%g%s$r?7@_$7$?!#$3$N=PNO%U%!%$%k$G%Q!<%8%s%0$7$?7k(B $B2L!"%*%j%8%J%k$N(BDFA$B$N7k2L$HA4$/F1$8$K$J$C$?$N$G(B($B$?$@$7>uBV(B1$B$O$N%b%8%e!<%k$GJ8;zG[(B $BNs$H$7$FDj5A$5$l$?$b$N$r(Bextern$B$G%]%$%s%?$H$7$F;2>H$7$?$iMn$A$k$H$$$&(B $B%P%0$KD9;~4VG:$^$5$l$?!#$J$<$@$a$J$N$+J,$+$i$J$$(B($B%l%Y%k$H$7$F$OF1$8$J(B $B$N$K(B)$B!#$^$?$3$N2~JQ$N:]!":#$^$G$O(BNFA$B:n@.;~$K%X%C%@%U%!%$%k$r:n@.$7$F(B $B$$$?$,!"J8K!%U%!%$%kFI$_9~;~$K9T$J$&$3$H$K$7$?!#$^$?L$;HMQ%/%i%9%A%'%C(B $B%/;~$K%/%i%9>pJs$r$9$Y$F%U%j!<$9$k$3$H$K$7$?!#(B v.1.11(96.1.8) $B3+;O5-9f$r;XDj$9$k(B*$B$r?7@_!#$=$NB>!"%W%m%0%i%`$r5!G=Kh$K%b%8%e!<%kJ,$1(B $B$7$?!#(B v.1.11a(96.1.11) v.1.11$B$G%(%s%P%0$7$F$$$?(B-c$B$N=PNO!JBh(B5$B%U%#!<%k%I$,=PNO$5$l$k(B)$B$rD{@5!#(B v.1.11b(96.1.11) $B:F5"Dj5A$N:]!"%k!<%WFb$G$N(Breserved$B%U%i%0$NN)$FJ}$,$*$+$7$+$C$?$N$r=$(B $B@5$7$?!#(B v.1.11c(96.1.11) DFA$B$G:F5"$,?<$/$J$k$HMn$A$k$N$,(Bstack overflow$B$K$h$k$b$N$HH=L@!#:F5"4X(B $B?tFb$NJ8;zNs$r(Bstatic$B$KJQ99$7!"$"$kDxEY?<$/$^$G$NC5:w$,2DG=$K$J$C$?!#(B $B>-MhE*$K$=$l$G$bITB-$J$i$P(Bgcc$B$N%3%s%Q%$%k%*%W%7%g%s$J$I$G2sHr$9$kI,MW(B $B$,$"$k!#(B v.1.20(96.1.13) BNF$B$r=*C<%l%Y%k$^$GMn$H$7$F(BNFA$B$r:n@.$7$?8e(BDFA$B$r:n$k$H>uBV?t$,GzH/$9$k(B $B$3$H$,$"$k!#$=$N0l$D$H$7$F$"$k%/%i%9$NDj5A$,F1$8%/%i%9$+$i;O$^$k>l9g!"(B $B$=$N%/%i%9$=$l$>$l$N=*CuBV?t$r:n$k$H$$$&$3$H$,$"$k!#$3$N$3$H$O$O$8$a$+$iJ,$C$F$$$?$,!":G(B $B=*E*$K:G>.2=$9$l$PF1$8$K$J$k$N$G5$$K$7$J$+$C$?!#$7$+$7$3$l$,860x$G(B96M $B$r?)$$IU$/$7$FMn$A$k$H$$$&$3$H$,5/$3$C$?$N$GBP=h$9$k$3$H$K$7$?!#(B TOP$B%l%Y%k(B($BJ8K!%l%Y%k(B)$B$G>uBV$r2!$5$($i$l$k$J$i$PJ8K!$r=q$-49$($k=hM}$r(B $B9T$&$3$H$K$7$?!#6qBNE*$K$Ol9g!"%$%W%7%m%sDj5A$,8=$l$k(B($B$9$J$o$A%i%`%@A+0\(B $B$r=hM}$9$kI,MW$,$"$k!K$N$G!"$=$l$rHr$1$k$?$a$K(Babort$B%U%i%0$rMQ0U$7$?!#(B s : a b c s : a b $B6qBNE*$K$O(Bb$B$K(Babort$B%U%i%0$rN)$F$F$*$-!"(BNFA$B:n@.Cf$KCmL\Cf$N%H!<%/%s$K(B abort$B%U%i%0$,8=$l$?$i!"8=:_=hM}Cf$N%/%i%9$N=P8}(BFA$B$KD>@\$D$J$0$H$$$&$b(B $B$N$G$"$k!#$9$J$o$A!">e$G8@$($P!"(Bb$B$N=P8}$,Fs$DMQ0U$5$l$?$H9M$($FNI$$(B(c $B$NA0$H(Bc$B$N8e$m(B)$B!#(B $B$7$+$78=:_$3$l$r9T$&$?$a$KH/@8$9$k%P%0$,J,$C$F$$$k!#$9$J$o$A!"(Bs$B$N=P8}(B $B$G$"$k(B"..c"$B$K(B"..b"$B$r@\B3$9$k$h$&$K:n$C$F$$$k$?$a!"$=$N=P8}(BFA$B$G$O7PO)(B $B$,0c$&$N$K(Bc$B$b(Bd$B$b$N2sHrJ}K!$bL5$/$O$J$$!K!#$A$J$_$K%/%i(B $B%9Dj5A$OJ8;zNs$NG[Ns$H$7$FI=8=$5$l$F$$$?$N$r!"J8;zNs$N%j%9%H$KJQ99$7(B $B$F$$$k(B($B3Z$9$k$?$a$KG[Ns$K$7$F$$$?$b$N$O$3$l$GA4LG$7$?(B)$B!#(B $BCm(B:$B$3$l$O8e$+$i9M$($k$H%"!<%/>e$Ke$K%U%i%0$r>h$;$l$PLdBj$O2r7h$9$k!#(B v.1.21(96.1.13) DFA$B$,40A4$K(BDFA$B$K$J$C$F$$$J$$%P%0$,A0$+$i3NG'$5$l$F$$$?!#$3$N860x$O!"(B NFA$B$+$i(BDFA$B$KJQ49$9$k:]!"$I$NA+0\85$HA+0\@h$G%"!<%/$r;XDj$9$k$h$&$K$J$C(B $B$F$$$?$?$a!"Nc$($P(BA$B$+$i(BB$B$X(Ba,b$B$NF~NO$GA+0\$9$k>l9g6hJL$,IU$+$J$+$C$?$?(B $B$a!"ITDj$N7k2L$H$J$C$F$$$?$3$H$G$"$k!#$^$?8e$m8~$-(B($BA0>uBV$X(B)$B$N%"!<%/(B $B$OuBVGzH/$7$F$$$?$h$&$G!"$3$l$K$h$C$F>uBV$,$"$kDx(B $BEYM^@)$5$l$k$h$&$K$J$C$?!#(B v.1.22(96.1.15) NFA$B$+$i(BDFA$B$X$NJQ49$K$*$$$FF~NO%3%s%U%j%/%H$r2r7h$9$k:]$K!"LdBj$H$J$k(B $BF~NO$NA+0\@h$,A4$/F1$8$G$"$k>l9g!"JL!9$K?75,$N>uBV$r:n$k$N$G$O$J$/!"(B $BF1$8>uBV$XA+0\$9$k$h$&$K$7$?!#Nc$($P$D$.$N>l9g!"(B 0 -(a,b)->1 +--(a,b)->2 $B$N>l9g(Ba$B$b(Bb$B$b(B1,2$B$XA+0\$9$k$N$G!"(B 0-(a,b)->(1,2) $B$H$9$k!#:#$^$G$O!"(B 0 -a->(1,2) +--b->(1,2') $B$H$7$F$$$?!#(B(1,2)$B$b(B(1,2')$B$bA4$/F1$8$G$"$k$,!".2=$r9T$&$H$7$?$i7k2L$OF1$8$J$N$GITMW$H$J$C$F$7$^$&$+$i(B)$B!#(B $B$^$?(Bv.1.21$B$^$G$O!"(B A : a A : a A ($B$?$@$7(Ba$B$O=*C<(B) $B$H$$$&J8K!$rL58B%k!<%W$9$k$b$N$H$7$F$O$M$k%P%0$,$"$C$?$N$r=$@5$7$?!#(B v.1.30(96.1.15) $B:#$^$G$O!"(BDFA$B$N:n@.$N%"%k%4%j%:%`<+BN$r4*0c$$$7$F$$$F!"$"$k>l=j$G$NNc(B $B$($P(B0$B$H(B1$B$NM;9g>uBV$H!"JL$N>l=j$G$N(B0$B$H(B1$B$rM;9g>uBV$OJLJ*$H$7$F07$C$F$$(B $B$?!#$3$l$K$h$j2<5-$NJ8K!$G$OL58B$K>uBV$r:n@.$9$k$3$H$,3NG'$5$l$F$$$?!#(B S : A S : B A : C D B : C D C : a C : b D : c D : c D $B$=$3$GM;9g>uBV<-=q$r!"%m!<%+%k$J$b$N$+$i%0%m!<%P%k$KJQ$($?$i(B($B$A$g$C$H(B $B$NJQ99$G:Q$s$@(B)$B!"2?$HN^$,=P$k$[$I>uBV?t$O8:$j(B(NFA$B$h$j(BDFA$B$NJ}$,>uBV?t(B $B$,>/$J$/$J$k(B!)$B!">e5-J8K!$b%3%s%Q%$%k$G$-$k$h$&$K$J$C$?!#(B v.1.31(96.1.16) v.1.30$B$h$j(BFA$B$NAm?t$H(BFA$B$N=hM}?t$,:G=*E*$K0lCW$7$J$/$J$C$?!#$3$l$OJ#9g(B $BE*$J860x$r;}$C$F$$$F!"C5n$9(B $B$k$H$$$&%N!<%I>C5nH=Dj$K$+$+$i$J$+$C$?(B($B$?$@$7$3$l$O=PNO7k2L$K;Y>c$r(B $BMh$?$5$J$$!#C1$KITMW$H$J$C$?%N!<%I$r(Bfree$B$G$-$J$$$@$1$G$"$k(B) $B>e(B2$B$D$O860x$,J,$C$?;~E@$G$9$0$KD>$;$?$,!"(B3$B$D$a$O9=B$E*$KFq$7$+$C$?!#(B $B%"!<%/$r@ZCG$9$k$H$-$K$3$N>uBV$KMn$A$k$+$r%A%'%C%/$9$k4X?t$r:n$C$?$N(B $B$@$,(B($B;X$5$l$F$$$k%"!<%/$,0l$D$K$J$C$?$J$i!"$=$N%N!<%I$,>CLG$9$k$H2>Dj(B $B$7$F<+J,$K$=$N1F6A$,JV$C$F$/$k$+$H$9$l$P$h$$(B)$B!"$3$l$rMQ$$$k$H!">uBVM;(B $B9g=hM}2aDx$J$I$K0lC6$3$N>uBV$K4Y$k$3$H$,$"$j!"$3$N%A%'%C%/$K$+$+$k$?(B $B$a$K>CLG$9$Y$-$G$J$$%N!<%I$^$G$,>C5n$5$l$k>l9g$,$"$k$N$G$"$k(B($B$3$l$K5$(B $BIU$/$N$K$b$?$$$X$s;~4V$,$+$+$C$?(B)$B!#$=$3$G:n6HCf$N%N!<%I$KBP$7$F(B volatiled$BB0@-$rN)$F>C5nITG=$H8+$J$9$3$H$K$h$C$F2r7h$r;n$_$?$H$3$m!"D"(B $B$8$j$N9g$o$J$$J8K!%U%!%$%k(B(loop_recurse2$B$J$I(B)$B$O9g$&$h$&$K$J$C$?$,!"(Bg1 $B$J$I$G$O%A%'%C%/4X?t$NCf$G(BBusError$B$r$I$&$7$F$b5/$3$7$F$7$^$&!#$H$$$&(B $B$3$H$G$3$N7o$K4X$7$F$O(B1$BF|4hD%$C$F$@$a$@$C$?$N$GJ]N1$H$9$k$3$H$K$7$?!#(B $B$^$?(Bv.1.30$B$G$O>uBVM;9g$K$*$$$F!"$9$G$KB8:_$9$kM;9g>uBV$X$N%j%s%/$N=h(B $BM}$r%(%s%P%0$5$;$F$$$?$N$G!"$3$l$r=$@5$7$?!#(B v.1.32(96.1.16) v.1.20$B$GJ8K!=q$-49$($r9T$&$h$&$K$J$C$F$+$i:#$^$G!"(BDFA$BJQ49$K$*$$$F(B abortFA$B$H(BexitFA$B$rEy2A$K07$C$F$$$J$+$C$?!#$=$N$?$a!"(BabortFA$B$,$A$c$s$H(B $B@\B3$5$l$J$$$H$$$&8=>]$,5/$-$F$$$?!#$=$3$GJQ49$N:F5"4X?t$,8F$P$l$?$H(B $B$-!"(BexitFA$B$,(BNULL$B$J$i(BabortFA$B$r8+$F!"F~$C$F$$$k$J$i$=$NCM$r(BexitFA$B$X0\$9(B $B$H$$$&$h$&$K$7$?$H$3$m@5>oF0:n$7$?!#$3$l$K$H$b$J$$!"(B{exitFA,abortFA} $B$r(B{exitFA1,exitFA2}$B$HL>>NJQ99$7$?!#(B v.1.33(96.1.17) v.1.30$B$+$i!XN^$,=P$k$[$I!Y>uBV?t$,>/$J$/$J$C$?$,!">/$7LdBj$,$"$C$?!#(B $B$=$l$OIaDL$N(BNFA->DFA$BJQ49$HF1$8$h$&$KM;9g$5$l$?85$N>uBVMWAG$,F1$8$G$"(B $B$l$PF1$8>uBV$H$9$k$H$$$&%"%k%4%j%:%`$@$C$?$,!"7PO)$K$h$C$FuBV$,KD$i$`$H$$(B $B$&$3$H$O$J$+$C$?!#(B v.1.34(96.1.18) $B%k!<%W$9$k;~!"$=$N@h$N%N!<%I$X(B $B$7$?!#$^$?>uBV$r8:$i$9$?$a$NJ8K!=q$-49$(;~$K$G$-$k>uBV$NL>A0$r(B# $B$+$i(B#$B$KJQ99$7$?!#J8K!%A%'%C%/;~$N%(%i!<%a%C%;!<(B $B%8$K4^$^$l$k2DG=@-$,$"$j!"$=$N>l9gJ,$+$j$K$/$$$+$i$G$"$k!#(B v.1.35(96.1.19) v.1.31$B$G%k!<%W$H%"%\!<%H$rF1$8$HJa$i$($?$,!"$=$l$O4V0c$$$@$C$?!#$=$l(B $B$i$OJLJ*$G!":F5"$O@5<0$N=P8}$HEy2A$G$"$j!"CV$-49$o$k$b$N$G$"$k!#$^$?(B $B%"%\!<%H$O;R6!$K5$5$l$F$$$/$?$aC_@Q$7$F$$$/$b$N$J$N$G!"%j%9%H(B $B$G07$&$h$&JQ99$5$l$?!#:#$^$G$OB9$K%"%\!<%H$,7Q>5$5$l$F$$$J$+$C$?$3$H(B $B$K$J$k!#$3$N%j%9%H$N;H$$J}$OIaDL$N%*!<%HJQ?t$H$O0[$J$k$?$aCm0U$,I,MW!#(B $B?F$N%j%9%H$O$=$N$^$^;R6!$KEO$5$l!";R6!$O$=$l$rKAF,$G%3%T!<$7$F;HMQ$9(B $B$k$H$J$C$F$$$k$N$G0z?t$H$7$F$b$i$C$?%j%9%H$O(Bfree$B$7$F$O$J$i$J$$!#IaDL(B $B$N%*!<%HJQ?t$HF1$8$l%3%T!<$r$9$k$N$,$a$s$I$/$5$$$N$?(B $B$a!"$3$N$h$&$J7A$H$J$C$?!#(B v.1.36(96.1.20) $B:#$^$G$N%"%k%4%j%:%`$G$O!"$?$^$K1J5W%k!<%W$9$kJ8K!$,$"$C$?!#$=$NM}M3(B $B$OM;9g>uBV<-=q(B($B%W%m%0%i%`$G$O(BGROUP$B$H8F$s$G$$$k(B)$B$rC1=c$K9=@.$5$l$k(BFA$B$X(B $B$N%]%$%s%?$H$7$F$$$?$+$i$G$"$k!#$7$+$7Nc$($P>uBV(B1$B$H>uBV(B2$B$,M;9g$5$l$?(B $B>uBV(B(1,2)$B$H$5$i$K$=$l$K>uBV(B1$B$rM;9g$7$?>uBV(B(1,(1,2))$B$OA4$/F1$8$b$N$G$"(B $B$k!#$=$3$G<-=q$K=q$-9~$`(BFA$B$N%]%$%s%?$OM;9g$5$l$?>uBV$G$"$k>l9g$O$=$N(B $B$b$H$N9=@.(BFA$B$X$N%]%$%s%?$N%j%9%H$r=q$-9~$`$h$&$K$7$?!#$3$l$K$h$j40A4(B $B$K(BNFA$B$N%Q%o!<%;%C%H$G$"$k$3$H$,J]>Z$5$l$k$?$a!"M}O@E*$K1J5W%k!<%W$O$J(B $B$/$J$C$?!#$^$?(Bv.1.35$B$^$G$O<-=qFb%=!<%H$N0LCVH=Dj$N>r7o$,%?%$%W%_%9$7(B $B$F$$$?$N$rD>$7$?$N$G!"$=$l$K$h$j>uBV$,$5$i$KH>J,$K$J$C$?!#8e!":#$^$G(B $BM;9g$7$?$H$-%U%i%0$rIU$1K:$l$F$$$k8D=j$,$"$C$?$N$b=$@5$5$l$?!#(B v.1.37(96.1.22) g2fa$B8_49$N>l9gJ8uBVM;9gH=Dj$N(Bif$BJ8Fb$G%U%i%0(B $B$,0lCW$7$F$$$k$+$I$&$+$NH=Dj$r$7$J$$$H$7$F$$$?$D$b$j$,$&$C$+$j4V0c$C(B $B$F$$$?$?$a$K(B-c$B$G$O$$$D$^$G$b?7$7$$>uBV$r:n$k$H$J$C$F$$$?$N$r=$@5!#(B v.1.40(96.1.23) $B;EMM>e$N%P%0$H9M$($i$l$F$$$k%U%i%0$N;EMM$rJQ99$7$?!#$9$J$o$A%N!<%I>e(B $B$K5-O?$9$k$N$G$O$J$/%"!<%/>e$K5-O?$9$k!#$3$l$K$h$j!"J,4t$=$l$>$l$G$3(B $B$H$J$k%U%i%0$rI=8=$G$-$k$7!"$^$?%k!<%W$J$I$K$h$k%U%i%0$N:.Mp$b$J$/$J(B $B$k!#$3$N%U%i%0$N5-O?J}<0$O!"3+;O%U%i%0$l$K4X$7$F;XDj(B $B$G$-$k!#(Bfbs$B$G$NMxMQ$H$7$F$OJ8e$N%U%i%0$G9T$C$F$$$k$?(B $B$ae$G9T$$!"3+;O%U%i%0$OC18l%7!<%1%s%9>e$Ke$K5-O?$9$kM=Dj$G$"$k!#(B $B$3$N%U%i%0=hM}$N%U%i%05-O?%P%C(B $B%U%!$K=q$-9~$s$G$*$/$H$7$F$*$-%"!<%/@\B3;~$K@\B385$N(BFA$B$N2>%U%i%05-21(B $B%P%C%U%!$NFbMF$r%3%T!<$7$F(BFA$B$N2>%U%i%0%P%C%U%!$r>C$9J}K!$rl9gJ#?t$N%"!<%/$K<+J,$N(B $B%U%i%0$rN)$F$J$1$l$P$J$i$J$$$,$I$N%"!<%/$+$H$$$&>pJs$r$I$&pJsEAHB$O(BexitFA$B$rMQ(B $B0U$9$k$H$-$KMzNr>pJsFb$K%U%i%0$rN)$F$k$3$H$K$h$C$F9T$($P$h$$!#(B $By(B $B$k$3$H$K$7$?!#(B v.1.41(96.2.1) $B:#$^$GKvHx:F5"$,$"$C$?>l9g$OC1=c$K!"$=$N%/%i%9$N3+;O%N!<%I$X@\B3$9$k(B $B$H$7$F$$$?$,!"$3$l$G$ONc$($Pe$G!":F5"8D(B $B=j$KC#$7$?$H$-$K$O!"3+;O%N!<%I$K$D$J$0$N$G$O$J$/?7$?$J%N!<%I(B($B%W%m%0%i(B $B%`$G$O(BcloneFA$B$H8F$s$G$$$k(B)$B$KHt$V$h$&$K$7$F!"$=$N%N!<%I$+$i$O5-O?$7$F(B $B$"$C$?A+0\$r7Q>5$9$k$h$&$K$9$k$N$,pJs$N5-O?$Nl9g$O!"$=$3$X=*C<5-9f$HA+0\(B $B@h(BFA$B$N%j%9%H$KDI2C$9$k$H$9$k!#$3$&$9$l$PNc$($P>e5-$N>l9g!"(BB$B$r=hM}$7$F(B $B$$$k:]$K$O(BB$B$NMzNr$K$O(Bb$B$N$_$,EPO?$5$l$k$?$a(Ba$B$G$NA+0\$O$J$/$J$k$N$G!"(BA $B$H(BB$B$H$G6hJL$5$l$F$$$k$3$H$K$J$k!#(B $B$3$l$K$h$j:F5"$NLdBj$O2r7h$5$l$?!#$7$+$7%j%s%/$N9=B$>e!"(BDFA$B:n@.;~$KBg(B $BNL$K8IN)%k!<%W$,$G$-$F$7$^$&$h$&$K$J$C$?!#$9$J$o$A(BcloneFA$B$N$b$H$N(BFA$B$,(B $B>CLG$7$F$b$=$N@h$N(BFA$B$O(BcloneFA$B$+$i;X$5$l$F$$$k$N$G>CLG$7$J$$$o$1$G$"$k!#(B $B$^$?$b$&0l$DBg$-$J%P%0%U%#%C%/%9$,$"$k!#(Bv.1.20$B0JMhJ8K!=q$-49$($N$?$a(B $B$KF3F~$5$l$?(Babort$B%U%i%0$N07$$$,(BNFA$B=hM}4X?t$NCf$G0l%+=j4V0c$C$F$$$?!#(B $B6qBNE*$K$O!"$"$k%/%i%9$N%\%G%#$KHs=*C<$,8=$l$k$H!"$=$N%/%i%9$,;}$C$F(B $B$$$kI{=P8}(B(extraFAs)$B$rI,$:$=$N$^$^EO$7$F$$$?!#$7$+$7$=$N%\%G%#Fb$NHs(B $B=*C<$,%\%G%#$N:G8eHx$G$J$1$l$PC&=P$9$k$o$1$G$O$J$$$N$GEO$7$F$O$@$a$G(B $B$"$k!#$3$l$K$h$j:dAR$N=IGq0FFb$NJ8K!$N(BDFA$B$,@5>o$K$J$C$?!#(B v.1.42(96.2.1) $BMzNr%P%C%U%!$+$i$N%"!<%/$r7k$V:]$NA+0\@h$N(BFA$B$N(BpsNum$B$NIU$1J}$,>/$7$*$+(B $B$7$+$C$?$N$G=$@5$7$?!#$?$@(BDFA$B=hM}Cf$I$&$;(B0$B$K$J$i$J$$$G8IN)%k!<%W$K$J$C(B $B$F$7$^$&$N$G$"$^$j1F6A$O$J$$$7LdBj$b$J$+$C$?$N$@$,!#(B v.1.43(96.2.1) abort$B%U%i%0$K$h$kI{=P8}$K$7$?!#(B v.1.44(96.2.3) $B:8:F5"@-$K$h$C$F=hM}$N1J5W%k!<%W$rKI$0$?$a$N%A%'%C%/$,:#$^$G2a>j$@$C(B $B$?$N$rE,@Z$K$7$?!#$^$?$3$l$KH<$$!"$=$N%(%i!<%a%C%;!<%8$H%/%i%9$,L58B(B $BDj5A$K$J$k$H$-$N%(%i!<%a%C%;!<%8$r2~A1$7$F!"4XM?$9$k%/%i%9$N%j%9%H$r(B $B4^$a$k$h$&$K$7$?!#$^$?(Byacc$BFb$GH/3P$7$?%(%i!<$G9T$,$:$l$k$b$N$N$&$A!"(B $B4JC1$KD>$;$k$b$N$N$_Jd@5$7$FI=<($9$k$h$&$K$7$?!#$^$?:#$^$G%/%i%9$N%S%C(B $B%H%U%#!<%k%I$O(Bunsigned int$B$H$7$F$$$?$,!"(Bgcc$B$N(Blong long$B7?$K>-MhE*$KJQ(B $B99$9$k2DG=@-$,$"$k$N$G!"(Btypedef$B$GDj5A$7$F$=$l$rMQ$$$k$h$&$K$7$?!#$^$?(B r_makeNFA$B$N=hM}$r(B2$B=E%k!<%W$GI=8=$9$k$3$H$K$h$j-Mh$N3HD%(B 1 $B=PMh>e$,$C$?(BDFA$B$K:G=*E*$J>uBV:G>.2=$r$9$k!#>uBV:G>.2=$K$O(BO(nlog(n)) $B$G=hM}$G$-$k(BHopcroft$B$N%"%k%4%j%:%`$H$$$&$b$N$,$"$k$i$7$$!#(B 1. Hopcroft $B$N86Cx(B J.~Hopcroft, An $n \log n$ Algorithm for Minimizing States in a Finite Automaton, Theory of Machines and Computations, Academic Press, New York, pp.189--196, 1971. 2. Hopcroft $B$N%"%k%4%j%:%`$r2r@b$7$?J88%(B D.~Gries, Describing an Algorithm by Hopcroft, Acta Informatica 2, pp.97--109, 1973. 2 $B>uBV?t$,Bg$-$/$J$k$H!"M;9g(BFA$B$N<-=q$r0z$/$N$K$H$F$b;~4V$,$+$+$k$h(B $B$&$K$J$C$?$N$r9bB.2=$9$k!#$3$l$K$O%O%C%7%e%F!<%V%k$rMQ$$$k$N$,:GE,(B $B$G$"$m$&!#(B 3 $Be$K>h$;$i$l$k$h$&$K$9$k!#(B 4 $B3+;O5-9f$N%U%i%0$r:G2<0L%S%C%H$K8GDj$9$k!#(B 5 $B:8:F5"$NJ8K!$rFI$_9~$_;~$KKvHx:F5"$K=q49$($F=hM}2DG=$K$9$k!#(B $B""%P%0$=$NB>LdBjE@(B $Be$NLdBjE@$,$"$k!#(B S : P P : A P : A B $B$H$"$C$?$H$-$K!"(BDFA$B$G$OF~NO(BAB$B$KBP$7$F(BP$B$Nl9g$bF1MM!"(BA$B$NA0$K3+;O%U%i%0$,!"(BA$B$N8e$KI,$:pJs$H$7$J$$(BDFA$B$K$*$$$F2sHr:$Fq$JLdBj$G$"$k!#F~NO$r(B $B7hDjE*$K$9$k$K$OM;9g$,I,MW$@$,!"M;9g$9$k$H%U%i%0$N6hJL$,$G$-$J$/$J$k(B $B$+$i$G$"$k!#(BNFA$B$J$i$P(BBNF$B$r$=$N$^$^%M%C%H%o!<%/$K$7$?7A>u$K$J$C$F$*$j!"(B $B0c$&4T857PO)F1;N$,0[$J$k%N!<%I$N.2=$KBP$9$kEXNO$OL5BL$@$C$?(B!?)$B!#(B julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/mkfa.h0000644001051700105040000000760412004452400017744 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include #include #include #ifdef HAVE_MALLOC_H #include #endif #include #define VER_NO "ver.1.44-flex-p1" #define SYMBOL_LEN 256 typedef short FLAG; typedef unsigned int CLASSFLAGS; #define CLASSFLAG_MAX sizeof(CLASSFLAGS)*8 typedef struct _BODY{ char name[ SYMBOL_LEN ]; /* $B9=@.$9$k%/%i%9$NL>A0(B */ FLAG abort; /* BODY$B$NESCf=*N;%U%i%0(B */ struct _BODY *next; } BODY; typedef struct _BODYLIST{ BODY *body; struct _BODYLIST *next; } BODYLIST; typedef struct _CLASS{ short no; /* $B%/%i%9HV9f(B $BHs=*C<$H=*C<$GFHN)3dEv(B $BHs=*C<$O(B#31$B$^$G$NEPO?!"$=$NB>$O(B#-1 */ char name[ SYMBOL_LEN ]; /* $B%/%i%9$NL>A0(B */ struct _CLASS *next; /* $B.2=$N$?$a0l;~E*$K$G$-$?%/%i%9$+(B */ } CLASS; /* bodyList, branch $B$O=*C<$HHs=*C<$G0UL#$,A4$/0c$&(B non-terminal: bodyList: $B9=@.$9$k%/%i%9L>$N%j%9%H$N%j%9%H$X$N%]%$%s%?(B branch: $BG[Ns$N?t(B($BDj5A$N?t$@$1B8:_$9$k(B) terminal: bodyList: $B$=$N=*C<5-9f$K3:Ev$9$kuBV(B */ CLASSFLAGS start; /* $B%/%i%93+;O%U%i%0(B */ CLASSFLAGS accpt; /* $B%/%i%9uBVHV9f(B(3$B$DAH:n@.;~$K?6$i$l$k(B) */ ARC *nsList; /* $BF~NO$HuBV%j%9%H(B */ CLASSFLAGS start; /* $B%/%i%93+;O%U%i%0(B($BA4$F$N%"!<%/$N(Bor) */ CLASSFLAGS accpt; /* $B%/%i%9DFA 2:3$B$DAH:n@.;~(B */ /* for DFA */ int psNum; /* ARC$B$G;X$5$l$F$$$k%"!<%/?t(B */ /* connectUnifyFA$B$G$O(Bincrement$B$5$l$J$$$3$H$KCm0U!#(B */ UNIFYARC *usList; /* NFA->DFA$B$GM;9g$5$l$?uBV(B */ FALIST *group; /* $BM;9g$7$?$H$-$N9=@.$9$k>uBV(B */ FLAG volatiled; /* $B%"!<%/JQ99Cf$N$?$a8IN)H=Dj$r #include /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ #ifdef c_plusplus #ifndef __cplusplus #define __cplusplus #endif #endif #ifdef __cplusplus #include #ifndef _WIN32 #include #else #ifndef YY_ALWAYS_INTERACTIVE #ifndef YY_NEVER_INTERACTIVE extern int isatty YY_PROTO(( int )); #endif #endif #endif /* Use prototypes in function declarations. */ #define YY_USE_PROTOS /* The "const" storage-class-modifier is valid. */ #define YY_USE_CONST #else /* ! __cplusplus */ #if __STDC__ #define YY_USE_PROTOS #define YY_USE_CONST #endif /* __STDC__ */ #endif /* ! __cplusplus */ #ifdef __TURBOC__ #pragma warn -rch #pragma warn -use #include #include #define YY_USE_CONST #define YY_USE_PROTOS #endif #ifdef YY_USE_CONST #define yyconst const #else #define yyconst #endif #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto #else #define YY_PROTO(proto) () #endif #define YY_SKIP_YYWRAP int yywrap(void){ return 1; } /* Returned upon end-of-file. */ #define YY_NULL 0 /* Promotes a possibly negative, possibly signed char to an unsigned * integer for use as an array index. If the signed char is negative, * we want to instead treat it as an 8-bit unsigned char, hence the * double cast. */ #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. */ #define BEGIN yy_start = 1 + 2 * /* Translate the current start state into a value that can be later handed * to BEGIN to return to the state. The YYSTATE alias is for lex * compatibility. */ #define YY_START ((yy_start - 1) / 2) #define YYSTATE YY_START /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* Special action meaning "start processing a new file". */ #define YY_NEW_FILE yyrestart( yyin ) #define YY_END_OF_BUFFER_CHAR 0 /* Size of default input buffer. */ #define YY_BUF_SIZE 16384 typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; extern FILE *yyin, *yyout; #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 /* The funky do-while in the following #define is used to turn the definition * int a single C statement (which needs a semi-colon terminator). This * avoids problems with code like: * * if ( condition_holds ) * yyless( 5 ); * else * do_something_else(); * * Prior to using the do-while the compiler would get upset at the * "else" because it interpreted the "if" statement as being all * done when it reached the ';' after the yyless() call. */ /* Return all but the first 'n' matched characters back to the input stream. */ #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ *yy_cp = yy_hold_char; \ YY_RESTORE_YY_MORE_OFFSET \ yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } \ while ( 0 ) #define unput(c) yyunput( c, yytext_ptr ) /* The following is because we cannot portably get our hands on size_t * (without autoconf's help, which isn't available because we want * flex-generated scanners to compile on their own). */ typedef unsigned int yy_size_t; struct yy_buffer_state { FILE *yy_input_file; char *yy_ch_buf; /* input buffer */ char *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. */ yy_size_t yy_buf_size; /* Number of characters read into yy_ch_buf, not including EOB * characters. */ int yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to * delete it. */ int yy_is_our_buffer; /* Whether this is an "interactive" input source; if so, and * if we're using stdio for input, then we want to use getc() * instead of fread(), to make sure we stop fetching input after * each newline. */ int yy_is_interactive; /* Whether we're considered to be at the beginning of a line. * If so, '^' rules will be active on the next match, otherwise * not. */ int yy_at_bol; /* Whether to try to fill the input buffer when we reach the * end of it. */ int yy_fill_buffer; int yy_buffer_status; #define YY_BUFFER_NEW 0 #define YY_BUFFER_NORMAL 1 /* When an EOF's been seen but there's still some text to process * then we mark the buffer as YY_EOF_PENDING, to indicate that we * shouldn't try reading from the input source any more. We might * still have a bunch of tokens to match, though, because of * possible backing-up. * * When we actually see the EOF, we change the status to "new" * (via yyrestart()), so that the user can continue scanning by * just pointing yyin at a new input file. */ #define YY_BUFFER_EOF_PENDING 2 }; static YY_BUFFER_STATE yy_current_buffer = 0; /* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general * "scanner state". */ #define YY_CURRENT_BUFFER yy_current_buffer /* yy_hold_char holds the character lost when yytext is formed. */ static char yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ int yyleng; /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ /* Flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ static int yy_did_buffer_switch_on_eof; void yyrestart YY_PROTO(( FILE *input_file )); void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); void yy_load_buffer_state YY_PROTO(( void )); YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); #define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); static void *yy_flex_alloc YY_PROTO(( yy_size_t )); static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); static void yy_flex_free YY_PROTO(( void * )); #define yy_new_buffer yy_create_buffer #define yy_set_interactive(is_interactive) \ { \ if ( ! yy_current_buffer ) \ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ yy_current_buffer->yy_is_interactive = is_interactive; \ } #define yy_set_bol(at_bol) \ { \ if ( ! yy_current_buffer ) \ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ yy_current_buffer->yy_at_bol = at_bol; \ } #define YY_AT_BOL() (yy_current_buffer->yy_at_bol) typedef unsigned char YY_CHAR; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; typedef int yy_state_type; extern char *yytext; #define yytext_ptr yytext static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ yyleng = (int) (yy_cp - yy_bp); \ yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; #define YY_NUM_RULES 14 #define YY_END_OF_BUFFER 15 static yyconst short int yy_accept[33] = { 0, 0, 0, 15, 13, 12, 10, 7, 13, 13, 8, 2, 9, 13, 3, 4, 0, 11, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 0 } ; static yyconst int yy_ec[256] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 1, 5, 1, 6, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 1, 1, 1, 1, 1, 10, 11, 8, 8, 8, 12, 8, 13, 8, 14, 8, 8, 8, 8, 15, 16, 8, 8, 17, 18, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 8, 1, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 19, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } ; static yyconst int yy_meta[21] = { 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1 } ; static yyconst short int yy_base[36] = { 0, 0, 0, 39, 40, 40, 40, 40, 35, 10, 40, 0, 40, 0, 40, 40, 34, 40, 18, 22, 0, 0, 16, 18, 18, 15, 17, 12, 13, 15, 40, 40, 40, 24, 21, 20 } ; static yyconst short int yy_def[36] = { 0, 32, 1, 32, 32, 32, 32, 32, 33, 32, 32, 34, 32, 35, 32, 32, 33, 32, 32, 32, 34, 35, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32 } ; static yyconst short int yy_nxt[61] = { 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 11, 11, 11, 11, 11, 11, 11, 11, 14, 15, 18, 21, 20, 19, 16, 16, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 17, 17, 32, 3, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 } ; static yyconst short int yy_chk[61] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 35, 34, 9, 33, 33, 29, 28, 27, 26, 25, 24, 23, 22, 19, 18, 16, 8, 3, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 } ; static yy_state_type yy_last_accepting_state; static char *yy_last_accepting_cpos; /* The intent behind this definition is that it'll catch * any uses of REJECT which flex missed. */ #define REJECT reject_used_but_not_detected #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; #line 1 "gram.l" #define INITIAL 0 #line 391 "lex.yy.c" /* Macros after this point can all be overridden by user definitions in * section 1. */ #ifndef YY_SKIP_YYWRAP #ifdef __cplusplus extern "C" int yywrap YY_PROTO(( void )); #else extern int yywrap YY_PROTO(( void )); #endif #endif #ifndef YY_NO_UNPUT static void yyunput YY_PROTO(( int c, char *buf_ptr )); #endif #ifndef yytext_ptr static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen YY_PROTO(( yyconst char * )); #endif #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else static int input YY_PROTO(( void )); #endif #endif #if YY_STACK_USED static int yy_start_stack_ptr = 0; static int yy_start_stack_depth = 0; static int *yy_start_stack = 0; #ifndef YY_NO_PUSH_STATE static void yy_push_state YY_PROTO(( int new_state )); #endif #ifndef YY_NO_POP_STATE static void yy_pop_state YY_PROTO(( void )); #endif #ifndef YY_NO_TOP_STATE static int yy_top_state YY_PROTO(( void )); #endif #else #define YY_NO_PUSH_STATE 1 #define YY_NO_POP_STATE 1 #define YY_NO_TOP_STATE 1 #endif #ifdef YY_MALLOC_DECL YY_MALLOC_DECL #else #if __STDC__ #ifndef __cplusplus #include #endif #else /* Just try to get by without declaring the routines. This will fail * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) * or sizeof(void*) != sizeof(int). */ #endif #endif /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 #endif /* Copy whatever the last rule matched to the standard output. */ #ifndef ECHO /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ #define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ if ( yy_current_buffer->yy_is_interactive ) \ { \ int c = '*', n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) { \ if (n >= 1 && buf[n-1] == '\r') { \ buf[n-1] = (char) c; \ } else { \ buf[n++] = (char) c; \ } \ } \ if ( c == EOF && ferror( yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ result = n; \ } \ else \ { \ errno=0; \ while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ { \ if( errno != EINTR) \ { \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ break; \ } \ errno=0; \ clearerr(yyin); \ } \ int n; \ for(n = 0; n < result; n++) { \ if (buf[n] == '\n') { \ if (n >= 1 && buf[n-1] == '\r') { \ buf[n-1] = '\n'; \ } \ } \ } \ } #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #ifndef yyterminate #define yyterminate() return YY_NULL #endif /* Number of entries by which start-condition stack grows. */ #ifndef YY_START_STACK_INCR #define YY_START_STACK_INCR 25 #endif /* Report a fatal error. */ #ifndef YY_FATAL_ERROR #define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) #endif /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ #ifndef YY_DECL #define YY_DECL int yylex YY_PROTO(( void )) #endif /* Code executed at the beginning of each rule, after yytext and yyleng * have been set up. */ #ifndef YY_USER_ACTION #define YY_USER_ACTION #endif /* Code executed at the end of each rule. */ #ifndef YY_BREAK #define YY_BREAK break; #endif #define YY_RULE_SETUP \ YY_USER_ACTION YY_DECL { register yy_state_type yy_current_state; register char *yy_cp, *yy_bp; register int yy_act; #line 1 "gram.l" #line 555 "lex.yy.c" if ( yy_init ) { yy_init = 0; #ifdef YY_USER_INIT YY_USER_INIT; #endif if ( ! yy_start ) yy_start = 1; /* first start state */ if ( ! yyin ) yyin = stdin; if ( ! yyout ) yyout = stdout; if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_load_buffer_state(); } while ( 1 ) /* loops until end-of-file is reached */ { yy_cp = yy_c_buf_p; /* Support of yytext. */ *yy_cp = yy_hold_char; /* yy_bp points to the position in yy_ch_buf of the start of * the current run. */ yy_bp = yy_cp; yy_current_state = yy_start; yy_match: do { register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 33 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; ++yy_cp; } while ( yy_base[yy_current_state] != 40 ); yy_find_action: yy_act = yy_accept[yy_current_state]; if ( yy_act == 0 ) { /* have to back up */ yy_cp = yy_last_accepting_cpos; yy_current_state = yy_last_accepting_state; yy_act = yy_accept[yy_current_state]; } YY_DO_BEFORE_ACTION; do_action: /* This label is used only to access EOF actions. */ switch ( yy_act ) { /* beginning of action switch */ case 0: /* must back up */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; yy_cp = yy_last_accepting_cpos; yy_current_state = yy_last_accepting_state; goto yy_find_action; case 1: YY_RULE_SETUP #line 2 "gram.l" { yylval = yytext + 1; return( TAG ); } YY_BREAK case 2: YY_RULE_SETUP #line 7 "gram.l" { yylval = yytext; return( SYMBOL ); } YY_BREAK case 3: YY_RULE_SETUP #line 12 "gram.l" { ModeBlock = 1; return( OPEN ); } YY_BREAK case 4: YY_RULE_SETUP #line 17 "gram.l" { ModeBlock = 0; return( CLOSE ); } YY_BREAK case 5: YY_RULE_SETUP #line 22 "gram.l" return( CTRL_ASSIGN ); YY_BREAK case 6: YY_RULE_SETUP #line 23 "gram.l" return( CTRL_IGNORE ); YY_BREAK case 7: YY_RULE_SETUP #line 24 "gram.l" return( REVERSE ); YY_BREAK case 8: YY_RULE_SETUP #line 25 "gram.l" return( STARTCLASS ); YY_BREAK case 9: YY_RULE_SETUP #line 26 "gram.l" return( LET ); YY_BREAK case 10: YY_RULE_SETUP #line 27 "gram.l" return( NL ); YY_BREAK case 11: YY_RULE_SETUP #line 28 "gram.l" return( REMARK ); YY_BREAK case 12: YY_RULE_SETUP #line 29 "gram.l" {}; YY_BREAK case 13: YY_RULE_SETUP #line 31 "gram.l" { errMes("Lexical mistake \"%s\"", yytext ); exit( 1 ); } YY_BREAK case 14: YY_RULE_SETUP #line 35 "gram.l" ECHO; YY_BREAK #line 723 "lex.yy.c" case YY_STATE_EOF(INITIAL): yyterminate(); case YY_END_OF_BUFFER: { /* Amount of text matched not including the EOB char. */ int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = yy_hold_char; YY_RESTORE_YY_MORE_OFFSET if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) { /* We're scanning a new file or input source. It's * possible that this happened because the user * just pointed yyin at a new source and called * yylex(). If so, then we have to assure * consistency between yy_current_buffer and our * globals. Here is the right place to do so, because * this is the first action (other than possibly a * back-up) that will match for the new input source. */ yy_n_chars = yy_current_buffer->yy_n_chars; yy_current_buffer->yy_input_file = yyin; yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; } /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test * in input(). */ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) { /* This was really a NUL. */ yy_state_type yy_next_state; yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); /* Okay, we're now positioned to make the NUL * transition. We couldn't have * yy_get_previous_state() go ahead and do it * for us because it doesn't know how to deal * with the possibility of jamming (and we don't * want to build jamming into it because then it * will run more slowly). */ yy_next_state = yy_try_NUL_trans( yy_current_state ); yy_bp = yytext_ptr + YY_MORE_ADJ; if ( yy_next_state ) { /* Consume the NUL. */ yy_cp = ++yy_c_buf_p; yy_current_state = yy_next_state; goto yy_match; } else { yy_cp = yy_c_buf_p; goto yy_find_action; } } else switch ( yy_get_next_buffer() ) { case EOB_ACT_END_OF_FILE: { yy_did_buffer_switch_on_eof = 0; if ( yywrap() ) { /* Note: because we've taken care in * yy_get_next_buffer() to have set up * yytext, we can now set up * yy_c_buf_p so that if some total * hoser (like flex itself) wants to * call the scanner after we return the * YY_NULL, it'll still work - another * YY_NULL will get returned. */ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; yy_act = YY_STATE_EOF(YY_START); goto do_action; } else { if ( ! yy_did_buffer_switch_on_eof ) YY_NEW_FILE; } break; } case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_match; case EOB_ACT_LAST_MATCH: yy_c_buf_p = &yy_current_buffer->yy_ch_buf[yy_n_chars]; yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_find_action; } break; } default: YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); } /* end of action switch */ } /* end of scanning one token */ } /* end of yylex */ /* yy_get_next_buffer - try to read in a new buffer * * Returns a code representing an action: * EOB_ACT_LAST_MATCH - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ static int yy_get_next_buffer() { register char *dest = yy_current_buffer->yy_ch_buf; register char *source = yytext_ptr; register int number_to_move, i; int ret_val; if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); if ( yy_current_buffer->yy_fill_buffer == 0 ) { /* Don't try to fill the buffer, so this is an EOF. */ if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) { /* We matched a single character, the EOB, so * treat this as a final EOF. */ return EOB_ACT_END_OF_FILE; } else { /* We matched some text prior to the EOB, first * process it. */ return EOB_ACT_LAST_MATCH; } } /* Try to read more data. */ /* First move last chars to start of buffer. */ number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) /* don't do the read, it's not guaranteed to return an EOF, * just force an EOF */ yy_current_buffer->yy_n_chars = yy_n_chars = 0; else { int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ #ifdef YY_USES_REJECT YY_FATAL_ERROR( "input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); #else /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = yy_current_buffer; int yy_c_buf_p_offset = (int) (yy_c_buf_p - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { int new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; else b->yy_buf_size *= 2; b->yy_ch_buf = (char *) /* Include room in for 2 EOB chars. */ yy_flex_realloc( (void *) b->yy_ch_buf, b->yy_buf_size + 2 ); } else /* Can't grow it, we don't own it. */ b->yy_ch_buf = 0; if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; #endif } if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; /* Read in more data. */ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); yy_current_buffer->yy_n_chars = yy_n_chars; } if ( yy_n_chars == 0 ) { if ( number_to_move == YY_MORE_ADJ ) { ret_val = EOB_ACT_END_OF_FILE; yyrestart( yyin ); } else { ret_val = EOB_ACT_LAST_MATCH; yy_current_buffer->yy_buffer_status = YY_BUFFER_EOF_PENDING; } } else ret_val = EOB_ACT_CONTINUE_SCAN; yy_n_chars += number_to_move; yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; return ret_val; } /* yy_get_previous_state - get the state just before the EOB char was reached */ static yy_state_type yy_get_previous_state() { register yy_state_type yy_current_state; register char *yy_cp; yy_current_state = yy_start; for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) { register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 33 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; } return yy_current_state; } /* yy_try_NUL_trans - try to make a transition on the NUL character * * synopsis * next_state = yy_try_NUL_trans( current_state ); */ #ifdef YY_USE_PROTOS static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) #else static yy_state_type yy_try_NUL_trans( yy_current_state ) yy_state_type yy_current_state; #endif { register int yy_is_jam; register char *yy_cp = yy_c_buf_p; register YY_CHAR yy_c = 1; if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 33 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; yy_is_jam = (yy_current_state == 32); return yy_is_jam ? 0 : yy_current_state; } #ifndef YY_NO_UNPUT #ifdef YY_USE_PROTOS static void yyunput( int c, register char *yy_bp ) #else static void yyunput( c, yy_bp ) int c; register char *yy_bp; #endif { register char *yy_cp = yy_c_buf_p; /* undo effects of setting up yytext */ *yy_cp = yy_hold_char; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ register int number_to_move = yy_n_chars + 2; register char *dest = &yy_current_buffer->yy_ch_buf[ yy_current_buffer->yy_buf_size + 2]; register char *source = &yy_current_buffer->yy_ch_buf[number_to_move]; while ( source > yy_current_buffer->yy_ch_buf ) *--dest = *--source; yy_cp += (int) (dest - source); yy_bp += (int) (dest - source); yy_current_buffer->yy_n_chars = yy_n_chars = yy_current_buffer->yy_buf_size; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); } *--yy_cp = (char) c; yytext_ptr = yy_bp; yy_hold_char = *yy_cp; yy_c_buf_p = yy_cp; } #endif /* ifndef YY_NO_UNPUT */ #ifdef __cplusplus static int yyinput() #else static int input() #endif { int c; *yy_c_buf_p = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { /* yy_c_buf_p now points to the character we want to return. * If this occurs *before* the EOB characters, then it's a * valid NUL; if not, then we've hit the end of the buffer. */ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) /* This was really a NUL. */ *yy_c_buf_p = '\0'; else { /* need more input */ int offset = yy_c_buf_p - yytext_ptr; ++yy_c_buf_p; switch ( yy_get_next_buffer() ) { case EOB_ACT_LAST_MATCH: /* This happens because yy_g_n_b() * sees that we've accumulated a * token and flags that we need to * try matching the token before * proceeding. But for input(), * there's no matching to consider. * So convert the EOB_ACT_LAST_MATCH * to EOB_ACT_END_OF_FILE. */ /* Reset buffer status. */ yyrestart( yyin ); /* fall through */ case EOB_ACT_END_OF_FILE: { if ( yywrap() ) return EOF; if ( ! yy_did_buffer_switch_on_eof ) YY_NEW_FILE; #ifdef __cplusplus return yyinput(); #else return input(); #endif } case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext_ptr + offset; break; } } } c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ *yy_c_buf_p = '\0'; /* preserve yytext */ yy_hold_char = *++yy_c_buf_p; return c; } #ifdef YY_USE_PROTOS void yyrestart( FILE *input_file ) #else void yyrestart( input_file ) FILE *input_file; #endif { if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_init_buffer( yy_current_buffer, input_file ); yy_load_buffer_state(); } #ifdef YY_USE_PROTOS void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) #else void yy_switch_to_buffer( new_buffer ) YY_BUFFER_STATE new_buffer; #endif { if ( yy_current_buffer == new_buffer ) return; if ( yy_current_buffer ) { /* Flush out information for old buffer. */ *yy_c_buf_p = yy_hold_char; yy_current_buffer->yy_buf_pos = yy_c_buf_p; yy_current_buffer->yy_n_chars = yy_n_chars; } yy_current_buffer = new_buffer; yy_load_buffer_state(); /* We don't actually know whether we did this switch during * EOF (yywrap()) processing, but the only time this flag * is looked at is after yywrap() is called, so it's safe * to go ahead and always set it. */ yy_did_buffer_switch_on_eof = 1; } #ifdef YY_USE_PROTOS void yy_load_buffer_state( void ) #else void yy_load_buffer_state() #endif { yy_n_chars = yy_current_buffer->yy_n_chars; yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; yyin = yy_current_buffer->yy_input_file; yy_hold_char = *yy_c_buf_p; } #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) #else YY_BUFFER_STATE yy_create_buffer( file, size ) FILE *file; int size; #endif { YY_BUFFER_STATE b; b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_buf_size = size; /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_is_our_buffer = 1; yy_init_buffer( b, file ); return b; } #ifdef YY_USE_PROTOS void yy_delete_buffer( YY_BUFFER_STATE b ) #else void yy_delete_buffer( b ) YY_BUFFER_STATE b; #endif { if ( ! b ) return; if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; if ( b->yy_is_our_buffer ) yy_flex_free( (void *) b->yy_ch_buf ); yy_flex_free( (void *) b ); } #ifndef _WIN32 #include #else #ifndef YY_ALWAYS_INTERACTIVE #ifndef YY_NEVER_INTERACTIVE extern int isatty YY_PROTO(( int )); #endif #endif #endif #ifdef YY_USE_PROTOS void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) #else void yy_init_buffer( b, file ) YY_BUFFER_STATE b; FILE *file; #endif { yy_flush_buffer( b ); b->yy_input_file = file; b->yy_fill_buffer = 1; #if YY_ALWAYS_INTERACTIVE b->yy_is_interactive = 1; #else #if YY_NEVER_INTERACTIVE b->yy_is_interactive = 0; #else b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; #endif #endif } #ifdef YY_USE_PROTOS void yy_flush_buffer( YY_BUFFER_STATE b ) #else void yy_flush_buffer( b ) YY_BUFFER_STATE b; #endif { if ( ! b ) return; b->yy_n_chars = 0; /* We always need two end-of-buffer characters. The first causes * a transition to the end-of-buffer state. The second causes * a jam in that state. */ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; b->yy_buf_pos = &b->yy_ch_buf[0]; b->yy_at_bol = 1; b->yy_buffer_status = YY_BUFFER_NEW; if ( b == yy_current_buffer ) yy_load_buffer_state(); } #ifndef YY_NO_SCAN_BUFFER #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) #else YY_BUFFER_STATE yy_scan_buffer( base, size ) char *base; yy_size_t size; #endif { YY_BUFFER_STATE b; if ( size < 2 || base[size-2] != YY_END_OF_BUFFER_CHAR || base[size-1] != YY_END_OF_BUFFER_CHAR ) /* They forgot to leave room for the EOB's. */ return 0; b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ b->yy_buf_pos = b->yy_ch_buf = base; b->yy_is_our_buffer = 0; b->yy_input_file = 0; b->yy_n_chars = b->yy_buf_size; b->yy_is_interactive = 0; b->yy_at_bol = 1; b->yy_fill_buffer = 0; b->yy_buffer_status = YY_BUFFER_NEW; yy_switch_to_buffer( b ); return b; } #endif #ifndef YY_NO_SCAN_STRING #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) #else YY_BUFFER_STATE yy_scan_string( yy_str ) yyconst char *yy_str; #endif { int len; for ( len = 0; yy_str[len]; ++len ) ; return yy_scan_bytes( yy_str, len ); } #endif #ifndef YY_NO_SCAN_BYTES #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) #else YY_BUFFER_STATE yy_scan_bytes( bytes, len ) yyconst char *bytes; int len; #endif { YY_BUFFER_STATE b; char *buf; yy_size_t n; int i; /* Get memory for full buffer, including space for trailing EOB's. */ n = len + 2; buf = (char *) yy_flex_alloc( n ); if ( ! buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); for ( i = 0; i < len; ++i ) buf[i] = bytes[i]; buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; b = yy_scan_buffer( buf, n ); if ( ! b ) YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; return b; } #endif #ifndef YY_NO_PUSH_STATE #ifdef YY_USE_PROTOS static void yy_push_state( int new_state ) #else static void yy_push_state( new_state ) int new_state; #endif { if ( yy_start_stack_ptr >= yy_start_stack_depth ) { yy_size_t new_size; yy_start_stack_depth += YY_START_STACK_INCR; new_size = yy_start_stack_depth * sizeof( int ); if ( ! yy_start_stack ) yy_start_stack = (int *) yy_flex_alloc( new_size ); else yy_start_stack = (int *) yy_flex_realloc( (void *) yy_start_stack, new_size ); if ( ! yy_start_stack ) YY_FATAL_ERROR( "out of memory expanding start-condition stack" ); } yy_start_stack[yy_start_stack_ptr++] = YY_START; BEGIN(new_state); } #endif #ifndef YY_NO_POP_STATE static void yy_pop_state() { if ( --yy_start_stack_ptr < 0 ) YY_FATAL_ERROR( "start-condition stack underflow" ); BEGIN(yy_start_stack[yy_start_stack_ptr]); } #endif #ifndef YY_NO_TOP_STATE static int yy_top_state() { return yy_start_stack[yy_start_stack_ptr - 1]; } #endif #ifndef YY_EXIT_FAILURE #define YY_EXIT_FAILURE 2 #endif #ifdef YY_USE_PROTOS static void yy_fatal_error( yyconst char msg[] ) #else static void yy_fatal_error( msg ) char msg[]; #endif { (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } /* Redefine yyless() so it works in section 3 code. */ #undef yyless #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ yytext[yyleng] = yy_hold_char; \ yy_c_buf_p = yytext + n; \ yy_hold_char = *yy_c_buf_p; \ *yy_c_buf_p = '\0'; \ yyleng = n; \ } \ while ( 0 ) /* Internal utility routines. */ #ifndef yytext_ptr #ifdef YY_USE_PROTOS static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) #else static void yy_flex_strncpy( s1, s2, n ) char *s1; yyconst char *s2; int n; #endif { register int i; for ( i = 0; i < n; ++i ) s1[i] = s2[i]; } #endif #ifdef YY_NEED_STRLEN #ifdef YY_USE_PROTOS static int yy_flex_strlen( yyconst char *s ) #else static int yy_flex_strlen( s ) yyconst char *s; #endif { register int n; for ( n = 0; s[n]; ++n ) ; return n; } #endif #ifdef YY_USE_PROTOS static void *yy_flex_alloc( yy_size_t size ) #else static void *yy_flex_alloc( size ) yy_size_t size; #endif { return (void *) malloc( size ); } #ifdef YY_USE_PROTOS static void *yy_flex_realloc( void *ptr, yy_size_t size ) #else static void *yy_flex_realloc( ptr, size ) void *ptr; yy_size_t size; #endif { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those * that use void* generic pointers. It works with the latter * because both ANSI C and C++ allow castless assignment from * any pointer type to void*, and deal with argument conversions * as though doing an assignment. */ return (void *) realloc( (char *) ptr, size ); } #ifdef YY_USE_PROTOS static void yy_flex_free( void *ptr ) #else static void yy_flex_free( ptr ) void *ptr; #endif { free( ptr ); } #if YY_MAIN int main() { yylex(); return 0; } #endif #line 35 "gram.l" julius-4.2.2/gramtools/mkdfa/mkfa-1.44-flex/main.c0000644001051700105040000001441712004452400017745 0ustar ritrlab/* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology * All rights reserved */ #include "mkfa.h" #include "nfa.h" #include "dfa.h" #include "triplet.h" void getSwitch( int argc, char *argv[] ); int setSwitch( char *sw ); int setFilename( char *fname, int kind ); void usage( void ); char VerNo[] = VER_NO; CLASS *ClassList = NULL; /* $B%/%i%9$N@~7A%j%9%H(B */ CLASS *ClassListTail = NULL; /* $B%/%i%9$N@~7A%j%9%H$N:G8eHx$N%N!<%I(B */ CLASS *StartSymbol = NULL; /* $B3+;O5-9f$N%/%i%9(B */ char GramFile[ 1024 ]; /* $BJ8K!%U%!%$%kL>(B */ char VocaFile[ 1024 ]; /* $B8lWC%U%!%$%kL>(B */ char FAfile[ 1024 ]; /* FA$B%U%!%$%kL>(B(DFAorNFA) */ char HeaderFile[ 1024 ]; /* $B%X%C%@%U%!%$%kL>(B */ int NoNewLine = 0; /* $BJ#?t$NI=<(%b!<%I$G2~9TLdBj$r2r7h$9$k(B */ FA *FAlist = NULL; /* FA$B%M%C%H%o!<%/$K$*$1$k3+;O(BFA$B$N%]%$%s%?(B */ char Clipboard[ 1024 ]; /* sprintf$BMQ$N0l;~=q$-9~$_%P%C%U%!(B */ static int optF = 0; /* -f $B$,;XDj$5$l$?(B(-dfa$B$H$NLdBj2r7h$K(B) */ int SW_SentList = 0; int SW_NoWarning = 0; int SW_Compati = 0; int SW_Quiet = 0; int SW_SemiQuiet = 0; int SW_Debug = 0; int SW_NFAoutput = 0; int SW_Verbose = 0; int SW_EdgeStart; int SW_EdgeAccpt; int main( int argc, char *argv[] ) { #ifdef YYDEBUG extern int yydebug; yydebug = 1; #endif getSwitch( argc, argv ); if( SW_EdgeAccpt ){ errMes( "I'm sorry. AcceptFlag on edge is under construction." ); } setGram(); setVoca(); makeNFA(); if( !SW_NFAoutput ) makeDFA(); makeTriplet(); return( 0 ); } void getSwitch( int argc, char *argv[] ) { int i; int filemode = 0; int filefinish = 0; for( i = 1; i < argc; i++ ){ if( filemode == 0 ){ if( argv[ i ][ 0 ] == '-' ){ filemode = setSwitch( &argv[ i ][ 1 ] ); } else { usage(); } } else { filefinish = setFilename( argv[ i ], filemode ); filemode = 0; } } if( !filefinish ) usage(); } int setSwitch( char *sw ) { char *sname[] = { "l", "nw", "c", "db", "dfa", "nfa", "fg", "fv", "fo", "fh", "f", "v", "c", "e", "e0", "e1", "q0", "q", "q1", NULL }; int swNo; for( swNo = 0; ; swNo++ ){ if( sname[ swNo ] == NULL ) break; if( strcmp( sw, sname[ swNo ] ) == 0 ) break; } switch( swNo ){ case 0: SW_SentList = 1; break; case 1: SW_NoWarning = 1; break; case 2: SW_Compati = 1; break; case 3: SW_Debug = 1; break; case 4: if( optF ) usage(); SW_NFAoutput = 0; break; case 5: if( optF ) usage(); SW_NFAoutput = 1; break; case 6: return( 1 ); case 7: return( 2 ); case 8: return( 3 ); case 9: return( 4 ); case 10: return( 5 ); case 11: SW_Verbose = 1; break; case 12: SW_Compati = 1; break; case 13: SW_EdgeAccpt = 1; SW_EdgeStart = 1; break; case 14: SW_EdgeAccpt = 1; break; case 15: SW_EdgeStart = 1; break; case 16: SW_Quiet = 1; case 17: case 18: SW_SemiQuiet = 1; break; default: usage(); } return( 0 ); } int setFilename( char *fname, int kind ) { static int f_gram = 0; static int f_voca = 0; static int f_out = 0; static int f_header = 0; switch( kind ){ case 1: strcpy( GramFile, fname ); f_gram = 1; break; case 2: strcpy( VocaFile, fname ); f_voca = 1; break; case 3: strcpy( FAfile, fname ); f_out = 1; break; case 4: strcpy( HeaderFile, fname ); f_header = 1; break; case 5: sprintf( GramFile, "%s.grammar", fname ); sprintf( VocaFile, "%s.voca", fname ); if( SW_NFAoutput ){ sprintf( FAfile, "%s.nfa", fname ); } else { sprintf( FAfile, "%s.dfa", fname ); } optF = 1; sprintf( HeaderFile, "%s.h", fname ); f_gram = f_voca = f_out = f_header = 1; return( 1 ); } if( f_gram && f_voca && f_out && f_header ){ return( 1 ); } else { return( 0 ); } } void errMes( char *fmt, ... ) { va_list argp; if( NoNewLine ) putc( '\n', stderr ); va_start( argp, fmt ); vsprintf( Clipboard, fmt, argp ); va_end( argp ); fprintf( stderr, "Error: %s\n", Clipboard ); exit( 1 ); } void warnMes( char *fmt, ... ) { va_list argp; if( SW_NoWarning ) return; if( NoNewLine ) putc( '\n', stderr ); va_start( argp, fmt ); vsprintf( Clipboard, fmt, argp ); va_end( argp ); fprintf( stderr, "Warning: %s\n", Clipboard ); NoNewLine = 0; } void verboseMes( char *fmt, ... ) { va_list argp; if( !SW_Verbose ) return; if( NoNewLine ) putc( '\n', stderr ); va_start( argp, fmt ); vsprintf( Clipboard, fmt, argp ); va_end( argp ); fprintf( stderr, "[verbose] %s\n", Clipboard ); NoNewLine = 0; } void usage( void ) { fprintf( stderr, "finite automaton generator, mkfa %s programmed by 1995-1996 S.Hamada\n" "function: grammar & vocabulary -> FA & header for parsing\n" "usage: mkfa " 1-gramC EMFCCvZ @- Liftering p[^iCEPLIFTERj 0 vZo OC ERpC - cygwin, MSVC RpCP - cygwin "gcc -mno-cygwin" T|[g - "--disable-plugin" configure wG[C EW[[h - jcontrol addgram changegram @Eォ @@C - "DELPROCESS" R}hASR LM CX^X @@CBiSR\j EC - "-mapunk" IvVC - jconf t@C "-htkconf" t@C jconf pX ゚oOC - "-input stdin" W^t@C WAV `ョ gC - vOCw "-plugin fBNg" MSVC C 4.1.4 (2009.12.25) =================== VK - MSVC RpCJuliusfoCXI@\ D PORTAUDIO_DEV_NUM wDfoCX NOoIwD - MSVC Julius fP[wD C - Mac OS X (OS X 10.6 SDK) RpC - x portaudio (Windowsp) C 4.1.3 (2009.11.02) =================== VK - V MSVC T|[gF"msvc/00README.txt" B - C N-gram T|[g - VXe portaudio Cuwb_AJulius portaudio gpBOCu V19 BA^foCX PORTAUDIO_DEV w\B ョNOB - W[[hACg (-walign) o\ XEC - 4.1.x 0PvXgCMNA4.0.x Ols dlXB - j_get_current_filename() JuliusLib - "--enable-wpair" ASYP oOC - Xg[JnEIoOC - julius-simple make oOC - cygwin ^oOC - adintool "-server" oOC - @Fッ@oOC 4.1.2 (2009.02.12) =================== [N-gram] - TCY 2GB 4GB g - "-mapunk" unknown word w\DftHg "" "" - `FbNFォmCN-gram "" G[ [SRILM] - backward ARPA ゥ - mkbingram backward ARPA "-swap" ヲI - 1-gramK -99 ゥC [@\P] - successor list \zASYD - yomi2voca.pl XV - ALSA audio buffer overrun - generate-ngram -debug wX [JuliusLib] - j_close_stream() [oOC] - adintool T[owoOC - cygwin RpCG[C (libesd) - "-input" woOC 4.1.1 (2008.12.13) =================== oOCF [N-gram] - SRILM wKfoOC [] - "-htkconf ConfigFile" ConfigFile "SOURCERATE" C - "-input stdin" C [ACg] - PPFッ "-1pass" wACgo - "-palign", "-walign" "-salign" w タsoOC [Module mode] - PPFッ receive/activate/dactivate oOC - NCAg@IDwt[YoOC - Fッ@ (GRAM=..) o 0 oOC - PPFッ "SYNCGRAM" JuliusoOC [Others] - OS RpCCi"-ldl" j - Jconf obNXbVoOC - 1pXタs1pXIoC [Tools] Jcontrol - "graminfo" R}hVKT|[g - PPFッ@iォjNCAgoOC mkdfa - mkfa C - DOS `ョoOC adintool - nI[oD - I[fBIt@CoOC 4.1 (2008.10.3) ================ vOC@\gF - vOC^ - M - xNg - - xNg - o - JuliusLib R[obNvOC\ - vOCTv\[XR[hDedlRgtD - FLinux, Windows dlopen() OS V@\F - }`Xg[T|[g - MSD-HMM T|[giHTS c[LbgwKfgp\j - CVN T|[g - VTLN p frequency warping ip[^j - "-input alsa", "-input oss" "-input esd" - W[[hpNCAg perl "jclient-perl" dlXF - fFッIvVu - IvVCX^X`^KvD @iFfIvV "-LM" CCX^X` @@OKvDfIvV "-AM" @@G[j - SIvVCCX^X`OC"-GLOBAL" IvV @w @CV "-nosectioncheck" wD oOCF - "-record" eot@C - "-multigramout" - jconf t@CWJ - HMM aFHMM 256, Pp - W[@WG[bZ[W hLg - "Juliusbook" [X (pdf / html) - }jAVCman fBNgWD ftHgCXg[D 4.0.2 (2008.5.27) ================== V@\F - 2pXTクsCo[W (3.x O) 1pX IoC4.x FックsD VIvV "-fallback1pass" wCo[WlC 2pXクs1pXIoD - o USEPOWER=T T|[g dlXF - GMM gpC"-AM_GMM" wCfL D oOCF - GMM p 4.0.1 C - Linux/ALSA foCXwD - N-gram クsoOC - "-record" IvVwsoOC - }CN sp-segment / GMM / VAD タs callback タsC - CoOC 4.0.1 (2008.3.12) ================== dlXF - Linux ftHgI[fBIAPICOSS ALSA XD oOCF - - ALSA lCeBu - "-48" IvVw segfault - "-spsegment" wMFCC[t[ segfault - VADA - V[g|[YZOe[VCGMM VAD, fR[_x[ XVADCZOg CMN - fA - }`pX[hCV[g|[Yf`N - N-gramA - backward N-gram ^1pX2-graml - mGgmD - back-off `クsD - 24bitGgN-gramクs - obNItWi[GA] malloc D - PPFッ - PPFッ[h N xXgo ("-output N") - CXg[ - cygwin "make install" クs - \[XR[hB - sv static [JC - Tc[NGAp StackDecode V@\F - W[[h - "ACTIVATEGRAM", "DEACTIVATEGRAM", "DELGRAM" @wC @ ID @wD - VR}h "GRAMINFO": @XgD 4.0 (2007.12.19) ================= i4.0 V@\CQlj - \ - GW{Cu - }`fR[fBOT|[g - fW[EJulian - C N-gram T|[g - [U` - PFッT|[g - confusion network o - V[g|[YZOe[VP - GMM x[X VAD - fR[_x[X VAD - RpCIvV - - TvAv julius-simple - c[ (generate-ngram) - adintool }`T[oC@\Ac[ 3.5.3 (2006.12.29) =================== - \P - xvZKF 20% 40% - ANZXKFC2pXp - ォEPgXtP - V@Ac[ - "dfa_minimize", "dfa_determinize" c DFA /D - mkdfa.pl dfa_minimize ゥIoD - "slf2dfa" c HTK @t@C DFA `ョc[J - HTKop[^ - HTK wKgo Config t@C p[^D - mkbinhmm LoCiHMMwb_D - POtP - gCtHXRAlOtIvVF PCPOfReLXg o ("-graphrange -1") - ICGlM[KT|[g - GlM[K (ENORMAL) wKfp ICFッbT|[giOp[Kj - \[XR[hC - libsent/src/wav2mfcc \ - p[^W[ - "--setting" IvV libsent RpCo - Doxygen-1.5.0 T|[g - "julius-info@lists.sourceforge.jp" ItBVAAhX - Copyright typoC - oOC - "--enable-words-int" fクs - C[Jobt@CIvV[N - z segmentation fault - DFA dJEg - mkdfa.pl: mkfa N - adintool: fCt@Cc - adintool: fCObt@CL^ - CC 3.5.2 (2006.07.31) =================== - Windows R\[P - DirectSound T|[g (cygwin / mingw) - Xbh (portaudio API gp) - V MinGW (5.0.2) - POtxP - POt[Jbg@\ (-graphcut) - [vp (-graphboundloop) - OtP}[WTfASYP - 1pXPgXPOto\ - "--enable-word-graph" POtoC2pXOtT - "--enable-wpair" wCLp 2-gram g1pXOtASYD - oOC - configure XNvg Solaris 8/9 - tied-mixture f -gprune none - f duration wb_G[bZ[W - MFCC strip_zero [jOo - タP - f hmmdefs 3.5.1 (2006.03.31) =================== - (MFCC)og - W(Accel: _A)D - _A pfFッ - VIvV "-accwin" - C^wg(_0, _E, _N, _D, _A, _N) - C - o^CvCfwb_ゥf - op[^w\F -preemph, -fbank, -ceplif, -rawe / -norawe, -enormal / -noenormal, -escale, -silfloor - VIvV "-zmeanframe": t[ DC offset (HTK) - MAP-CMN - }CNElbg[NL - VIvV"-cmnmapweight"FdWXiftHg 100.0j - IvV "-cmnload" bPvXgw\ - 2b~PvXg5bpD VIvV "-cmnnoupdate" PvXgXVD - W[[h - Fッ~EJ "" "" Vo - "ADDGRAM @" "CHANGEGRAM @" @O - oOC - W[[hPAUSE/RESUME^C~OoOC - vf24bitN-gramoOC - }`pX3(o1)foOC - @JEgoOC - gramtools MacOSX RpCoOC 3.5 (2005.11.11) ================= - VT|[g@\ - GMM ッsvp(-gmm, -gmmnum -gmmreject) - POto (--enable-graphout, --enable-graphout-nbest) - PMx} (--enable-cmthres) - Julian@Fッ@oIvV (-multigramout, g README-multigramout-ja.txt Qlj - N@t@Cw "-gram prefix1,prefix2,..." "-gramlist listfile" prefix .dfa, .dict t@CvtBbNXw - eLXgoGR[fBOF"-charconv from to" Linux iconv, Win32 Win32API + libjcode gpD - Linux P - ALSA-1.0.0 ~ (--with-mictype=alsa) - EsounD I[fBIT[o[T|[g (--with-mictype=esd) - USBI[fBIgp}CNoOC - foCX(ftHg:/dev/dsp) AUDIODEV w\ - portaudio, spLib w\ - gpP - 1pXr[[NGAgp - \ォ(outstyle, factoringAK) i20k JNAS fast 1.5MBj - PN-gramgpP (1)2-gramobNIt\Ki20k JNAS 8MB, 3-gramj (2)2-gramN-gramCfbNX32bit24bitk - dlX - N-gramCfbNXkCoCiN-gramt@C`ョV DiV`ョゥj CCV`ョD "mkbingram -d oldbinary newbinary" C{o[W mkbingram ooCiN-gramV`ョ C3.4.2OD iQlF`ョt@C17oCgF "julius_bingram_v3" o[WC "julius_bingram_v4" Vo[Wj - adintool Flbg[NoCgI[_gGfBA D - ftHgkWJ zlib CugpD ]O gzip gpD"--disable-zlib" OFF (]) X\ - Win: oR[hftHguvXD 3.4.2ftHg SJIS D - Win: VC++RpC@X @Julian -DBUILD_JULIAN Djulius.h XD 00HowToCompile-win.txtQ - oOC - XyNgTugNVoOC - 2pX stack empty I[NoOC - Z(t[)oOC - CMNgpfgpt@CE}CNoOC - fR[fBO(--enable-sp-segment)spPoOC - maxcodebooksize \ヲoOC - stack empty Tクs\ヲ - W[[h -separatescore (AMSCORE=..., LMSCORE=...j - STIvV "-b 0" r[@XVoOC - @XPgCtHvZvoOC - @gpCFッ@IDWooC - GfBA}Vf[^M - Linux: USBI[fBIgp}CNoOC - Linux: iccRpCJuliusNoOC - Win: sR[h(@t@CE-filelist) - Win: cygwin mkdfa.pl e|JoOC - Win: ZLIBgpfopenクsoOC - Win: -record ^t@Cgq .raw .wav C - \[X - Linux Windows R\[\[X - Multipath \[X "--enable-multipath" CfJ multipath RpCD\[X "MULTIPATH_VERSION" `D - - Win: MinGW RpCD - vORgKXVFDoxygen D "make doxygen" HTML `ョNXt@XD - make install C"julius-3.5-fast" o[W (--enable-setup l)タsoCiCXg[ - "./support/build-all.sh" e julius, julian SタsoCi RpC build-bin/ D - Updated LICENSE.txt with English translation for reference. 3.4.2 (2004.03.31) =================== - wZp -rejectshort msec - (-input adinnet) W[[hpPAUSE/TERMINATE - oOC - t[[NC (beam.c) - PFッN-bestosC - generate [vC - ` - 'gcc -Wall' \[X` - copyright \ヲ 2004 NXV - config.guess, config.sub XV (for MacOSX) 3.4.1 (2004.02.25) =================== - TASYP - PWJXRAdvZC - PgCtHVXRAvZ@タ(-iwcd1 best #) o[W3.4 OFッxP\I - oCiHMM`ョiJuliuspjT|[g hmmdefsoCi`ョc[ "mkbinhmm" - MFCCoisin/cose[ugpj - oOC - (-input adinnet) MJnFッJn - (-input adinnet) W[[hpPAUSE/TERMINATE - (-input adinnet) vZXsKv fork - (-input rawfile) WindowsTEhR[_[^ wav t@C - (f) CMNsvfCMNs - (f) triphone G[bZ[Wf - (adintool) At@CM - (sp-segment) V[g|[YTJEfoOCC }CNV[g|[YZOe[Vタ - (-[wps]align) A[NC - DC@\ (-zmean) - (-module) Vo '' - @\uTv - HTMLhLgXV - rev.3.3 X whatsnew/ Lq VIvVF"-iwcd1 best #" "-zmean" VconfigureIvVF"--disable-lmfix", "--enable-visualize" 3.4 (2003.10.01) =================== - mMxvZT|[g - "-cmalpha" IvVX[WOWw - W[[hmMxo "-outcode" 'C' w - configure "--disable-cm" - "--enable-cm-nbest"N-bestpvZ@X\ - NXN-gramT|[g - configure "--disable-class-ngram" - factoringm[hN-gramPx[XォPx[XX - adinrec, adintoolIvV"-record"L^`ョ RAW`ョWAV`ョX - obZ[WXF System Info bZ[W\ GWbZ[W - CF W[[hooOC t[oOC (realtime-1stpass.c) osC NetAudiogpRpCクsoOC oCiN-gram[hTCY`FbNC mixturexvZC (gprune_none.c) "-version"->"-setting", "-hipass"->"-hifreq", "-lopass"->"-lofreq" 3.3p4 (2003.05.06) =================== - sP - }CNRnOAbvsC - W[[hCNCAgnOAb vsCiNCAgG[bZ[WM j - 1000TvZobt@OD - lbg[NoRubLOD - \[XhC ijcontroltypo, LOG_TEN`Cj 3.3p3 (2003.01.08) =================== - PV[g|[YVD - [Julius] "-iwspword" V[g|[YPGgFッ ォゥDPN-gramGg oCHMMftHg" [sp] sp sp"CIvV "-iwspentry" w\D - [}`pX] Zcontext-freePD IvV "-iwsp" CォSPXLbv\ V[g|[YftDtf "-spmodel" X \DhLgQD - P: - xPFC}CNxC ] 200ms O 50ms OPD - foCXvZXgpCubNG[ IXD - VfoCXF - libsndfile-1.0.x D - ALSA-0.9.x D ゥo OSS DCgp configure "--with-mictype=alsa" ヲIwD 3.3p2 (2002.11.18) =================== - [}`pX] fXLbvJDf IionjJT|[gD C~o[W HTK uSvJT|[g D - V@\: "-record dir" CJuliusf[^SwfB NgL^@\D - oOC - Solaris MFCC t@CD - "-module" "-input adinnet" pCNCAgR}h tD - Fッobt@O^C~O[oD 3.3p1 (2002.10.15) =================== oOCF - QpXfdyieBftHglOD - ォXy[XクsD - monophone gp -separatescore oD julius-4.2.2/plugin/0000755001051700105040000000000012004463507012627 5ustar ritrlabjulius-4.2.2/plugin/Makefile.darwin0000644001051700105040000000137411224410060015544 0ustar ritrlabSHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .jpi .c.jpi: $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< CC=gcc CFLAGS=-bundle -flat_namespace -undefined suppress LDFLAGS= J_CFLAGS=-I../libsent/include -I../libjulius/include `../libsent/libsent-config --cflags` `../libjulius/libjulius-config --cflags` J_LDFLAGS=-L../libjulius `../libjulius/libjulius-config --libs` -L../libsent `../libsent/libsent-config --libs` all: audio_postprocess.jpi fvin.jpi feature_postprocess.jpi calcmix.jpi generic_callback.jpi result.jpi calcmix.jpi: calcmix.c $(CC) $(CFLAGS) $(J_CFLAGS) -o calcmix.jpi $(LDFLAGS) $(J_LDFLAGS) calcmix.c generic_callback.jpi: generic_callback.c $(CC) $(CFLAGS) $(J_CFLAGS) -o generic_callback.jpi generic_callback.c $(LDFLAGS) $(J_LDFLAGS) clean: rm *.jpi julius-4.2.2/plugin/00readme.txt0000644001051700105040000000356011224410060014756 0ustar ritrlabPlugin samples ================ From rev.4.1, plugin is supported. This directory contains exmaple sample source codes of plugins. Function specifications of plugin are fully documented within the source. See the instruction below. Files ====== 00readme.txt This file plugin_defs.h Plugin related typedefs for C. adin_oss.c A/D-in plugin example: OSS mic input audio_postprocess.c A/D-in postprocess plugin fvin.c Feature vector input plugin feature_postprocess.c Feature vector postprocess plugin calcmix.c AM Mixture calculation plugin Makefile Makefile for Linux / mingw How to compile =============== The source should be compiled into a dynamic shared object. The object file should have a suffix of ".jpi". On Linux and cygwin, you can compile with gcc like this: % gcc -shared -o result.jpi result.c If you compile on cygwin and want it to run without cygwin, you can do % gcc -shared -mno-cygwin -o result.jpi result.c On Mac OS X: % gcc -bundle -flat_namespace -undefined suppress -o result.jpi result.c How to use =========== Add option "-plugindir dirname" to Julius. The "dirname" should be a directory (or colon-separated list of directories). All the .jpi files in the specified directory will be loaded into Julius at startup. How to test ============ You can test the OSS API audio input plugin written at "adin_oss.c". The loaded plugin component "adin_oss.jpi" will be selected as input by specifying "-input myadin", where the string "myadin" is the string which the function "adin_get_optname()" returns in adin_oss.c. % cd plugin % make adin_oss.jpi % cd .. % ./julius/julius -plugindir plugin -input myadin This adin plugin can be used from adintool and adinrec like this: % ./adinrec/adinrec -plugindir plugin -input myadin julius-4.2.2/plugin/fvin.c0000644001051700105040000004074211277435630013753 0ustar ritrlab/** * @file fvin.c * * * @brief A skeleton code of feature input plugin * * * * @brief 特徴量入力プラグインのひな形 * * * @author Akinobu Lee * @date Mon Aug 11 17:05:17 2008 * * $Revision: 1.3 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * feature input plugin functions * * Required: * - fvin_get_optname() * - fvin_get_configuration() * - fvin_standby() * - fvin_open() * - fvin_read() * - fvin_close() * * Optional: * - fvin_terminate() * - fvin_pause() * - fvin_resume() * - fvin_input_name() * */ #include #include #include "plugin_defs.h" #define PLUGIN_TITLE "Feature vector input plugin for Julius" #define INPUT_OPT "myfvin" /************************************************************************/ /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /************************************************************************/ /************************************************************************/ /* Feature-vector input plugin functions */ /** * * @brief Return option string to select this input at option. (required) * * This function should return option string which should be specified * as an argument "-input" option, to be used on Julius. The returning * string should not be the same with any existing value. * * This function will be called several times at option parsing at startup. * * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * * @brief 入力選択用のオプション文字列を返す(必須) * * このプラグインを入力として選択する際に,"-input" オプションで指定す * べき文字列を格納して返す.返す文字は,システムにすでにあるものや, * 他のプラグインが使用しているものと同じでないこと. * (もし同じだった場合システム側が優先される) * * この関数は,起動時のオプション解析時に何度か呼ばれる. * * @param buf [out] 値を格納して返すバッファ * @param buflen [in] buf の最大長 * * */ void fvin_get_optname(char *buf, int buflen) { strncpy(buf, INPUT_OPT, buflen); } /** * * @brief Return configuration parameters for this input (required) * * This function should return configuration parameters about the input. * * When opcode = 0, return the dimension (length) of input vector. * * When opcode = 1, return the frame interval (time between frames) in * milliseconds. * * When opcode = 2, parameter type code can be returned. The code should * the same format as used in HTK parameter file header. This is for * checking the input parameter type against acousitc model, and * you can disable the checking by returning "0xffff" to this opcode. * * @param opcode [in] requested operation code * * @return values required for the opcode as described. * * * * @brief 特徴量のパラメータを返す(必須) * * この入力プラグインがJuliusに渡す特徴量に関するパラメータを返す. * 与えられた以下の opcode ごとに,値を返す. * * opcode = 0: ベクトルの次元数 * opcode = 1: 1フレームあたりの時間幅(単位:ミリ秒) * opcode = 2: パラメータの型 * * opcode = 2 のパラメータの型は,音響モデルの特徴量との型整合性 * チェックに使われる.値は,HTK の特徴量ファイルのヘッダ形式で * エンコードされた値を返す.型チェックを行わない場合は, * 0xffff を返すこと. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * * @return opcode ごとに要求された値を返す. * */ int fvin_get_configuration(int opcode) { switch(opcode) { case 0: /* return number of elements in a vector */ return(25); case 1:/* return msec per frame */ return(10); case 2:/* return parameter type specification in HTK format */ /* return 0xffff to disable checking */ return(0xffff); } } /************************************************************************/ /** * * @brief Initialize input device (required) * * This will be called only once at start up of Julius. You can * check if the input file exists or prepare a socket for connection. * * If this function returns FALSE, Julius will exit. * * JuliusLib: this function will be called at j_adin_init(). * * @return TRUE on success, FALSE on failure. * * * @brief デバイスを初期化する(必須) * * この関数は起動時に一回だけ呼ばれる.ここでは入力ファイルの準備や * ソケットの用意といった,入力のための準備を行うのに使う. * * FALSE を返した場合,Julius は終了する. * * JuliusLib: この関数は j_adin_init() で呼ばれる. * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean fvin_standby() { /* sever socket ready etc... */ return TRUE; } /** * * @brief Open an input (required) * * This function should open a new input. You may open a feature * vector file, or wait for connection at this function. * * If this function returns FALSE, Julius will exit recognition loop. * * JuliusLib: this will be called at j_open_stream(). * * @return TRUE on success, FALSE on failure. * * * @brief 入力を開く(必須) * * 入力を新規に開く.ファイルのオープン,ネットワーククライアントからの * 接続などはここで行う. * * FALSE を返したとき,Julius は認識ループを抜ける. * * JuliusLib: この関数は j_open_stream() 内で呼ばれる. * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean fvin_open() { /* listen and accept socket, or open a file */ return TRUE; } /** * * @brief Read a vector from input (required) * * This will be called repeatedly at each frame, and the read vector * will be processed immediately, and then this function is called again. * * Return value of ADIN_EOF tells end of stream to Julius, which * causes Julius to finish current recognition and close stream. * ADIN_SEGMENT requests Julius to segment the current input. The * current recognition will be stopped at this point, recognition * result will be output, and then Julius continues to the next input. * The behavior of ADIN_SEGMENT is similar to ADIN_EOF except that * ADIN_SEGMENT does not close/open input, but just stop and restart * the recognition. At last, return value should be ADIN_ERROR on * error, in which Julius exits itself immediately. * * @param vecbuf [out] store a vector obtained in this function * @param veclen [in] vector length * * @return 0 on success, ADIN_EOF on end of stream, ADIN_SEGMENT to * request segmentation to Julius, or ADIN_ERROR on error. * * * @brief 入力からベクトルを読み込む(必須) * * この関数は入力からベクトルを1つだけ読み込む.この関数は * フレームごとに呼ばれ,読み込まれたベクトルはこのあとすぐに認識処理され, * また次のフレームのデータを読むためにこの関数が呼ばれる. * * 入力が終端まで達したとき,ADIN_EOF を返す.このとき,Julius は現在 * の認識処理を終了させ,入力を閉じる. * * ADIN_ERROR はこの関数内で深刻なエラーが生じた場合に返す.これが返さ * れた場合,Julius はその場で異常終了する. * * ADIN_SEGMENT を返すことで,Julius に現在の認識を現時点で区切ること * を要求することができる.現在の認識処理はこの時点でいったん区切られ, * そこまでの認識結果が確定・出力されたあと,次の認識処理が始まりこの * 関数が呼ばれる.ADIN_SEGMENT は ADIN_EOF と動作が似ているが, * ADIN_EOF が adin_close(), adin_open() を呼んで入力を終了させ * るのに対して,ADIN_SEGMENT はこれらを呼ばずに入力を続行する. * * @param vecbuf [out] 得られたベクトルを格納するバッファ * @param veclen [in] ベクトル長 * * @return 成功時 0 あるいは end of stream 時に ADIN_EOF, Julius に区 * 切り要求を出すときには ADIN_SEGMENT, エラー時はADIN_ERROR を返す. * */ int fvin_read(float *vecbuf, int veclen) { /* read one vector from the input */ if (0/* error */) return ADIN_ERROR; if (0/* input should be segmented here */) return ADIN_SEGMENT; if (0/* EOF */) return ADIN_EOF; return(0); /* success */ } /** * * @brief Close the current input (required) * * This function will be called when the input has reached end of file * (i.e. the last call of fvin_read() returns ADIN_EOF) * * You may close a file or disconnect network client here. * * If this function returns TRUE, Julius will go again to adin_open() * to open another stream. If returns FALSE, Julius will exit * the recognition loop. * * JuliusLib: This will be called at the end of j_recognize_stream(). * * @return TRUE on success, FALSE on failure. * * * @brief 入力を閉じる(必須) * * 現在の入力を閉じる.この関数は,入力が終端(EOF)に達したとき(すな * わち fvin_read() が ADIN_EOF を返したとき)に呼ばれる.通常,ここでは * ファイルを閉じる,ネットワーク接続を切断するなどの処理を行う. * * 正常終了としてTRUEを返したとき,Julius は adin_open() に戻って * 他のストリームを開こうとする. FALSE を返したときは,Julius は * 認識ループを抜ける. * * JuliusLib: この関数は j_recognize_stream() の最後で呼ばれる. * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean fvin_close() { /* file close, connection close, etc.. */ return TRUE; } /************************************************************************/ /** * * @brief A hook for Termination request (optional) * * This function will be called when Julius receives a Termination * request to stop running. This can be used to synchronize input * facility with Julius's running status. * * Termination will occur when Julius is running on module mode and * received TERMINATE command from client, or j_request_terminate() * is called inside application. On termination, Julius will stop * recognition immediately (discard current input if in process), * and wait until received RESUME command or call of j_request_resume(). * * This hook function will be called just after a Termination request. * Please note that this will be called when Julius receives request, * not on actual termination. * * @return TRUE on success, FALSE on failure. * * * @brief 中断要求用フック(任意) * * この関数を定義すると,Julius は中断要求を受け取った際にこの関数を呼び出す. * これを使って,Julius の中断・再開と同期した入力同期処理を実装することが * できる.(例:入力送信元に対して送信中断要求を出すなど) * * 中断要求は,Julius がアプリケーションやクライアントより受け取る * 認識中断の要求である.具体的には,Julius がモジュールモードで動作して * いる時に TERMINATE コマンドをクライアントから受け取ったときや, * JuliusLibを組み込んだアプリケーションが j_request_terminate() を * 呼んだときに発生する. * * 中断要求を受け取ると,Julius は現在の認識処理を中断する. * 認識途中であった場合,その入力を破棄して即時中断する. * 処理の再開は,RESUME コマンドか j_request_resume() の呼び出しで行われる. * * この関数は中断要求を Julius が受け取った時点で呼ばれる. * 実際に処理が中断した後で呼ばれるのではないことに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean fvin_terminate() { printf("terminate request\n"); return TRUE; } /** * * @brief A hook for Pause request (optional) * * This function will be called when Julius receives a Pause request * to stop running. This can be used to synchronize input facility * with Julius's running status. * * Pause will occur when Julius is running on module mode and * received PAUSE command from client, or j_request_pause() * is called inside application. On pausing, Julius will * stop recognition and then wait until it receives RESUME command * or j_request_resume() is called. When pausing occurs while recognition is * running, Julius will process it to the end before stops. * * This hook function will be called just after a Pause request. * Please note that this will be called when Julius receives request, * not on actual pause. * * @return TRUE on success, FALSE on failure. * * * @brief 停止要求用フック(任意) * * この関数を定義すると,Julius は停止要求を受け取った際にこの関数を呼び出す. * これを使って,Julius の中断・再開と同期した入力同期処理を実装することが * できる.(例:入力送信元に対して送信中断要求を出すなど) * * 停止要求は,Julius がアプリケーションやクライアントより受け取る, * 認識の一時停止の要求である.具体的には,Julius がモジュールモードで動作して * いる時に PAUSE コマンドをクライアントから受け取ったときや, * JuliusLibを組み込んだアプリケーションが j_request_pause() を * 呼んだときに発生する. * * 停止要求を受け取ると,Julius は現在の認識処理を中断する. * 認識途中であった場合,その認識が終わるまで待ってから中断する. * 処理の再開は,RESUME コマンドか j_request_resume() の呼び出しで行われる. * * 中断要求 (fvin_terminate) との違いは,認識途中に要求を受けたときの動作が * 異なる.中断要求では強制中断するが,停止要求ではその認識が終わるまで * 待ってから停止する. * * この関数は停止要求を Julius が受け取った時点で呼ばれる. * 実際に処理が停止した後で呼ばれるのではないことに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean fvin_pause() { printf("pause request\n"); return TRUE; } /** * * @brief A hook for Resume request (optional) * * This function will be called when Julius received a resume request * to recover from pause/termination status. * * Resume will occur when Julius has been stopped by receiving RESUME * command from client on module mode, or j_request_resume() is called * inside application. * * This hook function will be called just after a resume request. * This can be used to make this A/D-in plugin cooperate with the * pause/resume status, for example to tell audio client to restart * audio streaming. * * This function is totally optional. * * @return TRUE on success, FALSE on failure. * * * @brief 認識再開要求用フック(任意) * * この関数を定義すると,Julius は停止状態からの認識再開要求の際に * この関数を呼び出す. * * 認識再開要求は,Julius がモジュールモードで動作して RESUME コマンドを * クライアントから受け取ったときや,JuliusLibを組み込んだアプリケーション * が j_request_resume() を呼んだときに発生する.この再開要求が発生 * すると,Julius は停止していた認識を再開する. * * 注意:この関数は,実際に停止したときに呼ばれるのではなく,Julius が * 要求を受け取った時点で,そのたびに呼ばれる.複数回呼ばれることや, * すでに動作中である場合にさらにこのコマンドを受け取ったときにも呼ば * れることがあることに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean fvin_resume() { printf("resume request\n"); return TRUE; } /** * * @brief A function to return current device name for information (optional) * * This function is totally optional. * * @return pointer to the device name string * * * @brief 入力ファイル・デバイス名を返す関数(任意) * * @return 入力ファイルあるいはデバイス名の文字列へのポインタ * * */ char * fvin_input_name() { printf("input name function was called\n"); return("default"); } /* end of file */ julius-4.2.2/plugin/result.c0000644001051700105040000000770511066615330014322 0ustar ritrlab/** * @file result.c * * * @brief Plugin to process recognition result * * * * @brief 認識結果を処理するプラグイン * * * @author Akinobu Lee * @date Fri Aug 22 15:17:59 2008 * * $Revision: 1.1 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * Result processing function * * - result_str() * */ /***************************************************************************/ #include #include #define PLUGIN_TITLE "result process plugin for Julius" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /** * * @brief Process a recognition result (best string) * * This function will be called each time after recognition of an * utterance is finished. The best recognition result for the * utterance will be passed to this function, as a string in which * words are separated by white space. When the recognition was failed * or rejected, string will be NULL. * * On short-pause segmentation mode or GMM/Decoder-VAD mode, where * an input utterance may be segmented into pieces, this funtion will be * called for each segment. On multi decoding, the best hypothesis among * all the recognition instance will be given. * * @param result_str [in] recognition result, words separated by whitespace, * or NULL on failure * * * * @brief 認識結果の処理(最尤文字列) * * この関数は入力の認識が終わるたびに呼び出され, * 入力に対する認識結果(最も確率の高い候補)の文字列が渡される. * 与えられる文字列は,単語毎にスペースで区切られる. * 認識が失敗した場合は, 文字列に NULL が渡される. * * ショートポーズセグメンテーションや GMM/Decoder ベースのVADを * 行う場合,入力は小単位に分割される.この場合,この関数は * その分割された小単位ごとに呼ばれる.また,複数モデル認識の場合, * 全認識処理中で最もスコアの高い仮説が渡される. * * @param result_str [in] 認識結果(単語は空白で区切られている)NULLの * 場合,認識失敗. * * * */ void result_best_str(char *result_str) { if (result_str == NULL) { printf("[failed]\n"); } else { printf(" <<%s>>\n", result_str); } } julius-4.2.2/plugin/feature_postprocess.c0000644001051700105040000001016711066615330017077 0ustar ritrlab/** * @file feature_postprocess.c * * * @brief A sample plugin for feature vector postprocessing * * * * @brief 特徴量の後処理プラグインのサンプル * * * @author Akinobu Lee * @date Sun Aug 10 15:14:19 2008 * * $Revision: 1.1 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * Feature vector input postprocessing functions * * Required: * - fvin_postprocess() * */ /***************************************************************************/ #include #include #include "plugin_defs.h" #define PLUGIN_TITLE "feature vector postprocess plugin for Julius" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /** * * @brief Post-processing function for a feature vector * * When defined, this function will be called at every input vector * before recognition. This will be called successively for every input * at each frame. * * You can monitor the feature vector to be recognized, and also can * modify or overwrite the content to do some transformation like a * feature-space adaptation. * * If multiple plugins have this functions, they are all executed in order * of loading. * * @param vecbuf [i/o] a feature vector * @param veclen [in] length of @a vecbuf * @param nframe [in] frame number in a recognition, staring with 0. * * * * @brief 特徴量ベクトルに対する後処理関数 * * この関数が定義された場合,Julius は個々の特徴量ベクトルについて, * 認識が行われる前にこの関数を呼び出す.この関数は,入力が進むたびに * その各フレームの特徴量ベクトルについて呼ばれる. * * この関数を使って入力の特徴量ベクトルをモニタできるほか,バッファ上の * データを直接書き換えることもできる.音声認識はこの関数が終わったあとの * データに対して行われるので,例えば話者適応や話者正規化のような処理を * ここで行うことも可能である. * * 複数のプラグインでこの関数が指定されている場合,それらは読み込み順に * 実行される. * * @param vecbuf [i/o] 特徴量ベクトル * @param veclen [in] @a vecbuf の長さ * @param nframe [in] フレーム番号 * * * */ void fvin_postprocess(float *vecbuf, int veclen, int nframe) { int i; /* just output the vectors to stdout */ printf("%d:", nframe); for(i=0;i * @brief A sample plugin for calculating Gaussians * * This sample uses Julius libraries. * * * * @brief ガウス分布計算プラグインのサンプル * * このサンプルは julius のライブラリを使用します. * * * @author Akinobu Lee * @date Mon Aug 11 15:29:45 2008 * * $Revision: 1.1 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * Gaussian mixture calculation plugin: * * Required: * - calcmix_get_optname() * - calcmix() * - calcmix_init() * - calcmix_free() * */ /***************************************************************************/ /* we refer to Julius libsent header */ #include #include //#include "plugin_defs.h" #define PLUGIN_TITLE "Gaussian calculation plugin for Julius" #define GPRUNE_OPT "mycalc" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /************************************************************************/ /************************************************************************/ /** * * @brief Return option string to select at option. (required) * * This function should return option string which should be specified * as an argument "-gprune" option, to be used on Julius. The returning * string should not be the same with any existing value. * * This function will be called several times at option parsing at startup. * * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * * @brief 計算方法選択用オプションのための文字列を返す(必須) * * Julius で起動時に "-gprune ここで返す値" と指定するとこのプラグイン * が使用される.この関数では,上記の "-gprune" に与えるべき文字列を格 * 納して返す.返す文字は,システムの "-gprune" オプションにすでにある * ものや,他のプラグインが使用しているものと同じでないこと.(もし同 * じだった場合システム側が優先される) * * この関数は,起動時のオプション解析時に何度か呼ばれる. * * @param buf [out] 値を格納して返すバッファ * @param buflen [in] buf の最大長 * * */ void calcmix_get_optname(char *buf, int buflen) { strncpy(buf, GPRUNE_OPT, buflen); } /** * * @brief A basic implementaion of computing Gaussians * * This function should compute output probabilities for each * Gaussians. after this function returns, Julius will do addlog to * get the final output log probability. * * The input vector to be computed is located at wrk->OP_vec[], at a * length of wrk->OP_veclen. Gaussians are given by g[], at a number * of num. last_id and lnum is for internal use for pruning, just ignore * them. * * The scores for each Gaussians computed in this function should be * stored in OP_calced_score[], with their corresponding Gaussian ids * to OP_calced_id. The total number of calculated mixtures shuold * also stored in OP_calced_num. * * @param wrk [i/o] HMM computation work area to store data * @param g [in] set of Gaussian densities to compute the output probability. * @param num [in] length of above * @param last_id [in] ID list of N-best mixture in previous input frame, * or NULL if not exist * @param lnum [in] length of last_id * * * @brief ガウス分布計算関数 * * この関数では,与えられた複数のガウス分布に対して入力ベクトルの * 出力確率を求める.この関数が行うのは,複数のガウス分布それぞれの * 出力確率の算出と格納のみであり,混合分布としての重み計算や addlog * はこの関数が返ったあとに Julius 側で行われる. * * 入力ベクトルは wrk->OP_vec[] に格納されており,長さは wrk->OP_veclen * である.ガウス分布定義は g[] に配列として複数渡され,その数は num である. * * なお,last_id と lnum はこのガウス分布集合 g[] において直前の入力フ * レームで計算されたものの id が入っている.Julius の内部処理用なので, * 使わなくても差し支えない. * * 各ガウス分布に対する入力ベクトルの対数出力確率は,そのガウス分布の * ID (0 から始まる配列の添え字) を wrk->OP_calced_id に,値を * wrk->OP_calced_score に格納すること.また,実際に計算された * ガウス分布の数を wrk->OP_calced_num に格納すること. * (これは Gaussian pruning を想定した実装である) * * 以下は,pruning 等を行わない単純な出力確率計算を実装したものである. * ガウス分布は対角共分散を仮定している.なお Julius では読み込み時に * HTK でいうところの gconst 値はあらかじめ計算される.このため,計算時に * 下記の dens->gconst のように利用できる. * * @param wrk [i/o] HMM計算用ワークエリア * @param g [in] 出力確率を計算するガウス分布の列 * @param num [in] @a g のガウス分布の数 * @param last_id [in] 直前入力フレームで上位だったガウス分布のIDリスト, * または内場合は NULL * @param lnum [in] @a last_id の長さ * */ void calcmix(HMMWork *wrk, HTK_HMM_Dens **g, int num, int *last_id, int lnum) { int i; HTK_HMM_Dens *dens; LOGPROB *prob = wrk->OP_calced_score; int *id = wrk->OP_calced_id; VECT tmp, x; VECT *mean; VECT *var; VECT *vec; short veclen; for(i=0; imean; var = dens->var->vec; tmp = dens->gconst; vec = wrk->OP_vec; veclen = wrk->OP_veclen; for (; veclen > 0; veclen--) { x = *(vec++) - *(mean++); tmp += x * x * *(var++); } tmp *= -0.5; /* store it */ *(prob++) = tmp; } wrk->OP_calced_num = num; } /** * * Free work area. * You should free all allocated at clacmix_init(). * * @param wrk [i/o] HMM computation work area * * * calcmix_init() で確保されたワークエリアを開放する. * * @param wrk [i/o] HMM 計算用ワークエリア * * */ void calcmix_free(HMMWork *wrk) { free(wrk->OP_calced_score); free(wrk->OP_calced_id); } /** * * @brief Initialize and setup work area for Gaussian computation. * * You should set value for OP_calced_maxnum, and allocate OP_calced_score * and OP_calced_id. Remaining the content below is safe. * * This will be called once on instance initialization at startup. * * @param wrk [i/o] HMM computation work area * * @return TRUE on success, FALSE on failure. * * * @brief 計算用のワークエリアを確保する. * * ガウス分布計算用のワークエリアを確保する.下記にすでに書いてある分は, * そのまま Julius の内部でも使用しているので,削らないこと. * * この関数は最初に音響尤度計算インスタンスが作成されるときに呼び出される. * * @param wrk [i/o] HMM 計算用ワークエリア * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean calcmix_init(HMMWork *wrk) { /* maximum Gaussian set size = maximum mixture size * nstream */ wrk->OP_calced_maxnum = wrk->OP_hmminfo->maxmixturenum * wrk->OP_nstream; wrk->OP_calced_score = (LOGPROB *)malloc(sizeof(LOGPROB) * wrk->OP_calced_maxnum); wrk->OP_calced_id = (int *)malloc(sizeof(int) * wrk->OP_calced_maxnum); /* force gprune_num to the max number */ wrk->OP_gprune_num = wrk->OP_calced_maxnum; return TRUE; } /* end of file */ julius-4.2.2/plugin/adin_oss.c0000644001051700105040000006325011223164320014571 0ustar ritrlab/** * @file adin_oss.c * * * @brief A reference sample of A/D-in plugin * * This file describes the specifications of plugin functions to be * defined to make an A/D-in plugin. An A/D-in plugin will extend a * new audio sream input into Julius by addin a new choice to the * "-input" option. * * The recording format should be 16 bit (signed short), and sampling * rate should be set to the given value at adin_standby(). * * * * * @brief オーディオ入力プラグインのひな形 * * このファイルは,オーディオ入力プラグインを作成する際に定義すべきプ * ラグイン関数について解説している.オーディオ入力プラグインは, * Julius に新たな音声入力デバイスを追加する."-input" に新たな選択肢 * が追加され,実行時に Julius に対してそれを指定することで,このプラ * グイン経由で音声を取り込み認識することができる. * * オーディオ入力プラグインで取り込むべきデータのフォーマットは 16bit で * あること.さらに,サンプリングレートを adin_standby() 呼び出し時に * 与えられるレートに合わせること. * * * * Common functions that can be defined in any type of plugin: * - get_plugin_info() * - initialize() * * A/D-in plugin functions: * * Required: * - adin_get_optname() * - adin_get_configuration() * - adin_standby() * - adin_open() * - adin_read() * - adin_close() * * Optional: * - adin_terminate() * - adin_pause() * - adin_resume() * - adin_input_name() * * * @author Akinobu Lee * @date Thu Aug 7 14:28:37 2008 * * $Revision: 1.3 $ * */ /***************************************************************************/ #include #include #include #include "plugin_defs.h" /** * * Description string of this plugin file. * * * このプラグインファイルの説明文字列. * * */ #define PLUGIN_TITLE "A/D-in plugin for Julius" /** * * string to be specified at "-input" option at Julius to use this plugin * as input module. * * * このプラグインを使用して音声入力を行う際に,Juliusの "-input" オプション * に与えるべき文字列. * * */ #define INPUT_OPT "myadin" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /************************************************************************/ /************************************************************************/ /* A/D-in plugin functions */ /** * * @brief Return option string to select at option. (required) * * This function should return option string which should be specified * as an argument "-input" option, to be used on Julius. The returning * string should not be the same with any existing value. * * This function will be called several times at option parsing at startup. * * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * * @brief 音声入力選択用のオプション文字列を返す(必須) * * このプラグインを入力として選択する際に,"-input" オプションで指定す * べき文字列を格納して返す.返す文字は,システムにすでにあるものや, * 他のプラグインが使用しているものと同じでないこと. * (もし同じだった場合システム側が優先される) * * この関数は,起動時のオプション解析時に何度か呼ばれる. * * @param buf [out] 値を格納して返すバッファ * @param buflen [in] buf の最大長 * * */ void adin_get_optname(char *buf, int buflen) { strncpy(buf, INPUT_OPT, buflen); } /** * * @brief Return decoder parameter values related to this adin plugin (required) * * This function should return configuration values about how to set up * decoder to use this adin plugin. The return value is dependent on * the given opcode, as described below: * * opcode = 0: return whether real-time processing of 1st pass * should be enabled by default. * * if returns 0 (disabled) , Julius will do buffered input, spooling * the incoming input till EOF or silence cut segmentation, extract * feature vector, then recognize the whole. If returns 1 (enabled), * on-the-fly decoding will be performed, reading input and decoding * it concurrently. * * A real-time decoding uses some approximation on feature extraction * related to sentence-based normalization i.e. CMN or energy normalization. * This value is typically 0 on off-line recognition, and 1 for on-line * recognition. * * This value is device-dependent default value, and can be overridden by * user option "-realtime" and "-norealtime". * * opcode = 1: return whether silence cut segmentation should be * enabled by default * * return 0 to disable, 1 to enable. * * On file input, you can choose whether silence detection and * segmentation should be performed before recognition. On live input * like microphone, where input stream is infinite, you would perfer * choose 1 to enable it. * * This value is device-dependent default value, and can be overridden by * user option "-cutsilence" and "-nocutsilence". * * opcode = 2: return whether input threading is necessary or not. * * On Unix, when set to 1, Julius forks a separate thread for A/D-in * input. It can be useful when recognition is slow and some audio * inputs are dropped. Note that this should be enabled only for * infinite input like microphone or line input, since EOF handling on * threaded mode is not supported yet. Recommended value is 1 for * microphone input, 0 for file and network (tcp/ip) input. * Ignored on Win32. * * @param opcode [in] requested operation code * * @return values required for the opcode as described. * * * * @brief 入力の扱いに関するパラメータ設定を返す(必須) * * Julius がこの入力プラグインをどう扱うべきかについて,設定パラメータを * 返す.与えられた以下の opcode ごとに,値を返す. * * opcode = 0: リアルタイム認識を行うかどうかのデフォルト値 * * 1 を返すと,Julius は入力に対して特徴抽出と認識処理を平行して行う * リアルタイム認識を行う.0 の場合,いったん入力を終端(あるいは区切り) * まで受け取ってから,特徴抽出を行い,その後認識を開始する. * リアルタイム処理では,CMN やエネルギー平均など,発話全体を用いた * 特徴量の正規化が近似される. * * 通常,マイク入力などリアルタイムな結果が欲しい場合は 1, * ファイル入力などオフライン認識の場合は 0 を返すことが多い. * * なお,ここの値は,この入力が規定するデフォルト値であり, * Juliusの実行時オプション "-realtime", "-norealtime" でも変更できる. * オプションが指定された場合はその指定が優先される. * * opcode = 1: 無音区間検出による入力区切りのデフォルト値 * * Julius は入力音声に対して振幅と零交差による入力判定を行い,振幅が一 * 定レベル以下の部分をスキップし,そこで区切って入力とすることができ * る.この無音での自動区切りのデフォルトを,返値 1 で有効, 0 で無効 * とできる. * * 通常,マイクなどの直接入力では 1,1発話ごとの音声ファイルでは 0 を * 返すことが多い. * * なお,ここの値は,この入力が規定するデフォルト値であり, * Juliusの実行時オプション "-cutsilence", "-nocutsilence" でも変更できる. * オプションが指定された場合はその指定が優先される. * * opcode = 2: 音声入力をスレッド化するかのデフォルト値 * * 音声入力取り込み部を別スレッドにするかどうかを選択する. * 音声認識の速度が遅く,音声データの取りこぼしが発生する場合に有効である. * ただし,現在のJuliusでは,EOF による認識終了を正しく扱えないので, * マイク入力などの入力長が有限でない入力についてのみスレッド化を有効に * すべきである. * * 通常,マイク UDP などでは 1 にし,ファイルや TCP/IP ソケットでは * 0 にする. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * * @return opcode ごとに要求された値を返す. * */ int adin_get_configuration(int opcode) { /* For your convenience, UNCOMMENT ONE OF THEM BELOW that match your needs */ /* typical values for live microphone/line input */ switch(opcode) { case 0: return 1; case 1: return 1; case 2: return 1; } /* typical values for offline file input */ /* * switch(opcode) { * case 0: * return 0; * case 1: * return 0; * case 2: * return 0; * } */ /* typical setting for tcpip input */ /* assuming speech to be segmented at sender */ /* * switch(opcode) { * case 0: * return 1; * case 1: * return 0; * case 2: * return 0; * } */ /* typical setting for tcpip input */ /* assuming receiving continous speech stream and segmented should be done at Julius side */ /* * switch(opcode) { * case 0: * return 1; * case 1: * return 1; * case 2: * return 0; * } */ } /************************************************************************/ #include #include #include #include static int audio_fd; static int freq; /** * * @brief Initialize input device (required) * * This will be called only once at start up of Julius. You can * initialize the device, check if the device exists or prepare a socket * for connection. * * If this function returns FALSE, Julius will exit. * * JuliusLib: this function will be called at j_adin_init(). * * @param sfreq [in] required sampling frequency * @param dummy [in] a dummy data * * @return TRUE on success, FALSE on failure. * * * @brief デバイスを初期化する(必須) * * この関数は起動時に一回だけ呼ばれる.ここではデバイスのチェックや * ソケットの用意といった,音声入力のための準備を行うのに使う. * * FALSE を返した場合,Julius は終了する. * * JuliusLib: この関数は j_adin_init() で呼ばれる. * * @param sfreq [in] サンプリングレート * @param dummy [in] ダミーデータ(未使用) * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean adin_standby(int sfreq, void *dummy) { /* store the frequency */ freq = sfreq; return TRUE; } /** * * @brief Open an input stream (required) * * This function should open a new audio stream for input. * You may open a capture device, open an audio file, or wait for * connection with other network client at this function. * * If this function returns FALSE, Julius will exit recognition loop. * * JuliusLib: this will be called at j_open_stream(). * * @param pathname [in] file / device name to open or NULL for default * * @return TRUE on success, FALSE on failure. * * * @brief 入力音声ストリームを開く(必須) * * 入力音声ストリームを新規に開く.通常,デバイスやファイルのオープン, * ネットワーククライアントからの接続などをここで行う. * * FALSE を返したとき,Julius は認識ループを抜ける. * * JuliusLib: この関数は j_open_stream() 内で呼ばれる. * * @param pathname [in] 開くファイルあるいはデバイス名,NULL ならデフォルト * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean adin_open(char *pathname) { /* do open the device */ int fmt; int stereo; int ret; int s; char buf[2]; if ((audio_fd = open(pathname ? pathname : "/dev/dsp", O_RDONLY)) == -1) { printf("Error: cannot open %s\n", pathname ? pathname : "/dev/dsp"); return FALSE; } fmt = AFMT_S16_LE; /* 16bit signed (little endian) */ if (ioctl(audio_fd, SNDCTL_DSP_SETFMT, &fmt) == -1) { printf("Error: failed set format to 16bit signed\n"); return FALSE; } stereo = 0; /* mono */ ret = ioctl(audio_fd, SNDCTL_DSP_STEREO, &stereo); if (ret == -1 || stereo != 0) { stereo = 1; ret = ioctl(audio_fd, SNDCTL_DSP_CHANNELS, &stereo); if (ret == -1 || stereo != 1) { printf("Error: failed to set monoral channel\n"); return FALSE; } } s = freq; if (ioctl(audio_fd, SNDCTL_DSP_SPEED, &s) == -1) { printf("Erorr: failed to set sample rate to %dHz\n", freq); return FALSE; } /* start recording... */ read(audio_fd, buf, 2); return(TRUE); } /** * * @brief Read samples from device (required) * * This function is for reading samples to be recognized from input stream. * This will be called repeatedly at each time the read samples are fully * processed. * * The sampling format should be 16bit, 1 channel. * * @a sampnum is the maximum number of samples that can be read into @a buf. * The actual number of read samples should be returned. * * Impotant notes about I/O blocking: * - Do not wait until all the @a sampnum samples are read. * Blocking inside this function will block the whole recognition process. * If device allows, it is better to read only the available data * in the stream and return immediately. * - Avoid returning value of 0 when no data is available, wait for some * data to come inside this function. When you are using non-blocking * operation, you may want to return 0 when no data is available. * However, returning 0 will cause Julius to call again this function * immediately, and cause busy loop to make CPU load to reach 100%. * * So the ideal operation will be first wait_for_some_data_to_come, and * if any data becomes available, read them at most @a sampnum samples * and return the number of read samples. * * Positive return value should be the number of read samples, or one * of ADIN_EOF, ADIN_SEGMENT or ADIN_ERROR. Return value of ADIN_EOF * tells end of stream, which causes Julius to finish current * recognition and close stream. ADIN_SEGMENT requests Julius to * segment the current input. The current recognition will be stopped * at this point, recognition result will be output, and then Julius * continues to the next input. The behavior of ADIN_SEGMENT is * similar to ADIN_EOF except that ADIN_SEGMENT does not close/open * stream, but just stop and restart the recognition. At last, return * value should be ADIN_ERROR on error, in which Julius exits itself * immediately. * * @param buf [out] output buffer to store samples obtained. * @param sampnum [in] maximum number of samples that can be stored in @a buf. * * @return actural number of read samples, ADIN_EOF on end of stream, * ADIN_SEGMENT to request segmentation to Julius, or ADIN_ERROR on error. * * * @brief デバイスからサンプルを読み込む(必須) * * この関数は入力ストリームから音声サンプルを読み込む. * * バッファに格納して返す音声データの形式は 16bit, 1 チャンネルであること. * * @a sampnum は @a buf に格納することのできる最大のサンプル数である. * 返り値として,実際に読み込まれたサンプル数,あるいは以下で説明する * エラーコードを返す. * * この関数は認識中に何度も呼ばれ,ここで読まれたデータが Julius によっ * て 認識処理される.読み込んだ分の処理が終了すると,次の入力を読み込 * むためにこの関数が再度呼ばれる. * * この関数内での I/O blocking については以下の注意が必要である: * * - 長時間のブロックは避けること(@a sampnum は要求サンプル数ではな * く@a buf に格納可能な最大数である).この関数内でブロックすると認 * 識処理全体がブロックする.読み込みが長時間ブロックしないよう,数百 * サンプル程度だけ読み込んで返すか,あるいは最初にバッファ内にあるブ * ロックせずに読み込み可能なデータサンプル数を取得し,その分だけ読み * 込むようにするのがよい. * * - non-blocking モードを用いる場合, 0 を返さないこと. * バッファにデータが存在しないとき,0 を返すと Julius はサンプル * 無しのためまた即座にこの関数を呼び出す.これがビジーウェイトを * 発生させ,CPUロードがあがってしまう.バッファにデータが無いとき, * 即座に 0 を返さず,数十msec でよいのでこの関数内で待つ * ことが望ましい. * * 返り値は,実際に読み込んだサンプル数を正の値として返すか,あるいは * ADIN_EOF, ADIN_SEGMENT, ADIN_ERROR のどれかを返す.ADIN_EOF はスト * リームが終端まで達したことを表す,これを返すと,Julius は現在の認識 * 処理を終了させ,ストリームを閉じる.ADIN_ERROR はこの関数内で深刻な * エラーが生じた場合に返す.これが返された場合,Julius はその場で異常 * 終了する. * * ADIN_SEGMENT を返すことで,Julius に現在の認識を現時点で区切ること * を要求することができる.現在の認識処理はこの時点でいったん区切られ, * そこまでの認識結果が確定・出力されたあと,次の認識処理が始まりこの * 関数が呼ばれる.ADIN_SEGMENT は ADIN_EOF と動作が似ているが, * ADIN_EOF が adin_close(), adin_open() を呼んでストリームを終了させ * るのに対して,ADIN_SEGMENT はこれらを呼ばずに入力を続行する.この機 * 能は,たとえばネットワーク経由で音声データを受信しているときに,送 * 信側から音声認識のON/OFFやVADをコントロールしたい場合などに * 使うことができる. * * @param buf [out] 得られたサンプルを格納するバッファ * @param sampnum [in] @a buf 内に格納できる最大サンプル数 * * @return 実際に読み込まれたサンプル数,あるいは end of stream 時に ADIN_EOF, * Julius に区切り要求を出すときには ADIN_SEGMENT, エラー時はADIN_ERROR を * 返す. * */ int adin_read(SP16 *buf, int sampnum) { audio_buf_info info; int size, cnt; /* get sample num that can be read without blocking */ if (ioctl(audio_fd, SNDCTL_DSP_GETISPACE, &info) == -1) { printf("Error: adin_oss: failed to get number of samples in the buffer\n"); return(ADIN_ERROR); } /* get them as much as possible */ size = sampnum * sizeof(SP16); if (size > info.bytes) size = info.bytes; size &= ~ 1; /* Force 16bit alignment */ cnt = read(audio_fd, buf, size); if ( cnt < 0 ) { printf("Error: adin_oss: failed to read samples\n"); return (ADIN_ERROR); } cnt /= sizeof(short); return(cnt); } /** * * @brief Close the current input stream (required) * * This function will be called when the input stream has reached * end of file (i.e. the last call of adin_read() returns ADIN_EOF) * * You may close a capture device, close an audio file, or * disconnect network client. * * If this function returns TRUE, Julius will go again to adin_open() * to open another stream. If returns FALSE, Julius will exit * the recognition loop. * * JuliusLib: This will be called at the end of j_recognize_stream(). * * @return TRUE on success, FALSE on failure. * * * @brief 入力ストリームを閉じる(必須) * * 現在のストリームを閉じる.この関数は,入力ストリームが終端(EOF) * に達したとき(すなわち adin_read() が ADIN_EOF を返したとき)に * 呼ばれる.デバイスを閉じる,ファイルを閉じる,あるいはネットワーク接続を * 切断するのに使うことができる. * * 正常終了としてTRUEを返したとき,Julius は adin_open() に戻って * 他のストリームを開こうとする. FALSE を返したときは,Julius は * 認識ループを抜ける. * * JuliusLib: この関数は j_recognize_stream() の最後で呼ばれる. * * @return 成功時 TRUE,失敗時 FALSE を返す. * */ boolean adin_close() { close(audio_fd); return TRUE; } /************************************************************************/ /** * * @brief A hook for Termination request (optional) * * This function will be called when Julius receives a Termination * request to stop running. This can be used to synchronize input * facility with Julius's running status. * * Termination will occur when Julius is running on module mode and * received TERMINATE command from client, or j_request_terminate() * is called inside application. On termination, Julius will stop * recognition immediately (discard current input if in process), * and wait until received RESUME command or call of j_request_resume(). * * This hook function will be called just after a Termination request. * Please note that this will be called when Julius receives request, * not on actual termination. * * @return TRUE on success, FALSE on failure. * * * @brief 中断要求用フック(任意) * * この関数を定義すると,Julius は中断要求を受け取った際にこの関数を呼び出す. * これを使って,Julius の中断・再開と同期した入力同期処理を実装することが * できる.(例:入力送信元に対して送信中断要求を出すなど) * * 中断要求は,Julius がアプリケーションやクライアントより受け取る * 認識中断の要求である.具体的には,Julius がモジュールモードで動作して * いる時に TERMINATE コマンドをクライアントから受け取ったときや, * JuliusLibを組み込んだアプリケーションが j_request_terminate() を * 呼んだときに発生する. * * 中断要求を受け取ると,Julius は現在の認識処理を中断する. * 認識途中であった場合,その入力を破棄して即時中断する. * 処理の再開は,RESUME コマンドか j_request_resume() の呼び出しで行われる. * * この関数は中断要求を Julius が受け取った時点で呼ばれる. * 実際に処理が中断した後で呼ばれるのではないことに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean adin_terminate() { printf("terminate request\n"); return TRUE; } /** * * @brief A hook for Pause request (optional) * * This function will be called when Julius receives a Pause request * to stop running. This can be used to synchronize input facility * with Julius's running status. * * Pause will occur when Julius is running on module mode and * received PAUSE command from client, or j_request_pause() * is called inside application. On pausing, Julius will * stop recognition and then wait until it receives RESUME command * or j_request_resume() is called. When pausing occurs while recognition is * running, Julius will process it to the end before stops. * * This hook function will be called just after a Pause request. * Please note that this will be called when Julius receives request, * not on actual pause. * * @return TRUE on success, FALSE on failure. * * * @brief 停止要求用フック(任意) * * この関数を定義すると,Julius は停止要求を受け取った際にこの関数を呼び出す. * これを使って,Julius の中断・再開と同期した入力同期処理を実装することが * できる.(例:入力送信元に対して送信中断要求を出すなど) * * 停止要求は,Julius がアプリケーションやクライアントより受け取る, * 認識の一時停止の要求である.具体的には,Julius がモジュールモードで動作して * いる時に PAUSE コマンドをクライアントから受け取ったときや, * JuliusLibを組み込んだアプリケーションが j_request_pause() を * 呼んだときに発生する. * * 停止要求を受け取ると,Julius は現在の認識処理を中断する. * 認識途中であった場合,その認識が終わるまで待ってから中断する. * 処理の再開は,RESUME コマンドか j_request_resume() の呼び出しで行われる. * * 中断要求 (adin_terminate) との違いは,認識途中に要求を受けたときの動作が * 異なる.中断要求では強制中断するが,停止要求ではその認識が終わるまで * 待ってから停止する. * * この関数は停止要求を Julius が受け取った時点で呼ばれる. * 実際に処理が停止した後で呼ばれるのではないことに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean adin_pause() { printf("pause request\n"); return TRUE; } /** * * @brief A hook for Resume request (optional) * * This function will be called when Julius received a resume request * to recover from pause/termination status. * * Resume will occur when Julius has been stopped by receiving RESUME * command from client on module mode, or j_request_resume() is called * inside application. * * This hook function will be called just after a resume request. * This can be used to make this A/D-in plugin cooperate with the * pause/resume status, for example to tell audio client to restart * audio streaming. * * This function is totally optional. * * @return TRUE on success, FALSE on failure. * * * @brief 認識再開要求用フック(任意) * * この関数を定義すると,Julius は停止状態からの認識再開要求の際に * この関数を呼び出す. * * 認識再開要求は,Julius がモジュールモードで動作して RESUME コマンドを * クライアントから受け取ったときや,JuliusLibを組み込んだアプリケーション * が j_request_resume() を呼んだときに発生する.この再開要求が発生 * すると,Julius は停止していた認識を再開する. * * 注意:この関数は,実際に停止したときに呼ばれるのではなく,Julius が * 要求を受け取った時点で,そのたびに呼ばれる.複数回呼ばれることや, * すでに動作中である場合にさらにこのコマンドを受け取ったときにも呼ば * れることがあることに注意されたい. * * @return 成功時 TRUE, エラー時 FALSE を返す. * * */ boolean adin_resume() { printf("resume request\n"); return TRUE; } /** * * @brief A function to return current device name for information (optional) * * This function is totally optional. * * @return pointer to the device name string * * * @brief 入力ファイル・デバイス名を返す関数(任意) * * @return 入力ファイルあるいはデバイス名の文字列へのポインタ * * */ char * adin_input_name() { printf("input name function was called\n"); return("default"); } /* end of file */ julius-4.2.2/plugin/Makefile0000644001051700105040000000134611224410060014260 0ustar ritrlabSHELL=/bin/sh .SUFFIXES: .SUFFIXES: .c .jpi .c.jpi: $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< CC=gcc CFLAGS=-shared LDFLAGS= J_CFLAGS=-I../libsent/include -I../libjulius/include `../libsent/libsent-config --cflags` `../libjulius/libjulius-config --cflags` J_LDFLAGS=-L../libjulius `../libjulius/libjulius-config --libs` -L../libsent `../libsent/libsent-config --libs` all: adin_oss.jpi audio_postprocess.jpi fvin.jpi feature_postprocess.jpi calcmix.jpi generic_callback.jpi result.jpi calcmix.jpi: calcmix.c $(CC) $(CFLAGS) $(J_CFLAGS) -o calcmix.jpi $(LDFLAGS) $(J_LDFLAGS) calcmix.c generic_callback.jpi: generic_callback.c $(CC) $(CFLAGS) $(J_CFLAGS) -o generic_callback.jpi generic_callback.c $(LDFLAGS) $(J_LDFLAGS) clean: rm *.jpi julius-4.2.2/plugin/generic_callback.c0000644001051700105040000001037311066615330016227 0ustar ritrlab/** * @file generic_callback.c * * * @brief An example plugin using callback. * * * * @brief コールバックを使うプラグインのサンプル * * * @author Akinobu Lee * @date Wed Aug 13 23:50:27 2008 * * $Revision: 1.1 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * using plugin function: * * - engine_startup() * */ // #include "plugin_defs.h" #include #define PLUGIN_TITLE "An example plugin using callback" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /** * * A sample callback function to output RECREADY message. * * @param recog [in] engine instance * @param dummy [in] callback argument (dummy) * * * RECREADY を出力するコールバック用関数(サンプル) * * @param recog [in] エンジンインスタンス * @param dummy [in] コールバック引数(ダミー) * * */ static void status_recready(Recog *recog, void *dummy) { printf("<<>>\n"); } /** * * @brief plugin function that will be called after engine startup. * * When the function of this name is defined in a plugin, this will * be called just after Julius finished all startup sequence and before * input and recognition start. * * In this example, this function registers the local function * status_recready() as a CALLBACK_EVENT_SPEECH_READY callback. * This callback will be called on every time Julius is ready for * recognition for the next incoming input. * * @param data [in] a data pointer, actually a pointer to an engine instance. * * @return 0 on success, -1 on error. On error, Julius will exit immediately. * * * @brief 認識エンジン起動完了時に呼び出されるプラグイン関数 * * この名前の関数が定義された場合,その関数は,Julius が全ての初期化を * 完了して起動プロセスを終えた直後,実際に音声入力を開いて認識が始ま * る前に呼ばれます. * * ここでは,この関数を使って,上記の関数 status_recready() を * CALLBACK_EVENT_SPEECH_READY コールバックとして登録しています. * このコールバックは Julius が入力ストリームからの次の音声入力待ち * 状態になったときに呼ばれます. * * @param data [in] データへのポインタ.実体はエンジンインスタンスへの * ポインタが渡される. * * @return 成功時 0 ,エラー時 -1 を返す.エラーの場合 Julius は異常終了する. * * */ int startup(void *data) { Recog *recog = data; callback_add(recog, CALLBACK_EVENT_SPEECH_READY, status_recready, NULL); return 0; } julius-4.2.2/plugin/00readme-ja.txt0000644001051700105040000000473111224410060015347 0ustar ritrlabTvvOC vOC Julius-4.1 vOCgD fBNgCvOCTv\[XD vOCdgTvCSIC Juliusbook D Tv\[XCdlRgD t@C\ 00readme.txt t@C plugin_defs.h vOCp`wb_ adin_oss.c vOCTvFOSS}CN audio_postprocess.c vOCev[g fvin.c vOCev[g feature_postprocess.c vOCev[g calcmix.c KEXzWvZvOCTv Makefile Linux p Makefile vOCdlRpC vOCt@Cgq .jpi DタCLIuWFNgt@C DLinux cygwin CRpCD % gcc -shared -o result.jpi result.c cygwin RpCvOCCcygwin C -mno-cygwin D % gcc -shared -mno-cygwin -o result.jpi result.c Mac OS X (darwin) RpCD % gcc -bundle -flat_namespace -undefined suppress -o result.jpi result.c Julius vOC@ Julius IvV "-plugindir dirname" gDdirname v OCufBNgwD wfBNgS .jpi t@CD CIvVgvOCC"-plugindir" wD eXgPiresult.jpij result.c CFッoPvOCD Julius RpCCD % cd plugin (fBNg) % make result.jpi % cd .. % ./julius/julius ... -plugindir plugin CMac OS X Makefile.darwin gD % make -f Makefile.darwin result.jpi eXgQiI[fBIvOCj adin_oss.c COSS API ggsvOCD Julius { "-input myadin" ID Julius RpCCD % cd plugin (fBNg) % make adin_oss.jpi % cd .. % ./julius/julius -plugindir plugin -input myadin CvOC adintool adinrec oD % ./adinrec/adinrec -plugindir plugin -input myadin julius-4.2.2/plugin/audio_postprocess.c0000644001051700105040000001001011066615330016530 0ustar ritrlab/** * @file audio_postprocess.c * * * @brief A sample audio postprocessing plugin * * * * @brief オーディオ入力の後処理プラグインのサンプル * * * @author Akinobu Lee * @date Sun Aug 10 15:12:50 2008 * * $Revision: 1.1 $ * */ /** * Required for a file * - get_plugin_info() * * Optional for a file * - initialize() * */ /** * A/D-in postprocessing functions * * Required: * - adin_postprocess() * */ /***************************************************************************/ #include #include #include "plugin_defs.h" #define PLUGIN_TITLE "audio postprocess plugin for Julius" /** * * @brief Initialization at loading time (optional) * * If defined, this will be called just before this plugin is loaded to Julius. * if this returns -1, the whole functions in this file will not be loaded. * * This function is OPTIONAL. * * * @brief 読み込み時の初期化(任意) * * 起動時,Julius がこのプラグインを読み込む際に最初に呼ばれる. * -1 を返すと,このプラグイン全体が読み込まれなくなる. * 実行可能性のチェックに使える. * * * * * @return 0 on success, -1 on failure. * */ int initialize() { return 0; } /** * * @brief Get information of this plugin (required) * * This function should return informations of this plugin file. * The required info will be specified by opcode: * - 0: return description string of this file into buf * * This will be called just after Julius find this file and after * initialize(). * * @param opcode [in] requested operation code * @param buf [out] buffer to store the return string * @param buflen [in] maximum length of buf * * @return 0 on success, -1 on failure. On failure, Julius will ignore this * plugin. * * * * @brief プラグイン情報取得(必須) * * このプラグインに関する情報を返す.与えられた opcode によって動作する. * - 0 の場合,このプラグインファイルの名称を与えられたバッファに格納する * * この関数は,Julius がこのプラグインを読み込んだ直後に呼ばれる. * * @param opcode [in] 要求動作コード (現在 0 のみ実装) * @param buf [out] 値を格納するバッファ * @param buflen [in] buf の最大長 * * @return エラー時 -1, 成功時 0 を返す.エラーとして -1 を返した場合, * このプラグイン全体は読み込まれない. * * */ int get_plugin_info(int opcode, char *buf, int buflen) { switch(opcode) { case 0: /* plugin description string */ strncpy(buf, PLUGIN_TITLE, buflen); break; } return 0; } /** * * @brief Post-processing function for captured audio * * When defined, this function will be called at every audio input * fragments before any feature analysis. @a buf contains the small * fragment of captured audio input at a length of @a len, and this * will be called successively as input goes. * * You can monitor the incoming audio stream, and also can modify or * overwrite the content of @a buf to do some audio processing for the * incoming data like noise supression etc. * * If multiple plugins have this functions, they are all executed in order * of loading. * * @param buf [i/o] a fragment of audio inputs * @param len [in] length of @a buf (in samples) * * * * @brief 音声入力に対する後処理 * * この関数が定義された場合,Julius は入力された音声データに対して,特 * 徴量抽出を行う前にこの関数を呼び出す.@a buf には @a len の長さの音 * 声入力データ断片が入っている.この関数は,入力が進むたびにその短い * 断片ごとに繰り返し呼ばれる. * * この関数を使って入力音声データをモニタできるほかに,バッファ上の * データを直接書き換えることもできる.音声認識はこの関数が終わったあとの * データに対して行われるので,例えば雑音抑圧処理などをここで行う * ことも可能である. * * 複数のプラグインでこの関数が指定されている場合,それらは読み込み順に * 実行される. * * @param buf [i/o] 音声入力データ断片の入ったバッファ * @param len [in] @a buf の長さ(サンプル数) * * * */ void adin_postprocess(SP16 *buf, int len) { //printf("%d\n", len); } /* end of file */ julius-4.2.2/plugin/plugin_defs.h0000644001051700105040000000151212004452412015267 0ustar ritrlab/** * @file plugin_defs.h * * * @brief Definitions for JPI Plugin * * * * @brief JPI プラグイン用定義 * * * @author Akinobu Lee * @date Sat Aug 9 23:46:32 2008 * * $Revision: 1.3 $ * */ /* * Copyright (c) 1991-2012 Kawahara Lab., Kyoto University * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology * Copyright (c) 2005-2012 Julius project team, Nagoya Institute of Technology, Nagoya Institute of Technology * All rights reserved */ #ifndef __JULIUS_PLUGIN_DEFS__ #define __JULIUS_PLUGIN_DEFS__ typedef unsigned char boolean; typedef short SP16; #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif /// Return code of adin_read() #define ADIN_EOF -1 #define ADIN_ERROR -2 #define ADIN_SEGMENT -3 #endif /* __JULIUS_PLUGIN_DEFS__ */