einspline-0.9.2/0000777000113000011300000000000011311505425010472 500000000000000einspline-0.9.2/ltmain.sh0000644000113000011300000060446510665263176012264 00000000000000# ltmain.sh - Provide generalized library-building support services. # NOTE: Changing this file will not affect anything until you rerun configure. # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, # 2007 Free Software Foundation, Inc. # Originally by Gordon Matzigkeit , 1996 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. basename="s,^.*/,,g" # Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh # is ksh but when the shell is invoked as "sh" and the current value of # the _XPG environment variable is not equal to 1 (one), the special # positional parameter $0, within a function call, is the name of the # function. progpath="$0" # The name of this program: progname=`echo "$progpath" | $SED $basename` modename="$progname" # Global variables: EXIT_SUCCESS=0 EXIT_FAILURE=1 PROGRAM=ltmain.sh PACKAGE=libtool VERSION=1.5.24 TIMESTAMP=" (1.1220.2.456 2007/06/24 02:25:32)" # Be Bourne compatible (taken from Autoconf:_AS_BOURNE_COMPATIBLE). if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # Check that we have a working $echo. if test "X$1" = X--no-reexec; then # Discard the --no-reexec flag, and continue. shift elif test "X$1" = X--fallback-echo; then # Avoid inline document here, it may be left over : elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then # Yippee, $echo works! : else # Restart under the correct shell, and then maybe $echo will work. exec $SHELL "$progpath" --no-reexec ${1+"$@"} fi if test "X$1" = X--fallback-echo; then # used as fallback echo shift cat <&2 $echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 exit $EXIT_FAILURE fi # Global variables. mode=$default_mode nonopt= prev= prevopt= run= show="$echo" show_help= execute_dlfiles= duplicate_deps=no preserve_args= lo2o="s/\\.lo\$/.${objext}/" o2lo="s/\\.${objext}\$/.lo/" extracted_archives= extracted_serial=0 ##################################### # Shell function definitions: # This seems to be the best place for them # func_mktempdir [string] # Make a temporary directory that won't clash with other running # libtool processes, and avoids race conditions if possible. If # given, STRING is the basename for that directory. func_mktempdir () { my_template="${TMPDIR-/tmp}/${1-$progname}" if test "$run" = ":"; then # Return a directory name, but don't create it in dry-run mode my_tmpdir="${my_template}-$$" else # If mktemp works, use that first and foremost my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null` if test ! -d "$my_tmpdir"; then # Failing that, at least try and use $RANDOM to avoid a race my_tmpdir="${my_template}-${RANDOM-0}$$" save_mktempdir_umask=`umask` umask 0077 $mkdir "$my_tmpdir" umask $save_mktempdir_umask fi # If we're not in dry-run mode, bomb out on failure test -d "$my_tmpdir" || { $echo "cannot create temporary directory \`$my_tmpdir'" 1>&2 exit $EXIT_FAILURE } fi $echo "X$my_tmpdir" | $Xsed } # func_win32_libid arg # return the library type of file 'arg' # # Need a lot of goo to handle *both* DLLs and import libs # Has to be a shell function in order to 'eat' the argument # that is supplied when $file_magic_command is called. func_win32_libid () { win32_libid_type="unknown" win32_fileres=`file -L $1 2>/dev/null` case $win32_fileres in *ar\ archive\ import\ library*) # definitely import win32_libid_type="x86 archive import" ;; *ar\ archive*) # could be an import, or static if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | \ $EGREP -e 'file format pe-i386(.*architecture: i386)?' >/dev/null ; then win32_nmres=`eval $NM -f posix -A $1 | \ $SED -n -e '1,100{ / I /{ s,.*,import, p q } }'` case $win32_nmres in import*) win32_libid_type="x86 archive import";; *) win32_libid_type="x86 archive static";; esac fi ;; *DLL*) win32_libid_type="x86 DLL" ;; *executable*) # but shell scripts are "executable" too... case $win32_fileres in *MS\ Windows\ PE\ Intel*) win32_libid_type="x86 DLL" ;; esac ;; esac $echo $win32_libid_type } # func_infer_tag arg # Infer tagged configuration to use if any are available and # if one wasn't chosen via the "--tag" command line option. # Only attempt this if the compiler in the base compile # command doesn't match the default compiler. # arg is usually of the form 'gcc ...' func_infer_tag () { if test -n "$available_tags" && test -z "$tagname"; then CC_quoted= for arg in $CC; do case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac CC_quoted="$CC_quoted $arg" done case $@ in # Blanks in the command may have been stripped by the calling shell, # but not from the CC environment variable when configure was run. " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*) ;; # Blanks at the start of $base_compile will cause this to fail # if we don't check for them as well. *) for z in $available_tags; do if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then # Evaluate the configuration. eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`" CC_quoted= for arg in $CC; do # Double-quote args containing other shell metacharacters. case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac CC_quoted="$CC_quoted $arg" done case "$@ " in " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*) # The compiler in the base compile command matches # the one in the tagged configuration. # Assume this is the tagged configuration we want. tagname=$z break ;; esac fi done # If $tagname still isn't set, then no tagged configuration # was found and let the user know that the "--tag" command # line option must be used. if test -z "$tagname"; then $echo "$modename: unable to infer tagged configuration" $echo "$modename: specify a tag with \`--tag'" 1>&2 exit $EXIT_FAILURE # else # $echo "$modename: using $tagname tagged configuration" fi ;; esac fi } # func_extract_an_archive dir oldlib func_extract_an_archive () { f_ex_an_ar_dir="$1"; shift f_ex_an_ar_oldlib="$1" $show "(cd $f_ex_an_ar_dir && $AR x $f_ex_an_ar_oldlib)" $run eval "(cd \$f_ex_an_ar_dir && $AR x \$f_ex_an_ar_oldlib)" || exit $? if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then : else $echo "$modename: ERROR: object name conflicts: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" 1>&2 exit $EXIT_FAILURE fi } # func_extract_archives gentop oldlib ... func_extract_archives () { my_gentop="$1"; shift my_oldlibs=${1+"$@"} my_oldobjs="" my_xlib="" my_xabs="" my_xdir="" my_status="" $show "${rm}r $my_gentop" $run ${rm}r "$my_gentop" $show "$mkdir $my_gentop" $run $mkdir "$my_gentop" my_status=$? if test "$my_status" -ne 0 && test ! -d "$my_gentop"; then exit $my_status fi for my_xlib in $my_oldlibs; do # Extract the objects. case $my_xlib in [\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;; *) my_xabs=`pwd`"/$my_xlib" ;; esac my_xlib=`$echo "X$my_xlib" | $Xsed -e 's%^.*/%%'` my_xlib_u=$my_xlib while :; do case " $extracted_archives " in *" $my_xlib_u "*) extracted_serial=`expr $extracted_serial + 1` my_xlib_u=lt$extracted_serial-$my_xlib ;; *) break ;; esac done extracted_archives="$extracted_archives $my_xlib_u" my_xdir="$my_gentop/$my_xlib_u" $show "${rm}r $my_xdir" $run ${rm}r "$my_xdir" $show "$mkdir $my_xdir" $run $mkdir "$my_xdir" exit_status=$? if test "$exit_status" -ne 0 && test ! -d "$my_xdir"; then exit $exit_status fi case $host in *-darwin*) $show "Extracting $my_xabs" # Do not bother doing anything if just a dry run if test -z "$run"; then darwin_orig_dir=`pwd` cd $my_xdir || exit $? darwin_archive=$my_xabs darwin_curdir=`pwd` darwin_base_archive=`$echo "X$darwin_archive" | $Xsed -e 's%^.*/%%'` darwin_arches=`lipo -info "$darwin_archive" 2>/dev/null | $EGREP Architectures 2>/dev/null` if test -n "$darwin_arches"; then darwin_arches=`echo "$darwin_arches" | $SED -e 's/.*are://'` darwin_arch= $show "$darwin_base_archive has multiple architectures $darwin_arches" for darwin_arch in $darwin_arches ; do mkdir -p "unfat-$$/${darwin_base_archive}-${darwin_arch}" lipo -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}" cd "unfat-$$/${darwin_base_archive}-${darwin_arch}" func_extract_an_archive "`pwd`" "${darwin_base_archive}" cd "$darwin_curdir" $rm "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" done # $darwin_arches ## Okay now we have a bunch of thin objects, gotta fatten them up :) darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print| xargs basename | sort -u | $NL2SP` darwin_file= darwin_files= for darwin_file in $darwin_filelist; do darwin_files=`find unfat-$$ -name $darwin_file -print | $NL2SP` lipo -create -output "$darwin_file" $darwin_files done # $darwin_filelist ${rm}r unfat-$$ cd "$darwin_orig_dir" else cd "$darwin_orig_dir" func_extract_an_archive "$my_xdir" "$my_xabs" fi # $darwin_arches fi # $run ;; *) func_extract_an_archive "$my_xdir" "$my_xabs" ;; esac my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | $NL2SP` done func_extract_archives_result="$my_oldobjs" } # End of Shell function definitions ##################################### # Darwin sucks eval std_shrext=\"$shrext_cmds\" disable_libs=no # Parse our command line options once, thoroughly. while test "$#" -gt 0 do arg="$1" shift case $arg in -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;; *) optarg= ;; esac # If the previous option needs an argument, assign it. if test -n "$prev"; then case $prev in execute_dlfiles) execute_dlfiles="$execute_dlfiles $arg" ;; tag) tagname="$arg" preserve_args="${preserve_args}=$arg" # Check whether tagname contains only valid characters case $tagname in *[!-_A-Za-z0-9,/]*) $echo "$progname: invalid tag name: $tagname" 1>&2 exit $EXIT_FAILURE ;; esac case $tagname in CC) # Don't test for the "default" C tag, as we know, it's there, but # not specially marked. ;; *) if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "$progpath" > /dev/null; then taglist="$taglist $tagname" # Evaluate the configuration. eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$tagname'$/,/^# ### END LIBTOOL TAG CONFIG: '$tagname'$/p' < $progpath`" else $echo "$progname: ignoring unknown tag $tagname" 1>&2 fi ;; esac ;; *) eval "$prev=\$arg" ;; esac prev= prevopt= continue fi # Have we seen a non-optional argument yet? case $arg in --help) show_help=yes ;; --version) echo "\ $PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP Copyright (C) 2007 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." exit $? ;; --config) ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $progpath # Now print the configurations for the tags. for tagname in $taglist; do ${SED} -n -e "/^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$/,/^# ### END LIBTOOL TAG CONFIG: $tagname$/p" < "$progpath" done exit $? ;; --debug) $echo "$progname: enabling shell trace mode" set -x preserve_args="$preserve_args $arg" ;; --dry-run | -n) run=: ;; --features) $echo "host: $host" if test "$build_libtool_libs" = yes; then $echo "enable shared libraries" else $echo "disable shared libraries" fi if test "$build_old_libs" = yes; then $echo "enable static libraries" else $echo "disable static libraries" fi exit $? ;; --finish) mode="finish" ;; --mode) prevopt="--mode" prev=mode ;; --mode=*) mode="$optarg" ;; --preserve-dup-deps) duplicate_deps="yes" ;; --quiet | --silent) show=: preserve_args="$preserve_args $arg" ;; --tag) prevopt="--tag" prev=tag preserve_args="$preserve_args --tag" ;; --tag=*) set tag "$optarg" ${1+"$@"} shift prev=tag preserve_args="$preserve_args --tag" ;; -dlopen) prevopt="-dlopen" prev=execute_dlfiles ;; -*) $echo "$modename: unrecognized option \`$arg'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE ;; *) nonopt="$arg" break ;; esac done if test -n "$prevopt"; then $echo "$modename: option \`$prevopt' requires an argument" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi case $disable_libs in no) ;; shared) build_libtool_libs=no build_old_libs=yes ;; static) build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac` ;; esac # If this variable is set in any of the actions, the command in it # will be execed at the end. This prevents here-documents from being # left over by shells. exec_cmd= if test -z "$show_help"; then # Infer the operation mode. if test -z "$mode"; then $echo "*** Warning: inferring the mode of operation is deprecated." 1>&2 $echo "*** Future versions of Libtool will require --mode=MODE be specified." 1>&2 case $nonopt in *cc | cc* | *++ | gcc* | *-gcc* | g++* | xlc*) mode=link for arg do case $arg in -c) mode=compile break ;; esac done ;; *db | *dbx | *strace | *truss) mode=execute ;; *install*|cp|mv) mode=install ;; *rm) mode=uninstall ;; *) # If we have no mode, but dlfiles were specified, then do execute mode. test -n "$execute_dlfiles" && mode=execute # Just use the default operation mode. if test -z "$mode"; then if test -n "$nonopt"; then $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 else $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 fi fi ;; esac fi # Only execute mode is allowed to have -dlopen flags. if test -n "$execute_dlfiles" && test "$mode" != execute; then $echo "$modename: unrecognized option \`-dlopen'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi # Change the help message to a mode-specific one. generic_help="$help" help="Try \`$modename --help --mode=$mode' for more information." # These modes are in order of execution frequency so that they run quickly. case $mode in # libtool compile mode compile) modename="$modename: compile" # Get the compilation command and the source file. base_compile= srcfile="$nonopt" # always keep a non-empty value in "srcfile" suppress_opt=yes suppress_output= arg_mode=normal libobj= later= for arg do case $arg_mode in arg ) # do not "continue". Instead, add this to base_compile lastarg="$arg" arg_mode=normal ;; target ) libobj="$arg" arg_mode=normal continue ;; normal ) # Accept any command-line options. case $arg in -o) if test -n "$libobj" ; then $echo "$modename: you cannot specify \`-o' more than once" 1>&2 exit $EXIT_FAILURE fi arg_mode=target continue ;; -static | -prefer-pic | -prefer-non-pic) later="$later $arg" continue ;; -no-suppress) suppress_opt=no continue ;; -Xcompiler) arg_mode=arg # the next one goes into the "base_compile" arg list continue # The current "srcfile" will either be retained or ;; # replaced later. I would guess that would be a bug. -Wc,*) args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"` lastarg= save_ifs="$IFS"; IFS=',' for arg in $args; do IFS="$save_ifs" # Double-quote args containing other shell metacharacters. # Many Bourne shells cannot handle close brackets correctly # in scan sets, so we specify it separately. case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac lastarg="$lastarg $arg" done IFS="$save_ifs" lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"` # Add the arguments to base_compile. base_compile="$base_compile $lastarg" continue ;; * ) # Accept the current argument as the source file. # The previous "srcfile" becomes the current argument. # lastarg="$srcfile" srcfile="$arg" ;; esac # case $arg ;; esac # case $arg_mode # Aesthetically quote the previous argument. lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"` case $lastarg in # Double-quote args containing other shell metacharacters. # Many Bourne shells cannot handle close brackets correctly # in scan sets, and some SunOS ksh mistreat backslash-escaping # in scan sets (worked around with variable expansion), # and furthermore cannot handle '|' '&' '(' ')' in scan sets # at all, so we specify them separately. *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") lastarg="\"$lastarg\"" ;; esac base_compile="$base_compile $lastarg" done # for arg case $arg_mode in arg) $echo "$modename: you must specify an argument for -Xcompile" exit $EXIT_FAILURE ;; target) $echo "$modename: you must specify a target with \`-o'" 1>&2 exit $EXIT_FAILURE ;; *) # Get the name of the library object. [ -z "$libobj" ] && libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` ;; esac # Recognize several different file suffixes. # If the user specifies -o file.o, it is replaced with file.lo xform='[cCFSifmso]' case $libobj in *.ada) xform=ada ;; *.adb) xform=adb ;; *.ads) xform=ads ;; *.asm) xform=asm ;; *.c++) xform=c++ ;; *.cc) xform=cc ;; *.ii) xform=ii ;; *.class) xform=class ;; *.cpp) xform=cpp ;; *.cxx) xform=cxx ;; *.[fF][09]?) xform=[fF][09]. ;; *.for) xform=for ;; *.java) xform=java ;; *.obj) xform=obj ;; esac libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"` case $libobj in *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;; *) $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2 exit $EXIT_FAILURE ;; esac func_infer_tag $base_compile for arg in $later; do case $arg in -static) build_old_libs=yes continue ;; -prefer-pic) pic_mode=yes continue ;; -prefer-non-pic) pic_mode=no continue ;; esac done qlibobj=`$echo "X$libobj" | $Xsed -e "$sed_quote_subst"` case $qlibobj in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") qlibobj="\"$qlibobj\"" ;; esac test "X$libobj" != "X$qlibobj" \ && $echo "X$libobj" | grep '[]~#^*{};<>?"'"'"' &()|`$[]' \ && $echo "$modename: libobj name \`$libobj' may not contain shell special characters." objname=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` if test "X$xdir" = "X$obj"; then xdir= else xdir=$xdir/ fi lobj=${xdir}$objdir/$objname if test -z "$base_compile"; then $echo "$modename: you must specify a compilation command" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi # Delete any leftover library objects. if test "$build_old_libs" = yes; then removelist="$obj $lobj $libobj ${libobj}T" else removelist="$lobj $libobj ${libobj}T" fi $run $rm $removelist trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15 # On Cygwin there's no "real" PIC flag so we must build both object types case $host_os in cygwin* | mingw* | pw32* | os2*) pic_mode=default ;; esac if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then # non-PIC code in shared libraries is not supported pic_mode=default fi # Calculate the filename of the output object if compiler does # not support -o with -c if test "$compiler_c_o" = no; then output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext} lockfile="$output_obj.lock" removelist="$removelist $output_obj $lockfile" trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15 else output_obj= need_locks=no lockfile= fi # Lock this critical section if it is needed # We use this script file to make the link, it avoids creating a new file if test "$need_locks" = yes; then until $run ln "$progpath" "$lockfile" 2>/dev/null; do $show "Waiting for $lockfile to be removed" sleep 2 done elif test "$need_locks" = warn; then if test -f "$lockfile"; then $echo "\ *** ERROR, $lockfile exists and contains: `cat $lockfile 2>/dev/null` This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $run $rm $removelist exit $EXIT_FAILURE fi $echo "$srcfile" > "$lockfile" fi if test -n "$fix_srcfile_path"; then eval srcfile=\"$fix_srcfile_path\" fi qsrcfile=`$echo "X$srcfile" | $Xsed -e "$sed_quote_subst"` case $qsrcfile in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") qsrcfile="\"$qsrcfile\"" ;; esac $run $rm "$libobj" "${libobj}T" # Create a libtool object file (analogous to a ".la" file), # but don't create it if we're doing a dry run. test -z "$run" && cat > ${libobj}T </dev/null`" != "X$srcfile"; then $echo "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $run $rm $removelist exit $EXIT_FAILURE fi # Just move the object if needed, then go on to compile the next one if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then $show "$mv $output_obj $lobj" if $run $mv $output_obj $lobj; then : else error=$? $run $rm $removelist exit $error fi fi # Append the name of the PIC object to the libtool object file. test -z "$run" && cat >> ${libobj}T <> ${libobj}T </dev/null`" != "X$srcfile"; then $echo "\ *** ERROR, $lockfile contains: `cat $lockfile 2>/dev/null` but it should contain: $srcfile This indicates that another process is trying to use the same temporary object file, and libtool could not work around it because your compiler does not support \`-c' and \`-o' together. If you repeat this compilation, it may succeed, by chance, but you had better avoid parallel builds (make -j) in this platform, or get a better compiler." $run $rm $removelist exit $EXIT_FAILURE fi # Just move the object if needed if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then $show "$mv $output_obj $obj" if $run $mv $output_obj $obj; then : else error=$? $run $rm $removelist exit $error fi fi # Append the name of the non-PIC object the libtool object file. # Only append if the libtool object file exists. test -z "$run" && cat >> ${libobj}T <> ${libobj}T <&2 fi if test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; -static) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=built ;; -static-libtool-libs) if test -z "$pic_flag" && test -n "$link_static_flag"; then dlopen_self=$dlopen_self_static fi prefer_static_libs=yes ;; esac build_libtool_libs=no build_old_libs=yes break ;; esac done # See if our shared archives depend on static archives. test -n "$old_archive_from_new_cmds" && build_old_libs=yes # Go through the arguments, transforming them on the way. while test "$#" -gt 0; do arg="$1" shift case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test ;; *) qarg=$arg ;; esac libtool_args="$libtool_args $qarg" # If the previous option needs an argument, assign it. if test -n "$prev"; then case $prev in output) compile_command="$compile_command @OUTPUT@" finalize_command="$finalize_command @OUTPUT@" ;; esac case $prev in dlfiles|dlprefiles) if test "$preload" = no; then # Add the symbol object into the linking commands. compile_command="$compile_command @SYMFILE@" finalize_command="$finalize_command @SYMFILE@" preload=yes fi case $arg in *.la | *.lo) ;; # We handle these cases below. force) if test "$dlself" = no; then dlself=needless export_dynamic=yes fi prev= continue ;; self) if test "$prev" = dlprefiles; then dlself=yes elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then dlself=yes else dlself=needless export_dynamic=yes fi prev= continue ;; *) if test "$prev" = dlfiles; then dlfiles="$dlfiles $arg" else dlprefiles="$dlprefiles $arg" fi prev= continue ;; esac ;; expsyms) export_symbols="$arg" if test ! -f "$arg"; then $echo "$modename: symbol file \`$arg' does not exist" exit $EXIT_FAILURE fi prev= continue ;; expsyms_regex) export_symbols_regex="$arg" prev= continue ;; inst_prefix) inst_prefix_dir="$arg" prev= continue ;; precious_regex) precious_files_regex="$arg" prev= continue ;; release) release="-$arg" prev= continue ;; objectlist) if test -f "$arg"; then save_arg=$arg moreargs= for fil in `cat $save_arg` do # moreargs="$moreargs $fil" arg=$fil # A libtool-controlled object. # Check to see that this really is a libtool object. if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then pic_object= non_pic_object= # Read the .lo file # If there is no directory component, then add one. case $arg in */* | *\\*) . $arg ;; *) . ./$arg ;; esac if test -z "$pic_object" || \ test -z "$non_pic_object" || test "$pic_object" = none && \ test "$non_pic_object" = none; then $echo "$modename: cannot find name of object for \`$arg'" 1>&2 exit $EXIT_FAILURE fi # Extract subdirectory from the argument. xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` if test "X$xdir" = "X$arg"; then xdir= else xdir="$xdir/" fi if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then dlfiles="$dlfiles $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. dlprefiles="$dlprefiles $pic_object" prev= fi # A PIC object. libobjs="$libobjs $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object non_pic_objects="$non_pic_objects $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" non_pic_objects="$non_pic_objects $non_pic_object" fi else # Only an error if not doing a dry-run. if test -z "$run"; then $echo "$modename: \`$arg' is not a valid libtool object" 1>&2 exit $EXIT_FAILURE else # Dry-run case. # Extract subdirectory from the argument. xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` if test "X$xdir" = "X$arg"; then xdir= else xdir="$xdir/" fi pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"` non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"` libobjs="$libobjs $pic_object" non_pic_objects="$non_pic_objects $non_pic_object" fi fi done else $echo "$modename: link input file \`$save_arg' does not exist" exit $EXIT_FAILURE fi arg=$save_arg prev= continue ;; rpath | xrpath) # We need an absolute path. case $arg in [\\/]* | [A-Za-z]:[\\/]*) ;; *) $echo "$modename: only absolute run-paths are allowed" 1>&2 exit $EXIT_FAILURE ;; esac if test "$prev" = rpath; then case "$rpath " in *" $arg "*) ;; *) rpath="$rpath $arg" ;; esac else case "$xrpath " in *" $arg "*) ;; *) xrpath="$xrpath $arg" ;; esac fi prev= continue ;; xcompiler) compiler_flags="$compiler_flags $qarg" prev= compile_command="$compile_command $qarg" finalize_command="$finalize_command $qarg" continue ;; xlinker) linker_flags="$linker_flags $qarg" compiler_flags="$compiler_flags $wl$qarg" prev= compile_command="$compile_command $wl$qarg" finalize_command="$finalize_command $wl$qarg" continue ;; xcclinker) linker_flags="$linker_flags $qarg" compiler_flags="$compiler_flags $qarg" prev= compile_command="$compile_command $qarg" finalize_command="$finalize_command $qarg" continue ;; shrext) shrext_cmds="$arg" prev= continue ;; darwin_framework|darwin_framework_skip) test "$prev" = "darwin_framework" && compiler_flags="$compiler_flags $arg" compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" prev= continue ;; *) eval "$prev=\"\$arg\"" prev= continue ;; esac fi # test -n "$prev" prevarg="$arg" case $arg in -all-static) if test -n "$link_static_flag"; then compile_command="$compile_command $link_static_flag" finalize_command="$finalize_command $link_static_flag" fi continue ;; -allow-undefined) # FIXME: remove this flag sometime in the future. $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2 continue ;; -avoid-version) avoid_version=yes continue ;; -dlopen) prev=dlfiles continue ;; -dlpreopen) prev=dlprefiles continue ;; -export-dynamic) export_dynamic=yes continue ;; -export-symbols | -export-symbols-regex) if test -n "$export_symbols" || test -n "$export_symbols_regex"; then $echo "$modename: more than one -exported-symbols argument is not allowed" exit $EXIT_FAILURE fi if test "X$arg" = "X-export-symbols"; then prev=expsyms else prev=expsyms_regex fi continue ;; -framework|-arch|-isysroot) case " $CC " in *" ${arg} ${1} "* | *" ${arg} ${1} "*) prev=darwin_framework_skip ;; *) compiler_flags="$compiler_flags $arg" prev=darwin_framework ;; esac compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" continue ;; -inst-prefix-dir) prev=inst_prefix continue ;; # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* # so, if we see these flags be careful not to treat them like -L -L[A-Z][A-Z]*:*) case $with_gcc/$host in no/*-*-irix* | /*-*-irix*) compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" ;; esac continue ;; -L*) dir=`$echo "X$arg" | $Xsed -e 's/^-L//'` # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) absdir=`cd "$dir" && pwd` if test -z "$absdir"; then $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2 absdir="$dir" notinst_path="$notinst_path $dir" fi dir="$absdir" ;; esac case "$deplibs " in *" -L$dir "*) ;; *) deplibs="$deplibs -L$dir" lib_search_path="$lib_search_path $dir" ;; esac case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) testbindir=`$echo "X$dir" | $Xsed -e 's*/lib$*/bin*'` case :$dllsearchpath: in *":$dir:"*) ;; *) dllsearchpath="$dllsearchpath:$dir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; *) dllsearchpath="$dllsearchpath:$testbindir";; esac ;; esac continue ;; -l*) if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos*) # These systems don't actually have a C or math library (as such) continue ;; *-*-os2*) # These systems don't actually have a C library (as such) test "X$arg" = "X-lc" && continue ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. test "X$arg" = "X-lc" && continue ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C and math libraries are in the System framework deplibs="$deplibs -framework System" continue ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype test "X$arg" = "X-lc" && continue ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work test "X$arg" = "X-lc" && continue ;; esac elif test "X$arg" = "X-lc_r"; then case $host in *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc_r directly, use -pthread flag. continue ;; esac fi deplibs="$deplibs $arg" continue ;; # Tru64 UNIX uses -model [arg] to determine the layout of C++ # classes, name mangling, and exception handling. -model) compile_command="$compile_command $arg" compiler_flags="$compiler_flags $arg" finalize_command="$finalize_command $arg" prev=xcompiler continue ;; -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads) compiler_flags="$compiler_flags $arg" compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" continue ;; -module) module=yes continue ;; # -64, -mips[0-9] enable 64-bit mode on the SGI compiler # -r[0-9][0-9]* specifies the processor on the SGI compiler # -xarch=*, -xtarget=* enable 64-bit mode on the Sun compiler # +DA*, +DD* enable 64-bit mode on the HP compiler # -q* pass through compiler args for the IBM compiler # -m* pass through architecture-specific compiler args for GCC # -m*, -t[45]*, -txscale* pass through architecture-specific # compiler args for GCC # -p, -pg, --coverage, -fprofile-* pass through profiling flag for GCC # -F/path gives path to uninstalled frameworks, gcc on darwin # @file GCC response files -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \ -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" compiler_flags="$compiler_flags $arg" continue ;; -shrext) prev=shrext continue ;; -no-fast-install) fast_install=no continue ;; -no-install) case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-darwin*) # The PATH hackery in wrapper scripts is required on Windows # and Darwin in order for the loader to find any dlls it needs. $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2 $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2 fast_install=no ;; *) no_install=yes ;; esac continue ;; -no-undefined) allow_undefined=no continue ;; -objectlist) prev=objectlist continue ;; -o) prev=output ;; -precious-files-regex) prev=precious_regex continue ;; -release) prev=release continue ;; -rpath) prev=rpath continue ;; -R) prev=xrpath continue ;; -R*) dir=`$echo "X$arg" | $Xsed -e 's/^-R//'` # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) $echo "$modename: only absolute run-paths are allowed" 1>&2 exit $EXIT_FAILURE ;; esac case "$xrpath " in *" $dir "*) ;; *) xrpath="$xrpath $dir" ;; esac continue ;; -static | -static-libtool-libs) # The effects of -static are defined in a previous loop. # We used to do the same as -all-static on platforms that # didn't have a PIC flag, but the assumption that the effects # would be equivalent was wrong. It would break on at least # Digital Unix and AIX. continue ;; -thread-safe) thread_safe=yes continue ;; -version-info) prev=vinfo continue ;; -version-number) prev=vinfo vinfo_number=yes continue ;; -Wc,*) args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'` arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" case $flag in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") flag="\"$flag\"" ;; esac arg="$arg $wl$flag" compiler_flags="$compiler_flags $flag" done IFS="$save_ifs" arg=`$echo "X$arg" | $Xsed -e "s/^ //"` ;; -Wl,*) args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'` arg= save_ifs="$IFS"; IFS=',' for flag in $args; do IFS="$save_ifs" case $flag in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") flag="\"$flag\"" ;; esac arg="$arg $wl$flag" compiler_flags="$compiler_flags $wl$flag" linker_flags="$linker_flags $flag" done IFS="$save_ifs" arg=`$echo "X$arg" | $Xsed -e "s/^ //"` ;; -Xcompiler) prev=xcompiler continue ;; -Xlinker) prev=xlinker continue ;; -XCClinker) prev=xcclinker continue ;; # Some other compiler flag. -* | +*) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac ;; *.$objext) # A standard object. objs="$objs $arg" ;; *.lo) # A libtool-controlled object. # Check to see that this really is a libtool object. if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then pic_object= non_pic_object= # Read the .lo file # If there is no directory component, then add one. case $arg in */* | *\\*) . $arg ;; *) . ./$arg ;; esac if test -z "$pic_object" || \ test -z "$non_pic_object" || test "$pic_object" = none && \ test "$non_pic_object" = none; then $echo "$modename: cannot find name of object for \`$arg'" 1>&2 exit $EXIT_FAILURE fi # Extract subdirectory from the argument. xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` if test "X$xdir" = "X$arg"; then xdir= else xdir="$xdir/" fi if test "$pic_object" != none; then # Prepend the subdirectory the object is found in. pic_object="$xdir$pic_object" if test "$prev" = dlfiles; then if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then dlfiles="$dlfiles $pic_object" prev= continue else # If libtool objects are unsupported, then we need to preload. prev=dlprefiles fi fi # CHECK ME: I think I busted this. -Ossama if test "$prev" = dlprefiles; then # Preload the old-style object. dlprefiles="$dlprefiles $pic_object" prev= fi # A PIC object. libobjs="$libobjs $pic_object" arg="$pic_object" fi # Non-PIC object. if test "$non_pic_object" != none; then # Prepend the subdirectory the object is found in. non_pic_object="$xdir$non_pic_object" # A standard non-PIC object non_pic_objects="$non_pic_objects $non_pic_object" if test -z "$pic_object" || test "$pic_object" = none ; then arg="$non_pic_object" fi else # If the PIC object exists, use it instead. # $xdir was prepended to $pic_object above. non_pic_object="$pic_object" non_pic_objects="$non_pic_objects $non_pic_object" fi else # Only an error if not doing a dry-run. if test -z "$run"; then $echo "$modename: \`$arg' is not a valid libtool object" 1>&2 exit $EXIT_FAILURE else # Dry-run case. # Extract subdirectory from the argument. xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'` if test "X$xdir" = "X$arg"; then xdir= else xdir="$xdir/" fi pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"` non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"` libobjs="$libobjs $pic_object" non_pic_objects="$non_pic_objects $non_pic_object" fi fi ;; *.$libext) # An archive. deplibs="$deplibs $arg" old_deplibs="$old_deplibs $arg" continue ;; *.la) # A libtool-controlled library. if test "$prev" = dlfiles; then # This library was specified with -dlopen. dlfiles="$dlfiles $arg" prev= elif test "$prev" = dlprefiles; then # The library was specified with -dlpreopen. dlprefiles="$dlprefiles $arg" prev= else deplibs="$deplibs $arg" fi continue ;; # Some other compiler argument. *) # Unknown arguments in both finalize_command and compile_command need # to be aesthetically quoted because they are evaled later. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac ;; esac # arg # Now actually substitute the argument into the commands. if test -n "$arg"; then compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" fi done # argument parsing loop if test -n "$prev"; then $echo "$modename: the \`$prevarg' option requires an argument" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then eval arg=\"$export_dynamic_flag_spec\" compile_command="$compile_command $arg" finalize_command="$finalize_command $arg" fi oldlibs= # calculate the name of the file, without its directory outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'` libobjs_save="$libobjs" if test -n "$shlibpath_var"; then # get the directories listed in $shlibpath_var eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\` else shlib_search_path= fi eval sys_lib_search_path=\"$sys_lib_search_path_spec\" eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` if test "X$output_objdir" = "X$output"; then output_objdir="$objdir" else output_objdir="$output_objdir/$objdir" fi # Create the object directory. if test ! -d "$output_objdir"; then $show "$mkdir $output_objdir" $run $mkdir $output_objdir exit_status=$? if test "$exit_status" -ne 0 && test ! -d "$output_objdir"; then exit $exit_status fi fi # Determine the type of output case $output in "") $echo "$modename: you must specify an output file" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE ;; *.$libext) linkmode=oldlib ;; *.lo | *.$objext) linkmode=obj ;; *.la) linkmode=lib ;; *) linkmode=prog ;; # Anything else should be a program. esac case $host in *cygwin* | *mingw* | *pw32*) # don't eliminate duplications in $postdeps and $predeps duplicate_compiler_generated_deps=yes ;; *) duplicate_compiler_generated_deps=$duplicate_deps ;; esac specialdeplibs= libs= # Find all interdependent deplibs by searching for libraries # that are linked more than once (e.g. -la -lb -la) for deplib in $deplibs; do if test "X$duplicate_deps" = "Xyes" ; then case "$libs " in *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; esac fi libs="$libs $deplib" done if test "$linkmode" = lib; then libs="$predeps $libs $compiler_lib_search_path $postdeps" # Compute libraries that are listed more than once in $predeps # $postdeps and mark them as special (i.e., whose duplicates are # not to be eliminated). pre_post_deps= if test "X$duplicate_compiler_generated_deps" = "Xyes" ; then for pre_post_dep in $predeps $postdeps; do case "$pre_post_deps " in *" $pre_post_dep "*) specialdeplibs="$specialdeplibs $pre_post_deps" ;; esac pre_post_deps="$pre_post_deps $pre_post_dep" done fi pre_post_deps= fi deplibs= newdependency_libs= newlib_search_path= need_relink=no # whether we're linking any uninstalled libtool libraries notinst_deplibs= # not-installed libtool libraries case $linkmode in lib) passes="conv link" for file in $dlfiles $dlprefiles; do case $file in *.la) ;; *) $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2 exit $EXIT_FAILURE ;; esac done ;; prog) compile_deplibs= finalize_deplibs= alldeplibs=no newdlfiles= newdlprefiles= passes="conv scan dlopen dlpreopen link" ;; *) passes="conv" ;; esac for pass in $passes; do if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan"; then libs="$deplibs" deplibs= fi if test "$linkmode" = prog; then case $pass in dlopen) libs="$dlfiles" ;; dlpreopen) libs="$dlprefiles" ;; link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; esac fi if test "$pass" = dlopen; then # Collect dlpreopened libraries save_deplibs="$deplibs" deplibs= fi for deplib in $libs; do lib= found=no case $deplib in -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe|-threads) if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else compiler_flags="$compiler_flags $deplib" fi continue ;; -l*) if test "$linkmode" != lib && test "$linkmode" != prog; then $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2 continue fi name=`$echo "X$deplib" | $Xsed -e 's/^-l//'` for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do for search_ext in .la $std_shrext .so .a; do # Search the libtool library lib="$searchdir/lib${name}${search_ext}" if test -f "$lib"; then if test "$search_ext" = ".la"; then found=yes else found=no fi break 2 fi done done if test "$found" != yes; then # deplib doesn't seem to be a libtool library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue else # deplib is a libtool library # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib, # We need to do some special things here, and not later. if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $deplib "*) if (${SED} -e '2q' $lib | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then library_names= old_library= case $lib in */* | *\\*) . $lib ;; *) . ./$lib ;; esac for l in $old_library $library_names; do ll="$l" done if test "X$ll" = "X$old_library" ; then # only static version available found=no ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'` test "X$ladir" = "X$lib" && ladir="." lib=$ladir/$old_library if test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else deplibs="$deplib $deplibs" test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs" fi continue fi fi ;; *) ;; esac fi fi ;; # -l -L*) case $linkmode in lib) deplibs="$deplib $deplibs" test "$pass" = conv && continue newdependency_libs="$deplib $newdependency_libs" newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` ;; prog) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi if test "$pass" = scan; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` ;; *) $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2 ;; esac # linkmode continue ;; # -L -R*) if test "$pass" = link; then dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'` # Make sure the xrpath contains only unique directories. case "$xrpath " in *" $dir "*) ;; *) xrpath="$xrpath $dir" ;; esac fi deplibs="$deplib $deplibs" continue ;; *.la) lib="$deplib" ;; *.$libext) if test "$pass" = conv; then deplibs="$deplib $deplibs" continue fi case $linkmode in lib) valid_a_lib=no case $deplibs_check_method in match_pattern*) set dummy $deplibs_check_method match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"` if eval $echo \"$deplib\" 2>/dev/null \ | $SED 10q \ | $EGREP "$match_pattern_regex" > /dev/null; then valid_a_lib=yes fi ;; pass_all) valid_a_lib=yes ;; esac if test "$valid_a_lib" != yes; then $echo $echo "*** Warning: Trying to link with static lib archive $deplib." $echo "*** I have the capability to make that library automatically link in when" $echo "*** you link to this library. But I can only do this if you have a" $echo "*** shared version of the library, which you do not appear to have" $echo "*** because the file extensions .$libext of this argument makes me believe" $echo "*** that it is just a static archive that I should not used here." else $echo $echo "*** Warning: Linking the shared library $output against the" $echo "*** static library $deplib is not portable!" deplibs="$deplib $deplibs" fi continue ;; prog) if test "$pass" != link; then deplibs="$deplib $deplibs" else compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" fi continue ;; esac # linkmode ;; # *.$libext *.lo | *.$objext) if test "$pass" = conv; then deplibs="$deplib $deplibs" elif test "$linkmode" = prog; then if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlopen support or we're linking statically, # we need to preload. newdlprefiles="$newdlprefiles $deplib" compile_deplibs="$deplib $compile_deplibs" finalize_deplibs="$deplib $finalize_deplibs" else newdlfiles="$newdlfiles $deplib" fi fi continue ;; %DEPLIBS%) alldeplibs=yes continue ;; esac # case $deplib if test "$found" = yes || test -f "$lib"; then : else $echo "$modename: cannot find the library \`$lib' or unhandled argument \`$deplib'" 1>&2 exit $EXIT_FAILURE fi # Check to see that this really is a libtool archive. if (${SED} -e '2q' $lib | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : else $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 exit $EXIT_FAILURE fi ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'` test "X$ladir" = "X$lib" && ladir="." dlname= dlopen= dlpreopen= libdir= library_names= old_library= # If the library was installed with an old release of libtool, # it will not redefine variables installed, or shouldnotlink installed=yes shouldnotlink=no avoidtemprpath= # Read the .la file case $lib in */* | *\\*) . $lib ;; *) . ./$lib ;; esac if test "$linkmode,$pass" = "lib,link" || test "$linkmode,$pass" = "prog,scan" || { test "$linkmode" != prog && test "$linkmode" != lib; }; then test -n "$dlopen" && dlfiles="$dlfiles $dlopen" test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen" fi if test "$pass" = conv; then # Only check for convenience libraries deplibs="$lib $deplibs" if test -z "$libdir"; then if test -z "$old_library"; then $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 exit $EXIT_FAILURE fi # It is a libtool convenience library, so add in its objects. convenience="$convenience $ladir/$objdir/$old_library" old_convenience="$old_convenience $ladir/$objdir/$old_library" tmp_libs= for deplib in $dependency_libs; do deplibs="$deplib $deplibs" if test "X$duplicate_deps" = "Xyes" ; then case "$tmp_libs " in *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; esac fi tmp_libs="$tmp_libs $deplib" done elif test "$linkmode" != prog && test "$linkmode" != lib; then $echo "$modename: \`$lib' is not a convenience library" 1>&2 exit $EXIT_FAILURE fi continue fi # $pass = conv # Get the name of the library we link against. linklib= for l in $old_library $library_names; do linklib="$l" done if test -z "$linklib"; then $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 exit $EXIT_FAILURE fi # This library was specified with -dlopen. if test "$pass" = dlopen; then if test -z "$libdir"; then $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2 exit $EXIT_FAILURE fi if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then # If there is no dlname, no dlopen support or we're linking # statically, we need to preload. We also need to preload any # dependent libraries so libltdl's deplib preloader doesn't # bomb out in the load deplibs phase. dlprefiles="$dlprefiles $lib $dependency_libs" else newdlfiles="$newdlfiles $lib" fi continue fi # $pass = dlopen # We need an absolute path. case $ladir in [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; *) abs_ladir=`cd "$ladir" && pwd` if test -z "$abs_ladir"; then $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2 $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 abs_ladir="$ladir" fi ;; esac laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` # Find the relevant object directory and library name. if test "X$installed" = Xyes; then if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then $echo "$modename: warning: library \`$lib' was moved." 1>&2 dir="$ladir" absdir="$abs_ladir" libdir="$abs_ladir" else dir="$libdir" absdir="$libdir" fi test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes else if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then dir="$ladir" absdir="$abs_ladir" # Remove this search path later notinst_path="$notinst_path $abs_ladir" else dir="$ladir/$objdir" absdir="$abs_ladir/$objdir" # Remove this search path later notinst_path="$notinst_path $abs_ladir" fi fi # $installed = yes name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` # This library was specified with -dlpreopen. if test "$pass" = dlpreopen; then if test -z "$libdir"; then $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2 exit $EXIT_FAILURE fi # Prefer using a static library (so that no silly _DYNAMIC symbols # are required to link). if test -n "$old_library"; then newdlprefiles="$newdlprefiles $dir/$old_library" # Otherwise, use the dlname, so that lt_dlopen finds it. elif test -n "$dlname"; then newdlprefiles="$newdlprefiles $dir/$dlname" else newdlprefiles="$newdlprefiles $dir/$linklib" fi fi # $pass = dlpreopen if test -z "$libdir"; then # Link the convenience library if test "$linkmode" = lib; then deplibs="$dir/$old_library $deplibs" elif test "$linkmode,$pass" = "prog,link"; then compile_deplibs="$dir/$old_library $compile_deplibs" finalize_deplibs="$dir/$old_library $finalize_deplibs" else deplibs="$lib $deplibs" # used for prog,scan pass fi continue fi if test "$linkmode" = prog && test "$pass" != link; then newlib_search_path="$newlib_search_path $ladir" deplibs="$lib $deplibs" linkalldeplibs=no if test "$link_all_deplibs" != no || test -z "$library_names" || test "$build_libtool_libs" = no; then linkalldeplibs=yes fi tmp_libs= for deplib in $dependency_libs; do case $deplib in -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test esac # Need to link against all dependency_libs? if test "$linkalldeplibs" = yes; then deplibs="$deplib $deplibs" else # Need to hardcode shared library paths # or/and link against static libraries newdependency_libs="$deplib $newdependency_libs" fi if test "X$duplicate_deps" = "Xyes" ; then case "$tmp_libs " in *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; esac fi tmp_libs="$tmp_libs $deplib" done # for deplib continue fi # $linkmode = prog... if test "$linkmode,$pass" = "prog,link"; then if test -n "$library_names" && { { test "$prefer_static_libs" = no || test "$prefer_static_libs,$installed" = "built,yes"; } || test -z "$old_library"; }; then # We need to hardcode the library path if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then # Make sure the rpath contains only unique directories. case "$temp_rpath " in *" $dir "*) ;; *" $absdir "*) ;; *) temp_rpath="$temp_rpath $absdir" ;; esac fi # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) compile_rpath="$compile_rpath $absdir" esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) finalize_rpath="$finalize_rpath $libdir" esac ;; esac fi # $linkmode,$pass = prog,link... if test "$alldeplibs" = yes && { test "$deplibs_check_method" = pass_all || { test "$build_libtool_libs" = yes && test -n "$library_names"; }; }; then # We only need to search for static libraries continue fi fi link_static=no # Whether the deplib will be linked statically use_static_libs=$prefer_static_libs if test "$use_static_libs" = built && test "$installed" = yes ; then use_static_libs=no fi if test -n "$library_names" && { test "$use_static_libs" = no || test -z "$old_library"; }; then if test "$installed" = no; then notinst_deplibs="$notinst_deplibs $lib" need_relink=yes fi # This is a shared library # Warn about portability, can't link against -module's on # some systems (darwin) if test "$shouldnotlink" = yes && test "$pass" = link ; then $echo if test "$linkmode" = prog; then $echo "*** Warning: Linking the executable $output against the loadable module" else $echo "*** Warning: Linking the shared library $output against the loadable module" fi $echo "*** $linklib is not portable!" fi if test "$linkmode" = lib && test "$hardcode_into_libs" = yes; then # Hardcode the library path. # Skip directories that are in the system default run-time # search path. case " $sys_lib_dlsearch_path " in *" $absdir "*) ;; *) case "$compile_rpath " in *" $absdir "*) ;; *) compile_rpath="$compile_rpath $absdir" esac ;; esac case " $sys_lib_dlsearch_path " in *" $libdir "*) ;; *) case "$finalize_rpath " in *" $libdir "*) ;; *) finalize_rpath="$finalize_rpath $libdir" esac ;; esac fi if test -n "$old_archive_from_expsyms_cmds"; then # figure out the soname set dummy $library_names realname="$2" shift; shift libname=`eval \\$echo \"$libname_spec\"` # use dlname if we got it. it's perfectly good, no? if test -n "$dlname"; then soname="$dlname" elif test -n "$soname_spec"; then # bleh windows case $host in *cygwin* | mingw*) major=`expr $current - $age` versuffix="-$major" ;; esac eval soname=\"$soname_spec\" else soname="$realname" fi # Make a new name for the extract_expsyms_cmds to use soroot="$soname" soname=`$echo $soroot | ${SED} -e 's/^.*\///'` newlib="libimp-`$echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a" # If the library has no export list, then create one now if test -f "$output_objdir/$soname-def"; then : else $show "extracting exported symbol list from \`$soname'" save_ifs="$IFS"; IFS='~' cmds=$extract_expsyms_cmds for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" fi # Create $newlib if test -f "$output_objdir/$newlib"; then :; else $show "generating import library for \`$soname'" save_ifs="$IFS"; IFS='~' cmds=$old_archive_from_expsyms_cmds for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" fi # make sure the library variables are pointing to the new library dir=$output_objdir linklib=$newlib fi # test -n "$old_archive_from_expsyms_cmds" if test "$linkmode" = prog || test "$mode" != relink; then add_shlibpath= add_dir= add= lib_linked=yes case $hardcode_action in immediate | unsupported) if test "$hardcode_direct" = no; then add="$dir/$linklib" case $host in *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;; *-*-sysv4*uw2*) add_dir="-L$dir" ;; *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \ *-*-unixware7*) add_dir="-L$dir" ;; *-*-darwin* ) # if the lib is a module then we can not link against # it, someone is ignoring the new warnings I added if /usr/bin/file -L $add 2> /dev/null | $EGREP ": [^:]* bundle" >/dev/null ; then $echo "** Warning, lib $linklib is a module, not a shared library" if test -z "$old_library" ; then $echo $echo "** And there doesn't seem to be a static archive available" $echo "** The link will probably fail, sorry" else add="$dir/$old_library" fi fi esac elif test "$hardcode_minus_L" = no; then case $host in *-*-sunos*) add_shlibpath="$dir" ;; esac add_dir="-L$dir" add="-l$name" elif test "$hardcode_shlibpath_var" = no; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; relink) if test "$hardcode_direct" = yes; then add="$dir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$dir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) add_dir="$add_dir -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then add_shlibpath="$dir" add="-l$name" else lib_linked=no fi ;; *) lib_linked=no ;; esac if test "$lib_linked" != yes; then $echo "$modename: configuration error: unsupported hardcode properties" exit $EXIT_FAILURE fi if test -n "$add_shlibpath"; then case :$compile_shlibpath: in *":$add_shlibpath:"*) ;; *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;; esac fi if test "$linkmode" = prog; then test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" test -n "$add" && compile_deplibs="$add $compile_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" if test "$hardcode_direct" != yes && \ test "$hardcode_minus_L" != yes && \ test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; esac fi fi fi if test "$linkmode" = prog || test "$mode" = relink; then add_shlibpath= add_dir= add= # Finalize command for both is simple: just hardcode it. if test "$hardcode_direct" = yes; then add="$libdir/$linklib" elif test "$hardcode_minus_L" = yes; then add_dir="-L$libdir" add="-l$name" elif test "$hardcode_shlibpath_var" = yes; then case :$finalize_shlibpath: in *":$libdir:"*) ;; *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; esac add="-l$name" elif test "$hardcode_automatic" = yes; then if test -n "$inst_prefix_dir" && test -f "$inst_prefix_dir$libdir/$linklib" ; then add="$inst_prefix_dir$libdir/$linklib" else add="$libdir/$linklib" fi else # We cannot seem to hardcode it, guess we'll fake it. add_dir="-L$libdir" # Try looking first in the location we're being installed to. if test -n "$inst_prefix_dir"; then case $libdir in [\\/]*) add_dir="$add_dir -L$inst_prefix_dir$libdir" ;; esac fi add="-l$name" fi if test "$linkmode" = prog; then test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" test -n "$add" && finalize_deplibs="$add $finalize_deplibs" else test -n "$add_dir" && deplibs="$add_dir $deplibs" test -n "$add" && deplibs="$add $deplibs" fi fi elif test "$linkmode" = prog; then # Here we assume that one of hardcode_direct or hardcode_minus_L # is not unsupported. This is valid on all known static and # shared platforms. if test "$hardcode_direct" != unsupported; then test -n "$old_library" && linklib="$old_library" compile_deplibs="$dir/$linklib $compile_deplibs" finalize_deplibs="$dir/$linklib $finalize_deplibs" else compile_deplibs="-l$name -L$dir $compile_deplibs" finalize_deplibs="-l$name -L$dir $finalize_deplibs" fi elif test "$build_libtool_libs" = yes; then # Not a shared library if test "$deplibs_check_method" != pass_all; then # We're trying link a shared library against a static one # but the system doesn't support it. # Just print a warning and add the library to dependency_libs so # that the program can be linked against the static library. $echo $echo "*** Warning: This system can not link to static lib archive $lib." $echo "*** I have the capability to make that library automatically link in when" $echo "*** you link to this library. But I can only do this if you have a" $echo "*** shared version of the library, which you do not appear to have." if test "$module" = yes; then $echo "*** But as you try to build a module library, libtool will still create " $echo "*** a static module, that should work as long as the dlopening application" $echo "*** is linked with the -dlopen flag to resolve symbols at runtime." if test -z "$global_symbol_pipe"; then $echo $echo "*** However, this would only work if libtool was able to extract symbol" $echo "*** lists from a program, using \`nm' or equivalent, but libtool could" $echo "*** not find such a program. So, this module is probably useless." $echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi else deplibs="$dir/$old_library $deplibs" link_static=yes fi fi # link shared/static library? if test "$linkmode" = lib; then if test -n "$dependency_libs" && { test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes || test "$link_static" = yes; }; then # Extract -R from dependency_libs temp_deplibs= for libdir in $dependency_libs; do case $libdir in -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'` case " $xrpath " in *" $temp_xrpath "*) ;; *) xrpath="$xrpath $temp_xrpath";; esac;; *) temp_deplibs="$temp_deplibs $libdir";; esac done dependency_libs="$temp_deplibs" fi newlib_search_path="$newlib_search_path $absdir" # Link against this library test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" # ... and its dependency_libs tmp_libs= for deplib in $dependency_libs; do newdependency_libs="$deplib $newdependency_libs" if test "X$duplicate_deps" = "Xyes" ; then case "$tmp_libs " in *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; esac fi tmp_libs="$tmp_libs $deplib" done if test "$link_all_deplibs" != no; then # Add the search paths of all dependency libraries for deplib in $dependency_libs; do case $deplib in -L*) path="$deplib" ;; *.la) dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'` test "X$dir" = "X$deplib" && dir="." # We need an absolute path. case $dir in [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; *) absdir=`cd "$dir" && pwd` if test -z "$absdir"; then $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 absdir="$dir" fi ;; esac if grep "^installed=no" $deplib > /dev/null; then path="$absdir/$objdir" else eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` if test -z "$libdir"; then $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 exit $EXIT_FAILURE fi if test "$absdir" != "$libdir"; then $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2 fi path="$absdir" fi depdepl= case $host in *-*-darwin*) # we do not want to link against static libs, # but need to link against shared eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib` if test -n "$deplibrary_names" ; then for tmp in $deplibrary_names ; do depdepl=$tmp done if test -f "$path/$depdepl" ; then depdepl="$path/$depdepl" fi # do not add paths which are already there case " $newlib_search_path " in *" $path "*) ;; *) newlib_search_path="$newlib_search_path $path";; esac fi path="" ;; *) path="-L$path" ;; esac ;; -l*) case $host in *-*-darwin*) # Again, we only want to link against shared libraries eval tmp_libs=`$echo "X$deplib" | $Xsed -e "s,^\-l,,"` for tmp in $newlib_search_path ; do if test -f "$tmp/lib$tmp_libs.dylib" ; then eval depdepl="$tmp/lib$tmp_libs.dylib" break fi done path="" ;; *) continue ;; esac ;; *) continue ;; esac case " $deplibs " in *" $path "*) ;; *) deplibs="$path $deplibs" ;; esac case " $deplibs " in *" $depdepl "*) ;; *) deplibs="$depdepl $deplibs" ;; esac done fi # link_all_deplibs != no fi # linkmode = lib done # for deplib in $libs dependency_libs="$newdependency_libs" if test "$pass" = dlpreopen; then # Link the dlpreopened libraries before other libraries for deplib in $save_deplibs; do deplibs="$deplib $deplibs" done fi if test "$pass" != dlopen; then if test "$pass" != conv; then # Make sure lib_search_path contains only unique directories. lib_search_path= for dir in $newlib_search_path; do case "$lib_search_path " in *" $dir "*) ;; *) lib_search_path="$lib_search_path $dir" ;; esac done newlib_search_path= fi if test "$linkmode,$pass" != "prog,link"; then vars="deplibs" else vars="compile_deplibs finalize_deplibs" fi for var in $vars dependency_libs; do # Add libraries to $var in reverse order eval tmp_libs=\"\$$var\" new_libs= for deplib in $tmp_libs; do # FIXME: Pedantically, this is the right thing to do, so # that some nasty dependency loop isn't accidentally # broken: #new_libs="$deplib $new_libs" # Pragmatically, this seems to cause very few problems in # practice: case $deplib in -L*) new_libs="$deplib $new_libs" ;; -R*) ;; *) # And here is the reason: when a library appears more # than once as an explicit dependence of a library, or # is implicitly linked in more than once by the # compiler, it is considered special, and multiple # occurrences thereof are not removed. Compare this # with having the same library being listed as a # dependency of multiple other libraries: in this case, # we know (pedantically, we assume) the library does not # need to be listed more than once, so we keep only the # last copy. This is not always right, but it is rare # enough that we require users that really mean to play # such unportable linking tricks to link the library # using -Wl,-lname, so that libtool does not consider it # for duplicate removal. case " $specialdeplibs " in *" $deplib "*) new_libs="$deplib $new_libs" ;; *) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$deplib $new_libs" ;; esac ;; esac ;; esac done tmp_libs= for deplib in $new_libs; do case $deplib in -L*) case " $tmp_libs " in *" $deplib "*) ;; *) tmp_libs="$tmp_libs $deplib" ;; esac ;; *) tmp_libs="$tmp_libs $deplib" ;; esac done eval $var=\"$tmp_libs\" done # for var fi # Last step: remove runtime libs from dependency_libs # (they stay in deplibs) tmp_libs= for i in $dependency_libs ; do case " $predeps $postdeps $compiler_lib_search_path " in *" $i "*) i="" ;; esac if test -n "$i" ; then tmp_libs="$tmp_libs $i" fi done dependency_libs=$tmp_libs done # for pass if test "$linkmode" = prog; then dlfiles="$newdlfiles" dlprefiles="$newdlprefiles" fi case $linkmode in oldlib) if test -n "$deplibs"; then $echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2 fi if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2 fi if test -n "$rpath"; then $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2 fi if test -n "$xrpath"; then $echo "$modename: warning: \`-R' is ignored for archives" 1>&2 fi if test -n "$vinfo"; then $echo "$modename: warning: \`-version-info/-version-number' is ignored for archives" 1>&2 fi if test -n "$release"; then $echo "$modename: warning: \`-release' is ignored for archives" 1>&2 fi if test -n "$export_symbols" || test -n "$export_symbols_regex"; then $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2 fi # Now set the variables for building old libraries. build_libtool_libs=no oldlibs="$output" objs="$objs$old_deplibs" ;; lib) # Make sure we only generate libraries of the form `libNAME.la'. case $outputname in lib*) name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" ;; *) if test "$module" = no; then $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi if test "$need_lib_prefix" != no; then # Add the "lib" prefix for modules if required name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` eval shared_ext=\"$shrext_cmds\" eval libname=\"$libname_spec\" else libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` fi ;; esac if test -n "$objs"; then if test "$deplibs_check_method" != pass_all; then $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1 exit $EXIT_FAILURE else $echo $echo "*** Warning: Linking the shared library $output against the non-libtool" $echo "*** objects $objs is not portable!" libobjs="$libobjs $objs" fi fi if test "$dlself" != no; then $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2 fi set dummy $rpath if test "$#" -gt 2; then $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 fi install_libdir="$2" oldlibs= if test -z "$rpath"; then if test "$build_libtool_libs" = yes; then # Building a libtool convenience library. # Some compilers have problems with a `.al' extension so # convenience libraries should have the same extension an # archive normally would. oldlibs="$output_objdir/$libname.$libext $oldlibs" build_libtool_libs=convenience build_old_libs=yes fi if test -n "$vinfo"; then $echo "$modename: warning: \`-version-info/-version-number' is ignored for convenience libraries" 1>&2 fi if test -n "$release"; then $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2 fi else # Parse the version information argument. save_ifs="$IFS"; IFS=':' set dummy $vinfo 0 0 0 IFS="$save_ifs" if test -n "$8"; then $echo "$modename: too many parameters to \`-version-info'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi # convert absolute version numbers to libtool ages # this retains compatibility with .la files and attempts # to make the code below a bit more comprehensible case $vinfo_number in yes) number_major="$2" number_minor="$3" number_revision="$4" # # There are really only two kinds -- those that # use the current revision as the major version # and those that subtract age and use age as # a minor version. But, then there is irix # which has an extra 1 added just for fun # case $version_type in darwin|linux|osf|windows|none) current=`expr $number_major + $number_minor` age="$number_minor" revision="$number_revision" ;; freebsd-aout|freebsd-elf|sunos) current="$number_major" revision="$number_minor" age="0" ;; irix|nonstopux) current=`expr $number_major + $number_minor` age="$number_minor" revision="$number_minor" lt_irix_increment=no ;; esac ;; no) current="$2" revision="$3" age="$4" ;; esac # Check that each of the things are valid numbers. case $current in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) $echo "$modename: CURRENT \`$current' must be a nonnegative integer" 1>&2 $echo "$modename: \`$vinfo' is not valid version information" 1>&2 exit $EXIT_FAILURE ;; esac case $revision in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) $echo "$modename: REVISION \`$revision' must be a nonnegative integer" 1>&2 $echo "$modename: \`$vinfo' is not valid version information" 1>&2 exit $EXIT_FAILURE ;; esac case $age in 0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;; *) $echo "$modename: AGE \`$age' must be a nonnegative integer" 1>&2 $echo "$modename: \`$vinfo' is not valid version information" 1>&2 exit $EXIT_FAILURE ;; esac if test "$age" -gt "$current"; then $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 $echo "$modename: \`$vinfo' is not valid version information" 1>&2 exit $EXIT_FAILURE fi # Calculate the version variables. major= versuffix= verstring= case $version_type in none) ;; darwin) # Like Linux, but with the current version available in # verstring for coding it into the library header major=.`expr $current - $age` versuffix="$major.$age.$revision" # Darwin ld doesn't like 0 for these options... minor_current=`expr $current + 1` xlcverstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision" verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" ;; freebsd-aout) major=".$current" versuffix=".$current.$revision"; ;; freebsd-elf) major=".$current" versuffix=".$current"; ;; irix | nonstopux) if test "X$lt_irix_increment" = "Xno"; then major=`expr $current - $age` else major=`expr $current - $age + 1` fi case $version_type in nonstopux) verstring_prefix=nonstopux ;; *) verstring_prefix=sgi ;; esac verstring="$verstring_prefix$major.$revision" # Add in all the interfaces that we are compatible with. loop=$revision while test "$loop" -ne 0; do iface=`expr $revision - $loop` loop=`expr $loop - 1` verstring="$verstring_prefix$major.$iface:$verstring" done # Before this point, $major must not contain `.'. major=.$major versuffix="$major.$revision" ;; linux) major=.`expr $current - $age` versuffix="$major.$age.$revision" ;; osf) major=.`expr $current - $age` versuffix=".$current.$age.$revision" verstring="$current.$age.$revision" # Add in all the interfaces that we are compatible with. loop=$age while test "$loop" -ne 0; do iface=`expr $current - $loop` loop=`expr $loop - 1` verstring="$verstring:${iface}.0" done # Make executables depend on our current version. verstring="$verstring:${current}.0" ;; sunos) major=".$current" versuffix=".$current.$revision" ;; windows) # Use '-' rather than '.', since we only want one # extension on DOS 8.3 filesystems. major=`expr $current - $age` versuffix="-$major" ;; *) $echo "$modename: unknown library version type \`$version_type'" 1>&2 $echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 exit $EXIT_FAILURE ;; esac # Clear the version info if we defaulted, and they specified a release. if test -z "$vinfo" && test -n "$release"; then major= case $version_type in darwin) # we can't check for "0.0" in archive_cmds due to quoting # problems, so we reset it completely verstring= ;; *) verstring="0.0" ;; esac if test "$need_version" = no; then versuffix= else versuffix=".0.0" fi fi # Remove version info from name if versioning should be avoided if test "$avoid_version" = yes && test "$need_version" = no; then major= versuffix= verstring="" fi # Check to see if the archive will have undefined symbols. if test "$allow_undefined" = yes; then if test "$allow_undefined_flag" = unsupported; then $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 build_libtool_libs=no build_old_libs=yes fi else # Don't allow undefined symbols. allow_undefined_flag="$no_undefined_flag" fi fi if test "$mode" != relink; then # Remove our outputs, but don't remove object files since they # may have been created when compiling PIC objects. removelist= tempremovelist=`$echo "$output_objdir/*"` for p in $tempremovelist; do case $p in *.$objext) ;; $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*) if test "X$precious_files_regex" != "X"; then if echo $p | $EGREP -e "$precious_files_regex" >/dev/null 2>&1 then continue fi fi removelist="$removelist $p" ;; *) ;; esac done if test -n "$removelist"; then $show "${rm}r $removelist" $run ${rm}r $removelist fi fi # Now set the variables for building old libraries. if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then oldlibs="$oldlibs $output_objdir/$libname.$libext" # Transform .lo files to .o files. oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP` fi # Eliminate all temporary directories. #for path in $notinst_path; do # lib_search_path=`$echo "$lib_search_path " | ${SED} -e "s% $path % %g"` # deplibs=`$echo "$deplibs " | ${SED} -e "s% -L$path % %g"` # dependency_libs=`$echo "$dependency_libs " | ${SED} -e "s% -L$path % %g"` #done if test -n "$xrpath"; then # If the user specified any rpath flags, then add them. temp_xrpath= for libdir in $xrpath; do temp_xrpath="$temp_xrpath -R$libdir" case "$finalize_rpath " in *" $libdir "*) ;; *) finalize_rpath="$finalize_rpath $libdir" ;; esac done if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then dependency_libs="$temp_xrpath $dependency_libs" fi fi # Make sure dlfiles contains only unique files that won't be dlpreopened old_dlfiles="$dlfiles" dlfiles= for lib in $old_dlfiles; do case " $dlprefiles $dlfiles " in *" $lib "*) ;; *) dlfiles="$dlfiles $lib" ;; esac done # Make sure dlprefiles contains only unique files old_dlprefiles="$dlprefiles" dlprefiles= for lib in $old_dlprefiles; do case "$dlprefiles " in *" $lib "*) ;; *) dlprefiles="$dlprefiles $lib" ;; esac done if test "$build_libtool_libs" = yes; then if test -n "$rpath"; then case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*) # these systems don't actually have a c library (as such)! ;; *-*-rhapsody* | *-*-darwin1.[012]) # Rhapsody C library is in the System framework deplibs="$deplibs -framework System" ;; *-*-netbsd*) # Don't link with libc until the a.out ld.so is fixed. ;; *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*) # Do not include libc due to us having libc/libc_r. ;; *-*-sco3.2v5* | *-*-sco5v6*) # Causes problems with __ctype ;; *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*) # Compiler inserts libc in the correct place for threads to work ;; *) # Add libc to deplibs on all other systems if necessary. if test "$build_libtool_need_lc" = "yes"; then deplibs="$deplibs -lc" fi ;; esac fi # Transform deplibs into only deplibs that can be linked in shared. name_save=$name libname_save=$libname release_save=$release versuffix_save=$versuffix major_save=$major # I'm not sure if I'm treating the release correctly. I think # release should show up in the -l (ie -lgmp5) so we don't want to # add it in twice. Is that correct? release="" versuffix="" major="" newdeplibs= droppeddeps=no case $deplibs_check_method in pass_all) # Don't check for shared/static. Everything works. # This might be a little naive. We might want to check # whether the library exists or not. But this is on # osf3 & osf4 and I'm not really sure... Just # implementing what was already the behavior. newdeplibs=$deplibs ;; test_compile) # This code stresses the "libraries are programs" paradigm to its # limits. Maybe even breaks it. We compile a program, linking it # against the deplibs as a proxy for the library. Then we can check # whether they linked in statically or dynamically with ldd. $rm conftest.c cat > conftest.c </dev/null` for potent_lib in $potential_libs; do # Follow soft links. if ls -lLd "$potent_lib" 2>/dev/null \ | grep " -> " >/dev/null; then continue fi # The statement above tries to avoid entering an # endless loop below, in case of cyclic links. # We might still enter an endless loop, since a link # loop can be closed while we follow links, # but so what? potlib="$potent_lib" while test -h "$potlib" 2>/dev/null; do potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` case $potliblink in [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";; esac done if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \ | ${SED} 10q \ | $EGREP "$file_magic_regex" > /dev/null; then newdeplibs="$newdeplibs $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes $echo $echo "*** Warning: linker path does not have real file for library $a_deplib." $echo "*** I have the capability to make that library automatically link in when" $echo "*** you link to this library. But I can only do this if you have a" $echo "*** shared version of the library, which you do not appear to have" $echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $echo "*** with $libname but no candidates were found. (...for file magic test)" else $echo "*** with $libname and none of the candidates passed a file format test" $echo "*** using a file magic. Last file checked: $potlib" fi fi else # Add a -L argument. newdeplibs="$newdeplibs $a_deplib" fi done # Gone through all deplibs. ;; match_pattern*) set dummy $deplibs_check_method match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"` for a_deplib in $deplibs; do name=`expr $a_deplib : '-l\(.*\)'` # If $name is empty we are operating on a -L argument. if test -n "$name" && test "$name" != "0"; then if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then case " $predeps $postdeps " in *" $a_deplib "*) newdeplibs="$newdeplibs $a_deplib" a_deplib="" ;; esac fi if test -n "$a_deplib" ; then libname=`eval \\$echo \"$libname_spec\"` for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do potential_libs=`ls $i/$libname[.-]* 2>/dev/null` for potent_lib in $potential_libs; do potlib="$potent_lib" # see symlink-check above in file_magic test if eval $echo \"$potent_lib\" 2>/dev/null \ | ${SED} 10q \ | $EGREP "$match_pattern_regex" > /dev/null; then newdeplibs="$newdeplibs $a_deplib" a_deplib="" break 2 fi done done fi if test -n "$a_deplib" ; then droppeddeps=yes $echo $echo "*** Warning: linker path does not have real file for library $a_deplib." $echo "*** I have the capability to make that library automatically link in when" $echo "*** you link to this library. But I can only do this if you have a" $echo "*** shared version of the library, which you do not appear to have" $echo "*** because I did check the linker path looking for a file starting" if test -z "$potlib" ; then $echo "*** with $libname but no candidates were found. (...for regex pattern test)" else $echo "*** with $libname and none of the candidates passed a file format test" $echo "*** using a regex pattern. Last file checked: $potlib" fi fi else # Add a -L argument. newdeplibs="$newdeplibs $a_deplib" fi done # Gone through all deplibs. ;; none | unknown | *) newdeplibs="" tmp_deplibs=`$echo "X $deplibs" | $Xsed -e 's/ -lc$//' \ -e 's/ -[LR][^ ]*//g'` if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then for i in $predeps $postdeps ; do # can't use Xsed below, because $i might contain '/' tmp_deplibs=`$echo "X $tmp_deplibs" | ${SED} -e "1s,^X,," -e "s,$i,,"` done fi if $echo "X $tmp_deplibs" | $Xsed -e 's/[ ]//g' \ | grep . >/dev/null; then $echo if test "X$deplibs_check_method" = "Xnone"; then $echo "*** Warning: inter-library dependencies are not supported in this platform." else $echo "*** Warning: inter-library dependencies are not known to be supported." fi $echo "*** All declared inter-library dependencies are being dropped." droppeddeps=yes fi ;; esac versuffix=$versuffix_save major=$major_save release=$release_save libname=$libname_save name=$name_save case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library is the System framework newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'` ;; esac if test "$droppeddeps" = yes; then if test "$module" = yes; then $echo $echo "*** Warning: libtool could not satisfy all declared inter-library" $echo "*** dependencies of module $libname. Therefore, libtool will create" $echo "*** a static module, that should work as long as the dlopening" $echo "*** application is linked with the -dlopen flag." if test -z "$global_symbol_pipe"; then $echo $echo "*** However, this would only work if libtool was able to extract symbol" $echo "*** lists from a program, using \`nm' or equivalent, but libtool could" $echo "*** not find such a program. So, this module is probably useless." $echo "*** \`nm' from GNU binutils and a full rebuild may help." fi if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi else $echo "*** The inter-library dependencies that have been dropped here will be" $echo "*** automatically added whenever a program is linked with this library" $echo "*** or is declared to -dlopen it." if test "$allow_undefined" = no; then $echo $echo "*** Since this library must not contain undefined symbols," $echo "*** because either the platform does not support them or" $echo "*** it was explicitly requested with -no-undefined," $echo "*** libtool will only create a static version of it." if test "$build_old_libs" = no; then oldlibs="$output_objdir/$libname.$libext" build_libtool_libs=module build_old_libs=yes else build_libtool_libs=no fi fi fi fi # Done checking deplibs! deplibs=$newdeplibs fi # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $deplibs " in *" -L$path/$objdir "*) new_libs="$new_libs -L$path/$objdir" ;; esac ;; esac done for deplib in $deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$new_libs $deplib" ;; esac ;; *) new_libs="$new_libs $deplib" ;; esac done deplibs="$new_libs" # All the library-specific variables (install_libdir is set above). library_names= old_library= dlname= # Test again, we may have decided not to build it any more if test "$build_libtool_libs" = yes; then if test "$hardcode_into_libs" = yes; then # Hardcode the library paths hardcode_libdirs= dep_rpath= rpath="$finalize_rpath" test "$mode" != relink && rpath="$compile_rpath$rpath" for libdir in $rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" dep_rpath="$dep_rpath $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) perm_rpath="$perm_rpath $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" if test -n "$hardcode_libdir_flag_spec_ld"; then case $archive_cmds in *\$LD*) eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\" ;; *) eval dep_rpath=\"$hardcode_libdir_flag_spec\" ;; esac else eval dep_rpath=\"$hardcode_libdir_flag_spec\" fi fi if test -n "$runpath_var" && test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do rpath="$rpath$dir:" done eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" fi test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" fi shlibpath="$finalize_shlibpath" test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath" if test -n "$shlibpath"; then eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" fi # Get the real and link names of the library. eval shared_ext=\"$shrext_cmds\" eval library_names=\"$library_names_spec\" set dummy $library_names realname="$2" shift; shift if test -n "$soname_spec"; then eval soname=\"$soname_spec\" else soname="$realname" fi if test -z "$dlname"; then dlname=$soname fi lib="$output_objdir/$realname" linknames= for link do linknames="$linknames $link" done # Use standard objects if they are pic test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` # Prepare the list of exported symbols if test -z "$export_symbols"; then if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then $show "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $run $rm $export_symbols cmds=$export_symbols_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" if len=`expr "X$cmd" : ".*"` && test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then $show "$cmd" $run eval "$cmd" || exit $? skipped_export=false else # The command line is too long to execute in one step. $show "using reloadable object file for export list..." skipped_export=: # Break out early, otherwise skipped_export may be # set to false by a later but shorter cmd. break fi done IFS="$save_ifs" if test -n "$export_symbols_regex"; then $show "$EGREP -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\"" $run eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' $show "$mv \"${export_symbols}T\" \"$export_symbols\"" $run eval '$mv "${export_symbols}T" "$export_symbols"' fi fi fi if test -n "$export_symbols" && test -n "$include_expsyms"; then $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"' fi tmp_deplibs= for test_deplib in $deplibs; do case " $convenience " in *" $test_deplib "*) ;; *) tmp_deplibs="$tmp_deplibs $test_deplib" ;; esac done deplibs="$tmp_deplibs" if test -n "$convenience"; then if test -n "$whole_archive_flag_spec"; then save_libobjs=$libobjs eval libobjs=\"\$libobjs $whole_archive_flag_spec\" else gentop="$output_objdir/${outputname}x" generated="$generated $gentop" func_extract_archives $gentop $convenience libobjs="$libobjs $func_extract_archives_result" fi fi if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then eval flag=\"$thread_safe_flag_spec\" linker_flags="$linker_flags $flag" fi # Make a backup of the uninstalled library when relinking if test "$mode" = relink; then $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $? fi # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then eval test_cmds=\"$module_expsym_cmds\" cmds=$module_expsym_cmds else eval test_cmds=\"$module_cmds\" cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then eval test_cmds=\"$archive_expsym_cmds\" cmds=$archive_expsym_cmds else eval test_cmds=\"$archive_cmds\" cmds=$archive_cmds fi fi if test "X$skipped_export" != "X:" && len=`expr "X$test_cmds" : ".*" 2>/dev/null` && test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then : else # The command line is too long to link in one step, link piecewise. $echo "creating reloadable object files..." # Save the value of $output and $libobjs because we want to # use them later. If we have whole_archive_flag_spec, we # want to use save_libobjs as it was before # whole_archive_flag_spec was expanded, because we can't # assume the linker understands whole_archive_flag_spec. # This may have to be revisited, in case too many # convenience libraries get linked in and end up exceeding # the spec. if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then save_libobjs=$libobjs fi save_output=$output output_la=`$echo "X$output" | $Xsed -e "$basename"` # Clear the reloadable object creation command queue and # initialize k to one. test_cmds= concat_cmds= objlist= delfiles= last_robj= k=1 output=$output_objdir/$output_la-${k}.$objext # Loop over the list of objects to be linked. for obj in $save_libobjs do eval test_cmds=\"$reload_cmds $objlist $last_robj\" if test "X$objlist" = X || { len=`expr "X$test_cmds" : ".*" 2>/dev/null` && test "$len" -le "$max_cmd_len"; }; then objlist="$objlist $obj" else # The command $test_cmds is almost too long, add a # command to the queue. if test "$k" -eq 1 ; then # The first file doesn't have a previous command to add. eval concat_cmds=\"$reload_cmds $objlist $last_robj\" else # All subsequent reloadable object files will link in # the last one created. eval concat_cmds=\"\$concat_cmds~$reload_cmds $objlist $last_robj\" fi last_robj=$output_objdir/$output_la-${k}.$objext k=`expr $k + 1` output=$output_objdir/$output_la-${k}.$objext objlist=$obj len=1 fi done # Handle the remaining objects by creating one last # reloadable object file. All subsequent reloadable object # files will link in the last one created. test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\${concat_cmds}$reload_cmds $objlist $last_robj\" if ${skipped_export-false}; then $show "generating symbol list for \`$libname.la'" export_symbols="$output_objdir/$libname.exp" $run $rm $export_symbols libobjs=$output # Append the command to create the export file. eval concat_cmds=\"\$concat_cmds~$export_symbols_cmds\" fi # Set up a command to remove the reloadable object files # after they are used. i=0 while test "$i" -lt "$k" do i=`expr $i + 1` delfiles="$delfiles $output_objdir/$output_la-${i}.$objext" done $echo "creating a temporary reloadable object file: $output" # Loop through the commands generated above and execute them. save_ifs="$IFS"; IFS='~' for cmd in $concat_cmds; do IFS="$save_ifs" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" libobjs=$output # Restore the value of output. output=$save_output if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then eval libobjs=\"\$libobjs $whole_archive_flag_spec\" fi # Expand the library linking commands again to reset the # value of $libobjs for piecewise linking. # Do each of the archive commands. if test "$module" = yes && test -n "$module_cmds" ; then if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then cmds=$module_expsym_cmds else cmds=$module_cmds fi else if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then cmds=$archive_expsym_cmds else cmds=$archive_cmds fi fi # Append the command to remove the reloadable object files # to the just-reset $cmds. eval cmds=\"\$cmds~\$rm $delfiles\" fi save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$mode" = relink; then $run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)' fi exit $lt_exit } done IFS="$save_ifs" # Restore the uninstalled library and exit if test "$mode" = relink; then $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $? if test -n "$convenience"; then if test -z "$whole_archive_flag_spec"; then $show "${rm}r $gentop" $run ${rm}r "$gentop" fi fi exit $EXIT_SUCCESS fi # Create links to the real library. for linkname in $linknames; do if test "$realname" != "$linkname"; then $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)" $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $? fi done # If -module or -export-dynamic was specified, set the dlname. if test "$module" = yes || test "$export_dynamic" = yes; then # On all known operating systems, these are identical. dlname="$soname" fi fi ;; obj) if test -n "$deplibs"; then $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2 fi if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2 fi if test -n "$rpath"; then $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2 fi if test -n "$xrpath"; then $echo "$modename: warning: \`-R' is ignored for objects" 1>&2 fi if test -n "$vinfo"; then $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2 fi if test -n "$release"; then $echo "$modename: warning: \`-release' is ignored for objects" 1>&2 fi case $output in *.lo) if test -n "$objs$old_deplibs"; then $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 exit $EXIT_FAILURE fi libobj="$output" obj=`$echo "X$output" | $Xsed -e "$lo2o"` ;; *) libobj= obj="$output" ;; esac # Delete the old objects. $run $rm $obj $libobj # Objects from convenience libraries. This assumes # single-version convenience libraries. Whenever we create # different ones for PIC/non-PIC, this we'll have to duplicate # the extraction. reload_conv_objs= gentop= # reload_cmds runs $LD directly, so let us get rid of # -Wl from whole_archive_flag_spec and hope we can get by with # turning comma into space.. wl= if test -n "$convenience"; then if test -n "$whole_archive_flag_spec"; then eval tmp_whole_archive_flags=\"$whole_archive_flag_spec\" reload_conv_objs=$reload_objs\ `$echo "X$tmp_whole_archive_flags" | $Xsed -e 's|,| |g'` else gentop="$output_objdir/${obj}x" generated="$generated $gentop" func_extract_archives $gentop $convenience reload_conv_objs="$reload_objs $func_extract_archives_result" fi fi # Create the old-style object. reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test output="$obj" cmds=$reload_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" # Exit if we aren't doing a library object file. if test -z "$libobj"; then if test -n "$gentop"; then $show "${rm}r $gentop" $run ${rm}r $gentop fi exit $EXIT_SUCCESS fi if test "$build_libtool_libs" != yes; then if test -n "$gentop"; then $show "${rm}r $gentop" $run ${rm}r $gentop fi # Create an invalid libtool object if no PIC, so that we don't # accidentally link it into a program. # $show "echo timestamp > $libobj" # $run eval "echo timestamp > $libobj" || exit $? exit $EXIT_SUCCESS fi if test -n "$pic_flag" || test "$pic_mode" != default; then # Only do commands if we really have different PIC objects. reload_objs="$libobjs $reload_conv_objs" output="$libobj" cmds=$reload_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" fi if test -n "$gentop"; then $show "${rm}r $gentop" $run ${rm}r $gentop fi exit $EXIT_SUCCESS ;; prog) case $host in *cygwin*) output=`$echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;; esac if test -n "$vinfo"; then $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2 fi if test -n "$release"; then $echo "$modename: warning: \`-release' is ignored for programs" 1>&2 fi if test "$preload" = yes; then if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown && test "$dlopen_self_static" = unknown; then $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support." fi fi case $host in *-*-rhapsody* | *-*-darwin1.[012]) # On Rhapsody replace the C library is the System framework compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'` finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'` ;; esac case $host in *darwin*) # Don't allow lazy linking, it breaks C++ global constructors if test "$tagname" = CXX ; then compile_command="$compile_command ${wl}-bind_at_load" finalize_command="$finalize_command ${wl}-bind_at_load" fi ;; esac # move library search paths that coincide with paths to not yet # installed libraries to the beginning of the library search list new_libs= for path in $notinst_path; do case " $new_libs " in *" -L$path/$objdir "*) ;; *) case " $compile_deplibs " in *" -L$path/$objdir "*) new_libs="$new_libs -L$path/$objdir" ;; esac ;; esac done for deplib in $compile_deplibs; do case $deplib in -L*) case " $new_libs " in *" $deplib "*) ;; *) new_libs="$new_libs $deplib" ;; esac ;; *) new_libs="$new_libs $deplib" ;; esac done compile_deplibs="$new_libs" compile_command="$compile_command $compile_deplibs" finalize_command="$finalize_command $finalize_deplibs" if test -n "$rpath$xrpath"; then # If the user specified any rpath flags, then add them. for libdir in $rpath $xrpath; do # This is the magic to use -rpath. case "$finalize_rpath " in *" $libdir "*) ;; *) finalize_rpath="$finalize_rpath $libdir" ;; esac done fi # Now hardcode the library paths rpath= hardcode_libdirs= for libdir in $compile_rpath $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" rpath="$rpath $flag" fi elif test -n "$runpath_var"; then case "$perm_rpath " in *" $libdir "*) ;; *) perm_rpath="$perm_rpath $libdir" ;; esac fi case $host in *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) testbindir=`$echo "X$libdir" | $Xsed -e 's*/lib$*/bin*'` case :$dllsearchpath: in *":$libdir:"*) ;; *) dllsearchpath="$dllsearchpath:$libdir";; esac case :$dllsearchpath: in *":$testbindir:"*) ;; *) dllsearchpath="$dllsearchpath:$testbindir";; esac ;; esac done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi compile_rpath="$rpath" rpath= hardcode_libdirs= for libdir in $finalize_rpath; do if test -n "$hardcode_libdir_flag_spec"; then if test -n "$hardcode_libdir_separator"; then if test -z "$hardcode_libdirs"; then hardcode_libdirs="$libdir" else # Just accumulate the unique libdirs. case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) ;; *) hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" ;; esac fi else eval flag=\"$hardcode_libdir_flag_spec\" rpath="$rpath $flag" fi elif test -n "$runpath_var"; then case "$finalize_perm_rpath " in *" $libdir "*) ;; *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;; esac fi done # Substitute the hardcoded libdirs into the rpath. if test -n "$hardcode_libdir_separator" && test -n "$hardcode_libdirs"; then libdir="$hardcode_libdirs" eval rpath=\" $hardcode_libdir_flag_spec\" fi finalize_rpath="$rpath" if test -n "$libobjs" && test "$build_old_libs" = yes; then # Transform all the library objects into standard objects. compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` fi dlsyms= if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then if test -n "$NM" && test -n "$global_symbol_pipe"; then dlsyms="${outputname}S.c" else $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 fi fi if test -n "$dlsyms"; then case $dlsyms in "") ;; *.c) # Discover the nlist of each of the dlfiles. nlist="$output_objdir/${outputname}.nm" $show "$rm $nlist ${nlist}S ${nlist}T" $run $rm "$nlist" "${nlist}S" "${nlist}T" # Parse the name list into a source file. $show "creating $output_objdir/$dlsyms" test -z "$run" && $echo > "$output_objdir/$dlsyms" "\ /* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */ /* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */ #ifdef __cplusplus extern \"C\" { #endif /* Prevent the only kind of declaration conflicts we can make. */ #define lt_preloaded_symbols some_other_symbol /* External symbol declarations for the compiler. */\ " if test "$dlself" = yes; then $show "generating symbol list for \`$output'" test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist" # Add our own program objects to the symbol list. progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` for arg in $progfiles; do $show "extracting global C symbols from \`$arg'" $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" done if test -n "$exclude_expsyms"; then $run eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' $run eval '$mv "$nlist"T "$nlist"' fi if test -n "$export_symbols_regex"; then $run eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T' $run eval '$mv "$nlist"T "$nlist"' fi # Prepare the list of exported symbols if test -z "$export_symbols"; then export_symbols="$output_objdir/$outputname.exp" $run $rm $export_symbols $run eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' case $host in *cygwin* | *mingw* ) $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' $run eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"' ;; esac else $run eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"' $run eval 'grep -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T' $run eval 'mv "$nlist"T "$nlist"' case $host in *cygwin* | *mingw* ) $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"' $run eval 'cat "$nlist" >> "$output_objdir/$outputname.def"' ;; esac fi fi for arg in $dlprefiles; do $show "extracting global C symbols from \`$arg'" name=`$echo "$arg" | ${SED} -e 's%^.*/%%'` $run eval '$echo ": $name " >> "$nlist"' $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" done if test -z "$run"; then # Make sure we have at least an empty file. test -f "$nlist" || : > "$nlist" if test -n "$exclude_expsyms"; then $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T $mv "$nlist"T "$nlist" fi # Try sorting and uniquifying the output. if grep -v "^: " < "$nlist" | if sort -k 3 /dev/null 2>&1; then sort -k 3 else sort +2 fi | uniq > "$nlist"S; then : else grep -v "^: " < "$nlist" > "$nlist"S fi if test -f "$nlist"S; then eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"' else $echo '/* NONE */' >> "$output_objdir/$dlsyms" fi $echo >> "$output_objdir/$dlsyms" "\ #undef lt_preloaded_symbols #if defined (__STDC__) && __STDC__ # define lt_ptr void * #else # define lt_ptr char * # define const #endif /* The mapping between symbol names and symbols. */ " case $host in *cygwin* | *mingw* ) $echo >> "$output_objdir/$dlsyms" "\ /* DATA imports from DLLs on WIN32 can't be const, because runtime relocations are performed -- see ld's documentation on pseudo-relocs */ struct { " ;; * ) $echo >> "$output_objdir/$dlsyms" "\ const struct { " ;; esac $echo >> "$output_objdir/$dlsyms" "\ const char *name; lt_ptr address; } lt_preloaded_symbols[] = {\ " eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms" $echo >> "$output_objdir/$dlsyms" "\ {0, (lt_ptr) 0} }; /* This works around a problem in FreeBSD linker */ #ifdef FREEBSD_WORKAROUND static const void *lt_preloaded_setup() { return lt_preloaded_symbols; } #endif #ifdef __cplusplus } #endif\ " fi pic_flag_for_symtable= case $host in # compiling the symbol table file with pic_flag works around # a FreeBSD bug that causes programs to crash when -lm is # linked before any other PIC object. But we must not use # pic_flag when linking with -static. The problem exists in # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) case "$compile_command " in *" -static "*) ;; *) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND";; esac;; *-*-hpux*) case "$compile_command " in *" -static "*) ;; *) pic_flag_for_symtable=" $pic_flag";; esac esac # Now compile the dynamic symbol file. $show "(cd $output_objdir && $LTCC $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")" $run eval '(cd $output_objdir && $LTCC $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $? # Clean up the generated files. $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T" $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T" # Transform the symbol file into the correct name. case $host in *cygwin* | *mingw* ) if test -f "$output_objdir/${outputname}.def" ; then compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%" | $NL2SP` finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%" | $NL2SP` else compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%" | $NL2SP` finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%" | $NL2SP` fi ;; * ) compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%" | $NL2SP` finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%" | $NL2SP` ;; esac ;; *) $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 exit $EXIT_FAILURE ;; esac else # We keep going just in case the user didn't refer to # lt_preloaded_symbols. The linker will fail if global_symbol_pipe # really was required. # Nullify the symbol file. compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "s% @SYMFILE@%%" | $NL2SP` finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "s% @SYMFILE@%%" | $NL2SP` fi if test "$need_relink" = no || test "$build_libtool_libs" != yes; then # Replace the output file specification. compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e 's%@OUTPUT@%'"$output"'%g' | $NL2SP` link_command="$compile_command$compile_rpath" # We have no uninstalled library dependencies, so finalize right now. $show "$link_command" $run eval "$link_command" exit_status=$? # Delete the generated files. if test -n "$dlsyms"; then $show "$rm $output_objdir/${outputname}S.${objext}" $run $rm "$output_objdir/${outputname}S.${objext}" fi exit $exit_status fi if test -n "$shlibpath_var"; then # We should set the shlibpath_var rpath= for dir in $temp_rpath; do case $dir in [\\/]* | [A-Za-z]:[\\/]*) # Absolute path. rpath="$rpath$dir:" ;; *) # Relative path: add a thisdir entry. rpath="$rpath\$thisdir/$dir:" ;; esac done temp_rpath="$rpath" fi if test -n "$compile_shlibpath$finalize_shlibpath"; then compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" fi if test -n "$finalize_shlibpath"; then finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" fi compile_var= finalize_var= if test -n "$runpath_var"; then if test -n "$perm_rpath"; then # We should set the runpath_var. rpath= for dir in $perm_rpath; do rpath="$rpath$dir:" done compile_var="$runpath_var=\"$rpath\$$runpath_var\" " fi if test -n "$finalize_perm_rpath"; then # We should set the runpath_var. rpath= for dir in $finalize_perm_rpath; do rpath="$rpath$dir:" done finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " fi fi if test "$no_install" = yes; then # We don't need to create a wrapper script. link_command="$compile_var$compile_command$compile_rpath" # Replace the output file specification. link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` # Delete the old output file. $run $rm $output # Link the executable and exit $show "$link_command" $run eval "$link_command" || exit $? exit $EXIT_SUCCESS fi if test "$hardcode_action" = relink; then # Fast installation is not supported link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2 $echo "$modename: \`$output' will be relinked during installation" 1>&2 else if test "$fast_install" != no; then link_command="$finalize_var$compile_command$finalize_rpath" if test "$fast_install" = yes; then relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $SP2NL | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g' | $NL2SP` else # fast_install is set to needless relink_command= fi else link_command="$compile_var$compile_command$compile_rpath" relink_command="$finalize_var$finalize_command$finalize_rpath" fi fi # Replace the output file specification. link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` # Delete the old output files. $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname $show "$link_command" $run eval "$link_command" || exit $? # Now create the wrapper script. $show "creating $output" # Quote the relink command for shipping. if test -n "$relink_command"; then # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` relink_command="$var=\"$var_value\"; export $var; $relink_command" fi done relink_command="(cd `pwd`; $relink_command)" relink_command=`$echo "X$relink_command" | $SP2NL | $Xsed -e "$sed_quote_subst" | $NL2SP` fi # Quote $echo for shipping. if test "X$echo" = "X$SHELL $progpath --fallback-echo"; then case $progpath in [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $progpath --fallback-echo";; *) qecho="$SHELL `pwd`/$progpath --fallback-echo";; esac qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"` else qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` fi # Only actually do things if our run command is non-null. if test -z "$run"; then # win32 will think the script is a binary if it has # a .exe suffix, so we strip it off here. case $output in *.exe) output=`$echo $output|${SED} 's,.exe$,,'` ;; esac # test for cygwin because mv fails w/o .exe extensions case $host in *cygwin*) exeext=.exe outputname=`$echo $outputname|${SED} 's,.exe$,,'` ;; *) exeext= ;; esac case $host in *cygwin* | *mingw* ) output_name=`basename $output` output_path=`dirname $output` cwrappersource="$output_path/$objdir/lt-$output_name.c" cwrapper="$output_path/$output_name.exe" $rm $cwrappersource $cwrapper trap "$rm $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15 cat > $cwrappersource <> $cwrappersource<<"EOF" #include #include #include #include #include #include #include #include #include #if defined(PATH_MAX) # define LT_PATHMAX PATH_MAX #elif defined(MAXPATHLEN) # define LT_PATHMAX MAXPATHLEN #else # define LT_PATHMAX 1024 #endif #ifndef DIR_SEPARATOR # define DIR_SEPARATOR '/' # define PATH_SEPARATOR ':' #endif #if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \ defined (__OS2__) # define HAVE_DOS_BASED_FILE_SYSTEM # ifndef DIR_SEPARATOR_2 # define DIR_SEPARATOR_2 '\\' # endif # ifndef PATH_SEPARATOR_2 # define PATH_SEPARATOR_2 ';' # endif #endif #ifndef DIR_SEPARATOR_2 # define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR) #else /* DIR_SEPARATOR_2 */ # define IS_DIR_SEPARATOR(ch) \ (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2)) #endif /* DIR_SEPARATOR_2 */ #ifndef PATH_SEPARATOR_2 # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR) #else /* PATH_SEPARATOR_2 */ # define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2) #endif /* PATH_SEPARATOR_2 */ #define XMALLOC(type, num) ((type *) xmalloc ((num) * sizeof(type))) #define XFREE(stale) do { \ if (stale) { free ((void *) stale); stale = 0; } \ } while (0) /* -DDEBUG is fairly common in CFLAGS. */ #undef DEBUG #if defined DEBUGWRAPPER # define DEBUG(format, ...) fprintf(stderr, format, __VA_ARGS__) #else # define DEBUG(format, ...) #endif const char *program_name = NULL; void * xmalloc (size_t num); char * xstrdup (const char *string); const char * base_name (const char *name); char * find_executable(const char *wrapper); int check_executable(const char *path); char * strendzap(char *str, const char *pat); void lt_fatal (const char *message, ...); int main (int argc, char *argv[]) { char **newargz; int i; program_name = (char *) xstrdup (base_name (argv[0])); DEBUG("(main) argv[0] : %s\n",argv[0]); DEBUG("(main) program_name : %s\n",program_name); newargz = XMALLOC(char *, argc+2); EOF cat >> $cwrappersource <> $cwrappersource <<"EOF" newargz[1] = find_executable(argv[0]); if (newargz[1] == NULL) lt_fatal("Couldn't find %s", argv[0]); DEBUG("(main) found exe at : %s\n",newargz[1]); /* we know the script has the same name, without the .exe */ /* so make sure newargz[1] doesn't end in .exe */ strendzap(newargz[1],".exe"); for (i = 1; i < argc; i++) newargz[i+1] = xstrdup(argv[i]); newargz[argc+1] = NULL; for (i=0; i> $cwrappersource <> $cwrappersource <> $cwrappersource <<"EOF" return 127; } void * xmalloc (size_t num) { void * p = (void *) malloc (num); if (!p) lt_fatal ("Memory exhausted"); return p; } char * xstrdup (const char *string) { return string ? strcpy ((char *) xmalloc (strlen (string) + 1), string) : NULL ; } const char * base_name (const char *name) { const char *base; #if defined (HAVE_DOS_BASED_FILE_SYSTEM) /* Skip over the disk name in MSDOS pathnames. */ if (isalpha ((unsigned char)name[0]) && name[1] == ':') name += 2; #endif for (base = name; *name; name++) if (IS_DIR_SEPARATOR (*name)) base = name + 1; return base; } int check_executable(const char * path) { struct stat st; DEBUG("(check_executable) : %s\n", path ? (*path ? path : "EMPTY!") : "NULL!"); if ((!path) || (!*path)) return 0; if ((stat (path, &st) >= 0) && ( /* MinGW & native WIN32 do not support S_IXOTH or S_IXGRP */ #if defined (S_IXOTH) ((st.st_mode & S_IXOTH) == S_IXOTH) || #endif #if defined (S_IXGRP) ((st.st_mode & S_IXGRP) == S_IXGRP) || #endif ((st.st_mode & S_IXUSR) == S_IXUSR)) ) return 1; else return 0; } /* Searches for the full path of the wrapper. Returns newly allocated full path name if found, NULL otherwise */ char * find_executable (const char* wrapper) { int has_slash = 0; const char* p; const char* p_next; /* static buffer for getcwd */ char tmp[LT_PATHMAX + 1]; int tmp_len; char* concat_name; DEBUG("(find_executable) : %s\n", wrapper ? (*wrapper ? wrapper : "EMPTY!") : "NULL!"); if ((wrapper == NULL) || (*wrapper == '\0')) return NULL; /* Absolute path? */ #if defined (HAVE_DOS_BASED_FILE_SYSTEM) if (isalpha ((unsigned char)wrapper[0]) && wrapper[1] == ':') { concat_name = xstrdup (wrapper); if (check_executable(concat_name)) return concat_name; XFREE(concat_name); } else { #endif if (IS_DIR_SEPARATOR (wrapper[0])) { concat_name = xstrdup (wrapper); if (check_executable(concat_name)) return concat_name; XFREE(concat_name); } #if defined (HAVE_DOS_BASED_FILE_SYSTEM) } #endif for (p = wrapper; *p; p++) if (*p == '/') { has_slash = 1; break; } if (!has_slash) { /* no slashes; search PATH */ const char* path = getenv ("PATH"); if (path != NULL) { for (p = path; *p; p = p_next) { const char* q; size_t p_len; for (q = p; *q; q++) if (IS_PATH_SEPARATOR(*q)) break; p_len = q - p; p_next = (*q == '\0' ? q : q + 1); if (p_len == 0) { /* empty path: current directory */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal ("getcwd failed"); tmp_len = strlen(tmp); concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); } else { concat_name = XMALLOC(char, p_len + 1 + strlen(wrapper) + 1); memcpy (concat_name, p, p_len); concat_name[p_len] = '/'; strcpy (concat_name + p_len + 1, wrapper); } if (check_executable(concat_name)) return concat_name; XFREE(concat_name); } } /* not found in PATH; assume curdir */ } /* Relative path | not found in path: prepend cwd */ if (getcwd (tmp, LT_PATHMAX) == NULL) lt_fatal ("getcwd failed"); tmp_len = strlen(tmp); concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1); memcpy (concat_name, tmp, tmp_len); concat_name[tmp_len] = '/'; strcpy (concat_name + tmp_len + 1, wrapper); if (check_executable(concat_name)) return concat_name; XFREE(concat_name); return NULL; } char * strendzap(char *str, const char *pat) { size_t len, patlen; assert(str != NULL); assert(pat != NULL); len = strlen(str); patlen = strlen(pat); if (patlen <= len) { str += len - patlen; if (strcmp(str, pat) == 0) *str = '\0'; } return str; } static void lt_error_core (int exit_status, const char * mode, const char * message, va_list ap) { fprintf (stderr, "%s: %s: ", program_name, mode); vfprintf (stderr, message, ap); fprintf (stderr, ".\n"); if (exit_status >= 0) exit (exit_status); } void lt_fatal (const char *message, ...) { va_list ap; va_start (ap, message); lt_error_core (EXIT_FAILURE, "FATAL", message, ap); va_end (ap); } EOF # we should really use a build-platform specific compiler # here, but OTOH, the wrappers (shell script and this C one) # are only useful if you want to execute the "real" binary. # Since the "real" binary is built for $host, then this # wrapper might as well be built for $host, too. $run $LTCC $LTCFLAGS -s -o $cwrapper $cwrappersource ;; esac $rm $output trap "$rm $output; exit $EXIT_FAILURE" 1 2 15 $echo > $output "\ #! $SHELL # $output - temporary wrapper script for $objdir/$outputname # Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP # # The $output program cannot be directly executed until all the libtool # libraries that it depends on are installed. # # This wrapper script should never be moved out of the build directory. # If it is, it will not operate correctly. # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed='${SED} -e 1s/^X//' sed_quote_subst='$sed_quote_subst' # Be Bourne compatible (taken from Autoconf:_AS_BOURNE_COMPATIBLE). if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in *posix*) set -o posix;; esac fi BIN_SH=xpg4; export BIN_SH # for Tru64 DUALCASE=1; export DUALCASE # for MKS sh # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH relink_command=\"$relink_command\" # This environment variable determines our operation mode. if test \"\$libtool_install_magic\" = \"$magic\"; then # install mode needs the following variable: notinst_deplibs='$notinst_deplibs' else # When we are sourced in execute mode, \$file and \$echo are already set. if test \"\$libtool_execute_magic\" != \"$magic\"; then echo=\"$qecho\" file=\"\$0\" # Make sure echo works. if test \"X\$1\" = X--no-reexec; then # Discard the --no-reexec flag, and continue. shift elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then # Yippee, \$echo works! : else # Restart under the correct shell, and then maybe \$echo will work. exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"} fi fi\ " $echo >> $output "\ # Find the directory that this script lives in. thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\` test \"x\$thisdir\" = \"x\$file\" && thisdir=. # Follow symbolic links until we get to the real thisdir. file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\` while test -n \"\$file\"; do destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\` # If there was a directory component, then change thisdir. if test \"x\$destdir\" != \"x\$file\"; then case \"\$destdir\" in [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; *) thisdir=\"\$thisdir/\$destdir\" ;; esac fi file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\` file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\` done # Try to get the absolute directory name. absdir=\`cd \"\$thisdir\" && pwd\` test -n \"\$absdir\" && thisdir=\"\$absdir\" " if test "$fast_install" = yes; then $echo >> $output "\ program=lt-'$outputname'$exeext progdir=\"\$thisdir/$objdir\" if test ! -f \"\$progdir/\$program\" || \\ { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ test \"X\$file\" != \"X\$progdir/\$program\"; }; then file=\"\$\$-\$program\" if test ! -d \"\$progdir\"; then $mkdir \"\$progdir\" else $rm \"\$progdir/\$file\" fi" $echo >> $output "\ # relink executable if necessary if test -n \"\$relink_command\"; then if relink_command_output=\`eval \$relink_command 2>&1\`; then : else $echo \"\$relink_command_output\" >&2 $rm \"\$progdir/\$file\" exit $EXIT_FAILURE fi fi $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || { $rm \"\$progdir/\$program\"; $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; } $rm \"\$progdir/\$file\" fi" else $echo >> $output "\ program='$outputname' progdir=\"\$thisdir/$objdir\" " fi $echo >> $output "\ if test -f \"\$progdir/\$program\"; then" # Export our shlibpath_var if we have one. if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then $echo >> $output "\ # Add our own library path to $shlibpath_var $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" # Some systems cannot cope with colon-terminated $shlibpath_var # The second colon is a workaround for a bug in BeOS R4 sed $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\` export $shlibpath_var " fi # fixup the dll searchpath if we need to. if test -n "$dllsearchpath"; then $echo >> $output "\ # Add the dll search path components to the executable PATH PATH=$dllsearchpath:\$PATH " fi $echo >> $output "\ if test \"\$libtool_execute_magic\" != \"$magic\"; then # Run the actual program with our arguments. " case $host in # Backslashes separate directories on plain windows *-*-mingw | *-*-os2*) $echo >> $output "\ exec \"\$progdir\\\\\$program\" \${1+\"\$@\"} " ;; *) $echo >> $output "\ exec \"\$progdir/\$program\" \${1+\"\$@\"} " ;; esac $echo >> $output "\ \$echo \"\$0: cannot exec \$program \$*\" exit $EXIT_FAILURE fi else # The program doesn't exist. \$echo \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2 \$echo \"This script is just a wrapper for \$program.\" 1>&2 $echo \"See the $PACKAGE documentation for more information.\" 1>&2 exit $EXIT_FAILURE fi fi\ " chmod +x $output fi exit $EXIT_SUCCESS ;; esac # See if we need to build an old-fashioned archive. for oldlib in $oldlibs; do if test "$build_libtool_libs" = convenience; then oldobjs="$libobjs_save" addlibs="$convenience" build_libtool_libs=no else if test "$build_libtool_libs" = module; then oldobjs="$libobjs_save" build_libtool_libs=no else oldobjs="$old_deplibs $non_pic_objects" fi addlibs="$old_convenience" fi if test -n "$addlibs"; then gentop="$output_objdir/${outputname}x" generated="$generated $gentop" func_extract_archives $gentop $addlibs oldobjs="$oldobjs $func_extract_archives_result" fi # Do each command in the archive commands. if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then cmds=$old_archive_from_new_cmds else # POSIX demands no paths to be encoded in archives. We have # to avoid creating archives with duplicate basenames if we # might have to extract them afterwards, e.g., when creating a # static archive out of a convenience library, or when linking # the entirety of a libtool archive into another (currently # not supported by libtool). if (for obj in $oldobjs do $echo "X$obj" | $Xsed -e 's%^.*/%%' done | sort | sort -uc >/dev/null 2>&1); then : else $echo "copying selected object files to avoid basename conflicts..." if test -z "$gentop"; then gentop="$output_objdir/${outputname}x" generated="$generated $gentop" $show "${rm}r $gentop" $run ${rm}r "$gentop" $show "$mkdir $gentop" $run $mkdir "$gentop" exit_status=$? if test "$exit_status" -ne 0 && test ! -d "$gentop"; then exit $exit_status fi fi save_oldobjs=$oldobjs oldobjs= counter=1 for obj in $save_oldobjs do objbase=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` case " $oldobjs " in " ") oldobjs=$obj ;; *[\ /]"$objbase "*) while :; do # Make sure we don't pick an alternate name that also # overlaps. newobj=lt$counter-$objbase counter=`expr $counter + 1` case " $oldobjs " in *[\ /]"$newobj "*) ;; *) if test ! -f "$gentop/$newobj"; then break; fi ;; esac done $show "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj" $run ln "$obj" "$gentop/$newobj" || $run cp "$obj" "$gentop/$newobj" oldobjs="$oldobjs $gentop/$newobj" ;; *) oldobjs="$oldobjs $obj" ;; esac done fi eval cmds=\"$old_archive_cmds\" if len=`expr "X$cmds" : ".*"` && test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then cmds=$old_archive_cmds else # the command line is too long to link in one step, link in parts $echo "using piecewise archive linking..." save_RANLIB=$RANLIB RANLIB=: objlist= concat_cmds= save_oldobjs=$oldobjs # Is there a better way of finding the last object in the list? for obj in $save_oldobjs do last_oldobj=$obj done for obj in $save_oldobjs do oldobjs="$objlist $obj" objlist="$objlist $obj" eval test_cmds=\"$old_archive_cmds\" if len=`expr "X$test_cmds" : ".*" 2>/dev/null` && test "$len" -le "$max_cmd_len"; then : else # the above command should be used before it gets too long oldobjs=$objlist if test "$obj" = "$last_oldobj" ; then RANLIB=$save_RANLIB fi test -z "$concat_cmds" || concat_cmds=$concat_cmds~ eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\" objlist= fi done RANLIB=$save_RANLIB oldobjs=$objlist if test "X$oldobjs" = "X" ; then eval cmds=\"\$concat_cmds\" else eval cmds=\"\$concat_cmds~\$old_archive_cmds\" fi fi fi save_ifs="$IFS"; IFS='~' for cmd in $cmds; do eval cmd=\"$cmd\" IFS="$save_ifs" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" done if test -n "$generated"; then $show "${rm}r$generated" $run ${rm}r$generated fi # Now create the libtool archive. case $output in *.la) old_library= test "$build_old_libs" = yes && old_library="$libname.$libext" $show "creating $output" # Preserve any variables that may affect compiler behavior for var in $variables_saved_for_relink; do if eval test -z \"\${$var+set}\"; then relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" elif eval var_value=\$$var; test -z "$var_value"; then relink_command="$var=; export $var; $relink_command" else var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` relink_command="$var=\"$var_value\"; export $var; $relink_command" fi done # Quote the link command for shipping. relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)" relink_command=`$echo "X$relink_command" | $SP2NL | $Xsed -e "$sed_quote_subst" | $NL2SP` if test "$hardcode_automatic" = yes ; then relink_command= fi # Only create the output if not a dry run. if test -z "$run"; then for installed in no yes; do if test "$installed" = yes; then if test -z "$install_libdir"; then break fi output="$output_objdir/$outputname"i # Replace all uninstalled libtool libraries with the installed ones newdependency_libs= for deplib in $dependency_libs; do case $deplib in *.la) name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'` eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` if test -z "$libdir"; then $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 exit $EXIT_FAILURE fi newdependency_libs="$newdependency_libs $libdir/$name" ;; *) newdependency_libs="$newdependency_libs $deplib" ;; esac done dependency_libs="$newdependency_libs" newdlfiles= for lib in $dlfiles; do name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` if test -z "$libdir"; then $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 exit $EXIT_FAILURE fi newdlfiles="$newdlfiles $libdir/$name" done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` if test -z "$libdir"; then $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 exit $EXIT_FAILURE fi newdlprefiles="$newdlprefiles $libdir/$name" done dlprefiles="$newdlprefiles" else newdlfiles= for lib in $dlfiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac newdlfiles="$newdlfiles $abs" done dlfiles="$newdlfiles" newdlprefiles= for lib in $dlprefiles; do case $lib in [\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;; *) abs=`pwd`"/$lib" ;; esac newdlprefiles="$newdlprefiles $abs" done dlprefiles="$newdlprefiles" fi $rm $output # place dlname in correct position for cygwin tdlname=$dlname case $host,$output,$installed,$module,$dlname in *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;; esac $echo > $output "\ # $outputname - a libtool library file # Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP # # Please DO NOT delete this file! # It is necessary for linking the library. # The name that we can dlopen(3). dlname='$tdlname' # Names of this library. library_names='$library_names' # The name of the static archive. old_library='$old_library' # Libraries that this one depends upon. dependency_libs='$dependency_libs' # Version information for $libname. current=$current age=$age revision=$revision # Is this an already installed library? installed=$installed # Should we warn about portability when linking against -modules? shouldnotlink=$module # Files to dlopen/dlpreopen dlopen='$dlfiles' dlpreopen='$dlprefiles' # Directory that this library needs to be installed in: libdir='$install_libdir'" if test "$installed" = no && test "$need_relink" = yes; then $echo >> $output "\ relink_command=\"$relink_command\"" fi done fi # Do a symbolic link so that the libtool archive can be found in # LD_LIBRARY_PATH before the program is installed. $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $? ;; esac exit $EXIT_SUCCESS ;; # libtool install mode install) modename="$modename: install" # There may be an optional sh(1) argument at the beginning of # install_prog (especially on Windows NT). if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || # Allow the use of GNU shtool's install command. $echo "X$nonopt" | grep shtool > /dev/null; then # Aesthetically quote it. arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac install_prog="$arg " arg="$1" shift else install_prog= arg=$nonopt fi # The real first argument should be the name of the installation program. # Aesthetically quote it. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac install_prog="$install_prog$arg" # We need to accept at least all the BSD install flags. dest= files= opts= prev= install_type= isdir=no stripme= for arg do if test -n "$dest"; then files="$files $dest" dest=$arg continue fi case $arg in -d) isdir=yes ;; -f) case " $install_prog " in *[\\\ /]cp\ *) ;; *) prev=$arg ;; esac ;; -g | -m | -o) prev=$arg ;; -s) stripme=" -s" continue ;; -*) ;; *) # If the previous option needed an argument, then skip it. if test -n "$prev"; then prev= else dest=$arg continue fi ;; esac # Aesthetically quote the argument. arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` case $arg in *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") arg="\"$arg\"" ;; esac install_prog="$install_prog $arg" done if test -z "$install_prog"; then $echo "$modename: you must specify an install program" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi if test -n "$prev"; then $echo "$modename: the \`$prev' option requires an argument" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi if test -z "$files"; then if test -z "$dest"; then $echo "$modename: no file or destination specified" 1>&2 else $echo "$modename: you must specify a destination" 1>&2 fi $echo "$help" 1>&2 exit $EXIT_FAILURE fi # Strip any trailing slash from the destination. dest=`$echo "X$dest" | $Xsed -e 's%/$%%'` # Check to see that the destination is a directory. test -d "$dest" && isdir=yes if test "$isdir" = yes; then destdir="$dest" destname= else destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'` test "X$destdir" = "X$dest" && destdir=. destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'` # Not a directory, so check to see that there is only one file specified. set dummy $files if test "$#" -gt 2; then $echo "$modename: \`$dest' is not a directory" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi fi case $destdir in [\\/]* | [A-Za-z]:[\\/]*) ;; *) for file in $files; do case $file in *.lo) ;; *) $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE ;; esac done ;; esac # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" staticlibs= future_libdirs= current_libdirs= for file in $files; do # Do each installation. case $file in *.$libext) # Do the static libraries later. staticlibs="$staticlibs $file" ;; *.la) # Check to see that this really is a libtool archive. if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : else $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi library_names= old_library= relink_command= # If there is no directory component, then add one. case $file in */* | *\\*) . $file ;; *) . ./$file ;; esac # Add the libdir to current_libdirs if it is the destination. if test "X$destdir" = "X$libdir"; then case "$current_libdirs " in *" $libdir "*) ;; *) current_libdirs="$current_libdirs $libdir" ;; esac else # Note the libdir as a future libdir. case "$future_libdirs " in *" $libdir "*) ;; *) future_libdirs="$future_libdirs $libdir" ;; esac fi dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/ test "X$dir" = "X$file/" && dir= dir="$dir$objdir" if test -n "$relink_command"; then # Determine the prefix the user has applied to our future dir. inst_prefix_dir=`$echo "$destdir" | $SED "s%$libdir\$%%"` # Don't allow the user to place us outside of our expected # location b/c this prevents finding dependent libraries that # are installed to the same prefix. # At present, this check doesn't affect windows .dll's that # are installed into $libdir/../bin (currently, that works fine) # but it's something to keep an eye on. if test "$inst_prefix_dir" = "$destdir"; then $echo "$modename: error: cannot install \`$file' to a directory not ending in $libdir" 1>&2 exit $EXIT_FAILURE fi if test -n "$inst_prefix_dir"; then # Stick the inst_prefix_dir data into the link command. relink_command=`$echo "$relink_command" | $SP2NL | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%" | $NL2SP` else relink_command=`$echo "$relink_command" | $SP2NL | $SED "s%@inst_prefix_dir@%%" | $NL2SP` fi $echo "$modename: warning: relinking \`$file'" 1>&2 $show "$relink_command" if $run eval "$relink_command"; then : else $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 exit $EXIT_FAILURE fi fi # See the names of the shared library. set dummy $library_names if test -n "$2"; then realname="$2" shift shift srcname="$realname" test -n "$relink_command" && srcname="$realname"T # Install the shared library and build the symlinks. $show "$install_prog $dir/$srcname $destdir/$realname" $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $? if test -n "$stripme" && test -n "$striplib"; then $show "$striplib $destdir/$realname" $run eval "$striplib $destdir/$realname" || exit $? fi if test "$#" -gt 0; then # Delete the old symlinks, and create new ones. # Try `ln -sf' first, because the `ln' binary might depend on # the symlink we replace! Solaris /bin/ln does not understand -f, # so we also need to try rm && ln -s. for linkname do if test "$linkname" != "$realname"; then $show "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })" $run eval "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })" fi done fi # Do each command in the postinstall commands. lib="$destdir/$realname" cmds=$postinstall_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || { lt_exit=$? # Restore the uninstalled library and exit if test "$mode" = relink; then $run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)' fi exit $lt_exit } done IFS="$save_ifs" fi # Install the pseudo-library for information purposes. name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` instname="$dir/$name"i $show "$install_prog $instname $destdir/$name" $run eval "$install_prog $instname $destdir/$name" || exit $? # Maybe install the static library, too. test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" ;; *.lo) # Install (i.e. copy) a libtool object. # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` destfile="$destdir/$destfile" fi # Deduce the name of the destination old-style object file. case $destfile in *.lo) staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"` ;; *.$objext) staticdest="$destfile" destfile= ;; *) $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE ;; esac # Install the libtool object if requested. if test -n "$destfile"; then $show "$install_prog $file $destfile" $run eval "$install_prog $file $destfile" || exit $? fi # Install the old object if enabled. if test "$build_old_libs" = yes; then # Deduce the name of the old-style object file. staticobj=`$echo "X$file" | $Xsed -e "$lo2o"` $show "$install_prog $staticobj $staticdest" $run eval "$install_prog \$staticobj \$staticdest" || exit $? fi exit $EXIT_SUCCESS ;; *) # Figure out destination file name, if it wasn't already specified. if test -n "$destname"; then destfile="$destdir/$destname" else destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` destfile="$destdir/$destfile" fi # If the file is missing, and there is a .exe on the end, strip it # because it is most likely a libtool script we actually want to # install stripped_ext="" case $file in *.exe) if test ! -f "$file"; then file=`$echo $file|${SED} 's,.exe$,,'` stripped_ext=".exe" fi ;; esac # Do a test to see if this is really a libtool program. case $host in *cygwin*|*mingw*) wrapper=`$echo $file | ${SED} -e 's,.exe$,,'` ;; *) wrapper=$file ;; esac if (${SED} -e '4q' $wrapper | grep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then notinst_deplibs= relink_command= # Note that it is not necessary on cygwin/mingw to append a dot to # foo even if both foo and FILE.exe exist: automatic-append-.exe # behavior happens only for exec(3), not for open(2)! Also, sourcing # `FILE.' does not work on cygwin managed mounts. # # If there is no directory component, then add one. case $wrapper in */* | *\\*) . ${wrapper} ;; *) . ./${wrapper} ;; esac # Check the variables that should have been set. if test -z "$notinst_deplibs"; then $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2 exit $EXIT_FAILURE fi finalize=yes for lib in $notinst_deplibs; do # Check to see that each library is installed. libdir= if test -f "$lib"; then # If there is no directory component, then add one. case $lib in */* | *\\*) . $lib ;; *) . ./$lib ;; esac fi libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test if test -n "$libdir" && test ! -f "$libfile"; then $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 finalize=no fi done relink_command= # Note that it is not necessary on cygwin/mingw to append a dot to # foo even if both foo and FILE.exe exist: automatic-append-.exe # behavior happens only for exec(3), not for open(2)! Also, sourcing # `FILE.' does not work on cygwin managed mounts. # # If there is no directory component, then add one. case $wrapper in */* | *\\*) . ${wrapper} ;; *) . ./${wrapper} ;; esac outputname= if test "$fast_install" = no && test -n "$relink_command"; then if test "$finalize" = yes && test -z "$run"; then tmpdir=`func_mktempdir` file=`$echo "X$file$stripped_ext" | $Xsed -e 's%^.*/%%'` outputname="$tmpdir/$file" # Replace the output file specification. relink_command=`$echo "X$relink_command" | $SP2NL | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g' | $NL2SP` $show "$relink_command" if $run eval "$relink_command"; then : else $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 ${rm}r "$tmpdir" continue fi file="$outputname" else $echo "$modename: warning: cannot relink \`$file'" 1>&2 fi else # Install the binary that we compiled earlier. file=`$echo "X$file$stripped_ext" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"` fi fi # remove .exe since cygwin /usr/bin/install will append another # one anyway case $install_prog,$host in */usr/bin/install*,*cygwin*) case $file:$destfile in *.exe:*.exe) # this is ok ;; *.exe:*) destfile=$destfile.exe ;; *:*.exe) destfile=`$echo $destfile | ${SED} -e 's,.exe$,,'` ;; esac ;; esac $show "$install_prog$stripme $file $destfile" $run eval "$install_prog\$stripme \$file \$destfile" || exit $? test -n "$outputname" && ${rm}r "$tmpdir" ;; esac done for file in $staticlibs; do name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` # Set up the ranlib parameters. oldlib="$destdir/$name" $show "$install_prog $file $oldlib" $run eval "$install_prog \$file \$oldlib" || exit $? if test -n "$stripme" && test -n "$old_striplib"; then $show "$old_striplib $oldlib" $run eval "$old_striplib $oldlib" || exit $? fi # Do each command in the postinstall commands. cmds=$old_postinstall_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || exit $? done IFS="$save_ifs" done if test -n "$future_libdirs"; then $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2 fi if test -n "$current_libdirs"; then # Maybe just do a dry run. test -n "$run" && current_libdirs=" -n$current_libdirs" exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs' else exit $EXIT_SUCCESS fi ;; # libtool finish mode finish) modename="$modename: finish" libdirs="$nonopt" admincmds= if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then for dir do libdirs="$libdirs $dir" done for libdir in $libdirs; do if test -n "$finish_cmds"; then # Do each command in the finish commands. cmds=$finish_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" || admincmds="$admincmds $cmd" done IFS="$save_ifs" fi if test -n "$finish_eval"; then # Do the single finish_eval. eval cmds=\"$finish_eval\" $run eval "$cmds" || admincmds="$admincmds $cmds" fi done fi # Exit here if they wanted silent mode. test "$show" = : && exit $EXIT_SUCCESS $echo "X----------------------------------------------------------------------" | $Xsed $echo "Libraries have been installed in:" for libdir in $libdirs; do $echo " $libdir" done $echo $echo "If you ever happen to want to link against installed libraries" $echo "in a given directory, LIBDIR, you must either use libtool, and" $echo "specify the full pathname of the library, or use the \`-LLIBDIR'" $echo "flag during linking and do at least one of the following:" if test -n "$shlibpath_var"; then $echo " - add LIBDIR to the \`$shlibpath_var' environment variable" $echo " during execution" fi if test -n "$runpath_var"; then $echo " - add LIBDIR to the \`$runpath_var' environment variable" $echo " during linking" fi if test -n "$hardcode_libdir_flag_spec"; then libdir=LIBDIR eval flag=\"$hardcode_libdir_flag_spec\" $echo " - use the \`$flag' linker flag" fi if test -n "$admincmds"; then $echo " - have your system administrator run these commands:$admincmds" fi if test -f /etc/ld.so.conf; then $echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" fi $echo $echo "See any operating system documentation about shared libraries for" $echo "more information, such as the ld(1) and ld.so(8) manual pages." $echo "X----------------------------------------------------------------------" | $Xsed exit $EXIT_SUCCESS ;; # libtool execute mode execute) modename="$modename: execute" # The first argument is the command name. cmd="$nonopt" if test -z "$cmd"; then $echo "$modename: you must specify a COMMAND" 1>&2 $echo "$help" exit $EXIT_FAILURE fi # Handle -dlopen flags immediately. for file in $execute_dlfiles; do if test ! -f "$file"; then $echo "$modename: \`$file' is not a file" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi dir= case $file in *.la) # Check to see that this really is a libtool archive. if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : else $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi # Read the libtool library. dlname= library_names= # If there is no directory component, then add one. case $file in */* | *\\*) . $file ;; *) . ./$file ;; esac # Skip this library if it cannot be dlopened. if test -z "$dlname"; then # Warn if it was a shared library. test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'" continue fi dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` test "X$dir" = "X$file" && dir=. if test -f "$dir/$objdir/$dlname"; then dir="$dir/$objdir" else if test ! -f "$dir/$dlname"; then $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2 exit $EXIT_FAILURE fi fi ;; *.lo) # Just add the directory containing the .lo file. dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` test "X$dir" = "X$file" && dir=. ;; *) $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2 continue ;; esac # Get the absolute pathname. absdir=`cd "$dir" && pwd` test -n "$absdir" && dir="$absdir" # Now add the directory to shlibpath_var. if eval "test -z \"\$$shlibpath_var\""; then eval "$shlibpath_var=\"\$dir\"" else eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" fi done # This variable tells wrapper scripts just to set shlibpath_var # rather than running their programs. libtool_execute_magic="$magic" # Check if any of the arguments is a wrapper script. args= for file do case $file in -*) ;; *) # Do a test to see if this is really a libtool program. if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then # If there is no directory component, then add one. case $file in */* | *\\*) . $file ;; *) . ./$file ;; esac # Transform arg to wrapped name. file="$progdir/$program" fi ;; esac # Quote arguments (to preserve shell metacharacters). file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"` args="$args \"$file\"" done if test -z "$run"; then if test -n "$shlibpath_var"; then # Export the shlibpath_var. eval "export $shlibpath_var" fi # Restore saved environment variables for lt_var in LANG LC_ALL LC_CTYPE LC_COLLATE LC_MESSAGES do eval "if test \"\${save_$lt_var+set}\" = set; then $lt_var=\$save_$lt_var; export $lt_var fi" done # Now prepare to actually exec the command. exec_cmd="\$cmd$args" else # Display what would be done. if test -n "$shlibpath_var"; then eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" $echo "export $shlibpath_var" fi $echo "$cmd$args" exit $EXIT_SUCCESS fi ;; # libtool clean and uninstall mode clean | uninstall) modename="$modename: $mode" rm="$nonopt" files= rmforce= exit_status=0 # This variable tells wrapper scripts just to set variables rather # than running their programs. libtool_install_magic="$magic" for arg do case $arg in -f) rm="$rm $arg"; rmforce=yes ;; -*) rm="$rm $arg" ;; *) files="$files $arg" ;; esac done if test -z "$rm"; then $echo "$modename: you must specify an RM program" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE fi rmdirs= origobjdir="$objdir" for file in $files; do dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` if test "X$dir" = "X$file"; then dir=. objdir="$origobjdir" else objdir="$dir/$origobjdir" fi name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` test "$mode" = uninstall && objdir="$dir" # Remember objdir for removal later, being careful to avoid duplicates if test "$mode" = clean; then case " $rmdirs " in *" $objdir "*) ;; *) rmdirs="$rmdirs $objdir" ;; esac fi # Don't error if the file doesn't exist and rm -f was used. if (test -L "$file") >/dev/null 2>&1 \ || (test -h "$file") >/dev/null 2>&1 \ || test -f "$file"; then : elif test -d "$file"; then exit_status=1 continue elif test "$rmforce" = yes; then continue fi rmfiles="$file" case $name in *.la) # Possibly a libtool archive, so verify it. if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then . $dir/$name # Delete the libtool libraries and symlinks. for n in $library_names; do rmfiles="$rmfiles $objdir/$n" done test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library" case "$mode" in clean) case " $library_names " in # " " in the beginning catches empty $dlname *" $dlname "*) ;; *) rmfiles="$rmfiles $objdir/$dlname" ;; esac test -n "$libdir" && rmfiles="$rmfiles $objdir/$name $objdir/${name}i" ;; uninstall) if test -n "$library_names"; then # Do each command in the postuninstall commands. cmds=$postuninstall_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" if test "$?" -ne 0 && test "$rmforce" != yes; then exit_status=1 fi done IFS="$save_ifs" fi if test -n "$old_library"; then # Do each command in the old_postuninstall commands. cmds=$old_postuninstall_cmds save_ifs="$IFS"; IFS='~' for cmd in $cmds; do IFS="$save_ifs" eval cmd=\"$cmd\" $show "$cmd" $run eval "$cmd" if test "$?" -ne 0 && test "$rmforce" != yes; then exit_status=1 fi done IFS="$save_ifs" fi # FIXME: should reinstall the best remaining shared library. ;; esac fi ;; *.lo) # Possibly a libtool object, so verify it. if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then # Read the .lo file . $dir/$name # Add PIC object to the list of files to remove. if test -n "$pic_object" \ && test "$pic_object" != none; then rmfiles="$rmfiles $dir/$pic_object" fi # Add non-PIC object to the list of files to remove. if test -n "$non_pic_object" \ && test "$non_pic_object" != none; then rmfiles="$rmfiles $dir/$non_pic_object" fi fi ;; *) if test "$mode" = clean ; then noexename=$name case $file in *.exe) file=`$echo $file|${SED} 's,.exe$,,'` noexename=`$echo $name|${SED} 's,.exe$,,'` # $file with .exe has already been added to rmfiles, # add $file without .exe rmfiles="$rmfiles $file" ;; esac # Do a test to see if this is a libtool program. if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then relink_command= . $dir/$noexename # note $name still contains .exe if it was in $file originally # as does the version of $file that was added into $rmfiles rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}" if test "$fast_install" = yes && test -n "$relink_command"; then rmfiles="$rmfiles $objdir/lt-$name" fi if test "X$noexename" != "X$name" ; then rmfiles="$rmfiles $objdir/lt-${noexename}.c" fi fi fi ;; esac $show "$rm $rmfiles" $run $rm $rmfiles || exit_status=1 done objdir="$origobjdir" # Try to remove the ${objdir}s in the directories where we deleted files for dir in $rmdirs; do if test -d "$dir"; then $show "rmdir $dir" $run rmdir $dir >/dev/null 2>&1 fi done exit $exit_status ;; "") $echo "$modename: you must specify a MODE" 1>&2 $echo "$generic_help" 1>&2 exit $EXIT_FAILURE ;; esac if test -z "$exec_cmd"; then $echo "$modename: invalid operation mode \`$mode'" 1>&2 $echo "$generic_help" 1>&2 exit $EXIT_FAILURE fi fi # test -z "$show_help" if test -n "$exec_cmd"; then eval exec $exec_cmd exit $EXIT_FAILURE fi # We need to display help for each of the modes. case $mode in "") $echo \ "Usage: $modename [OPTION]... [MODE-ARG]... Provide generalized library-building support services. --config show all configuration variables --debug enable verbose shell tracing -n, --dry-run display commands without modifying any files --features display basic configuration information and exit --finish same as \`--mode=finish' --help display this help message and exit --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS] --quiet same as \`--silent' --silent don't print informational messages --tag=TAG use configuration variables from tag TAG --version print version information MODE must be one of the following: clean remove files from the build directory compile compile a source file into a libtool object execute automatically set library path, then run a program finish complete the installation of libtool libraries install install libraries or executables link create a library or an executable uninstall remove libraries from an installed directory MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for a more detailed description of MODE. Report bugs to ." exit $EXIT_SUCCESS ;; clean) $echo \ "Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE... Remove files from the build directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, object or program, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; compile) $echo \ "Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE Compile a source file into a libtool library object. This mode accepts the following additional options: -o OUTPUT-FILE set the output file name to OUTPUT-FILE -prefer-pic try to building PIC objects only -prefer-non-pic try to building non-PIC objects only -static always build a \`.o' file suitable for static linking COMPILE-COMMAND is a command to be used in creating a \`standard' object file from the given SOURCEFILE. The output file name is determined by removing the directory component from SOURCEFILE, then substituting the C source code suffix \`.c' with the library object suffix, \`.lo'." ;; execute) $echo \ "Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]... Automatically set library path, then run a program. This mode accepts the following additional options: -dlopen FILE add the directory containing FILE to the library path This mode sets the library path environment variable according to \`-dlopen' flags. If any of the ARGS are libtool executable wrappers, then they are translated into their corresponding uninstalled binary, and any of their required library directories are added to the library path. Then, COMMAND is executed, with ARGS as arguments." ;; finish) $echo \ "Usage: $modename [OPTION]... --mode=finish [LIBDIR]... Complete the installation of libtool libraries. Each LIBDIR is a directory that contains libtool libraries. The commands that this mode executes may require superuser privileges. Use the \`--dry-run' option if you just want to see what would be executed." ;; install) $echo \ "Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND... Install executables or libraries. INSTALL-COMMAND is the installation command. The first component should be either the \`install' or \`cp' program. The rest of the components are interpreted as arguments to that command (only BSD-compatible install options are recognized)." ;; link) $echo \ "Usage: $modename [OPTION]... --mode=link LINK-COMMAND... Link object files or libraries together to form another library, or to create an executable program. LINK-COMMAND is a command using the C compiler that you would use to create a program from several object files. The following components of LINK-COMMAND are treated specially: -all-static do not do any dynamic linking at all -avoid-version do not add a version suffix if possible -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) -export-symbols SYMFILE try to export only the symbols listed in SYMFILE -export-symbols-regex REGEX try to export only the symbols matching REGEX -LLIBDIR search LIBDIR for required installed libraries -lNAME OUTPUT-FILE requires the installed library libNAME -module build a library that can dlopened -no-fast-install disable the fast-install mode -no-install link a not-installable executable -no-undefined declare that a library does not refer to external symbols -o OUTPUT-FILE create OUTPUT-FILE from the specified objects -objectlist FILE Use a list of object files found in FILE to specify objects -precious-files-regex REGEX don't remove output files matching REGEX -release RELEASE specify package release information -rpath LIBDIR the created library will eventually be installed in LIBDIR -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries -static do not do any dynamic linking of uninstalled libtool libraries -static-libtool-libs do not do any dynamic linking of libtool libraries -version-info CURRENT[:REVISION[:AGE]] specify library version info [each variable defaults to 0] All other options (arguments beginning with \`-') are ignored. Every other argument is treated as a filename. Files ending in \`.la' are treated as uninstalled libtool libraries, other files are standard or library object files. If the OUTPUT-FILE ends in \`.la', then a libtool library is created, only library objects (\`.lo' files) may be specified, and \`-rpath' is required, except when creating a convenience library. If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created using \`ar' and \`ranlib', or on Windows using \`lib'. If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file is created, otherwise an executable program is created." ;; uninstall) $echo \ "Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... Remove libraries from an installation directory. RM is the name of the program to use to delete files associated with each FILE (typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed to RM. If FILE is a libtool library, all the files associated with it are deleted. Otherwise, only FILE itself is deleted using RM." ;; *) $echo "$modename: invalid operation mode \`$mode'" 1>&2 $echo "$help" 1>&2 exit $EXIT_FAILURE ;; esac $echo $echo "Try \`$modename --help' for more information about other modes." exit $? # The TAGs below are defined such that we never get into a situation # in which we disable both kinds of libraries. Given conflicting # choices, we go for a static library, that is the most portable, # since we can't tell whether shared libraries were disabled because # the user asked for that or because the platform doesn't support # them. This is particularly important on AIX, because we don't # support having both static and shared libraries enabled at the same # time on that platform, so we default to a shared-only configuration. # If a disable-shared tag is given, we'll fallback to a static-only # configuration. But we'll never go from static-only to shared-only. # ### BEGIN LIBTOOL TAG CONFIG: disable-shared disable_libs=shared # ### END LIBTOOL TAG CONFIG: disable-shared # ### BEGIN LIBTOOL TAG CONFIG: disable-static disable_libs=static # ### END LIBTOOL TAG CONFIG: disable-static # Local Variables: # mode:shell-script # sh-indentation:2 # End: einspline-0.9.2/ChangeLog0000664000113000011300000005630711012400563012170 00000000000000------------------------------------------------------------------------ r129 | kpesler | 2007-05-09 17:32:00 -0400 (Wed, 09 May 2007) | 1 line Added F77 wrappers for 1D nonuniform spline creation. ------------------------------------------------------------------------ r128 | kpesler | 2007-05-09 17:18:30 -0400 (Wed, 09 May 2007) | 1 line Starting to add F77 wrappers for nonuniform splines. ------------------------------------------------------------------------ r127 | kpesler | 2007-05-09 16:43:55 -0400 (Wed, 09 May 2007) | 1 line Adding some docs for Fortran wrappers. ------------------------------------------------------------------------ r126 | kpesler | 2007-05-09 16:18:41 -0400 (Wed, 09 May 2007) | 1 line Added some more background explanation. ------------------------------------------------------------------------ r125 | kpesler | 2007-05-09 15:48:01 -0400 (Wed, 09 May 2007) | 1 line Some more fixes to point to sourceforge download area. ------------------------------------------------------------------------ r124 | kpesler | 2007-05-09 15:18:40 -0400 (Wed, 09 May 2007) | 1 line Adding SF logo to web pages. ------------------------------------------------------------------------ r123 | esler | 2007-05-09 14:05:19 -0400 (Wed, 09 May 2007) | 1 line Added F77 wrappers for all uniform B-spline evaluation routines. ------------------------------------------------------------------------ r122 | esler | 2007-05-09 12:18:59 -0400 (Wed, 09 May 2007) | 1 line Wrapped all uniform spline creating routines in F77. ------------------------------------------------------------------------ r121 | esler | 2007-05-09 11:49:48 -0400 (Wed, 09 May 2007) | 1 line Need to use F77_FUNC_ instead of F77_FUNC. ------------------------------------------------------------------------ r120 | esler | 2007-05-08 16:39:00 -0400 (Tue, 08 May 2007) | 1 line Adding FORTRAN test program. ------------------------------------------------------------------------ r119 | esler | 2007-05-08 15:17:50 -0400 (Tue, 08 May 2007) | 1 line Starting to add F77 interface. ------------------------------------------------------------------------ r118 | esler | 2007-05-07 17:30:44 -0400 (Mon, 07 May 2007) | 1 line Added a few questions and answers to the FAQ. ------------------------------------------------------------------------ r117 | esler | 2007-05-07 13:13:37 -0400 (Mon, 07 May 2007) | 1 line Adding more explicit documentation. ------------------------------------------------------------------------ r116 | esler | 2007-05-07 10:57:21 -0400 (Mon, 07 May 2007) | 1 line Adding more explicit documentation. ------------------------------------------------------------------------ r115 | esler | 2007-05-04 16:45:02 -0400 (Fri, 04 May 2007) | 1 line Added destroy_grid function. ------------------------------------------------------------------------ r114 | esler | 2007-05-04 15:02:51 -0400 (Fri, 04 May 2007) | 1 line Added feature to list. ------------------------------------------------------------------------ r113 | esler | 2007-05-02 16:06:41 -0400 (Wed, 02 May 2007) | 1 line Make sure we don't have symbol conflicts. ------------------------------------------------------------------------ r112 | esler | 2007-05-02 12:04:13 -0400 (Wed, 02 May 2007) | 1 line Added a note about bug in gcc for SSE. ------------------------------------------------------------------------ r111 | esler | 2007-05-02 11:59:06 -0400 (Wed, 02 May 2007) | 1 line Adding inclusion of string.h ------------------------------------------------------------------------ r110 | esler | 2007-05-01 17:08:06 -0400 (Tue, 01 May 2007) | 1 line Trying a few more fixes. ------------------------------------------------------------------------ r109 | esler | 2007-05-01 17:07:38 -0400 (Tue, 01 May 2007) | 1 line Trying a few more fixes. ------------------------------------------------------------------------ r108 | esler | 2007-05-01 16:48:20 -0400 (Tue, 01 May 2007) | 1 line Fixed a couple of bugs in SSE macros. ------------------------------------------------------------------------ r107 | esler | 2007-05-01 13:33:11 -0400 (Tue, 01 May 2007) | 1 line A few minor fixes. ------------------------------------------------------------------------ r106 | esler | 2007-05-01 11:51:39 -0400 (Tue, 01 May 2007) | 1 line Migrated complex double-precision SSE routines to new A matrices. ------------------------------------------------------------------------ r105 | esler | 2007-05-01 11:06:12 -0400 (Tue, 01 May 2007) | 1 line Migrated double-precision real to new A data. ------------------------------------------------------------------------ r104 | esler | 2007-05-01 10:52:23 -0400 (Tue, 01 May 2007) | 1 line Single-precision uses new A matrix data. ------------------------------------------------------------------------ r103 | esler | 2007-05-01 08:55:19 -0400 (Tue, 01 May 2007) | 1 line Transitioning to dynamically allocated A matrices. ------------------------------------------------------------------------ r102 | esler | 2007-04-30 22:09:20 -0400 (Mon, 30 Apr 2007) | 1 line Putting A matrices in dynamically allocated memory because global variables do not remain 16-byte aligned in shared libraries on 32-bit machines. ------------------------------------------------------------------------ r101 | esler | 2007-04-30 12:50:54 -0400 (Mon, 30 Apr 2007) | 1 line We still need SSE2 even for single-precision routines right now. ------------------------------------------------------------------------ r100 | esler | 2007-04-30 12:43:48 -0400 (Mon, 30 Apr 2007) | 1 line Minor changes to allow compilation on procs with SSE, but not SSE2. ------------------------------------------------------------------------ r99 | esler | 2007-04-19 16:38:56 -0400 (Thu, 19 Apr 2007) | 1 line Adding (hopefully) last SSE routines. ------------------------------------------------------------------------ r98 | esler | 2007-04-19 16:27:21 -0400 (Thu, 19 Apr 2007) | 1 line Finished NUB SSE 3d_z. ------------------------------------------------------------------------ r97 | esler | 2007-04-19 16:03:30 -0400 (Thu, 19 Apr 2007) | 1 line Optmizing SSE version of 3d_z nonuniform evalutation. ------------------------------------------------------------------------ r96 | esler | 2007-04-19 14:26:59 -0400 (Thu, 19 Apr 2007) | 1 line Double precision real nonuniform SSE version seems to be done. ------------------------------------------------------------------------ r95 | esler | 2007-04-19 14:16:31 -0400 (Thu, 19 Apr 2007) | 1 line Working on NUB 3d_d sse version. Fixed typo in get_NUBasis_d2funcs_sse_d and get_NUBasis_dfuncs_sse_d. ------------------------------------------------------------------------ r94 | esler | 2007-04-19 13:24:10 -0400 (Thu, 19 Apr 2007) | 1 line Added 2d_d NUB SSE routines. ------------------------------------------------------------------------ r93 | esler | 2007-04-19 12:55:30 -0400 (Thu, 19 Apr 2007) | 1 line A little more clean up. ------------------------------------------------------------------------ r92 | esler | 2007-04-19 12:49:38 -0400 (Thu, 19 Apr 2007) | 1 line Cleaned up NUB SSE version a little. ------------------------------------------------------------------------ r91 | esler | 2007-04-19 12:37:32 -0400 (Thu, 19 Apr 2007) | 1 line Made parameter to destroy_Bspline void* ------------------------------------------------------------------------ r90 | esler | 2007-04-19 12:31:36 -0400 (Thu, 19 Apr 2007) | 1 line Fixed base structures that were causing destroy_Bspline to segfault. ------------------------------------------------------------------------ r89 | esler | 2007-04-19 12:03:35 -0400 (Thu, 19 Apr 2007) | 1 line Fixed type in evaluation function. ------------------------------------------------------------------------ r88 | esler | 2007-04-19 11:28:45 -0400 (Thu, 19 Apr 2007) | 1 line Added SSE version of get_NUBasis functions. ------------------------------------------------------------------------ r87 | esler | 2007-04-18 18:10:38 -0400 (Wed, 18 Apr 2007) | 1 line Added several links. ------------------------------------------------------------------------ r86 | esler | 2007-04-18 17:37:51 -0400 (Wed, 18 Apr 2007) | 1 line Changed www menu a bit. ------------------------------------------------------------------------ r85 | esler | 2007-04-18 16:47:03 -0400 (Wed, 18 Apr 2007) | 1 line All nessesary SSE routines for NUBspline complex single-precision are done. ------------------------------------------------------------------------ r84 | esler | 2007-04-18 16:00:17 -0400 (Wed, 18 Apr 2007) | 1 line 3D complex NUB SSE version seems to be working. ------------------------------------------------------------------------ r83 | esler | 2007-04-18 15:28:35 -0400 (Wed, 18 Apr 2007) | 1 line Adding single-precision complex NUB spline SSE version. ------------------------------------------------------------------------ r82 | esler | 2007-04-18 14:03:02 -0400 (Wed, 18 Apr 2007) | 1 line Added configure.ac flags to enable SSE2 and SSE3 instruction use. ------------------------------------------------------------------------ r81 | esler | 2007-04-18 13:27:52 -0400 (Wed, 18 Apr 2007) | 1 line Now calling NUBasis destructor properly. ------------------------------------------------------------------------ r80 | esler | 2007-04-18 13:24:36 -0400 (Wed, 18 Apr 2007) | 1 line destroy_Bspline now available to everyone. ------------------------------------------------------------------------ r79 | esler | 2007-04-18 13:23:26 -0400 (Wed, 18 Apr 2007) | 1 line Added destroy_Bspline function. ------------------------------------------------------------------------ r78 | esler | 2007-04-18 13:02:20 -0400 (Wed, 18 Apr 2007) | 1 line Added storage of spline and type codes. ------------------------------------------------------------------------ r77 | esler | 2007-04-18 11:48:26 -0400 (Wed, 18 Apr 2007) | 1 line Adding two more equations from LaTeX ------------------------------------------------------------------------ r76 | esler | 2007-04-18 11:43:26 -0400 (Wed, 18 Apr 2007) | 1 line Adding more background. ------------------------------------------------------------------------ r75 | esler | 2007-04-18 11:17:54 -0400 (Wed, 18 Apr 2007) | 1 line Adding more testing framework. ------------------------------------------------------------------------ r74 | esler | 2007-04-17 18:36:19 -0400 (Tue, 17 Apr 2007) | 1 line Going home. ------------------------------------------------------------------------ r73 | esler | 2007-04-17 18:34:02 -0400 (Tue, 17 Apr 2007) | 1 line Slightly sped up evalutation again. ------------------------------------------------------------------------ r72 | esler | 2007-04-17 18:22:37 -0400 (Tue, 17 Apr 2007) | 1 line Removed cruft. ------------------------------------------------------------------------ r71 | esler | 2007-04-17 18:17:50 -0400 (Tue, 17 Apr 2007) | 1 line Sped up SSE evaluation of 3d_s NUBspline slightly. ------------------------------------------------------------------------ r70 | esler | 2007-04-17 17:46:19 -0400 (Tue, 17 Apr 2007) | 1 line Adding nubspline.h ------------------------------------------------------------------------ r69 | esler | 2007-04-17 17:42:09 -0400 (Tue, 17 Apr 2007) | 1 line Added nonuniform speed test. ------------------------------------------------------------------------ r68 | esler | 2007-04-17 17:06:20 -0400 (Tue, 17 Apr 2007) | 1 line Finished routines for SSE version of NUBspline_s. ------------------------------------------------------------------------ r67 | esler | 2007-04-17 16:22:11 -0400 (Tue, 17 Apr 2007) | 1 line Staring SSE version. ------------------------------------------------------------------------ r66 | esler | 2007-04-17 16:07:29 -0400 (Tue, 17 Apr 2007) | 1 line Added test for FFTW library. ------------------------------------------------------------------------ r65 | esler | 2007-04-17 15:36:22 -0400 (Tue, 17 Apr 2007) | 1 line Fixed up some autoconf stuff. ------------------------------------------------------------------------ r64 | esler | 2007-04-17 15:20:33 -0400 (Tue, 17 Apr 2007) | 1 line Adding GPL license to all source files. ------------------------------------------------------------------------ r63 | esler | 2007-04-17 14:03:07 -0400 (Tue, 17 Apr 2007) | 1 line Adding missing file. ------------------------------------------------------------------------ r62 | esler | 2007-04-17 14:02:04 -0400 (Tue, 17 Apr 2007) | 1 line Adding missing file. ------------------------------------------------------------------------ r61 | esler | 2007-04-17 14:01:43 -0400 (Tue, 17 Apr 2007) | 1 line Adding mising Makefile.am files. ------------------------------------------------------------------------ r60 | esler | 2007-04-17 13:59:54 -0400 (Tue, 17 Apr 2007) | 1 line Added stdlib.h to list of includes. ------------------------------------------------------------------------ r59 | esler | 2007-04-17 13:54:39 -0400 (Tue, 17 Apr 2007) | 1 line Renamed library to libeinspline. ------------------------------------------------------------------------ r58 | esler | 2007-04-16 10:27:15 -0400 (Mon, 16 Apr 2007) | 1 line Adding more documentation. ------------------------------------------------------------------------ r57 | esler | 2007-04-16 09:05:02 -0400 (Mon, 16 Apr 2007) | 1 line Added documentation for nonuniform B-splines. ------------------------------------------------------------------------ r56 | esler | 2007-04-14 12:47:50 -0400 (Sat, 14 Apr 2007) | 1 line Adding documentation for nonuniform splines. ------------------------------------------------------------------------ r55 | esler | 2007-04-12 21:45:59 -0400 (Thu, 12 Apr 2007) | 1 line Adding NUBspline evaluators for double-precision complex. ------------------------------------------------------------------------ r54 | esler | 2007-04-12 21:41:34 -0400 (Thu, 12 Apr 2007) | 1 line Adding complex single-precision NUB evaluations. ------------------------------------------------------------------------ r53 | esler | 2007-04-12 17:22:11 -0400 (Thu, 12 Apr 2007) | 1 line Added NUB complex_double creation routines. ------------------------------------------------------------------------ r52 | esler | 2007-04-12 17:20:11 -0400 (Thu, 12 Apr 2007) | 1 line Added NUB complex_float creation routines. ------------------------------------------------------------------------ r51 | esler | 2007-04-12 15:47:15 -0400 (Thu, 12 Apr 2007) | 1 line Propagated forgotten fix from NUB_1d_s to NUB_1d_d. ------------------------------------------------------------------------ r50 | esler | 2007-04-12 15:43:12 -0400 (Thu, 12 Apr 2007) | 1 line Adding forgotten files. ------------------------------------------------------------------------ r49 | esler | 2007-04-12 15:40:32 -0400 (Thu, 12 Apr 2007) | 1 line Fixed off-by-one error in solve_NUB_periodic_interp_1d_d. ------------------------------------------------------------------------ r48 | esler | 2007-04-12 14:44:37 -0400 (Thu, 12 Apr 2007) | 1 line Bug in 2D and 3D splines tracked to underallocation in create_nubasis. ------------------------------------------------------------------------ r47 | esler | 2007-04-12 13:42:12 -0400 (Thu, 12 Apr 2007) | 1 line There is still an error NUB_3d_s functions somewhere. ------------------------------------------------------------------------ r46 | esler | 2007-04-11 15:25:07 -0400 (Wed, 11 Apr 2007) | 1 line Propogated fixed to other routines. ------------------------------------------------------------------------ r45 | esler | 2007-04-11 15:11:59 -0400 (Wed, 11 Apr 2007) | 1 line Fixed off-by-one error in solve_NUB_periodic_interp_1d_s and find_NUBcoefs_1d_s for PBC. ------------------------------------------------------------------------ r44 | esler | 2007-04-11 13:40:22 -0400 (Wed, 11 Apr 2007) | 1 line Added 3D nonuniform creation routine. ------------------------------------------------------------------------ r43 | esler | 2007-04-11 13:01:58 -0400 (Wed, 11 Apr 2007) | 1 line Added structures for nonuniform 3D basis splines. ------------------------------------------------------------------------ r42 | esler | 2007-04-11 12:40:03 -0400 (Wed, 11 Apr 2007) | 1 line Added 2d nonuniform structures and some evaluation functions. ------------------------------------------------------------------------ r41 | esler | 2007-04-11 11:51:42 -0400 (Wed, 11 Apr 2007) | 1 line 1D nonunform bspline appears to be working. ------------------------------------------------------------------------ r40 | esler | 2007-04-11 11:21:08 -0400 (Wed, 11 Apr 2007) | 1 line Nonuniform basis appears to be working. ------------------------------------------------------------------------ r39 | esler | 2007-04-11 11:12:34 -0400 (Wed, 11 Apr 2007) | 1 line Moved code into new files for better organization. ------------------------------------------------------------------------ r38 | esler | 2007-04-11 10:02:01 -0400 (Wed, 11 Apr 2007) | 1 line Adding forgotten file containing NUB structure definitions. ------------------------------------------------------------------------ r37 | esler | 2007-04-11 07:42:19 -0400 (Wed, 11 Apr 2007) | 1 line Starting creating of 1D spline. ------------------------------------------------------------------------ r36 | esler | 2007-04-10 17:44:14 -0400 (Tue, 10 Apr 2007) | 1 line Added Solve routines for nonuniform case. ------------------------------------------------------------------------ r35 | esler | 2007-04-10 16:08:16 -0400 (Tue, 10 Apr 2007) | 1 line Fixed return types. ------------------------------------------------------------------------ r34 | esler | 2007-04-10 16:07:22 -0400 (Tue, 10 Apr 2007) | 1 line Added all get_NUBasis routines. ------------------------------------------------------------------------ r33 | esler | 2007-04-10 15:44:21 -0400 (Tue, 10 Apr 2007) | 1 line Adding initialization of basis. ------------------------------------------------------------------------ r32 | esler | 2007-04-10 15:29:09 -0400 (Tue, 10 Apr 2007) | 1 line Adding some basic code for storing nonuniform spline basis. ------------------------------------------------------------------------ r31 | esler | 2007-04-09 17:37:13 -0400 (Mon, 09 Apr 2007) | 1 line CenterGrid and GeneralGrid work. ------------------------------------------------------------------------ r30 | esler | 2007-04-09 17:22:56 -0400 (Mon, 09 Apr 2007) | 1 line Adding tests for nonuniform case. ------------------------------------------------------------------------ r29 | esler | 2007-04-09 17:02:33 -0400 (Mon, 09 Apr 2007) | 1 line Adding grid creation routines. ------------------------------------------------------------------------ r28 | esler | 2007-04-09 16:39:48 -0400 (Mon, 09 Apr 2007) | 1 line Staring work on nonuniform case. ------------------------------------------------------------------------ r27 | esler | 2007-04-03 21:19:02 -0400 (Tue, 03 Apr 2007) | 1 line Saving the logo. ------------------------------------------------------------------------ r26 | esler | 2007-04-03 21:17:50 -0400 (Tue, 03 Apr 2007) | 1 line Change background color and prettied up graphics. ------------------------------------------------------------------------ r25 | esler | 2007-04-03 17:59:15 -0400 (Tue, 03 Apr 2007) | 1 line A little prettier. ------------------------------------------------------------------------ r24 | esler | 2007-04-03 17:46:05 -0400 (Tue, 03 Apr 2007) | 1 line Added some more documentation. ------------------------------------------------------------------------ r23 | esler | 2007-04-03 12:18:17 -0400 (Tue, 03 Apr 2007) | 1 line Finished all SSE evaluation routines. ------------------------------------------------------------------------ r22 | esler | 2007-04-03 12:16:29 -0400 (Tue, 03 Apr 2007) | 1 line Fixed 2D SSE complex double routines. ------------------------------------------------------------------------ r21 | esler | 2007-04-03 12:13:07 -0400 (Tue, 03 Apr 2007) | 1 line Fixed typos. ------------------------------------------------------------------------ r20 | esler | 2007-04-03 12:12:11 -0400 (Tue, 03 Apr 2007) | 1 line Added 1d routines including Hessians, which just call the Laplacian routines. ------------------------------------------------------------------------ r19 | esler | 2007-04-02 10:00:28 -0400 (Mon, 02 Apr 2007) | 1 line Some web site updates. ------------------------------------------------------------------------ r18 | esler | 2007-04-01 17:51:51 -0400 (Sun, 01 Apr 2007) | 1 line Adding einspline logo. ------------------------------------------------------------------------ r17 | esler | 2007-04-01 17:04:58 -0400 (Sun, 01 Apr 2007) | 1 line Adding missing files for web page. ------------------------------------------------------------------------ r16 | esler | 2007-04-01 17:04:18 -0400 (Sun, 01 Apr 2007) | 1 line Some changes to the web pages ------------------------------------------------------------------------ r15 | esler | 2007-03-30 16:28:03 -0400 (Fri, 30 Mar 2007) | 1 line Adding documentation file for C interface. ------------------------------------------------------------------------ r14 | esler | 2007-03-29 15:05:04 -0400 (Thu, 29 Mar 2007) | 1 line Data prefetching doesn't seem to do a lick of good. ------------------------------------------------------------------------ r13 | esler | 2007-03-29 00:46:27 -0400 (Thu, 29 Mar 2007) | 1 line Really time for bed. ------------------------------------------------------------------------ r12 | esler | 2007-03-29 00:37:03 -0400 (Thu, 29 Mar 2007) | 1 line Time for bed. ------------------------------------------------------------------------ r11 | esler | 2007-03-28 23:50:32 -0400 (Wed, 28 Mar 2007) | 1 line Altivec seems to be working. ------------------------------------------------------------------------ r10 | esler | 2007-03-28 20:33:44 -0400 (Wed, 28 Mar 2007) | 1 line Adding altivec version -- still buggy. ------------------------------------------------------------------------ r9 | esler | 2007-03-28 13:32:35 -0400 (Wed, 28 Mar 2007) | 1 line Adding web page logos. ------------------------------------------------------------------------ r8 | esler | 2007-03-28 13:32:07 -0400 (Wed, 28 Mar 2007) | 1 line Adding a couple of web pages. ------------------------------------------------------------------------ r7 | esler | 2007-03-28 13:31:42 -0400 (Wed, 28 Mar 2007) | 1 line Removed ref to bspline_structs_sse.h and bspline_structs_std.h ------------------------------------------------------------------------ r6 | esler | 2007-03-27 13:30:12 -0400 (Tue, 27 Mar 2007) | 1 line Autoconf 2.59 had obsoleted AC_PROD_CC_STDC, which we need. It is unobsoleted in 2.61. ------------------------------------------------------------------------ r5 | esler | 2007-03-27 13:17:09 -0400 (Tue, 27 Mar 2007) | 1 line Adding forgotten files. ------------------------------------------------------------------------ r4 | esler | 2007-03-27 13:16:20 -0400 (Tue, 27 Mar 2007) | 1 line Adding forgotten files. ------------------------------------------------------------------------ r3 | esler | 2007-03-27 13:15:31 -0400 (Tue, 27 Mar 2007) | 1 line Adding forgotten files. ------------------------------------------------------------------------ r2 | esler | 2007-03-27 13:09:55 -0400 (Tue, 27 Mar 2007) | 1 line Adding the rest of the files to the repository. ------------------------------------------------------------------------ r1 | esler | 2007-03-27 13:09:20 -0400 (Tue, 27 Mar 2007) | 1 line Replace bspline_structs_sse.h and bspline_structs_std.h with just bspline_structs.h ------------------------------------------------------------------------ einspline-0.9.2/README0000664000113000011300000000237311012400563011270 00000000000000Einspline B-spline library -------------------------- libeinspline is intended to be a simple, fast, and accurate library for creating and evaluation interpolating B-splines. In supports splines in 1D, 2D, and 3D, with real and complex datatypes in single or double precision. It supports spline with uniform on nonuniform knot spacing. When many splines need to be evaluated a single point, it has specialized routines to speed the calculations. Building -------- einspline is built with the standard autotools sequence: 1) ./configure --option1 ... 2) make 3) make install Options ------- Einspline contains hand-coded routines to take advantage of SSE, SSE2, and SSE3 instructions. To enable their use, use ./configure --enable-sse Some routines make optional use of software prefetching, which helps on some processors and may hurt on others. It is enabled by passing "--enable-prefetch" to configure. Known issues: GCC 4.0.2 (and perhaps earlier versions) has a known bug in generating optimized SSE code on 32-bit machines. The results of using the SSE version may not be correct. We suggest upgrading to the 4.1 or 4.2 line of gcc or using the Intel compiler, which is free on Linux for noncommercial use. Alternatively, disable the SSE routines. einspline-0.9.2/Makefile.in0000664000113000011300000005153011273633721012471 00000000000000# Makefile.in generated by automake 1.10 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = . DIST_COMMON = README $(am__configure_deps) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/einspline.pc.in \ $(top_srcdir)/configure AUTHORS COPYING ChangeLog INSTALL NEWS \ config.guess config.sub depcomp install-sh ltmain.sh missing ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/m4/acx_pthread.m4 \ $(top_srcdir)/m4/ax_cc_maxopt.m4 \ $(top_srcdir)/m4/ax_cxx_maxopt.m4 \ $(top_srcdir)/m4/ax_f77_maxopt.m4 \ $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_c_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_f77_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ $(top_srcdir)/m4/ax_gcc_archflag.m4 \ $(top_srcdir)/m4/ax_gxx_archflag.m4 \ $(top_srcdir)/m4/ax_gcc_version.m4 \ $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ $(top_srcdir)/m4/ax_ext.m4 $(top_srcdir)/m4/ac_cxx_restrict.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = einspline.pc SOURCES = DIST_SOURCES = RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ html-recursive info-recursive install-data-recursive \ install-dvi-recursive install-exec-recursive \ install-html-recursive install-info-recursive \ install-pdf-recursive install-ps-recursive install-recursive \ installcheck-recursive installdirs-recursive pdf-recursive \ ps-recursive uninstall-recursive am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(pkgconfigdir)" pkgconfigDATA_INSTALL = $(INSTALL_DATA) DATA = $(pkgconfig_DATA) RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ distclean-recursive maintainer-clean-recursive ETAGS = etags CTAGS = ctags DIST_SUBDIRS = $(SUBDIRS) DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) am__remove_distdir = \ { test ! -d $(distdir) \ || { find $(distdir) -type d ! -perm -200 -exec chmod u+w {} ';' \ && rm -fr $(distdir); }; } DIST_ARCHIVES = $(distdir).tar.gz GZIP_ENV = --best distuninstallcheck_listfiles = find . -type f -print distcleancheck_listfiles = find . -type f -print ACLOCAL = @ACLOCAL@ ALL_STATIC = @ALL_STATIC@ AMTAR = @AMTAR@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CUDA_CFLAGS = @CUDA_CFLAGS@ CUDA_LIBS = @CUDA_LIBS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ F77 = @F77@ FFLAGS = @FFLAGS@ FFTW3F_CFLAGS = @FFTW3F_CFLAGS@ FFTW3F_LIBS = @FFTW3F_LIBS@ FFTW3_CFLAGS = @FFTW3_CFLAGS@ FFTW3_LIBS = @FFTW3_LIBS@ FLIBS = @FLIBS@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ OBJEXT = @OBJEXT@ OPENMP_FLAG = @OPENMP_FLAG@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKGDATADEF = @PKGDATADEF@ PKG_CONFIG = @PKG_CONFIG@ POW_LIB = @POW_LIB@ PRTDIAG = @PRTDIAG@ PTHREAD_FLAG = @PTHREAD_FLAG@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_FLAGS = @SIMD_FLAGS@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_F77 = @ac_ct_F77@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = $(prefix)/include/einspline infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ SUBDIRS = src www doc pkgconfig_DATA = einspline.pc EXTRA_DIST = einspline.pc.in m4/*.m4 cudalt.py pkgconfigdir = $(libdir)/pkgconfig/ all: all-recursive .SUFFIXES: am--refresh: @: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ echo ' cd $(srcdir) && $(AUTOMAKE) --gnu '; \ cd $(srcdir) && $(AUTOMAKE) --gnu \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile'; \ cd $(top_srcdir) && \ $(AUTOMAKE) --gnu Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ echo ' $(SHELL) ./config.status'; \ $(SHELL) ./config.status;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck $(top_srcdir)/configure: $(am__configure_deps) cd $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) einspline.pc: $(top_builddir)/config.status $(srcdir)/einspline.pc.in cd $(top_builddir) && $(SHELL) ./config.status $@ mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs distclean-libtool: -rm -f libtool install-pkgconfigDATA: $(pkgconfig_DATA) @$(NORMAL_INSTALL) test -z "$(pkgconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(pkgconfigdir)" @list='$(pkgconfig_DATA)'; for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ f=$(am__strip_dir) \ echo " $(pkgconfigDATA_INSTALL) '$$d$$p' '$(DESTDIR)$(pkgconfigdir)/$$f'"; \ $(pkgconfigDATA_INSTALL) "$$d$$p" "$(DESTDIR)$(pkgconfigdir)/$$f"; \ done uninstall-pkgconfigDATA: @$(NORMAL_UNINSTALL) @list='$(pkgconfig_DATA)'; for p in $$list; do \ f=$(am__strip_dir) \ echo " rm -f '$(DESTDIR)$(pkgconfigdir)/$$f'"; \ rm -f "$(DESTDIR)$(pkgconfigdir)/$$f"; \ done # This directory's subdirectories are mostly independent; you can cd # into them and run `make' without going through this Makefile. # To change the values of `make' variables: instead of editing Makefiles, # (1) if the variable is set in `config.status', edit `config.status' # (which will cause the Makefiles to be regenerated when you run `make'); # (2) otherwise, pass the desired values on the `make' command line. $(RECURSIVE_TARGETS): @failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ target=`echo $@ | sed s/-recursive//`; \ list='$(SUBDIRS)'; for subdir in $$list; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ dot_seen=yes; \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done; \ if test "$$dot_seen" = "no"; then \ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ fi; test -z "$$fail" $(RECURSIVE_CLEAN_TARGETS): @failcom='exit 1'; \ for f in x $$MAKEFLAGS; do \ case $$f in \ *=* | --[!k]*);; \ *k*) failcom='fail=yes';; \ esac; \ done; \ dot_seen=no; \ case "$@" in \ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ *) list='$(SUBDIRS)' ;; \ esac; \ rev=''; for subdir in $$list; do \ if test "$$subdir" = "."; then :; else \ rev="$$subdir $$rev"; \ fi; \ done; \ rev="$$rev ."; \ target=`echo $@ | sed s/-recursive//`; \ for subdir in $$rev; do \ echo "Making $$target in $$subdir"; \ if test "$$subdir" = "."; then \ local_target="$$target-am"; \ else \ local_target="$$target"; \ fi; \ (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ || eval $$failcom; \ done && test -z "$$fail" tags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ done ctags-recursive: list='$(SUBDIRS)'; for subdir in $$list; do \ test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ mkid -fID $$unique tags: TAGS TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) tags=; \ here=`pwd`; \ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ include_option=--etags-include; \ empty_fix=.; \ else \ include_option=--include; \ empty_fix=; \ fi; \ list='$(SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test ! -f $$subdir/TAGS || \ tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ fi; \ done; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$tags $$unique; \ fi ctags: CTAGS CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) tags=; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ test -z "$(CTAGS_ARGS)$$tags$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$tags $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && cd $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) $$here distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) $(am__remove_distdir) test -d $(distdir) || mkdir $(distdir) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ fi; \ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ else \ test -f $(distdir)/$$file \ || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ if test "$$subdir" = .; then :; else \ test -d "$(distdir)/$$subdir" \ || $(MKDIR_P) "$(distdir)/$$subdir" \ || exit 1; \ distdir=`$(am__cd) $(distdir) && pwd`; \ top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ (cd $$subdir && \ $(MAKE) $(AM_MAKEFLAGS) \ top_distdir="$$top_distdir" \ distdir="$$distdir/$$subdir" \ am__remove_distdir=: \ am__skip_length_check=: \ distdir) \ || exit 1; \ fi; \ done -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \ || chmod -R a+r $(distdir) dist-gzip: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) dist-bzip2: distdir tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2 $(am__remove_distdir) dist-tarZ: distdir tardir=$(distdir) && $(am__tar) | compress -c >$(distdir).tar.Z $(am__remove_distdir) dist-shar: distdir shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz $(am__remove_distdir) dist-zip: distdir -rm -f $(distdir).zip zip -rq $(distdir).zip $(distdir) $(am__remove_distdir) dist dist-all: distdir tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz $(am__remove_distdir) # This target untars the dist file and tries a VPATH configuration. Then # it guarantees that the distribution is self-contained by making another # tarfile. distcheck: dist case '$(DIST_ARCHIVES)' in \ *.tar.gz*) \ GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(am__untar) ;;\ *.tar.bz2*) \ bunzip2 -c $(distdir).tar.bz2 | $(am__untar) ;;\ *.tar.Z*) \ uncompress -c $(distdir).tar.Z | $(am__untar) ;;\ *.shar.gz*) \ GZIP=$(GZIP_ENV) gunzip -c $(distdir).shar.gz | unshar ;;\ *.zip*) \ unzip $(distdir).zip ;;\ esac chmod -R a-w $(distdir); chmod a+w $(distdir) mkdir $(distdir)/_build mkdir $(distdir)/_inst chmod a-w $(distdir) dc_install_base=`$(am__cd) $(distdir)/_inst && pwd | sed -e 's,^[^:\\/]:[\\/],/,'` \ && dc_destdir="$${TMPDIR-/tmp}/am-dc-$$$$/" \ && cd $(distdir)/_build \ && ../configure --srcdir=.. --prefix="$$dc_install_base" \ $(DISTCHECK_CONFIGURE_FLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) \ && $(MAKE) $(AM_MAKEFLAGS) dvi \ && $(MAKE) $(AM_MAKEFLAGS) check \ && $(MAKE) $(AM_MAKEFLAGS) install \ && $(MAKE) $(AM_MAKEFLAGS) installcheck \ && $(MAKE) $(AM_MAKEFLAGS) uninstall \ && $(MAKE) $(AM_MAKEFLAGS) distuninstallcheck_dir="$$dc_install_base" \ distuninstallcheck \ && chmod -R a-w "$$dc_install_base" \ && ({ \ (cd ../.. && umask 077 && mkdir "$$dc_destdir") \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" install \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" uninstall \ && $(MAKE) $(AM_MAKEFLAGS) DESTDIR="$$dc_destdir" \ distuninstallcheck_dir="$$dc_destdir" distuninstallcheck; \ } || { rm -rf "$$dc_destdir"; exit 1; }) \ && rm -rf "$$dc_destdir" \ && $(MAKE) $(AM_MAKEFLAGS) dist \ && rm -rf $(DIST_ARCHIVES) \ && $(MAKE) $(AM_MAKEFLAGS) distcleancheck $(am__remove_distdir) @(echo "$(distdir) archives ready for distribution: "; \ list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \ sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x' distuninstallcheck: @cd $(distuninstallcheck_dir) \ && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \ || { echo "ERROR: files left after uninstall:" ; \ if test -n "$(DESTDIR)"; then \ echo " (check DESTDIR support)"; \ fi ; \ $(distuninstallcheck_listfiles) ; \ exit 1; } >&2 distcleancheck: distclean @if test '$(srcdir)' = . ; then \ echo "ERROR: distcleancheck can only run from a VPATH build" ; \ exit 1 ; \ fi @test `$(distcleancheck_listfiles) | wc -l` -eq 0 \ || { echo "ERROR: files left in build directory after distclean:" ; \ $(distcleancheck_listfiles) ; \ exit 1; } >&2 check-am: all-am check: check-recursive all-am: Makefile $(DATA) installdirs: installdirs-recursive installdirs-am: for dir in "$(DESTDIR)$(pkgconfigdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-recursive install-exec: install-exec-recursive install-data: install-data-recursive uninstall: uninstall-recursive install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-recursive install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-recursive clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -f Makefile distclean-am: clean-am distclean-generic distclean-libtool \ distclean-tags dvi: dvi-recursive dvi-am: html: html-recursive info: info-recursive info-am: install-data-am: install-pkgconfigDATA install-dvi: install-dvi-recursive install-exec-am: install-html: install-html-recursive install-info: install-info-recursive install-man: install-pdf: install-pdf-recursive install-ps: install-ps-recursive installcheck-am: maintainer-clean: maintainer-clean-recursive -rm -f $(am__CONFIG_DISTCLEAN_FILES) -rm -rf $(top_srcdir)/autom4te.cache -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-recursive mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-recursive pdf-am: ps: ps-recursive ps-am: uninstall-am: uninstall-pkgconfigDATA .MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ install-strip .PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ all all-am am--refresh check check-am clean clean-generic \ clean-libtool ctags ctags-recursive dist dist-all dist-bzip2 \ dist-gzip dist-shar dist-tarZ dist-zip distcheck distclean \ distclean-generic distclean-libtool distclean-tags \ distcleancheck distdir distuninstallcheck dvi dvi-am html \ html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-pdf install-pdf-am \ install-pkgconfigDATA install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs installdirs-am \ maintainer-clean maintainer-clean-generic mostlyclean \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags tags-recursive uninstall uninstall-am \ uninstall-pkgconfigDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: einspline-0.9.2/AUTHORS0000664000113000011300000000051111012400563011450 00000000000000libbspline was written by Kenneth P. Esler Jr. during his postdoctoral postdoctoral appointment at the Geophysical Laboratory of the Carnegie Institution of Washington, in the District of Columbia. Bug reports can be sent to kesler@ciw.edu until at least September 2008. After that date, please try kpesler@gmail.com. 3/27/07 einspline-0.9.2/config.sub0000755000113000011300000007772610665263176012433 00000000000000#! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, # Inc. timestamp='2007-04-29' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software # can handle that machine. It does not imply ALL GNU software can. # # This file is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA # 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Please send patches to . Submit a context # diff and a properly formatted ChangeLog entry. # # Configuration subroutine to validate and canonicalize a configuration type. # Supply the specified configuration type as an argument. # If it is invalid, we print an error message on stderr and exit with code 1. # Otherwise, we print the canonical config type on stdout and succeed. # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases # that are meaningful with *any* GNU software. # Each package is responsible for reporting which valid configurations # it does not support. The user should be able to distinguish # a failure to support a valid configuration from a meaningless # configuration. # The goal of this file is to map all the various variations of a given # machine specification into a single specification in the form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM # or in some cases, the newer four-part form: # CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM # It is wrong to echo any other type of specification. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] CPU-MFR-OPSYS $0 [OPTION] ALIAS Canonicalize a configuration name. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.sub ($timestamp) Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" exit 1 ;; *local*) # First pass through any local machine types. echo $1 exit ;; * ) break ;; esac done case $# in 0) echo "$me: missing argument$help" >&2 exit 1;; 1) ;; *) echo "$me: too many arguments$help" >&2 exit 1;; esac # Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \ uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] then os=`echo $1 | sed 's/.*-/-/'` else os=; fi ;; esac ### Let's recognize common machines as not being operating systems so ### that things like config.sub decstation-3100 work. We also ### recognize some manufacturers as not being operating systems, so we ### can provide default operating systems below. case $os in -sun*os*) # Prevent following clause from handling this invalid input. ;; -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ -apple | -axis | -knuth | -cray) os= basic_machine=$1 ;; -sim | -cisco | -oki | -wec | -winbond) os= basic_machine=$1 ;; -scout) ;; -wrs) os=-vxworks basic_machine=$1 ;; -chorusos*) os=-chorusos basic_machine=$1 ;; -chorusrdb) os=-chorusrdb basic_machine=$1 ;; -hiux*) os=-hiuxwe2 ;; -sco6) os=-sco5v6 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5) os=-sco3.2v5 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco4) os=-sco3.2v4 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2.[4-9]*) os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco3.2v[4-9]*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco5v6*) # Don't forget version if it is 3.2v4 or newer. basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -sco*) os=-sco3.2v2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -udk*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -isc) os=-isc2.2 basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -clix*) basic_machine=clipper-intergraph ;; -isc*) basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` ;; -lynx*) os=-lynxos ;; -ptx*) basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` ;; -windowsnt*) os=`echo $os | sed -e 's/windowsnt/winnt/'` ;; -psos*) os=-psos ;; -mint | -mint[0-9]*) basic_machine=m68k-atari os=-mint ;; esac # Decode aliases for certain CPU-COMPANY combinations. case $basic_machine in # Recognize the basic CPU types without company name. # Some are omitted here because they have special meanings below. 1750a | 580 \ | a29k \ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ | am33_2.0 \ | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \ | bfin \ | c4x | clipper \ | d10v | d30v | dlx | dsp16xx \ | fido | fr30 | frv \ | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ | maxq | mb | microblaze | mcore | mep \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ | mips64vr | mips64vrel \ | mips64orion | mips64orionel \ | mips64vr4100 | mips64vr4100el \ | mips64vr4300 | mips64vr4300el \ | mips64vr5000 | mips64vr5000el \ | mips64vr5900 | mips64vr5900el \ | mipsisa32 | mipsisa32el \ | mipsisa32r2 | mipsisa32r2el \ | mipsisa64 | mipsisa64el \ | mipsisa64r2 | mipsisa64r2el \ | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ | mn10200 | mn10300 \ | mt \ | msp430 \ | nios | nios2 \ | ns16k | ns32k \ | or32 \ | pdp10 | pdp11 | pj | pjl \ | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ | pyramid \ | score \ | sh | sh[1234] | sh[24]a | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \ | sparcv8 | sparcv9 | sparcv9b | sparcv9v \ | spu | strongarm \ | tahoe | thumb | tic4x | tic80 | tron \ | v850 | v850e \ | we32k \ | x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \ | z8k) basic_machine=$basic_machine-unknown ;; m6811 | m68hc11 | m6812 | m68hc12) # Motorola 68HC11/12. basic_machine=$basic_machine-unknown os=-none ;; m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) ;; ms1) basic_machine=mt-unknown ;; # We use `pc' rather than `unknown' # because (1) that's what they normally are, and # (2) the word "unknown" tends to confuse beginning users. i*86 | x86_64) basic_machine=$basic_machine-pc ;; # Object if more than one company name word. *-*-*) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; # Recognize the basic CPU types with company name. 580-* \ | a29k-* \ | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ | avr-* | avr32-* \ | bfin-* | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \ | h8300-* | h8500-* \ | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ | m32c-* | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ | m88110-* | m88k-* | maxq-* | mcore-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ | mips64vr-* | mips64vrel-* \ | mips64orion-* | mips64orionel-* \ | mips64vr4100-* | mips64vr4100el-* \ | mips64vr4300-* | mips64vr4300el-* \ | mips64vr5000-* | mips64vr5000el-* \ | mips64vr5900-* | mips64vr5900el-* \ | mipsisa32-* | mipsisa32el-* \ | mipsisa32r2-* | mipsisa32r2el-* \ | mipsisa64-* | mipsisa64el-* \ | mipsisa64r2-* | mipsisa64r2el-* \ | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ | mmix-* \ | mt-* \ | msp430-* \ | nios-* | nios2-* \ | none-* | np1-* | ns16k-* | ns32k-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ | pyramid-* \ | romp-* | rs6000-* \ | sh-* | sh[1234]-* | sh[24]a-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \ | sparclite-* \ | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \ | tahoe-* | thumb-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tron-* \ | v850-* | v850e-* | vax-* \ | we32k-* \ | x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \ | xstormy16-* | xtensa-* \ | ymp-* \ | z8k-*) ;; # Recognize the various machine names and aliases which stand # for a CPU type and a company and sometimes even an OS. 386bsd) basic_machine=i386-unknown os=-bsd ;; 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) basic_machine=m68000-att ;; 3b*) basic_machine=we32k-att ;; a29khif) basic_machine=a29k-amd os=-udi ;; abacus) basic_machine=abacus-unknown ;; adobe68k) basic_machine=m68010-adobe os=-scout ;; alliant | fx80) basic_machine=fx80-alliant ;; altos | altos3068) basic_machine=m68k-altos ;; am29k) basic_machine=a29k-none os=-bsd ;; amd64) basic_machine=x86_64-pc ;; amd64-*) basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; amdahl) basic_machine=580-amdahl os=-sysv ;; amiga | amiga-*) basic_machine=m68k-unknown ;; amigaos | amigados) basic_machine=m68k-unknown os=-amigaos ;; amigaunix | amix) basic_machine=m68k-unknown os=-sysv4 ;; apollo68) basic_machine=m68k-apollo os=-sysv ;; apollo68bsd) basic_machine=m68k-apollo os=-bsd ;; aux) basic_machine=m68k-apple os=-aux ;; balance) basic_machine=ns32k-sequent os=-dynix ;; c90) basic_machine=c90-cray os=-unicos ;; convex-c1) basic_machine=c1-convex os=-bsd ;; convex-c2) basic_machine=c2-convex os=-bsd ;; convex-c32) basic_machine=c32-convex os=-bsd ;; convex-c34) basic_machine=c34-convex os=-bsd ;; convex-c38) basic_machine=c38-convex os=-bsd ;; cray | j90) basic_machine=j90-cray os=-unicos ;; craynv) basic_machine=craynv-cray os=-unicosmp ;; cr16c) basic_machine=cr16c-unknown os=-elf ;; crds | unos) basic_machine=m68k-crds ;; crisv32 | crisv32-* | etraxfs*) basic_machine=crisv32-axis ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; crx) basic_machine=crx-unknown os=-elf ;; da30 | da30-*) basic_machine=m68k-da30 ;; decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) basic_machine=mips-dec ;; decsystem10* | dec10*) basic_machine=pdp10-dec os=-tops10 ;; decsystem20* | dec20*) basic_machine=pdp10-dec os=-tops20 ;; delta | 3300 | motorola-3300 | motorola-delta \ | 3300-motorola | delta-motorola) basic_machine=m68k-motorola ;; delta88) basic_machine=m88k-motorola os=-sysv3 ;; djgpp) basic_machine=i586-pc os=-msdosdjgpp ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx ;; dpx2* | dpx2*-bull) basic_machine=m68k-bull os=-sysv3 ;; ebmon29k) basic_machine=a29k-amd os=-ebmon ;; elxsi) basic_machine=elxsi-elxsi os=-bsd ;; encore | umax | mmax) basic_machine=ns32k-encore ;; es1800 | OSE68k | ose68k | ose | OSE) basic_machine=m68k-ericsson os=-ose ;; fx2800) basic_machine=i860-alliant ;; genix) basic_machine=ns32k-ns ;; gmicro) basic_machine=tron-gmicro os=-sysv ;; go32) basic_machine=i386-pc os=-go32 ;; h3050r* | hiux*) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; h8300hms) basic_machine=h8300-hitachi os=-hms ;; h8300xray) basic_machine=h8300-hitachi os=-xray ;; h8500hms) basic_machine=h8500-hitachi os=-hms ;; harris) basic_machine=m88k-harris os=-sysv3 ;; hp300-*) basic_machine=m68k-hp ;; hp300bsd) basic_machine=m68k-hp os=-bsd ;; hp300hpux) basic_machine=m68k-hp os=-hpux ;; hp3k9[0-9][0-9] | hp9[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k2[0-9][0-9] | hp9k31[0-9]) basic_machine=m68000-hp ;; hp9k3[2-9][0-9]) basic_machine=m68k-hp ;; hp9k6[0-9][0-9] | hp6[0-9][0-9]) basic_machine=hppa1.0-hp ;; hp9k7[0-79][0-9] | hp7[0-79][0-9]) basic_machine=hppa1.1-hp ;; hp9k78[0-9] | hp78[0-9]) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) # FIXME: really hppa2.0-hp basic_machine=hppa1.1-hp ;; hp9k8[0-9][13679] | hp8[0-9][13679]) basic_machine=hppa1.1-hp ;; hp9k8[0-9][0-9] | hp8[0-9][0-9]) basic_machine=hppa1.0-hp ;; hppa-next) os=-nextstep3 ;; hppaosf) basic_machine=hppa1.1-hp os=-osf ;; hppro) basic_machine=hppa1.1-hp os=-proelf ;; i370-ibm* | ibm*) basic_machine=i370-ibm ;; # I'm not sure what "Sysv32" means. Should this be sysv3.2? i*86v32) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv32 ;; i*86v4*) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv4 ;; i*86v) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-sysv ;; i*86sol2) basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` os=-solaris2 ;; i386mach) basic_machine=i386-mach os=-mach ;; i386-vsta | vsta) basic_machine=i386-unknown os=-vsta ;; iris | iris4d) basic_machine=mips-sgi case $os in -irix*) ;; *) os=-irix4 ;; esac ;; isi68 | isi) basic_machine=m68k-isi os=-sysv ;; m88k-omron*) basic_machine=m88k-omron ;; magnum | m3230) basic_machine=mips-mips os=-sysv ;; merlin) basic_machine=ns32k-utek os=-sysv ;; mingw32) basic_machine=i386-pc os=-mingw32 ;; mingw32ce) basic_machine=arm-unknown os=-mingw32ce ;; miniframe) basic_machine=m68000-convergent ;; *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) basic_machine=m68k-atari os=-mint ;; mips3*-*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` ;; mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; monitor) basic_machine=m68k-rom68k os=-coff ;; morphos) basic_machine=powerpc-unknown os=-morphos ;; msdos) basic_machine=i386-pc os=-msdos ;; ms1-*) basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'` ;; mvs) basic_machine=i370-ibm os=-mvs ;; ncr3000) basic_machine=i486-ncr os=-sysv4 ;; netbsd386) basic_machine=i386-unknown os=-netbsd ;; netwinder) basic_machine=armv4l-rebel os=-linux ;; news | news700 | news800 | news900) basic_machine=m68k-sony os=-newsos ;; news1000) basic_machine=m68030-sony os=-newsos ;; news-3600 | risc-news) basic_machine=mips-sony os=-newsos ;; necv70) basic_machine=v70-nec os=-sysv ;; next | m*-next ) basic_machine=m68k-next case $os in -nextstep* ) ;; -ns2*) os=-nextstep2 ;; *) os=-nextstep3 ;; esac ;; nh3000) basic_machine=m68k-harris os=-cxux ;; nh[45]000) basic_machine=m88k-harris os=-cxux ;; nindy960) basic_machine=i960-intel os=-nindy ;; mon960) basic_machine=i960-intel os=-mon960 ;; nonstopux) basic_machine=mips-compaq os=-nonstopux ;; np1) basic_machine=np1-gould ;; nsr-tandem) basic_machine=nsr-tandem ;; op50n-* | op60c-*) basic_machine=hppa1.1-oki os=-proelf ;; openrisc | openrisc-*) basic_machine=or32-unknown ;; os400) basic_machine=powerpc-ibm os=-os400 ;; OSE68000 | ose68000) basic_machine=m68000-ericsson os=-ose ;; os68k) basic_machine=m68k-none os=-os68k ;; pa-hitachi) basic_machine=hppa1.1-hitachi os=-hiuxwe2 ;; paragon) basic_machine=i860-intel os=-osf ;; pbd) basic_machine=sparc-tti ;; pbb) basic_machine=m68k-tti ;; pc532 | pc532-*) basic_machine=ns32k-pc532 ;; pc98) basic_machine=i386-pc ;; pc98-*) basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium | p5 | k5 | k6 | nexgen | viac3) basic_machine=i586-pc ;; pentiumpro | p6 | 6x86 | athlon | athlon_*) basic_machine=i686-pc ;; pentiumii | pentium2 | pentiumiii | pentium3) basic_machine=i686-pc ;; pentium4) basic_machine=i786-pc ;; pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumpro-* | p6-* | 6x86-* | athlon-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pentium4-*) basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` ;; pn) basic_machine=pn-gould ;; power) basic_machine=power-ibm ;; ppc) basic_machine=powerpc-unknown ;; ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64) basic_machine=powerpc64-unknown ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` ;; ps2) basic_machine=i386-ibm ;; pw32) basic_machine=i586-unknown os=-pw32 ;; rdos) basic_machine=i386-pc os=-rdos ;; rom68k) basic_machine=m68k-rom68k os=-coff ;; rm[46]00) basic_machine=mips-siemens ;; rtpc | rtpc-*) basic_machine=romp-ibm ;; s390 | s390-*) basic_machine=s390-ibm ;; s390x | s390x-*) basic_machine=s390x-ibm ;; sa29200) basic_machine=a29k-amd os=-udi ;; sb1) basic_machine=mipsisa64sb1-unknown ;; sb1el) basic_machine=mipsisa64sb1el-unknown ;; sde) basic_machine=mipsisa32-sde os=-elf ;; sei) basic_machine=mips-sei os=-seiux ;; sequent) basic_machine=i386-sequent ;; sh) basic_machine=sh-hitachi os=-hms ;; sh5el) basic_machine=sh5le-unknown ;; sh64) basic_machine=sh64-unknown ;; sparclite-wrs | simso-wrs) basic_machine=sparclite-wrs os=-vxworks ;; sps7) basic_machine=m68k-bull os=-sysv2 ;; spur) basic_machine=spur-unknown ;; st2000) basic_machine=m68k-tandem ;; stratus) basic_machine=i860-stratus os=-sysv4 ;; sun2) basic_machine=m68000-sun ;; sun2os3) basic_machine=m68000-sun os=-sunos3 ;; sun2os4) basic_machine=m68000-sun os=-sunos4 ;; sun3os3) basic_machine=m68k-sun os=-sunos3 ;; sun3os4) basic_machine=m68k-sun os=-sunos4 ;; sun4os3) basic_machine=sparc-sun os=-sunos3 ;; sun4os4) basic_machine=sparc-sun os=-sunos4 ;; sun4sol2) basic_machine=sparc-sun os=-solaris2 ;; sun3 | sun3-*) basic_machine=m68k-sun ;; sun4) basic_machine=sparc-sun ;; sun386 | sun386i | roadrunner) basic_machine=i386-sun ;; sv1) basic_machine=sv1-cray os=-unicos ;; symmetry) basic_machine=i386-sequent os=-dynix ;; t3e) basic_machine=alphaev5-cray os=-unicos ;; t90) basic_machine=t90-cray os=-unicos ;; tic54x | c54x*) basic_machine=tic54x-unknown os=-coff ;; tic55x | c55x*) basic_machine=tic55x-unknown os=-coff ;; tic6x | c6x*) basic_machine=tic6x-unknown os=-coff ;; tx39) basic_machine=mipstx39-unknown ;; tx39el) basic_machine=mipstx39el-unknown ;; toad1) basic_machine=pdp10-xkl os=-tops20 ;; tower | tower-32) basic_machine=m68k-ncr ;; tpf) basic_machine=s390x-ibm os=-tpf ;; udi29k) basic_machine=a29k-amd os=-udi ;; ultra3) basic_machine=a29k-nyu os=-sym1 ;; v810 | necv810) basic_machine=v810-nec os=-none ;; vaxv) basic_machine=vax-dec os=-sysv ;; vms) basic_machine=vax-dec os=-vms ;; vpp*|vx|vx-*) basic_machine=f301-fujitsu ;; vxworks960) basic_machine=i960-wrs os=-vxworks ;; vxworks68) basic_machine=m68k-wrs os=-vxworks ;; vxworks29k) basic_machine=a29k-wrs os=-vxworks ;; w65*) basic_machine=w65-wdc os=-none ;; w89k-*) basic_machine=hppa1.1-winbond os=-proelf ;; xbox) basic_machine=i686-pc os=-mingw32 ;; xps | xps100) basic_machine=xps100-honeywell ;; ymp) basic_machine=ymp-cray os=-unicos ;; z8k-*-coff) basic_machine=z8k-unknown os=-sim ;; none) basic_machine=none-none os=-none ;; # Here we handle the default manufacturer of certain CPU types. It is in # some cases the only manufacturer, in others, it is the most popular. w89k) basic_machine=hppa1.1-winbond ;; op50n) basic_machine=hppa1.1-oki ;; op60c) basic_machine=hppa1.1-oki ;; romp) basic_machine=romp-ibm ;; mmix) basic_machine=mmix-knuth ;; rs6000) basic_machine=rs6000-ibm ;; vax) basic_machine=vax-dec ;; pdp10) # there are many clones, so DEC is not a safe bet basic_machine=pdp10-unknown ;; pdp11) basic_machine=pdp11-dec ;; we32k) basic_machine=we32k-att ;; sh[1234] | sh[24]a | sh[34]eb | sh[1234]le | sh[23]ele) basic_machine=sh-unknown ;; sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v) basic_machine=sparc-sun ;; cydra) basic_machine=cydra-cydrome ;; orion) basic_machine=orion-highlevel ;; orion105) basic_machine=clipper-highlevel ;; mac | mpw | mac-mpw) basic_machine=m68k-apple ;; pmac | pmac-mpw) basic_machine=powerpc-apple ;; *-unknown) # Make sure to match an already-canonicalized machine name. ;; *) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 exit 1 ;; esac # Here we canonicalize certain aliases for manufacturers. case $basic_machine in *-digital*) basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` ;; *-commodore*) basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` ;; *) ;; esac # Decode manufacturer-specific aliases for certain operating systems. if [ x"$os" != x"" ] then case $os in # First match some system type aliases # that might get confused with valid system types. # -solaris* is a basic system type, with this one exception. -solaris1 | -solaris1.*) os=`echo $os | sed -e 's|solaris1|sunos4|'` ;; -solaris) os=-solaris2 ;; -svr4*) os=-sysv4 ;; -unixware*) os=-sysv4.2uw ;; -gnu/linux*) os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` ;; # First accept the basic system types. # The portable systems comes first. # Each alternative MUST END IN A *, to match a version number. # -sysv* is not here because it comes later, after sysvr4. -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ | -aos* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* \ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ | -skyos* | -haiku* | -rdos* | -toppers* | -drops*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) case $basic_machine in x86-* | i*86-*) ;; *) os=-nto$os ;; esac ;; -nto-qnx*) ;; -nto*) os=`echo $os | sed -e 's|nto|nto-qnx|'` ;; -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \ | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) ;; -mac*) os=`echo $os | sed -e 's|mac|macos|'` ;; -linux-dietlibc) os=-linux-dietlibc ;; -linux*) os=`echo $os | sed -e 's|linux|linux-gnu|'` ;; -sunos5*) os=`echo $os | sed -e 's|sunos5|solaris2|'` ;; -sunos6*) os=`echo $os | sed -e 's|sunos6|solaris3|'` ;; -opened*) os=-openedition ;; -os400*) os=-os400 ;; -wince*) os=-wince ;; -osfrose*) os=-osfrose ;; -osf*) os=-osf ;; -utek*) os=-bsd ;; -dynix*) os=-bsd ;; -acis*) os=-aos ;; -atheos*) os=-atheos ;; -syllable*) os=-syllable ;; -386bsd) os=-bsd ;; -ctix* | -uts*) os=-sysv ;; -nova*) os=-rtmk-nova ;; -ns2 ) os=-nextstep2 ;; -nsk*) os=-nsk ;; # Preserve the version number of sinix5. -sinix5.*) os=`echo $os | sed -e 's|sinix|sysv|'` ;; -sinix*) os=-sysv4 ;; -tpf*) os=-tpf ;; -triton*) os=-sysv3 ;; -oss*) os=-sysv3 ;; -svr4) os=-sysv4 ;; -svr3) os=-sysv3 ;; -sysvr4) os=-sysv4 ;; # This must come after -sysvr4. -sysv*) ;; -ose*) os=-ose ;; -es1800*) os=-ose ;; -xenix) os=-xenix ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) os=-mint ;; -aros*) os=-aros ;; -kaos*) os=-kaos ;; -zvmoe) os=-zvmoe ;; -none) ;; *) # Get rid of the `-' at the beginning of $os. os=`echo $os | sed 's/[^-]*-//'` echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 exit 1 ;; esac else # Here we handle the default operating systems that come with various machines. # The value should be what the vendor currently ships out the door with their # machine or put another way, the most popular os provided with the machine. # Note that if you're going to try to match "-MANUFACTURER" here (say, # "-sun"), then you have to tell the case statement up towards the top # that MANUFACTURER isn't an operating system. Otherwise, code above # will signal an error saying that MANUFACTURER isn't an operating # system, and we'll never get to this point. case $basic_machine in score-*) os=-elf ;; spu-*) os=-elf ;; *-acorn) os=-riscix1.2 ;; arm*-rebel) os=-linux ;; arm*-semi) os=-aout ;; c4x-* | tic4x-*) os=-coff ;; # This must come before the *-dec entry. pdp10-*) os=-tops20 ;; pdp11-*) os=-none ;; *-dec | vax-*) os=-ultrix4.2 ;; m68*-apollo) os=-domain ;; i386-sun) os=-sunos4.0.2 ;; m68000-sun) os=-sunos3 # This also exists in the configure program, but was not the # default. # os=-sunos4 ;; m68*-cisco) os=-aout ;; mep-*) os=-elf ;; mips*-cisco) os=-elf ;; mips*-*) os=-elf ;; or32-*) os=-coff ;; *-tti) # must be before sparc entry or we get the wrong os. os=-sysv3 ;; sparc-* | *-sun) os=-sunos4.1.1 ;; *-be) os=-beos ;; *-haiku) os=-haiku ;; *-ibm) os=-aix ;; *-knuth) os=-mmixware ;; *-wec) os=-proelf ;; *-winbond) os=-proelf ;; *-oki) os=-proelf ;; *-hp) os=-hpux ;; *-hitachi) os=-hiux ;; i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) os=-sysv ;; *-cbm) os=-amigaos ;; *-dg) os=-dgux ;; *-dolphin) os=-sysv3 ;; m68k-ccur) os=-rtu ;; m88k-omron*) os=-luna ;; *-next ) os=-nextstep ;; *-sequent) os=-ptx ;; *-crds) os=-unos ;; *-ns) os=-genix ;; i370-*) os=-mvs ;; *-next) os=-nextstep3 ;; *-gould) os=-sysv ;; *-highlevel) os=-bsd ;; *-encore) os=-bsd ;; *-sgi) os=-irix ;; *-siemens) os=-sysv4 ;; *-masscomp) os=-rtu ;; f30[01]-fujitsu | f700-fujitsu) os=-uxpv ;; *-rom68k) os=-coff ;; *-*bug) os=-coff ;; *-apple) os=-macos ;; *-atari*) os=-mint ;; *) os=-none ;; esac fi # Here we handle the case where we know the os, and the CPU type, but not the # manufacturer. We pick the logical manufacturer. vendor=unknown case $basic_machine in *-unknown) case $os in -riscix*) vendor=acorn ;; -sunos*) vendor=sun ;; -aix*) vendor=ibm ;; -beos*) vendor=be ;; -hpux*) vendor=hp ;; -mpeix*) vendor=hp ;; -hiux*) vendor=hitachi ;; -unos*) vendor=crds ;; -dgux*) vendor=dg ;; -luna*) vendor=omron ;; -genix*) vendor=ns ;; -mvs* | -opened*) vendor=ibm ;; -os400*) vendor=ibm ;; -ptx*) vendor=sequent ;; -tpf*) vendor=ibm ;; -vxsim* | -vxworks* | -windiss*) vendor=wrs ;; -aux*) vendor=apple ;; -hms*) vendor=hitachi ;; -mpw* | -macos*) vendor=apple ;; -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) vendor=atari ;; -vos*) vendor=stratus ;; esac basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` ;; esac echo $basic_machine$os exit # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: einspline-0.9.2/missing0000755000113000011300000002557711012400653012020 00000000000000#! /bin/sh # Common stub for a few missing GNU programs while installing. scriptversion=2006-05-10.23 # Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004, 2005, 2006 # Free Software Foundation, Inc. # Originally by Fran,cois Pinard , 1996. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. if test $# -eq 0; then echo 1>&2 "Try \`$0 --help' for more information" exit 1 fi run=: sed_output='s/.* --output[ =]\([^ ]*\).*/\1/p' sed_minuso='s/.* -o \([^ ]*\).*/\1/p' # In the cases where this matters, `missing' is being run in the # srcdir already. if test -f configure.ac; then configure_ac=configure.ac else configure_ac=configure.in fi msg="missing on your system" case $1 in --run) # Try to run requested program, and just exit if it succeeds. run= shift "$@" && exit 0 # Exit code 63 means version mismatch. This often happens # when the user try to use an ancient version of a tool on # a file that requires a minimum version. In this case we # we should proceed has if the program had been absent, or # if --run hadn't been passed. if test $? = 63; then run=: msg="probably too old" fi ;; -h|--h|--he|--hel|--help) echo "\ $0 [OPTION]... PROGRAM [ARGUMENT]... Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an error status if there is no known handling for PROGRAM. Options: -h, --help display this help and exit -v, --version output version information and exit --run try to run the given command, and emulate it if it fails Supported PROGRAM values: aclocal touch file \`aclocal.m4' autoconf touch file \`configure' autoheader touch file \`config.h.in' autom4te touch the output file, or create a stub one automake touch all \`Makefile.in' files bison create \`y.tab.[ch]', if possible, from existing .[ch] flex create \`lex.yy.c', if possible, from existing .c help2man touch the output file lex create \`lex.yy.c', if possible, from existing .c makeinfo touch the output file tar try tar, gnutar, gtar, then tar without non-portable flags yacc create \`y.tab.[ch]', if possible, from existing .[ch] Send bug reports to ." exit $? ;; -v|--v|--ve|--ver|--vers|--versi|--versio|--version) echo "missing $scriptversion (GNU Automake)" exit $? ;; -*) echo 1>&2 "$0: Unknown \`$1' option" echo 1>&2 "Try \`$0 --help' for more information" exit 1 ;; esac # Now exit if we have it, but it failed. Also exit now if we # don't have it and --version was passed (most likely to detect # the program). case $1 in lex|yacc) # Not GNU programs, they don't have --version. ;; tar) if test -n "$run"; then echo 1>&2 "ERROR: \`tar' requires --run" exit 1 elif test "x$2" = "x--version" || test "x$2" = "x--help"; then exit 1 fi ;; *) if test -z "$run" && ($1 --version) > /dev/null 2>&1; then # We have it, but it failed. exit 1 elif test "x$2" = "x--version" || test "x$2" = "x--help"; then # Could not run --version or --help. This is probably someone # running `$TOOL --version' or `$TOOL --help' to check whether # $TOOL exists and not knowing $TOOL uses missing. exit 1 fi ;; esac # If it does not exist, or fails to run (possibly an outdated version), # try to emulate it. case $1 in aclocal*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." touch aclocal.m4 ;; autoconf) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." touch configure ;; autoheader) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`acconfig.h' or \`${configure_ac}'. You might want to install the \`Autoconf' and \`GNU m4' packages. Grab them from any GNU archive site." files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` test -z "$files" && files="config.h" touch_files= for f in $files; do case $f in *:*) touch_files="$touch_files "`echo "$f" | sed -e 's/^[^:]*://' -e 's/:.*//'`;; *) touch_files="$touch_files $f.in";; esac done touch $touch_files ;; automake*) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. You might want to install the \`Automake' and \`Perl' packages. Grab them from any GNU archive site." find . -type f -name Makefile.am -print | sed 's/\.am$/.in/' | while read f; do touch "$f"; done ;; autom4te) echo 1>&2 "\ WARNING: \`$1' is needed, but is $msg. You might have modified some files without having the proper tools for further handling them. You can get \`$1' as part of \`Autoconf' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo "#! /bin/sh" echo "# Created by GNU Automake missing as a replacement of" echo "# $ $@" echo "exit 0" chmod +x $file exit 1 fi ;; bison|yacc) echo 1>&2 "\ WARNING: \`$1' $msg. You should only need it if you modified a \`.y' file. You may need the \`Bison' package in order for those modifications to take effect. You can get \`Bison' from any GNU archive site." rm -f y.tab.c y.tab.h if test $# -ne 1; then eval LASTARG="\${$#}" case $LASTARG in *.y) SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.c fi SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` if test -f "$SRCFILE"; then cp "$SRCFILE" y.tab.h fi ;; esac fi if test ! -f y.tab.h; then echo >y.tab.h fi if test ! -f y.tab.c; then echo 'main() { return 0; }' >y.tab.c fi ;; lex|flex) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.l' file. You may need the \`Flex' package in order for those modifications to take effect. You can get \`Flex' from any GNU archive site." rm -f lex.yy.c if test $# -ne 1; then eval LASTARG="\${$#}" case $LASTARG in *.l) SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` if test -f "$SRCFILE"; then cp "$SRCFILE" lex.yy.c fi ;; esac fi if test ! -f lex.yy.c; then echo 'main() { return 0; }' >lex.yy.c fi ;; help2man) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a dependency of a manual page. You may need the \`Help2man' package in order for those modifications to take effect. You can get \`Help2man' from any GNU archive site." file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -f "$file"; then touch $file else test -z "$file" || exec >$file echo ".ab help2man is required to generate this page" exit 1 fi ;; makeinfo) echo 1>&2 "\ WARNING: \`$1' is $msg. You should only need it if you modified a \`.texi' or \`.texinfo' file, or any other file indirectly affecting the aspect of the manual. The spurious call might also be the consequence of using a buggy \`make' (AIX, DU, IRIX). You might want to install the \`Texinfo' package or the \`GNU make' package. Grab either from any GNU archive site." # The file to touch is that specified with -o ... file=`echo "$*" | sed -n "$sed_output"` test -z "$file" && file=`echo "$*" | sed -n "$sed_minuso"` if test -z "$file"; then # ... or it is the one specified with @setfilename ... infile=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` file=`sed -n ' /^@setfilename/{ s/.* \([^ ]*\) *$/\1/ p q }' $infile` # ... or it is derived from the source name (dir/f.texi becomes f.info) test -z "$file" && file=`echo "$infile" | sed 's,.*/,,;s,.[^.]*$,,'`.info fi # If the file does not exist, the user really needs makeinfo; # let's fail without touching anything. test -f $file || exit 1 touch $file ;; tar) shift # We have already tried tar in the generic part. # Look for gnutar/gtar before invocation to avoid ugly error # messages. if (gnutar --version > /dev/null 2>&1); then gnutar "$@" && exit 0 fi if (gtar --version > /dev/null 2>&1); then gtar "$@" && exit 0 fi firstarg="$1" if shift; then case $firstarg in *o*) firstarg=`echo "$firstarg" | sed s/o//` tar "$firstarg" "$@" && exit 0 ;; esac case $firstarg in *h*) firstarg=`echo "$firstarg" | sed s/h//` tar "$firstarg" "$@" && exit 0 ;; esac fi echo 1>&2 "\ WARNING: I can't seem to be able to run \`tar' with the given arguments. You may want to install GNU tar or Free paxutils, or check the command line arguments." exit 1 ;; *) echo 1>&2 "\ WARNING: \`$1' is needed, and is $msg. You might have modified some files without having the proper tools for further handling them. Check the \`README' file, it often tells you about the needed prerequisites for installing this package. You may also peek at any GNU archive site, in case some other package would contain this missing \`$1' program." exit 1 ;; esac exit 0 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-end: "$" # End: einspline-0.9.2/src/0000777000113000011300000000000011311505425011261 500000000000000einspline-0.9.2/src/test_bspline_s.c0000664000113000011300000001445211012400563014361 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "bspline.h" #include #include #include #include #ifndef M_PI #define M_PI 3.1415926535897932384626433 #endif double drand48(); void sincos (double phi, double *s, double *c); typedef struct { double kcut; double *Gvecs; float *coefs; int numG; } periodic_func_s; void int_periodic_func (periodic_func_s *func, double kcut) { func->kcut = kcut; func->numG = 0; int imax = (int) ceil (kcut/(2.0*M_PI)); for (int ix=-imax; ix<=imax; ix++) { double kx = 2.0*M_PI * ix; for (int iy=-imax; iy<=imax; iy++) { double ky = 2.0*M_PI * iy; for (int iz=-imax; iz<=imax; iz++) { double kz = 2.0*M_PI * iz; if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) func->numG++; } } } func->Gvecs = (double*) malloc (3*sizeof(double)*func->numG); func->coefs = (float*) malloc (2*sizeof(float) *func->numG); int iG = 0; for (int ix=-imax; ix<=imax; ix++) { double kx = 2.0*M_PI * ix; for (int iy=-imax; iy<=imax; iy++) { double ky = 2.0*M_PI * iy; for (int iz=-imax; iz<=imax; iz++) { double kz = 2.0*M_PI * iz; if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) { func->Gvecs[3*iG+0] = kx; func->Gvecs[3*iG+1] = ky; func->Gvecs[3*iG+2] = kz; func->coefs[2*iG+0] = 2.0*(drand48()-0.5); func->coefs[2*iG+1] = 2.0*(drand48()-0.5); iG++; } } } } } void eval_periodic_func_s (periodic_func_s* restrict func, double x, double y, double z, float *restrict val, float *restrict grad, float *restrict hess) { *val = 0.0; for (int i=0; i<3; i++) grad[i] = 0.0; for (int i=0; i<9; i++) hess[i] = 0.0; for (int iG=0; iGnumG; iG++) { double kx = func->Gvecs[3*iG+0]; double ky = func->Gvecs[3*iG+1]; double kz = func->Gvecs[3*iG+2]; double phase = x*kx + y*ky + z*kz; double re, im; sincos(phase, &im, &re); double c_re = func->coefs[2*iG+0]; double c_im = func->coefs[2*iG+1]; *val += re*c_re - im*c_im; grad[0] += -kx*(re*c_im + im*c_re); grad[1] += -ky*(re*c_im + im*c_re); grad[2] += -kz*(re*c_im + im*c_re); hess[0] += -kx*kx*(re*c_re - im*c_im); hess[1] += -kx*ky*(re*c_re - im*c_im); hess[2] += -kx*kz*(re*c_re - im*c_im); hess[3] += -ky*kx*(re*c_re - im*c_im); hess[4] += -ky*ky*(re*c_re - im*c_im); hess[5] += -ky*kz*(re*c_re - im*c_im); hess[6] += -kz*kx*(re*c_re - im*c_im); hess[7] += -kz*ky*(re*c_re - im*c_im); hess[8] += -kz*kz*(re*c_re - im*c_im); } } void test_bspline_3d_s() { double kcut = 2.0*M_PI * 5.0; int Nspline = 100; Ugrid x_grid, y_grid, z_grid; x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nspline; y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Nspline; z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nspline; double dx = 1.0/(double)(Nspline); double dy = 1.0/(double)(Nspline); double dz = 1.0/(double)(Nspline); BCtype_s xBC, yBC, zBC; xBC.lCode = xBC.rCode = PERIODIC; yBC.lCode = yBC.rCode = PERIODIC; zBC.lCode = zBC.rCode = PERIODIC; float *data = malloc (sizeof(float)*Nspline*Nspline*Nspline); periodic_func_s func; int_periodic_func (&func, kcut); for (int ix=0; ix < x_grid.num; ix++) { double x = (double) ix * dx; for (int iy=0; iy < y_grid.num; iy++) { double y = (double) iy * dy; for (int iz=0; iz < z_grid.num; iz++) { double z = (double) iz * dz; float val, grad[3], hess[9]; eval_periodic_func_s (&func, x, y, z, &val, grad, hess); data[(ix*Nspline+iy)*Nspline+iz] = val; } } } UBspline_3d_s *spline = create_UBspline_3d_s (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); int numTest = 10000; double valerror = 0.0; double graderror = 0.0; double hesserror = 0.0; double valsum=0.0, gradsum=0.0, hesssum=0.0; for (int i=0; i #include "config.h" #ifdef HAVE_POSIX_MEMALIGN inline void * aligned_alloc (size_t size, size_t alignment) { void *ptr; posix_memalign (&ptr, alignment, size); return ptr; } inline void aligned_free (void *ptr) { free (ptr); } #else inline void * aligned_alloc (size_t size, size_t alignment) { size += (alignment-1)+sizeof(void*); void *ptr = malloc (size); if (ptr == NULL) return NULL; else { void *shifted = ptr + sizeof(void*); size_t offset = alignment - (size_t)shifted%(size_t)alignment; void *aligned = shifted + offset; *((void**)aligned-1) = ptr; return aligned; } } inline void aligned_free (void *aligned) { void *ptr = *((void**)aligned-1); free (ptr); } #endif #endif einspline-0.9.2/src/multi_bspline_eval_std_d_cpp.cc0000664000113000011300000000276611015564403017417 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_d_impl.h" einspline-0.9.2/src/nubasis.c0000664000113000011300000006453211035744660013032 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "nubasis.h" #include NUBasis* create_NUBasis (NUgrid *grid, bool periodic) { NUBasis* restrict basis = malloc (sizeof(NUBasis)); basis->grid = grid; basis->periodic = periodic; int N = grid->num_points; basis->xVals = malloc ((N+5)*sizeof(double)); basis->dxInv = malloc (3*(N+2)*sizeof(double)); for (int i=0; ixVals[i+2] = grid->points[i]; double* restrict g = grid->points; // Extend grid points on either end to provide enough points to // construct a full basis set if (!periodic) { basis->xVals[0] = g[ 0 ] - 2.0*(g[1]-g[0]); basis->xVals[1] = g[ 0 ] - 1.0*(g[1]-g[0]); basis->xVals[N+2] = g[N-1] + 1.0*(g[N-1]-g[N-2]); basis->xVals[N+3] = g[N-1] + 2.0*(g[N-1]-g[N-2]); basis->xVals[N+4] = g[N-1] + 3.0*(g[N-1]-g[N-2]); } else { basis->xVals[1] = g[ 0 ] - (g[N-1] - g[N-2]); basis->xVals[0] = g[ 0 ] - (g[N-1] - g[N-3]); basis->xVals[N+2] = g[N-1] + (g[ 1 ] - g[ 0 ]); basis->xVals[N+3] = g[N-1] + (g[ 2 ] - g[ 0 ]); basis->xVals[N+4] = g[N-1] + (g[ 3 ] - g[ 0 ]); } for (int i=0; idxInv[3*i+j] = 1.0/(basis->xVals[i+j+1]-basis->xVals[i]); return basis; } void destroy_NUBasis (NUBasis *basis) { free (basis->xVals); free (basis->dxInv); free (basis); } int get_NUBasis_funcs_s (NUBasis* restrict basis, double x, float bfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; return i; } void get_NUBasis_funcs_si (NUBasis* restrict basis, int i, float bfuncs[4]) { int i2 = i+2; double b1[2], b2[3]; double x = basis->grid->points[i]; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; } int get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x, float bfuncs[4], float dbfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); return i; } void get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i, float bfuncs[4], float dbfuncs[4]) { double b1[2], b2[3]; double x = basis->grid->points[i]; int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); } int get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); return i; } void get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]) { double b1[2], b2[3]; double x = basis->grid->points[i]; int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); } ////////////////////////////// // Double-precision version // ////////////////////////////// int get_NUBasis_funcs_d (NUBasis* restrict basis, double x, double bfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; return i; } void get_NUBasis_funcs_di (NUBasis* restrict basis, int i, double bfuncs[4]) { int i2 = i+2; double b1[2], b2[3]; double x = basis->grid->points[i]; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; } int get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x, double bfuncs[4], double dbfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); return i; } void get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i, double bfuncs[4], double dbfuncs[4]) { double b1[2], b2[3]; double x = basis->grid->points[i]; int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); } int get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); return i; } void get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]) { double b1[2], b2[3]; double x = basis->grid->points[i]; int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bfuncs[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bfuncs[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bfuncs[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bfuncs[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); } #ifdef HAVE_SSE2 typedef union { float s[4]; __m128 v; } uvec4; typedef union { double s[2]; __m128d v; } uvec2; int get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec4 bfuncs; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; *funcs = bfuncs.v; return i; } int get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs, __m128 *restrict dfuncs) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec4 bfuncs, dbfuncs; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs.s[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs.s[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); *funcs = bfuncs.v; *dfuncs = dbfuncs.v; return i; } int get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec4 bfuncs, dbfuncs, d2bfuncs; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bfuncs.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bfuncs.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bfuncs.s[2] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bfuncs.s[3] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbfuncs.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbfuncs.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbfuncs.s[2] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbfuncs.s[3] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bfuncs.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bfuncs.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bfuncs.s[2] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bfuncs.s[3] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); *funcs = bfuncs.v; *dfuncs = dbfuncs.v; *d2funcs = d2bfuncs.v; return i; } ////////////////////////////// // Double-precision version // ////////////////////////////// int get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec2 bf01, bf23, dbf01, dbf23; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; *f01 = bf01.v; *f23 = bf23.v; return i; } int get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23, __m128d *restrict df01, __m128d *restrict df23) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec2 bf01, bf23, dbf01, dbf23; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbf01.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbf01.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbf23.s[0] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbf23.s[1] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); *f01 = bf01.v; *f23 = bf23.v; *df01 = dbf01.v; *df23 = dbf23.v; return i; } int get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23, __m128d *restrict df01, __m128d *restrict df23, __m128d *restrict d2f01, __m128d *restrict d2f23) { double b1[2], b2[3]; int i = (*basis->grid->reverse_map)(basis->grid, x); int i2 = i+2; double* restrict dxInv = basis->dxInv; double* restrict xVals = basis->xVals; uvec2 bf01, bf23, dbf01, dbf23, d2bf01, d2bf23; b1[0] = (xVals[i2+1]-x) * dxInv[3*(i+2)+0]; b1[1] = (x-xVals[i2]) * dxInv[3*(i+2)+0]; b2[0] = (xVals[i2+1]-x) * dxInv[3*(i+1)+1] * b1[0]; b2[1] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+1] * b1[0]+ (xVals[i2+2]-x) * dxInv[3*(i+2)+1] * b1[1]); b2[2] = (x-xVals[i2]) * dxInv[3*(i+2)+1] * b1[1]; bf01.s[0] = (xVals[i2+1]-x) * dxInv[3*(i )+2] * b2[0]; bf01.s[1] = ((x-xVals[i2-2]) * dxInv[3*(i )+2] * b2[0] + (xVals[i2+2]-x) * dxInv[3*(i+1)+2] * b2[1]); bf23.s[0] = ((x-xVals[i2-1]) * dxInv[3*(i+1)+2] * b2[1] + (xVals[i2+3]-x) * dxInv[3*(i+2)+2] * b2[2]); bf23.s[1] = (x-xVals[i2]) * dxInv[3*(i+2)+2] * b2[2]; dbf01.s[0] = -3.0 * (dxInv[3*(i )+2] * b2[0]); dbf01.s[1] = 3.0 * (dxInv[3*(i )+2] * b2[0] - dxInv[3*(i+1)+2] * b2[1]); dbf23.s[0] = 3.0 * (dxInv[3*(i+1)+2] * b2[1] - dxInv[3*(i+2)+2] * b2[2]); dbf23.s[1] = 3.0 * (dxInv[3*(i+2)+2] * b2[2]); d2bf01.s[0] = 6.0 * (+dxInv[3*(i+0)+2]* dxInv[3*(i+1)+1]*b1[0]); d2bf01.s[1] = 6.0 * (-dxInv[3*(i+1)+1]*(dxInv[3*(i+0)+2]+dxInv[3*(i+1)+2])*b1[0] + dxInv[3*(i+1)+2]* dxInv[3*(i+2)+1]*b1[1]); d2bf23.s[0] = 6.0 * (+dxInv[3*(i+1)+2]* dxInv[3*(i+1)+1]*b1[0] - dxInv[3*(i+2)+1]*(dxInv[3*(i+1)+2] + dxInv[3*(i+2)+2])*b1[1]); d2bf23.s[1] = 6.0 * (+dxInv[3*(i+2)+2]* dxInv[3*(i+2)+1]*b1[1]); *f01 = bf01.v; *f23 = bf23.v; *df01 = dbf01.v; *df23 = dbf23.v; *d2f01 = d2bf01.v; *d2f23 = d2bf23.v; return i; } #endif einspline-0.9.2/src/multi_bspline_create.h0000664000113000011300000001446711035047173015561 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_CREATE_H #define MULTI_BSPLINE_CREATE_H #include "bspline_base.h" #include "multi_bspline_structs.h" #ifdef __cplusplus extern "C" { #endif //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Spline creation functions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// ///////////////////////////////////// // Uniform, single precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_UBspline_1d_s * create_multi_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_UBspline_2d_s * create_multi_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, BCtype_s xBC, BCtype_s yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_UBspline_3d_s * create_multi_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, int spline_num, float *data); void set_multi_UBspline_2d_s (multi_UBspline_2d_s *spline, int spline_num, float *data); void set_multi_UBspline_3d_s (multi_UBspline_3d_s *spline, int spline_num, float *data); ///////////////////////////////////// // Uniform, double precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_UBspline_1d_d * create_multi_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_UBspline_2d_d * create_multi_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, BCtype_d xBC, BCtype_d yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_UBspline_3d_d * create_multi_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_UBspline_1d_d (multi_UBspline_1d_d *spline, int spline_num, double *data); void set_multi_UBspline_1d_d_BC (multi_UBspline_1d_d *spline, int spline_num, double *data, BCtype_d xBC); void set_multi_UBspline_2d_d (multi_UBspline_2d_d *spline, int spline_num, double *data); void set_multi_UBspline_3d_d (multi_UBspline_3d_d *spline, int spline_num, double *data); /////////////////////////////////////// // Uniform, single precision, complex// /////////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_UBspline_1d_c * create_multi_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_UBspline_2d_c * create_multi_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, BCtype_c xBC, BCtype_c yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_UBspline_3d_c * create_multi_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_UBspline_1d_c (multi_UBspline_1d_c *spline, int spline_num, complex_float *data); void set_multi_UBspline_2d_c (multi_UBspline_2d_c *spline, int spline_num, complex_float *data); void set_multi_UBspline_3d_c (multi_UBspline_3d_c *spline, int spline_num, complex_float *data); /////////////////////////////////////// // Uniform, double precision, complex// /////////////////////////////////////// // Create 1D uniform double-precision, complex Bspline multi_UBspline_1d_z * create_multi_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, int num_splines); // Create 2D uniform double-precision, complex Bspline multi_UBspline_2d_z * create_multi_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, BCtype_z xBC, BCtype_z yBC, int num_splines); // Create 3D uniform double-precision, complex Bspline multi_UBspline_3d_z * create_multi_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_UBspline_1d_z (multi_UBspline_1d_z *spline, int spline_num, complex_double *data); void set_multi_UBspline_1d_z_BC (multi_UBspline_1d_z *spline, int spline_num, complex_double *data, BCtype_z xBC); void set_multi_UBspline_2d_z (multi_UBspline_2d_z *spline, int spline_num, complex_double *data); void set_multi_UBspline_3d_z (multi_UBspline_3d_z *spline, int spline_num, complex_double *data); #ifdef __cplusplus } #endif #endif einspline-0.9.2/src/config.h.in0000664000113000011300000000775011273633731013244 00000000000000/* src/config.h.in. Generated from configure.ac by autoheader. */ /* Define to dummy `main' function (if any) required to link to the Fortran libraries. */ #undef F77_DUMMY_MAIN /* Define to a macro mangling the given C identifier (in lower and upper case), which must not contain underscores, for linking with Fortran. */ #undef F77_FUNC /* As F77_FUNC, but for C identifiers containing underscores. */ #undef F77_FUNC_ /* Define if F77 and FC dummy `main' functions are identical. */ #undef FC_DUMMY_MAIN_EQ_F77 /* Define to 1 if you have the `clock_gettime' function. */ #undef HAVE_CLOCK_GETTIME /* Define to 1 if C supports variable-length arrays. */ #undef HAVE_C_VARARRAYS /* Define to 1 if you have the header file. */ #undef HAVE_DLFCN_H /* FFTW3 library is available */ #undef HAVE_FFTW3 /* FFTW3F library is available */ #undef HAVE_FFTW3F /* Define to 1 if you have the `floor' function. */ #undef HAVE_FLOOR /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H /* Define to 1 if you have the `m' library (-lm). */ #undef HAVE_LIBM /* Define to 1 if you have the header file. */ #undef HAVE_MEMORY_H /* Support mmx instructions */ #undef HAVE_MMX /* Define to 1 if you have the `posix_memalign' function. */ #undef HAVE_POSIX_MEMALIGN /* Define to 1 if you have the `pow' function. */ #undef HAVE_POW /* Define to 1 if you have the `sqrt' function. */ #undef HAVE_SQRT /* Support SSE (Streaming SIMD Extensions) instructions */ #undef HAVE_SSE /* Support SSE2 (Streaming SIMD Extensions 2) instructions */ #undef HAVE_SSE2 /* Support SSE3 (Streaming SIMD Extensions 3) instructions */ #undef HAVE_SSE3 /* Support SSE4.1 (Streaming SIMD Extensions 4.1) instructions */ #undef HAVE_SSE4_1 /* Support SSE4.2 (Streaming SIMD Extensions 4.2) instructions */ #undef HAVE_SSE4_2 /* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ #undef HAVE_SSSE3 /* Define to 1 if stdbool.h conforms to C99. */ #undef HAVE_STDBOOL_H /* Define to 1 if you have the header file. */ #undef HAVE_STDINT_H /* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H /* Define to 1 if you have the header file. */ #undef HAVE_STRINGS_H /* Define to 1 if you have the header file. */ #undef HAVE_STRING_H /* Define to 1 if you have the `strtol' function. */ #undef HAVE_STRTOL /* Define to 1 if you have the header file. */ #undef HAVE_SYS_STAT_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TIME_H /* Define to 1 if you have the header file. */ #undef HAVE_SYS_TYPES_H /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H /* xmmintrin.h is available. */ #undef HAVE_XMMINTRIN_H /* Define to 1 if the system has the type `_Bool'. */ #undef HAVE__BOOL /* Use double-precision to solve for single-precision splines */ #undef HIGH_PRECISION /* Name of package */ #undef PACKAGE /* Define to the address where bug reports for this package should be sent. */ #undef PACKAGE_BUGREPORT /* Define to the full name of this package. */ #undef PACKAGE_NAME /* Define to the full name and version of this package. */ #undef PACKAGE_STRING /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME /* Define to the version of this package. */ #undef PACKAGE_VERSION /* Prefetch loop lead distance */ #undef PREFETCH_AHEAD /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS /* Use SSE prefetch */ #undef USE_PREFETCH /* Version number of package */ #undef VERSION /* Define to empty if `const' does not conform to ANSI C. */ #undef const /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus #undef inline #endif /* Define to empty if the C99 keyword for C++ does not work. */ #undef restrict /* Define to `unsigned int' if does not define. */ #undef size_t einspline-0.9.2/src/multi_nubspline_eval_sse_z_cpp.cc0000664000113000011300000000277011036110175017777 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_nubspline_eval_sse_z_impl.h" einspline-0.9.2/src/multi_bspline_eval_sse_d_impl.h0000664000113000011300000016600411302247653017440 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_SSE_D_IMPL_H #define MULTI_BSPLINE_EVAL_SSE_D_IMPL_H #include "config.h" #include #include #ifdef HAVE_SSE3 #include #endif #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern __m128d *restrict A_d; extern double *restrict Ad, *restrict dAd, *restrict d2Ad; #ifndef _MM_DDOT4_PD #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_d (&(p), t1); \ } while (0); #endif #endif /*********************************************************/ /* 1D double-precision, real evaulation functions */ /*********************************************************/ #include void eval_multi_UBspline_1d_d (multi_UBspline_1d_d *spline, double x, double* restrict vals) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); // fprintf (stderr, "ux = %12.8f\n", ux); // fprintf (stderr, "ipart = %ix tx = %12.7f\n", ix, tx); // fprintf (stderr, "a[0] = %1.8e\n", a[0]); // fprintf (stderr, "a[1] = %1.8e\n", a[1]); // fprintf (stderr, "a[2] = %1.8e\n", a[2]); // fprintf (stderr, "a[3] = %1.8e\n", a[3]); // fprintf (stderr, "tpx[0] = %1.8e\n", tpx[0]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_d_vg (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_d_vgl (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict lapl) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_d_vgh (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict hess) { eval_multi_UBspline_1d_d_vgl (spline, x, vals, grads, hess); } /*********************************************************/ /* 2D double-precision, real evaulation functions */ /*********************************************************/ void eval_multi_UBspline_2d_d(multi_UBspline_2d_d *spline, double x, double y, double* restrict vals) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[2*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23, d2a01, d2b01, d2a23, d2b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[2*Nh], mlapl[2*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double lapl2[2*N]; for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23, d2a01, d2b01, d2a23, d2b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[2*Nh], mhess[3*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, a23, b23, c23, tmp0, tmp1, r0, r1, i0, i1, val_r, val_i; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // z-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[3*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23, d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[3*Nh], mlapl[3*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; double lapl3[3*N]; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23, d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Zero-out values int Nh = (N+1)/2; __m128d mvals[Nh], mgrads[3*Nh], mhess[6*Nh]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; n header file. */ #define HAVE_DLFCN_H 1 /* FFTW3 library is available */ /* #undef HAVE_FFTW3 */ /* FFTW3F library is available */ /* #undef HAVE_FFTW3F */ /* Define to 1 if you have the `floor' function. */ #define HAVE_FLOOR 1 /* Define to 1 if you have the header file. */ #define HAVE_INTTYPES_H 1 /* Define to 1 if you have the `m' library (-lm). */ #define HAVE_LIBM 1 /* Define to 1 if you have the header file. */ #define HAVE_MEMORY_H 1 /* Support mmx instructions */ #define HAVE_MMX /* Define to 1 if you have the `posix_memalign' function. */ #define HAVE_POSIX_MEMALIGN 1 /* Define to 1 if you have the `pow' function. */ #define HAVE_POW 1 /* Define to 1 if you have the `sqrt' function. */ #define HAVE_SQRT 1 /* Support SSE (Streaming SIMD Extensions) instructions */ #define HAVE_SSE /* Support SSE2 (Streaming SIMD Extensions 2) instructions */ #define HAVE_SSE2 /* Support SSE3 (Streaming SIMD Extensions 3) instructions */ #define HAVE_SSE3 /* Support SSE4.1 (Streaming SIMD Extensions 4.1) instructions */ #define HAVE_SSE4_1 /* Support SSE4.2 (Streaming SIMD Extensions 4.2) instructions */ /* #undef HAVE_SSE4_2 */ /* Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions */ #define HAVE_SSSE3 /* Define to 1 if stdbool.h conforms to C99. */ #define HAVE_STDBOOL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDINT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STDLIB_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRINGS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_STRING_H 1 /* Define to 1 if you have the `strtol' function. */ #define HAVE_STRTOL 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TIME_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* xmmintrin.h is available. */ #define HAVE_XMMINTRIN_H 1 /* Define to 1 if the system has the type `_Bool'. */ #define HAVE__BOOL 1 /* Use double-precision to solve for single-precision splines */ /* #undef HIGH_PRECISION */ /* Name of package */ #define PACKAGE "einspline" /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "esler@uiuc.edu" /* Define to the full name of this package. */ #define PACKAGE_NAME "einspline" /* Define to the full name and version of this package. */ #define PACKAGE_STRING "einspline 0.9.2" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "einspline" /* Define to the version of this package. */ #define PACKAGE_VERSION "0.9.2" /* Prefetch loop lead distance */ /* #undef PREFETCH_AHEAD */ /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Use SSE prefetch */ /* #undef USE_PREFETCH */ /* Version number of package */ #define VERSION "0.9.2" /* Define to empty if `const' does not conform to ANSI C. */ /* #undef const */ /* Define to `__inline__' or `__inline' if that's what the C compiler calls it, or to nothing if 'inline' is not supported under any name. */ #ifndef __cplusplus /* #undef inline */ #endif /* Define to empty if the C99 keyword for C++ does not work. */ #define restrict __restrict__ /* Define to `unsigned int' if does not define. */ /* #undef size_t */ einspline-0.9.2/src/test_multi_cpp.cc0000664000113000011300000021736711021540436014560 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline.h" #include "bspline.h" #include #include #include #include using namespace std; inline double diff (double a, double b, double tol) { if (fabs(a-b) > tol) return 1; else return 0; } ////////////////////////////////////////// // Single-precision real test functions // ////////////////////////////////////////// int test_1d_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_s xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_s* norm_splines[num_splines]; multi_UBspline_1d_s *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]); // fprintf (stderr, "multi coef = %1.14e\n", // multi_spline->coefs[19+27*multi_spline->x_stride]); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals [num_splines]; float multi_grads[num_splines], norm_grads[num_splines]; float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[2*num_splines], norm_grads[2*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[3*num_splines], norm_grads[3*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[2*num_splines], norm_grads[2*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_c xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_c* norm_splines[num_splines]; multi_UBspline_1d_c *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); complex_float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; i((drand48()-0.5),(drand48()-0.5)); norm_splines[i] = create_UBspline_1d_c (x_grid, xBC, data); set_multi_UBspline_1d_c (multi_spline, i, data); } // fprintf (stderr, "\nnorm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[27]), // imag(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+27*multi_spline->x_stride]), // imag(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals [num_splines]; complex_float multi_grads[num_splines], norm_grads[num_splines]; complex_float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; i((drand48()-0.5),(drand48()-0.5)); norm_splines[i] = create_UBspline_2d_c (x_grid, y_grid, xBC, yBC, data); set_multi_UBspline_2d_c (multi_spline, i, data); } // fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[2127]), // imag(norm_splines[19]->coefs[2127])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+2127*multi_spline->y_stride]), // imag(multi_spline->coefs[19+2127*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; i((drand48()-0.5), (drand48()-0.5)); norm_splines[i] = create_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); set_multi_UBspline_3d_c (multi_spline, i, data); } // fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i((drand48()-0.5),(drand48()-0.5)); norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); set_multi_UBspline_3d_z (multi_spline, i, data); } fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", real(norm_splines[19]->coefs[227]), imag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", real(multi_spline->coefs[19+227*multi_spline->z_stride]), imag(multi_spline->coefs[19+227*multi_spline->z_stride])); //return; // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", real(norm_vals[j]), imag(norm_vals[j])); fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", real(multi_vals[j]), imag(multi_vals[j])); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_double_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_z xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_z* norm_splines[num_splines]; multi_UBspline_1d_z *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); complex_double data[Nx]; // Now, create normal splines and set multispline data for (int i=0; i((drand48()-0.5), (drand48()-0.5)); norm_splines[i] = create_UBspline_1d_z (x_grid, xBC, data); set_multi_UBspline_1d_z (multi_spline, i, data); } // fprintf (stderr, "\nnorm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[27]), // imag(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+27*multi_spline->x_stride]), // imag(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals [num_splines]; complex_double multi_grads[num_splines], norm_grads[num_splines]; complex_double multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; i((drand48()-0.5),(drand48()-0.5)); norm_splines[i] = create_UBspline_2d_z (x_grid, y_grid, xBC, yBC, data); set_multi_UBspline_2d_z (multi_spline, i, data); } // fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->y_stride]), // imag(multi_spline->coefs[19+227*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; i((drand48()-0.5),(drand48()-0.5)); norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); set_multi_UBspline_3d_z (multi_spline, i, data); } // fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", // real(norm_splines[19]->coefs[227]), // imag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // real(multi_spline->coefs[19+227*multi_spline->z_stride]), // imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i((drand48()-0.5), (drand48()-0.5)); norm_splines[i] = create_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); set_multi_UBspline_3d_z (multi_spline, i, data); } fprintf (stderr, "norm coef = %1.14e + %1.14ei\n", real(norm_splines[19]->coefs[227]), imag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", real(multi_spline->coefs[19+227*multi_spline->z_stride]), imag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "j = %d\n", j); fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } // Check gradients for (int n=0; n<3; n++) { diff = norm_grads[3*j+n] - multi_grads[3*j+n]; if (fabs(diff) > 1.0e-12) { fprintf (stderr, "n=%d\n", n); fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", norm_grads[3*j+n]); fprintf (stderr, " multi_grads[j] = %1.14e\n", multi_grads[3*j+n]); } } // Check hessian for (int n=0; n<9; n++) { diff = norm_hess[9*j+n] - multi_hess[9*j+n]; if (fabs(diff) > 1.0e-10) { fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", norm_hess[9*j+n]); fprintf (stderr, " multi_hess[j] = %1.14e\n", multi_hess[9*j+n]); } } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; i #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; /************************************************************/ /* 1D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_c (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_c_vg (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_c_vgl (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_c_vgh (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { eval_multi_UBspline_1d_c_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_c (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float prefactor = a[i]*b[j]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_2d_c_vg (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float ab = a[i]*b[j]; float dab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals [n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; } } void eval_multi_UBspline_2d_c_vgl (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; // complex_float lapl2[2*spline->num_splines]; complex_float* restrict lapl2 = spline->lapl2; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; lapl2[2*n+0] = lapl2[2*n+1] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float ab = a[i]*b[j]; float dab[2], d2ab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i]* b[j]; d2ab[1] = a[i]*d2b[j]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; lapl2[2*n+0] += d2ab[0]*coefs[n]; lapl2[2*n+1] += d2ab[1]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; lapl2[2*n+0] *= dxInv*dxInv; lapl2[2*n+1] *= dyInv*dyInv; lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; } } void eval_multi_UBspline_2d_c_vgh (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; for (int i=0; i<4; i++) hess[4*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++){ float ab = a[i]*b[j]; float dab[2], d2ab[3]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i] * b[j]; d2ab[1] = da[i] * db[j]; d2ab[2] = a[i] * d2b[j]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; hess [4*n+0] += d2ab[0]*coefs[n]; hess [4*n+1] += d2ab[1]*coefs[n]; hess [4*n+3] += d2ab[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; hess[4*n+0] *= dxInv*dxInv; hess[4*n+1] *= dxInv*dyInv; hess[4*n+3] *= dyInv*dyInv; // Copy hessian elements into lower half of 3x3 matrix hess[4*n+2] = hess[4*n+1]; } } /************************************************************/ /* 3D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_c (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float prefactor = a[i]*b[j]*c[k]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_3d_c_vg (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; } } void eval_multi_UBspline_3d_c_vgl (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; //complex_float lapl3[3*spline->num_splines]; complex_float* restrict lapl3 = spline->lapl3; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3], d2abc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = a[i]*d2b[j]* c[k]; d2abc[2] = a[i]* b[j]*d2c[k]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; lapl3[3*n+0] += d2abc[0]*coefs[n]; lapl3[3*n+1] += d2abc[1]*coefs[n]; lapl3[3*n+2] += d2abc[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; lapl3[3*n+0] *= dxInv*dxInv; lapl3[3*n+1] *= dyInv*dyInv; lapl3[3*n+2] *= dzInv*dzInv; lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; } } void eval_multi_UBspline_3d_c_vgh (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; for (int i=0; i<9; i++) hess[9*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3], d2abc[6]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = da[i]* db[j]* c[k]; d2abc[2] = da[i]* b[j]* dc[k]; d2abc[3] = a[i]*d2b[j]* c[k]; d2abc[4] = a[i]* db[j]* dc[k]; d2abc[5] = a[i]* b[j]*d2c[k]; complex_float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; hess [9*n+0] += d2abc[0]*coefs[n]; hess [9*n+1] += d2abc[1]*coefs[n]; hess [9*n+2] += d2abc[2]*coefs[n]; hess [9*n+4] += d2abc[3]*coefs[n]; hess [9*n+5] += d2abc[4]*coefs[n]; hess [9*n+8] += d2abc[5]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; hess[9*n+0] *= dxInv*dxInv; hess[9*n+4] *= dyInv*dyInv; hess[9*n+8] *= dzInv*dzInv; hess[9*n+1] *= dxInv*dyInv; hess[9*n+2] *= dxInv*dzInv; hess[9*n+5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess[9*n+3] = hess[9*n+1]; hess[9*n+6] = hess[9*n+2]; hess[9*n+7] = hess[9*n+5]; } } #endif einspline-0.9.2/src/bspline_eval_std_d.h0000664000113000011300000013362111012400563015171 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_STD_D_H #define BSPLINE_EVAL_STD_D_H #include #include extern const double* restrict Ad; extern const double* restrict dAd; extern const double* restrict d2Ad; /************************************************************/ /* 1D double-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_d (UBspline_1d_d * restrict spline, double x, double* restrict val) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_d_vg (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_d_vgl (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict lapl) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Ad[ 2]*tp[2] + d2Ad[ 3]*tp[3])+ coefs[i+1]*(d2Ad[ 6]*tp[2] + d2Ad[ 7]*tp[3])+ coefs[i+2]*(d2Ad[10]*tp[2] + d2Ad[11]*tp[3])+ coefs[i+3]*(d2Ad[14]*tp[2] + d2Ad[15]*tp[3])); } inline void eval_UBspline_1d_d_vgh (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict hess) { eval_UBspline_1d_d_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D double-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_d (UBspline_2d_d * restrict spline, double x, double y, double* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_UBspline_2d_d_vg (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_d_vgl (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])) + spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ d2a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ d2a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ d2a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_d_vgh (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = ( dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = ( dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = ( dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = ( dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = ( a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[0] = spline->x_grid.delta_inv * ( da[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ da[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ da[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ da[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[1] = spline->y_grid.delta_inv * ( a[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ a[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ a[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ a[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ d2a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ d2a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ d2a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * ( da[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ da[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ da[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ da[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[3] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * ( a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D double-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_d (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_d_vg (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], cP[16], bcP[4], dbcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_d_vgl (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + + spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_d_vgh (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; // if ((ix >= spline->x_grid.num)) x = spline->x_grid.num; // if ((ix < 0)) x = 0; // if ((iy >= spline->y_grid.num)) y = spline->y_grid.num; // if ((iy < 0)) y = 0; // if ((iz >= spline->z_grid.num)) z = spline->z_grid.num; // if ((iz < 0)) z = 0; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; int offmax = (ix+3)*xs + (iy+3)*ys + iz+3; // if (offmax > spline->coef_size) { // fprintf (stderr, "Outside bounds in spline evalutation.\n" // "offmax = %d csize = %d\n", offmax, spline->csize); // fprintf (stderr, "ix=%d iy=%d iz=%d\n", ix,iy,iz); // } #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = spline->x_grid.delta_inv * (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]); hess[3] = hess[1]; // dx dz; hess[2] = spline->x_grid.delta_inv * spline->z_grid.delta_inv * (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]); hess[6] = hess[2]; // d2y hess[4] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = spline->y_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/nugrid.c0000664000113000011300000001174611041141642012641 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "nugrid.h" #include #include #include int center_grid_reverse_map (void* gridptr, double x) { center_grid *grid = (center_grid *)gridptr; x -= grid->center; double index = copysign (log1p(fabs(x)*grid->aInv)*grid->bInv, x); return (int)floor(grid->half_points + index - grid->even_half); } int log_grid_reverse_map (void *gridptr, double x) { log_grid *grid = (log_grid *)gridptr; int index = (int) floor(grid->ainv*log(x*grid->startinv)); if (index < 0) return 0; else return index; } int general_grid_reverse_map (void* gridptr, double x) { NUgrid* grid = (NUgrid*) gridptr; int N = grid->num_points; double *points = grid->points; if (x <= points[0]) return (0); else if (x >= points[N-1]) return (N-1); else { int hi = N-1; int lo = 0; bool done = false; while (!done) { int i = (hi+lo)>>1; if (points[i] > x) hi = i; else lo = i; done = (hi-lo)<2; } return (lo); } } NUgrid* create_center_grid (double start, double end, double ratio, int num_points) { center_grid *grid = malloc (sizeof (center_grid)); if (grid != NULL) { assert (ratio > 1.0); grid->start = start; grid->end = end; grid->center = 0.5*(start + end); grid->num_points = num_points; grid->half_points = num_points/2; grid->odd = ((num_points % 2) == 1); grid->b = log(ratio) / (double)(grid->half_points-1); grid->bInv = 1.0/grid->b; grid->points = malloc (num_points * sizeof(double)); if (grid->odd) { grid->even_half = 0.0; grid->odd_one = 1; grid->a = 0.5*(end-start)/expm1(grid->b*grid->half_points); grid->aInv = 1.0/grid->a; for (int i=-grid->half_points; i<=grid->half_points; i++) { double sign; if (i<0) sign = -1.0; else sign = 1.0; grid->points[i+grid->half_points] = sign * grid->a*expm1(grid->b*abs(i))+grid->center; } } else { grid->even_half = 0.5; grid->odd_one = 0; grid->a = 0.5*(end-start)/expm1(grid->b*(-0.5+grid->half_points)); grid->aInv = 1.0/grid->a; for (int i=-grid->half_points; ihalf_points; i++) { double sign; if (i<0) sign = -1.0; else sign = 1.0; grid->points[i+grid->half_points] = sign * grid->a*expm1(grid->b*fabs(0.5+i)) + grid->center; } } grid->reverse_map = center_grid_reverse_map; grid->code = CENTER; } return (NUgrid*) grid; } NUgrid* create_log_grid (double start, double end, int num_points) { log_grid *grid = malloc (sizeof (log_grid)); grid->code = LOG; grid->start = start; grid->end = end; grid->num_points = num_points; grid->points = malloc(num_points*sizeof(double)); grid->a = 1.0/(double)(num_points-1)*log(end/start); grid->ainv = 1.0/grid->a; grid->startinv = 1.0/start; for (int i=0; ipoints[i] = start*exp(grid->a*(double)i); grid->reverse_map = log_grid_reverse_map; return (NUgrid*) grid; } NUgrid* create_general_grid (double *points, int num_points) { NUgrid* grid = malloc (sizeof(NUgrid)); if (grid != NULL) { grid->reverse_map = general_grid_reverse_map; grid->code = GENERAL; grid->points = malloc (num_points*sizeof(double)); grid->start = points[0]; grid->end = points[num_points-1]; grid->num_points = num_points; for (int i=0; ipoints[i] = points[i]; grid->code = GENERAL; } return grid; } void destroy_grid (NUgrid *grid) { free (grid->points); free (grid); } einspline-0.9.2/src/multi_bspline.h0000664000113000011300000000420311015564036014221 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_H #define MULTI_BSPLINE_H #include "bspline_base.h" //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Bspline structure definitions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// #include "multi_bspline_structs.h" // Currently, some of the single-precision routines use SSE2 instructions #include "multi_bspline_eval_s.h" #include "multi_bspline_eval_c.h" #include "multi_bspline_eval_d.h" #include "multi_bspline_eval_z.h" #include "bspline_create.h" #include "multi_bspline_create.h" #endif einspline-0.9.2/src/multi_bspline_eval_sse_d.c0000664000113000011300000000276611015564526016420 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_sse_d_impl.h" einspline-0.9.2/src/multi_bspline_eval_sse_c.c0000664000113000011300000000276611015566710016414 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_sse_c_impl.h" einspline-0.9.2/src/multi_bspline_eval_sse_c_impl.h0000664000113000011300000014407511235572631017445 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_SSE_C_IMPL_H #define MULTI_BSPLINE_EVAL_SSE_C_IMPL_H #include #include #ifdef HAVE_SSE3 #include #endif #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern __m128 *restrict A_s; extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; // Use plain-old SSE instructions #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 _r0 = _mm_mul_ps (M0, v); \ __m128 _r1 = _mm_mul_ps (M1, v); \ __m128 _r2 = _mm_mul_ps (M2, v); \ __m128 _r3 = _mm_mul_ps (M3, v); \ _MM_TRANSPOSE4_PS (_r0, _r1, _r2, _r3); \ r = _mm_add_ps (_mm_add_ps (_r0, _r1), _mm_add_ps (_r2, _r3)); \ } while (0); #define _MM_DOT4_PS(A, B, p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 alo = _mm_shuffle_ps (t, t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (t, t, _MM_SHUFFLE(2,3,2,3)); \ __m128 _a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(1,1,1,1)); \ __m128 _r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(p), _r); \ } while(0); /************************************************************/ /* 1D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_c (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_c_vg (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_c_vgl (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_c_vgh (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { eval_multi_UBspline_1d_c_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_c (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); __m128 a[4], b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm]; // Zero out values; for (int n=0; nx_stride; intptr_t ys = spline->y_stride; // Main compute loop __m128 ab; for (int i=0; i<4; i++) for (int j=0; j<4; j++){ ab = _mm_mul_ps ( a[i], b[j]); __m128* restrict coefs = (__m128*)(spline->coefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, da4, db4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); __m128 a[4], b[4], da[4], db[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[2*Nm]; // Zero out values; for (int n=0; nx_stride; intptr_t ys = spline->y_stride; // Main compute loop __m128 ab, dab[2]; for (int i=0; i<4; i++) for (int j=0; j<4; j++){ ab = _mm_mul_ps ( a[i], b[j]); dab[0] = _mm_mul_ps ( da[i], b[j]); dab[1] = _mm_mul_ps ( a[i], db[j]); __m128* restrict coefs = (__m128*)(spline->coefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[2*n+0] = ((complex_float*)mgrad)[nd2*4 + 2*0 + nm2] * dxInv; grads[2*n+1] = ((complex_float*)mgrad)[nd2*4 + 2*1 + nm2] * dyInv; } } void eval_multi_UBspline_2d_c_vgl (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, da4, db4, d2a4, d2b4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); __m128 a[4], b[4], da[4], db[4], d2a[4], d2b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[2*Nm], mlapl[2*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; nx_stride; intptr_t ys = spline->y_stride; // Main compute loop __m128 ab, dab[2], d2ab[2]; for (int i=0; i<4; i++) for (int j=0; j<4; j++){ ab = _mm_mul_ps ( a[i], b[j]); dab[0] = _mm_mul_ps ( da[i], b[j]); dab[1] = _mm_mul_ps ( a[i], db[j]); d2ab[0] = _mm_mul_ps (d2a[i], b[j]); d2ab[1] = _mm_mul_ps ( a[i],d2b[j]); __m128* restrict coefs = (__m128*)(spline->coefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[2*n+0] = ((complex_float*)mgrad)[nd2*4 + 2*0 + nm2] * dxInv; grads[2*n+1] = ((complex_float*)mgrad)[nd2*4 + 2*1 + nm2] * dyInv; lapl [n] = (((complex_float*)mlapl)[nd2*4 + 2*0 + nm2] * dxInv*dxInv + ((complex_float*)mlapl)[nd2*4 + 2*1 + nm2] * dyInv*dyInv); } } void eval_multi_UBspline_2d_c_vgh (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); // /// SSE mesh point determination // __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); // __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); // __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); // xy = _mm_sub_ps (xy, x0y0); // // ux = (x - x0)/delta_x and same for y // __m128 uxuy = _mm_mul_ps (xy, delta_inv); // // intpart = trunc (ux, uy) // __m128i intpart = _mm_cvttps_epi32(uxuy); // __m128i ixiy; // _mm_storeu_si128 (&ixiy, intpart); // // Store to memory for use in C expressions // // xmm registers are stored to memory in reverse order // int ix = ((int *)&ixiy)[3]; // int iy = ((int *)&ixiy)[2]; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // __m128 ipart = _mm_cvtepi32_ps (intpart); // __m128 txty = _mm_sub_ps (uxuy, ipart); // __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); // __m128 t2 = _mm_mul_ps (txty, txty); // __m128 t3 = _mm_mul_ps (t2, txty); // __m128 tpx = t3; // __m128 tpy = t2; // __m128 tpz = txty; // __m128 zero = one; // _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, da4, db4, d2a4, d2b4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); __m128 a[4], b[4], da[4], db[4], d2a[4], d2b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[2*Nm], mhess[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; nx_stride; intptr_t ys = spline->y_stride; // Main compute loop __m128 ab, dab[2], d2ab[3]; for (int i=0; i<4; i++) for (int j=0; j<4; j++){ ab = _mm_mul_ps ( a[i], b[j]); dab[0] = _mm_mul_ps ( da[i], b[j]); dab[1] = _mm_mul_ps ( a[i], db[j]); d2ab[0] = _mm_mul_ps (d2a[i], b[j]); d2ab[1] = _mm_mul_ps ( da[i], db[j]); d2ab[2] = _mm_mul_ps ( a[i],d2b[j]); __m128* restrict coefs = (__m128*)(spline->coefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[2*n+0] = ((complex_float*)mgrad)[nd2*4 + 2*0 + nm2] * dxInv; grads[2*n+1] = ((complex_float*)mgrad)[nd2*4 + 2*1 + nm2] * dyInv; hess [4*n+0] = ((complex_float*)mhess)[nd2*6 + 2*0 + nm2] * dxInv*dxInv; hess [4*n+1] = hess[4*n+2] = ((complex_float*)mhess)[nd2*6 + 2*1 + nm2] * dxInv*dyInv; hess [4*n+3] = ((complex_float*)mhess)[nd2*6 + 2*2 + nm2] * dyInv*dyInv; } } /************************************************************/ /* 3D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_c (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); __m128 a[4], b[4], c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm]; // Zero out values; for (int n=0; nx_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; __m128 abc; for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { abc = _mm_mul_ps ( a[i], _mm_mul_ps( b[j], c[k])); __m128* restrict coefs = (__m128*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[3*n+0] = ((complex_float*)mgrad)[nd2*6 + 2*0 + nm2] * dxInv; grads[3*n+1] = ((complex_float*)mgrad)[nd2*6 + 2*1 + nm2] * dyInv; grads[3*n+2] = ((complex_float*)mgrad)[nd2*6 + 2*2 + nm2] * dzInv; } } void eval_multi_UBspline_3d_c_vgl (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2c4, d2c4); d2c[0]=_mm_unpacklo_ps(tmp, tmp); d2c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2c4, d2c4); d2c[2]=_mm_unpacklo_ps(tmp, tmp); d2c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[3*Nm], mlapl[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[3*n+0] = ((complex_float*)mgrad)[nd2*6 + 2*0 + nm2] * dxInv; grads[3*n+1] = ((complex_float*)mgrad)[nd2*6 + 2*1 + nm2] * dyInv; grads[3*n+2] = ((complex_float*)mgrad)[nd2*6 + 2*2 + nm2] * dzInv; lapl [n] = (((complex_float*)mlapl)[nd2*6 + 2*0 + nm2] * dxInv*dxInv + ((complex_float*)mlapl)[nd2*6 + 2*1 + nm2] * dyInv*dyInv + ((complex_float*)mlapl)[nd2*6 + 2*2 + nm2] * dzInv*dzInv); } } void eval_multi_UBspline_3d_c_vgh (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2c4, d2c4); d2c[0]=_mm_unpacklo_ps(tmp, tmp); d2c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2c4, d2c4); d2c[2]=_mm_unpacklo_ps(tmp, tmp); d2c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+1)/2; __m128 mvals[Nm], mgrad[3*Nm], mhess[6*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>1; int nm2 = n & 1; vals[n] = ((complex_float*)mvals)[n]; grads[3*n+0] = ((complex_float*)mgrad)[nd2*6 + 2*0 + nm2] * dxInv; grads[3*n+1] = ((complex_float*)mgrad)[nd2*6 + 2*1 + nm2] * dyInv; grads[3*n+2] = ((complex_float*)mgrad)[nd2*6 + 2*2 + nm2] * dzInv; hess [9*n+0] = ((complex_float*)mhess)[nd2*12 + 2*0 + nm2] * dxInv*dxInv; hess [9*n+1] = hess[9*n+3] = ((complex_float*)mhess)[nd2*12 + 2*1 + nm2] * dxInv*dyInv; hess [9*n+2] = hess[9*n+6] = ((complex_float*)mhess)[nd2*12 + 2*2 + nm2] * dxInv*dzInv; hess [9*n+4] = ((complex_float*)mhess)[nd2*12 + 2*3 + nm2] * dyInv*dyInv; hess [9*n+5] = hess[9*n+7] = ((complex_float*)mhess)[nd2*12 + 2*4 + nm2] * dyInv*dzInv; hess [9*n+8] = ((complex_float*)mhess)[nd2*12 + 2*5 + nm2] * dzInv*dzInv; } } #endif einspline-0.9.2/src/Makefile.in0000664000113000011300000012051211273633721013255 00000000000000# Makefile.in generated by automake 1.10 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ check_PROGRAMS = TestBspline$(EXEEXT) TestNUBspline$(EXEEXT) \ test_fbspline$(EXEEXT) test_bspline_s$(EXEEXT) \ test_bspline_d$(EXEEXT) test_multi$(EXEEXT) \ test_fmulti_bspline$(EXEEXT) test_multi_cpp$(EXEEXT) \ $(am__EXEEXT_2) bin_PROGRAMS = time_multi$(EXEEXT) $(am__EXEEXT_1) subdir = src DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in $(srcdir)/config.h.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/m4/acx_pthread.m4 \ $(top_srcdir)/m4/ax_cc_maxopt.m4 \ $(top_srcdir)/m4/ax_cxx_maxopt.m4 \ $(top_srcdir)/m4/ax_f77_maxopt.m4 \ $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_c_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_f77_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ $(top_srcdir)/m4/ax_gcc_archflag.m4 \ $(top_srcdir)/m4/ax_gxx_archflag.m4 \ $(top_srcdir)/m4/ax_gcc_version.m4 \ $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ $(top_srcdir)/m4/ax_ext.m4 $(top_srcdir)/m4/ac_cxx_restrict.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h CONFIG_CLEAN_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \ "$(DESTDIR)$(includedir)" libLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(lib_LTLIBRARIES) am__DEPENDENCIES_1 = libeinspline_la_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__libeinspline_la_SOURCES_DIST = aligned_alloc.h bspline_base.h \ bspline_create.c bspline_create.h bspline_structs.h \ bspline_data.c bspline_eval_std_s.h bspline_eval_sse_s.h \ bspline_eval_std_c.h bspline_eval_sse_c.h bspline_eval_std_d.h \ bspline_eval_sse_d.h bspline_eval_std_z.h bspline_eval_sse_z.h \ multi_bspline.h multi_bspline_create.c multi_bspline_create.h \ multi_bspline_eval_c.h multi_bspline_eval_d.h \ multi_bspline_eval_s.h multi_bspline_eval_z.h \ multi_bspline_structs.h multi_nubspline_create.c \ nubspline_base.h nubspline_create.c nubspline_create.h \ nubspline_eval_sse_s.h nubspline_eval_std_s.h \ nubspline_eval_sse_c.h nubspline_eval_std_c.h \ nubspline_eval_sse_d.h nubspline_eval_std_d.h \ nubspline_eval_sse_z.h nubspline_eval_std_z.h \ nubspline_structs.h nubasis.h nubasis.c nugrid.h nugrid.c \ multi_bspline_eval_std_s.c multi_bspline_eval_std_s_cpp.cc \ multi_bspline_eval_std_c.c multi_bspline_eval_std_c_cpp.cc \ multi_bspline_eval_std_s_impl.h \ multi_bspline_eval_std_c_impl.h multi_bspline_eval_sse_s.c \ multi_bspline_eval_sse_s_cpp.cc multi_bspline_eval_sse_c.c \ multi_bspline_eval_sse_c_cpp.cc \ multi_bspline_eval_sse_s_impl.h \ multi_bspline_eval_sse_c_impl.h multi_bspline_eval_std_d.c \ multi_bspline_eval_std_d_cpp.cc multi_bspline_eval_std_z.c \ multi_bspline_eval_std_z_cpp.cc \ multi_bspline_eval_std_d_impl.h \ multi_bspline_eval_std_z_impl.h multi_nubspline_eval_std_z.c \ multi_nubspline_eval_std_z_cpp.cc \ multi_nubspline_eval_std_z_impl.h multi_bspline_eval_sse_d.c \ multi_bspline_eval_sse_d_cpp.cc multi_bspline_eval_sse_z.c \ multi_bspline_eval_sse_z_cpp.cc \ multi_bspline_eval_sse_d_impl.h \ multi_bspline_eval_sse_z_impl.h multi_nubspline_eval_sse_z.c \ multi_nubspline_eval_sse_z_cpp.cc \ multi_nubspline_eval_sse_z_impl.h blip_create.c blip_create.h \ multi_bspline_create_cuda.cu multi_bspline_cuda_s_impl.h \ multi_bspline_cuda_c_impl.h multi_bspline_cuda_d_impl.h \ multi_bspline_cuda_z_impl.h multi_bspline_eval_cuda.h \ bspline_structs_cuda.h bspline_create_cuda.cu fbspline.c \ fbspline.h fmulti_bspline.c fmulti_bspline.h fnubspline.c \ fnubspline.h @HAVE_SSE_FALSE@am__objects_1 = multi_bspline_eval_std_s.lo \ @HAVE_SSE_FALSE@ multi_bspline_eval_std_s_cpp.lo \ @HAVE_SSE_FALSE@ multi_bspline_eval_std_c.lo \ @HAVE_SSE_FALSE@ multi_bspline_eval_std_c_cpp.lo @HAVE_SSE_TRUE@am__objects_1 = multi_bspline_eval_sse_s.lo \ @HAVE_SSE_TRUE@ multi_bspline_eval_sse_s_cpp.lo \ @HAVE_SSE_TRUE@ multi_bspline_eval_sse_c.lo \ @HAVE_SSE_TRUE@ multi_bspline_eval_sse_c_cpp.lo @HAVE_SSE2_FALSE@am__objects_2 = multi_bspline_eval_std_d.lo \ @HAVE_SSE2_FALSE@ multi_bspline_eval_std_d_cpp.lo \ @HAVE_SSE2_FALSE@ multi_bspline_eval_std_z.lo \ @HAVE_SSE2_FALSE@ multi_bspline_eval_std_z_cpp.lo \ @HAVE_SSE2_FALSE@ multi_nubspline_eval_std_z.lo \ @HAVE_SSE2_FALSE@ multi_nubspline_eval_std_z_cpp.lo @HAVE_SSE2_TRUE@am__objects_2 = multi_bspline_eval_sse_d.lo \ @HAVE_SSE2_TRUE@ multi_bspline_eval_sse_d_cpp.lo \ @HAVE_SSE2_TRUE@ multi_bspline_eval_sse_z.lo \ @HAVE_SSE2_TRUE@ multi_bspline_eval_sse_z_cpp.lo \ @HAVE_SSE2_TRUE@ multi_nubspline_eval_sse_z.lo \ @HAVE_SSE2_TRUE@ multi_nubspline_eval_sse_z_cpp.lo @WANT_BLIPS_TRUE@am__objects_3 = blip_create.lo @HAVE_CUDA_TRUE@am__objects_4 = multi_bspline_create_cuda.lo \ @HAVE_CUDA_TRUE@ bspline_create_cuda.lo @WANT_FORTRAN_TRUE@am__objects_5 = fbspline.lo fmulti_bspline.lo \ @WANT_FORTRAN_TRUE@ fnubspline.lo am_libeinspline_la_OBJECTS = bspline_create.lo bspline_data.lo \ multi_bspline_create.lo multi_nubspline_create.lo \ nubspline_create.lo nubasis.lo nugrid.lo $(am__objects_1) \ $(am__objects_2) $(am__objects_3) $(am__objects_4) \ $(am__objects_5) libeinspline_la_OBJECTS = $(am_libeinspline_la_OBJECTS) @HAVE_CUDA_TRUE@am__EXEEXT_1 = test_multi_cuda$(EXEEXT) binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) @WANT_BLIPS_TRUE@am__EXEEXT_2 = test_blip$(EXEEXT) PROGRAMS = $(bin_PROGRAMS) am_TestBspline_OBJECTS = TestBspline.$(OBJEXT) TestBspline_OBJECTS = $(am_TestBspline_OBJECTS) TestBspline_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_TestNUBspline_OBJECTS = TestNUBspline.$(OBJEXT) TestNUBspline_OBJECTS = $(am_TestNUBspline_OBJECTS) TestNUBspline_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__test_blip_SOURCES_DIST = test_blip.c @WANT_BLIPS_TRUE@am_test_blip_OBJECTS = test_blip.$(OBJEXT) test_blip_OBJECTS = $(am_test_blip_OBJECTS) test_blip_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_test_bspline_d_OBJECTS = test_bspline_d.$(OBJEXT) test_bspline_d_OBJECTS = $(am_test_bspline_d_OBJECTS) test_bspline_d_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_test_bspline_s_OBJECTS = test_bspline_s.$(OBJEXT) test_bspline_s_OBJECTS = $(am_test_bspline_s_OBJECTS) test_bspline_s_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_test_fbspline_OBJECTS = test_fbspline.$(OBJEXT) test_fbspline_OBJECTS = $(am_test_fbspline_OBJECTS) test_fbspline_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_test_fmulti_bspline_OBJECTS = test_fmulti_bspline.$(OBJEXT) test_fmulti_bspline_OBJECTS = $(am_test_fmulti_bspline_OBJECTS) test_fmulti_bspline_DEPENDENCIES = libeinspline.la \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) am_test_multi_OBJECTS = test_multi.$(OBJEXT) test_multi_OBJECTS = $(am_test_multi_OBJECTS) test_multi_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am_test_multi_cpp_OBJECTS = test_multi_cpp.$(OBJEXT) test_multi_cpp_OBJECTS = $(am_test_multi_cpp_OBJECTS) test_multi_cpp_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) am__test_multi_cuda_SOURCES_DIST = test_multi_cuda.cu @HAVE_CUDA_TRUE@am_test_multi_cuda_OBJECTS = \ @HAVE_CUDA_TRUE@ test_multi_cuda.$(OBJEXT) test_multi_cuda_OBJECTS = $(am_test_multi_cuda_OBJECTS) @HAVE_CUDA_TRUE@test_multi_cuda_DEPENDENCIES = libeinspline.la \ @HAVE_CUDA_TRUE@ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ @HAVE_CUDA_TRUE@ $(am__DEPENDENCIES_1) am_time_multi_OBJECTS = time_multi.$(OBJEXT) time_multi_OBJECTS = $(am_time_multi_OBJECTS) time_multi_DEPENDENCIES = libeinspline.la $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) CXXLD = $(CXX) CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ F77COMPILE = $(F77) $(AM_FFLAGS) $(FFLAGS) LTF77COMPILE = $(LIBTOOL) --tag=F77 $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=compile $(F77) $(AM_FFLAGS) $(FFLAGS) F77LD = $(F77) F77LINK = $(LIBTOOL) --tag=F77 $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(F77LD) $(AM_FFLAGS) $(FFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libeinspline_la_SOURCES) $(TestBspline_SOURCES) \ $(TestNUBspline_SOURCES) $(test_blip_SOURCES) \ $(test_bspline_d_SOURCES) $(test_bspline_s_SOURCES) \ $(test_fbspline_SOURCES) $(test_fmulti_bspline_SOURCES) \ $(test_multi_SOURCES) $(test_multi_cpp_SOURCES) \ $(test_multi_cuda_SOURCES) $(time_multi_SOURCES) DIST_SOURCES = $(am__libeinspline_la_SOURCES_DIST) \ $(TestBspline_SOURCES) $(TestNUBspline_SOURCES) \ $(am__test_blip_SOURCES_DIST) $(test_bspline_d_SOURCES) \ $(test_bspline_s_SOURCES) $(test_fbspline_SOURCES) \ $(test_fmulti_bspline_SOURCES) $(test_multi_SOURCES) \ $(test_multi_cpp_SOURCES) $(am__test_multi_cuda_SOURCES_DIST) \ $(time_multi_SOURCES) includeHEADERS_INSTALL = $(INSTALL_HEADER) HEADERS = $(include_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALL_STATIC = @ALL_STATIC@ AMTAR = @AMTAR@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CUDA_CFLAGS = @CUDA_CFLAGS@ CUDA_LIBS = @CUDA_LIBS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ F77 = @F77@ FFLAGS = @FFLAGS@ FFTW3F_CFLAGS = @FFTW3F_CFLAGS@ FFTW3F_LIBS = @FFTW3F_LIBS@ FFTW3_CFLAGS = @FFTW3_CFLAGS@ FFTW3_LIBS = @FFTW3_LIBS@ FLIBS = @FLIBS@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ OBJEXT = @OBJEXT@ OPENMP_FLAG = @OPENMP_FLAG@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKGDATADEF = @PKGDATADEF@ PKG_CONFIG = @PKG_CONFIG@ POW_LIB = @POW_LIB@ PRTDIAG = @PRTDIAG@ PTHREAD_FLAG = @PTHREAD_FLAG@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_FLAGS = @SIMD_FLAGS@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_F77 = @ac_ct_F77@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = $(prefix)/include/einspline infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AM_LDFLAGS = $(ALL_STATIC) @WANT_FORTRAN_TRUE@MAYBE_FORTRAN = fbspline.c \ @WANT_FORTRAN_TRUE@ fbspline.h \ @WANT_FORTRAN_TRUE@ fmulti_bspline.c \ @WANT_FORTRAN_TRUE@ fmulti_bspline.h \ @WANT_FORTRAN_TRUE@ fnubspline.c \ @WANT_FORTRAN_TRUE@ fnubspline.h @WANT_BLIPS_TRUE@MAYBE_BLIPS = blip_create.c blip_create.h @WANT_BLIPS_TRUE@CHECK_BLIPS = test_blip @HAVE_CUDA_TRUE@MAYBE_CUDA = multi_bspline_create_cuda.cu \ @HAVE_CUDA_TRUE@ multi_bspline_cuda_s_impl.h \ @HAVE_CUDA_TRUE@ multi_bspline_cuda_c_impl.h \ @HAVE_CUDA_TRUE@ multi_bspline_cuda_d_impl.h \ @HAVE_CUDA_TRUE@ multi_bspline_cuda_z_impl.h \ @HAVE_CUDA_TRUE@ multi_bspline_eval_cuda.h \ @HAVE_CUDA_TRUE@ bspline_structs_cuda.h \ @HAVE_CUDA_TRUE@ bspline_create_cuda.cu @HAVE_CUDA_TRUE@CHECK_CUDA = test_multi_cuda @HAVE_CUDA_TRUE@test_multi_cuda_SOURCES = test_multi_cuda.cu @HAVE_CUDA_TRUE@test_multi_cuda_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIB) $(CUDA_LIBS) @HAVE_SSE_FALSE@SINGLE_SRC = multi_bspline_eval_std_s.c multi_bspline_eval_std_s_cpp.cc \ @HAVE_SSE_FALSE@ multi_bspline_eval_std_c.c multi_bspline_eval_std_c_cpp.cc \ @HAVE_SSE_FALSE@ multi_bspline_eval_std_s_impl.h multi_bspline_eval_std_c_impl.h @HAVE_SSE_TRUE@SINGLE_SRC = multi_bspline_eval_sse_s.c multi_bspline_eval_sse_s_cpp.cc \ @HAVE_SSE_TRUE@ multi_bspline_eval_sse_c.c multi_bspline_eval_sse_c_cpp.cc \ @HAVE_SSE_TRUE@ multi_bspline_eval_sse_s_impl.h multi_bspline_eval_sse_c_impl.h @HAVE_SSE2_FALSE@DOUBLE_SRC = multi_bspline_eval_std_d.c multi_bspline_eval_std_d_cpp.cc \ @HAVE_SSE2_FALSE@ multi_bspline_eval_std_z.c multi_bspline_eval_std_z_cpp.cc \ @HAVE_SSE2_FALSE@ multi_bspline_eval_std_d_impl.h multi_bspline_eval_std_z_impl.h \ @HAVE_SSE2_FALSE@ multi_nubspline_eval_std_z.c multi_nubspline_eval_std_z_cpp.cc \ @HAVE_SSE2_FALSE@ multi_nubspline_eval_std_z_impl.h @HAVE_SSE2_TRUE@DOUBLE_SRC = multi_bspline_eval_sse_d.c multi_bspline_eval_sse_d_cpp.cc \ @HAVE_SSE2_TRUE@ multi_bspline_eval_sse_z.c multi_bspline_eval_sse_z_cpp.cc \ @HAVE_SSE2_TRUE@ multi_bspline_eval_sse_d_impl.h multi_bspline_eval_sse_z_impl.h \ @HAVE_SSE2_TRUE@ multi_nubspline_eval_sse_z.c multi_nubspline_eval_sse_z_cpp.cc \ @HAVE_SSE2_TRUE@ multi_nubspline_eval_sse_z_impl.h TestBspline_SOURCES = TestBspline.c TestNUBspline_SOURCES = TestNUBspline.c test_fbspline_SOURCES = test_fbspline.f test_fmulti_bspline_SOURCES = test_fmulti_bspline.f @WANT_BLIPS_TRUE@test_blip_SOURCES = test_blip.c test_bspline_s_SOURCES = test_bspline_s.c test_bspline_d_SOURCES = test_bspline_d.c test_multi_SOURCES = test_multi.c test_multi_cpp_SOURCES = test_multi_cpp.cc time_multi_SOURCES = time_multi.c LDADD = AM_CCFLAGS = -g TestBspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) TestNUBspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_fbspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_fmulti_bspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_blip_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_bspline_s_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_bspline_d_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_multi_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_multi_cpp_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) time_multi_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) lib_LTLIBRARIES = libeinspline.la libeinspline_la_SOURCES = \ aligned_alloc.h \ bspline_base.h \ bspline_create.c \ bspline_create.h \ bspline_structs.h \ bspline_data.c \ bspline_eval_std_s.h \ bspline_eval_sse_s.h \ bspline_eval_std_c.h \ bspline_eval_sse_c.h \ bspline_eval_std_d.h \ bspline_eval_sse_d.h \ bspline_eval_std_z.h \ bspline_eval_sse_z.h \ multi_bspline.h \ multi_bspline_create.c \ multi_bspline_create.h \ multi_bspline_eval_c.h \ multi_bspline_eval_d.h \ multi_bspline_eval_s.h \ multi_bspline_eval_z.h \ multi_bspline_structs.h \ multi_nubspline_create.c \ nubspline_base.h \ nubspline_create.c \ nubspline_create.h \ nubspline_eval_sse_s.h \ nubspline_eval_std_s.h \ nubspline_eval_sse_c.h \ nubspline_eval_std_c.h \ nubspline_eval_sse_d.h \ nubspline_eval_std_d.h \ nubspline_eval_sse_z.h \ nubspline_eval_std_z.h \ nubspline_structs.h \ nubasis.h \ nubasis.c \ nugrid.h \ nugrid.c \ $(SINGLE_SRC) \ $(DOUBLE_SRC) \ $(MAYBE_BLIPS) \ $(MAYBE_CUDA) \ $(MAYBE_FORTRAN) libeinspline_la_LIBADD = $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) include_HEADERS = bspline_base.h bspline_create.h bspline_eval_sse_c.h \ bspline_eval_sse_d.h bspline_eval_sse_s.h \ bspline_eval_sse_z.h bspline_eval_std_c.h \ bspline_eval_std_d.h bspline_eval_std_s.h \ bspline_eval_std_z.h bspline.h bspline_structs.h \ fbspline.h fmulti_bspline.h fnubspline.h \ multi_bspline.h \ multi_bspline_create.h multi_bspline_structs.h \ multi_bspline_eval_c.h multi_bspline_eval_d.h \ multi_bspline_eval_s.h multi_bspline_eval_z.h \ multi_nubspline.h \ multi_nubspline_create.h multi_nubspline_structs.h \ multi_nubspline_eval_c.h multi_nubspline_eval_d.h \ multi_nubspline_eval_s.h multi_nubspline_eval_z.h \ nubspline_base.h nubspline_create.h \ nubspline_eval_sse_s.h nubspline_eval_std_s.h \ nubspline_eval_sse_c.h nubspline_eval_std_c.h \ nubspline_eval_sse_d.h nubspline_eval_std_d.h \ nubspline_eval_sse_z.h nubspline_eval_std_z.h \ nubspline_structs.h nubasis.h nubspline.h nugrid.h \ multi_bspline_structs_cuda.h multi_bspline_create_cuda.h \ multi_bspline_eval_cuda.h bspline_eval_cuda.h \ bspline_structs_cuda.h bspline_create_cuda.h \ config.h EXTRA_DIST = multi_bspline_eval_cuda_c.cu all: config.h $(MAKE) $(AM_MAKEFLAGS) all-am .SUFFIXES: .SUFFIXES: .c .cc .cu .f .lo .o .obj $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ cd $(top_srcdir) && \ $(AUTOMAKE) --gnu src/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh config.h: stamp-h1 @if test ! -f $@; then \ rm -f stamp-h1; \ $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \ else :; fi stamp-h1: $(srcdir)/config.h.in $(top_builddir)/config.status @rm -f stamp-h1 cd $(top_builddir) && $(SHELL) ./config.status src/config.h $(srcdir)/config.h.in: $(am__configure_deps) cd $(top_srcdir) && $(AUTOHEADER) rm -f stamp-h1 touch $@ distclean-hdr: -rm -f config.h stamp-h1 install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ if test -f $$p; then \ f=$(am__strip_dir) \ echo " $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(libdir)/$$f'"; \ $(LIBTOOL) --mode=install $(libLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(libdir)/$$f"; \ else :; fi; \ done uninstall-libLTLIBRARIES: @$(NORMAL_UNINSTALL) @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ p=$(am__strip_dir) \ echo " $(LIBTOOL) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$p'"; \ $(LIBTOOL) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$p"; \ done clean-libLTLIBRARIES: -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) @list='$(lib_LTLIBRARIES)'; for p in $$list; do \ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ test "$$dir" != "$$p" || dir=.; \ echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done libeinspline.la: $(libeinspline_la_OBJECTS) $(libeinspline_la_DEPENDENCIES) $(CXXLINK) -rpath $(libdir) $(libeinspline_la_OBJECTS) $(libeinspline_la_LIBADD) $(LIBS) install-binPROGRAMS: $(bin_PROGRAMS) @$(NORMAL_INSTALL) test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)" @list='$(bin_PROGRAMS)'; for p in $$list; do \ p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ if test -f $$p \ || test -f $$p1 \ ; then \ f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \ else :; fi; \ done uninstall-binPROGRAMS: @$(NORMAL_UNINSTALL) @list='$(bin_PROGRAMS)'; for p in $$list; do \ f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ rm -f "$(DESTDIR)$(bindir)/$$f"; \ done clean-binPROGRAMS: @list='$(bin_PROGRAMS)'; for p in $$list; do \ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ echo " rm -f $$p $$f"; \ rm -f $$p $$f ; \ done clean-checkPROGRAMS: @list='$(check_PROGRAMS)'; for p in $$list; do \ f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ echo " rm -f $$p $$f"; \ rm -f $$p $$f ; \ done TestBspline$(EXEEXT): $(TestBspline_OBJECTS) $(TestBspline_DEPENDENCIES) @rm -f TestBspline$(EXEEXT) $(LINK) $(TestBspline_OBJECTS) $(TestBspline_LDADD) $(LIBS) TestNUBspline$(EXEEXT): $(TestNUBspline_OBJECTS) $(TestNUBspline_DEPENDENCIES) @rm -f TestNUBspline$(EXEEXT) $(LINK) $(TestNUBspline_OBJECTS) $(TestNUBspline_LDADD) $(LIBS) test_blip$(EXEEXT): $(test_blip_OBJECTS) $(test_blip_DEPENDENCIES) @rm -f test_blip$(EXEEXT) $(LINK) $(test_blip_OBJECTS) $(test_blip_LDADD) $(LIBS) test_bspline_d$(EXEEXT): $(test_bspline_d_OBJECTS) $(test_bspline_d_DEPENDENCIES) @rm -f test_bspline_d$(EXEEXT) $(LINK) $(test_bspline_d_OBJECTS) $(test_bspline_d_LDADD) $(LIBS) test_bspline_s$(EXEEXT): $(test_bspline_s_OBJECTS) $(test_bspline_s_DEPENDENCIES) @rm -f test_bspline_s$(EXEEXT) $(LINK) $(test_bspline_s_OBJECTS) $(test_bspline_s_LDADD) $(LIBS) test_fbspline$(EXEEXT): $(test_fbspline_OBJECTS) $(test_fbspline_DEPENDENCIES) @rm -f test_fbspline$(EXEEXT) $(F77LINK) $(test_fbspline_OBJECTS) $(test_fbspline_LDADD) $(LIBS) test_fmulti_bspline$(EXEEXT): $(test_fmulti_bspline_OBJECTS) $(test_fmulti_bspline_DEPENDENCIES) @rm -f test_fmulti_bspline$(EXEEXT) $(F77LINK) $(test_fmulti_bspline_OBJECTS) $(test_fmulti_bspline_LDADD) $(LIBS) test_multi$(EXEEXT): $(test_multi_OBJECTS) $(test_multi_DEPENDENCIES) @rm -f test_multi$(EXEEXT) $(LINK) $(test_multi_OBJECTS) $(test_multi_LDADD) $(LIBS) test_multi_cpp$(EXEEXT): $(test_multi_cpp_OBJECTS) $(test_multi_cpp_DEPENDENCIES) @rm -f test_multi_cpp$(EXEEXT) $(CXXLINK) $(test_multi_cpp_OBJECTS) $(test_multi_cpp_LDADD) $(LIBS) test_multi_cuda$(EXEEXT): $(test_multi_cuda_OBJECTS) $(test_multi_cuda_DEPENDENCIES) @rm -f test_multi_cuda$(EXEEXT) $(LINK) $(test_multi_cuda_OBJECTS) $(test_multi_cuda_LDADD) $(LIBS) time_multi$(EXEEXT): $(time_multi_OBJECTS) $(time_multi_DEPENDENCIES) @rm -f time_multi$(EXEEXT) $(LINK) $(time_multi_OBJECTS) $(time_multi_LDADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) distclean-compile: -rm -f *.tab.c @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TestBspline.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/TestNUBspline.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blip_create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bspline_create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bspline_data.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fbspline.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fmulti_bspline.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/fnubspline.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_c.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_c_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_d.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_d_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_s.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_s_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_z.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_sse_z_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_c.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_c_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_d.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_d_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_s.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_s_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_z.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_bspline_eval_std_z_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_nubspline_create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_nubspline_eval_sse_z.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_nubspline_eval_sse_z_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_nubspline_eval_std_z.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/multi_nubspline_eval_std_z_cpp.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nubasis.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nubspline_create.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/nugrid.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_blip.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_bspline_d.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_bspline_s.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_multi.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/test_multi_cpp.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/time_multi.Po@am__quote@ .c.o: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c $< .c.obj: @am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` .c.lo: @am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< .cc.o: @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< .cc.obj: @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` @am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .cc.lo: @am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCXX_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo @AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< .f.o: $(F77COMPILE) -c -o $@ $< .f.obj: $(F77COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` .f.lo: $(LTF77COMPILE) -c -o $@ $< mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs install-includeHEADERS: $(include_HEADERS) @$(NORMAL_INSTALL) test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)" @list='$(include_HEADERS)'; for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ f=$(am__strip_dir) \ echo " $(includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(includedir)/$$f'"; \ $(includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(includedir)/$$f"; \ done uninstall-includeHEADERS: @$(NORMAL_UNINSTALL) @list='$(include_HEADERS)'; for p in $$list; do \ f=$(am__strip_dir) \ echo " rm -f '$(DESTDIR)$(includedir)/$$f'"; \ rm -f "$(DESTDIR)$(includedir)/$$f"; \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ mkid -fID $$unique tags: TAGS TAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) tags=; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$tags $$unique; \ fi ctags: CTAGS CTAGS: $(HEADERS) $(SOURCES) config.h.in $(TAGS_DEPENDENCIES) \ $(TAGS_FILES) $(LISP) tags=; \ here=`pwd`; \ list='$(SOURCES) $(HEADERS) config.h.in $(LISP) $(TAGS_FILES)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | \ $(AWK) ' { files[$$0] = 1; } \ END { for (i in files) print i; }'`; \ test -z "$(CTAGS_ARGS)$$tags$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$tags $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && cd $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) $$here distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ fi; \ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ else \ test -f $(distdir)/$$file \ || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done check-am: all-am $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS) check: check-am all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(HEADERS) config.h install-binPROGRAMS: install-libLTLIBRARIES installdirs: for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(includedir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-binPROGRAMS clean-checkPROGRAMS clean-generic \ clean-libLTLIBRARIES clean-libtool mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) -rm -f Makefile distclean-am: clean-am distclean-compile distclean-generic \ distclean-hdr distclean-tags dvi: dvi-am dvi-am: html: html-am info: info-am info-am: install-data-am: install-includeHEADERS install-dvi: install-dvi-am install-exec-am: install-binPROGRAMS install-libLTLIBRARIES install-html: install-html-am install-info: install-info-am install-man: install-pdf: install-pdf-am install-ps: install-ps-am installcheck-am: maintainer-clean: maintainer-clean-am -rm -rf ./$(DEPDIR) -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-binPROGRAMS uninstall-includeHEADERS \ uninstall-libLTLIBRARIES .MAKE: install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ clean-checkPROGRAMS clean-generic clean-libLTLIBRARIES \ clean-libtool ctags distclean distclean-compile \ distclean-generic distclean-hdr distclean-libtool \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-binPROGRAMS install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am \ install-includeHEADERS install-info install-info-am \ install-libLTLIBRARIES install-man install-pdf install-pdf-am \ install-ps install-ps-am install-strip installcheck \ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-binPROGRAMS \ uninstall-includeHEADERS uninstall-libLTLIBRARIES .cu.o: $(NVCC) -c $(NVCCFLAGS) $< .cu.lo: $(top_builddir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $< # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: einspline-0.9.2/src/test_multi.c0000664000113000011300000022726611037743553013566 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline.h" #include "multi_nubspline.h" #include "bspline.h" #include "nubspline.h" #include #include #include #include double drand48(); inline double diff (double a, double b, double tol) { if (fabs(a-b) > tol) return 1; else return 0; } ////////////////////////////////////////// // Single-precision real test functions // ////////////////////////////////////////// int test_1d_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_s xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_s* norm_splines[num_splines]; multi_UBspline_1d_s *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]); // fprintf (stderr, "multi coef = %1.14e\n", // multi_spline->coefs[19+27*multi_spline->x_stride]); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals [num_splines]; float multi_grads[num_splines], norm_grads[num_splines]; float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[2*num_splines], norm_grads[2*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[3*num_splines], norm_grads[3*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[2*num_splines], norm_grads[2*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_c xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_c* norm_splines[num_splines]; multi_UBspline_1d_c *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); complex_float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]), // cimagf(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // crealf(multi_spline->coefs[19+27*multi_spline->x_stride]), // cimagf(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals [num_splines]; complex_float multi_grads[num_splines], norm_grads[num_splines]; complex_float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[2127]), // cimag(norm_splines[19]->coefs[2127])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+2127*multi_spline->y_stride]), // cimag(multi_spline->coefs[19+2127*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), cimag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", creal(multi_spline->coefs[19+227*multi_spline->z_stride]), cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); //return; // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", creal(norm_vals[j]), cimag(norm_vals[j])); fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", creal(multi_vals[j]), cimag(multi_vals[j])); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_double_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_z xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_z* norm_splines[num_splines]; multi_UBspline_1d_z *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); complex_double data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]), // cimag(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+27*multi_spline->x_stride]), // cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals [num_splines]; complex_double multi_grads[num_splines], norm_grads[num_splines]; complex_double multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; ipoints[i]); BCtype_z xBC; // xBC.lCode = xBC.rCode = NATURAL; xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1; xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1; // First, create splines the normal way NUBspline_1d_z* norm_splines[num_splines]; multi_NUBspline_1d_z *multi_spline; // First, create multispline multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines); complex_double data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]), // cimag(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+27*multi_spline->x_stride]), // cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals [num_splines]; complex_double multi_grads[num_splines], norm_grads[num_splines]; complex_double multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; istart + (1.0-rx)*x_grid->end; ////////////////////////// // Check value routine // ////////////////////////// eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals); for (int j=0; jcoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->y_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), cimag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", creal(multi_spline->coefs[19+227*multi_spline->z_stride]), cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "j = %d\n", j); fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } // Check gradients for (int n=0; n<3; n++) { diff = norm_grads[3*j+n] - multi_grads[3*j+n]; if (fabs(diff) > 1.0e-12) { fprintf (stderr, "n=%d\n", n); fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", norm_grads[3*j+n]); fprintf (stderr, " multi_grads[j] = %1.14e\n", multi_grads[3*j+n]); } } // Check hessian for (int n=0; n<9; n++) { diff = norm_hess[9*j+n] - multi_hess[9*j+n]; if (fabs(diff) > 1.0e-10) { fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", norm_hess[9*j+n]); fprintf (stderr, " multi_hess[j] = %1.14e\n", multi_hess[9*j+n]); } } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; i #include #include int posix_memalign(void **memptr, size_t alignment, size_t size); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Helper functions for spline creation //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// void init_sse_data(); void find_coefs_1d_d (Ugrid grid, BCtype_d bc, double *data, intptr_t dstride, double *coefs, intptr_t cstride); void solve_deriv_interp_1d_s (float bands[], float coefs[], int M, int cstride) { // Solve interpolating equations // First and last rows are different bands[4*(0)+1] /= bands[4*(0)+0]; bands[4*(0)+2] /= bands[4*(0)+0]; bands[4*(0)+3] /= bands[4*(0)+0]; bands[4*(0)+0] = 1.0; bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1]; bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2]; bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3]; bands[4*(0)+0] = 0.0; bands[4*(1)+2] /= bands[4*(1)+1]; bands[4*(1)+3] /= bands[4*(1)+1]; bands[4*(1)+1] = 1.0; // Now do rows 2 through M+1 for (int row=2; row < (M+1); row++) { bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3]; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; bands[4*(row)+0] = 0.0; bands[4*(row)+1] = 1.0; } // Do last row bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2]; bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3]; bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2]; bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3]; bands[4*(M+1)+3] /= bands[4*(M+1)+2]; bands[4*(M+1)+2] = 1.0; coefs[(M+1)*cstride] = bands[4*(M+1)+3]; // Now back substitute up for (int row=M; row>0; row--) coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)]; // Finish with first row coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride]; } // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_periodic_interp_1d_s (float bands[], float coefs[], int M, int cstride) { float lastCol[M]; // Now solve: // First and last rows are different bands[4*(0)+2] /= bands[4*(0)+1]; bands[4*(0)+0] /= bands[4*(0)+1]; bands[4*(0)+3] /= bands[4*(0)+1]; bands[4*(0)+1] = 1.0; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; lastCol[0] = bands[4*(0)+0]; for (int row=1; row < (M-1); row++) { bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; bands[4*(row)+0] = 0.0; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; lastCol[row] /= bands[4*(row)+1]; bands[4*(row)+1] = 1.0; if (row < (M-2)) { bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; } } // Now do last row // The [2] element and [0] element are now on top of each other bands[4*(M-1)+0] += bands[4*(M-1)+2]; bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; bands[4*(M-1)+3] /= bands[4*(M-1)+1]; coefs[M*cstride] = bands[4*(M-1)+3]; for (int row=M-2; row>=0; row--) coefs[(row+1)*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; coefs[0*cstride] = coefs[M*cstride]; coefs[(M+1)*cstride] = coefs[1*cstride]; coefs[(M+2)*cstride] = coefs[2*cstride]; } // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_antiperiodic_interp_1d_s (float bands[], float coefs[], int M, int cstride) { bands[4*0+0] *= -1.0; bands[4*(M-1)+2] *= -1.0; float lastCol[M]; // Now solve: // First and last rows are different bands[4*(0)+2] /= bands[4*(0)+1]; bands[4*(0)+0] /= bands[4*(0)+1]; bands[4*(0)+3] /= bands[4*(0)+1]; bands[4*(0)+1] = 1.0; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; lastCol[0] = bands[4*(0)+0]; for (int row=1; row < (M-1); row++) { bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; bands[4*(row)+0] = 0.0; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; lastCol[row] /= bands[4*(row)+1]; bands[4*(row)+1] = 1.0; if (row < (M-2)) { bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; } } // Now do last row // The [2] element and [0] element are now on top of each other bands[4*(M-1)+0] += bands[4*(M-1)+2]; bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; bands[4*(M-1)+3] /= bands[4*(M-1)+1]; coefs[M*cstride] = bands[4*(M-1)+3]; for (int row=M-2; row>=0; row--) coefs[(row+1)*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; coefs[0*cstride] = -coefs[M*cstride]; coefs[(M+1)*cstride] = -coefs[1*cstride]; coefs[(M+2)*cstride] = -coefs[2*cstride]; } #ifdef HIGH_PRECISION void find_coefs_1d_s (Ugrid grid, BCtype_s bc, float *data, intptr_t dstride, float *coefs, intptr_t cstride) { BCtype_d d_bc; double *d_data, *d_coefs; d_bc.lCode = bc.lCode; d_bc.rCode = bc.rCode; d_bc.lVal = bc.lVal; d_bc.rVal = bc.rVal; int M = grid.num, N; if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) N = M+3; else N = M+2; d_data = malloc (N*sizeof(double)); d_coefs = malloc (N*sizeof(double)); for (int i=0; ispcode = U1D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->x_grid = x_grid; // Setup internal variables int M = x_grid.num; int N; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); N = M+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); N = M+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*N); #else posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*N)); #endif find_coefs_1d_s (spline->x_grid, xBC, data, 1, spline->coefs, 1); init_sse_data(); return spline; } void recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data) { find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1); } UBspline_2d_s* create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, BCtype_s xBC, BCtype_s yBC, float *data) { // Create new spline UBspline_2d_s* restrict spline = malloc (sizeof(UBspline_2d_s)); spline->spcode = U2D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny); #endif // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } init_sse_data(); return spline; } void recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } } UBspline_3d_s* create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data) { // Create new spline UBspline_3d_s* restrict spline = malloc (sizeof(UBspline_3d_s)); spline->spcode = U3D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, (sizeof(float)*Nx*Ny*Nz)); #endif // First, solve in the X-direction for (int iy=0; iyx_grid, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } init_sse_data(); return spline; } void recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, spline->zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Single-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs UBspline_1d_c* create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data) { // Create new spline UBspline_1d_c* restrict spline = malloc (sizeof(UBspline_1d_c)); spline->spcode = U1D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; // Setup internal variables int M = x_grid.num; int N; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); N = M+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); N = M+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(float)*N); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*N); #endif BCtype_s xBC_r, xBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; // Real part find_coefs_1d_s (spline->x_grid, xBC_r, (float*)data, 2, (float*)spline->coefs, 2); // Imaginarty part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+1, 2, ((float*)spline->coefs+1), 2); init_sse_data(); return spline; } void recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data) { BCtype_s xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; // Real part find_coefs_1d_s (spline->x_grid, xBC_r, (float*)data, 2, (float*)spline->coefs, 2); // Imaginarty part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+1, 2, ((float*)spline->coefs+1), 2); } UBspline_2d_c* create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, BCtype_c xBC, BCtype_c yBC, complex_float *data) { // Create new spline UBspline_2d_c* restrict spline = malloc (sizeof(UBspline_2d_c)); spline->spcode = U2D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(float)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny); #endif BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, 2*My, (float*)spline->coefs+coffset, 2*Ny); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My, ((float*)spline->coefs)+coffset+1, 2*Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, ((float*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, ((float*)spline->coefs)+coffset+1, 2); } init_sse_data(); return spline; } void recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, 2*My, (float*)spline->coefs+coffset, 2*Ny); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My, ((float*)spline->coefs)+coffset+1, 2*Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)spline->coefs)+doffset, 2, ((float*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2, ((float*)spline->coefs)+coffset+1, 2); } } UBspline_3d_c* create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data) { // Create new spline UBspline_3d_c* restrict spline = malloc (sizeof(UBspline_3d_c)); spline->spcode = U3D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(float)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(float)*Nx*Ny*Nz); #endif BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode; zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r; zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode; zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz, ((float*)spline->coefs)+coffset, 2*Ny*Nz); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz, ((float*)spline->coefs)+coffset+1, 2*Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, ((float*)spline->coefs)+coffset, 2*Nz); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, ((float*)spline->coefs)+coffset+1, 2*Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, ((float*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, ((float*)spline->coefs)+coffset+1, 2); } init_sse_data(); return spline; } void recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, 2*My*Mz, ((float*)spline->coefs)+coffset, 2*Ny*Nz); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, 2*My*Mz, ((float*)spline->coefs)+coffset+1, 2*Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)spline->coefs)+doffset, 2*Nz, ((float*)spline->coefs)+coffset, 2*Nz); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)spline->coefs)+doffset+1, 2*Nz, ((float*)spline->coefs)+coffset+1, 2*Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((float*)spline->coefs)+doffset, 2, ((float*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)spline->coefs)+doffset+1, 2, ((float*)spline->coefs)+coffset+1, 2); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Real Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_deriv_interp_1d_d (double bands[], double coefs[], int M, int cstride) { // Solve interpolating equations // First and last rows are different bands[4*(0)+1] /= bands[4*(0)+0]; bands[4*(0)+2] /= bands[4*(0)+0]; bands[4*(0)+3] /= bands[4*(0)+0]; bands[4*(0)+0] = 1.0; bands[4*(1)+1] -= bands[4*(1)+0]*bands[4*(0)+1]; bands[4*(1)+2] -= bands[4*(1)+0]*bands[4*(0)+2]; bands[4*(1)+3] -= bands[4*(1)+0]*bands[4*(0)+3]; bands[4*(0)+0] = 0.0; bands[4*(1)+2] /= bands[4*(1)+1]; bands[4*(1)+3] /= bands[4*(1)+1]; bands[4*(1)+1] = 1.0; // Now do rows 2 through M+1 for (int row=2; row < (M+1); row++) { bands[4*(row)+1] -= bands[4*(row)+0]*bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0]*bands[4*(row-1)+3]; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; bands[4*(row)+0] = 0.0; bands[4*(row)+1] = 1.0; } // Do last row bands[4*(M+1)+1] -= bands[4*(M+1)+0]*bands[4*(M-1)+2]; bands[4*(M+1)+3] -= bands[4*(M+1)+0]*bands[4*(M-1)+3]; bands[4*(M+1)+2] -= bands[4*(M+1)+1]*bands[4*(M)+2]; bands[4*(M+1)+3] -= bands[4*(M+1)+1]*bands[4*(M)+3]; bands[4*(M+1)+3] /= bands[4*(M+1)+2]; bands[4*(M+1)+2] = 1.0; coefs[(M+1)*cstride] = bands[4*(M+1)+3]; // Now back substitute up for (int row=M; row>0; row--) coefs[row*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[cstride*(row+1)]; // Finish with first row coefs[0] = bands[4*(0)+3] - bands[4*(0)+1]*coefs[1*cstride] - bands[4*(0)+2]*coefs[2*cstride]; } // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_periodic_interp_1d_d (double bands[], double coefs[], int M, int cstride) { double lastCol[M]; // Now solve: // First and last rows are different bands[4*(0)+2] /= bands[4*(0)+1]; bands[4*(0)+0] /= bands[4*(0)+1]; bands[4*(0)+3] /= bands[4*(0)+1]; bands[4*(0)+1] = 1.0; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; lastCol[0] = bands[4*(0)+0]; for (int row=1; row < (M-1); row++) { bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; bands[4*(row)+0] = 0.0; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; lastCol[row] /= bands[4*(row)+1]; bands[4*(row)+1] = 1.0; if (row < (M-2)) { bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; } } // Now do last row // The [2] element and [0] element are now on top of each other bands[4*(M-1)+0] += bands[4*(M-1)+2]; bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; bands[4*(M-1)+3] /= bands[4*(M-1)+1]; coefs[M*cstride] = bands[4*(M-1)+3]; for (int row=M-2; row>=0; row--) coefs[(row+1)*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; coefs[0*cstride] = coefs[M*cstride]; coefs[(M+1)*cstride] = coefs[1*cstride]; coefs[(M+2)*cstride] = coefs[2*cstride]; } // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_antiperiodic_interp_1d_d (double bands[], double coefs[], int M, int cstride) { double lastCol[M]; bands[4*0+0] *= -1.0; bands[4*(M-1)+2] *= -1.0; // Now solve: // First and last rows are different bands[4*(0)+2] /= bands[4*(0)+1]; bands[4*(0)+0] /= bands[4*(0)+1]; bands[4*(0)+3] /= bands[4*(0)+1]; bands[4*(0)+1] = 1.0; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*bands[4*(0)+0]; bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(0)+3]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(0)+2]; lastCol[0] = bands[4*(0)+0]; for (int row=1; row < (M-1); row++) { bands[4*(row)+1] -= bands[4*(row)+0] * bands[4*(row-1)+2]; bands[4*(row)+3] -= bands[4*(row)+0] * bands[4*(row-1)+3]; lastCol[row] = -bands[4*(row)+0] * lastCol[row-1]; bands[4*(row)+0] = 0.0; bands[4*(row)+2] /= bands[4*(row)+1]; bands[4*(row)+3] /= bands[4*(row)+1]; lastCol[row] /= bands[4*(row)+1]; bands[4*(row)+1] = 1.0; if (row < (M-2)) { bands[4*(M-1)+3] -= bands[4*(M-1)+2]*bands[4*(row)+3]; bands[4*(M-1)+1] -= bands[4*(M-1)+2]*lastCol[row]; bands[4*(M-1)+2] = -bands[4*(M-1)+2]*bands[4*(row)+2]; } } // Now do last row // The [2] element and [0] element are now on top of each other bands[4*(M-1)+0] += bands[4*(M-1)+2]; bands[4*(M-1)+1] -= bands[4*(M-1)+0] * (bands[4*(M-2)+2]+lastCol[M-2]); bands[4*(M-1)+3] -= bands[4*(M-1)+0] * bands[4*(M-2)+3]; bands[4*(M-1)+3] /= bands[4*(M-1)+1]; coefs[M*cstride] = bands[4*(M-1)+3]; for (int row=M-2; row>=0; row--) coefs[(row+1)*cstride] = bands[4*(row)+3] - bands[4*(row)+2]*coefs[(row+2)*cstride] - lastCol[row]*coefs[M*cstride]; coefs[0*cstride] = -coefs[M*cstride]; coefs[(M+1)*cstride] = -coefs[1*cstride]; coefs[(M+2)*cstride] = -coefs[2*cstride]; } void find_coefs_1d_d (Ugrid grid, BCtype_d bc, double *data, intptr_t dstride, double *coefs, intptr_t cstride) { int M = grid.num; double basis[4] = {1.0/6.0, 2.0/3.0, 1.0/6.0, 0.0}; if (bc.lCode == PERIODIC || bc.lCode == ANTIPERIODIC) { #ifdef HAVE_C_VARARRAYS double bands[M*4]; #else double *bands = malloc (4*M*sizeof(double)); #endif for (int i=0; ispcode = U1D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; // Setup internal variables int M = x_grid.num; int N; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); N = M+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); N = M+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*N); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*N); #endif find_coefs_1d_d (spline->x_grid, xBC, data, 1, spline->coefs, 1); init_sse_data(); return spline; } void recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data) { find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, spline->coefs, 1); } UBspline_2d_d* create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, BCtype_d xBC, BCtype_d yBC, double *data) { // Create new spline UBspline_2d_d* restrict spline = malloc (sizeof(UBspline_2d_d)); spline->spcode = U2D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny)); #endif // First, solve in the X-direction for (int iy=0; iyx_grid, xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } init_sse_data(); return spline; } void recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } } UBspline_3d_d* create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data) { // Create new spline UBspline_3d_d* restrict spline = malloc (sizeof(UBspline_3d_d)); spline->spcode = U3D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, (sizeof(double)*Nx*Ny*Nz)); #endif // First, solve in the X-direction for (int iy=0; iyx_grid, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } init_sse_data(); return spline; } void recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, spline->zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs UBspline_1d_z* create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data) { // Create new spline UBspline_1d_z* restrict spline = malloc (sizeof(UBspline_1d_z)); spline->spcode = U1D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; // Setup internal variables int M = x_grid.num; int N; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); N = M+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); N = M+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*N); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*N); #endif BCtype_d xBC_r, xBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; // Real part find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, (double*)spline->coefs, 2); // Imaginarty part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, ((double*)spline->coefs)+1, 2); init_sse_data(); return spline; } void recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data) { int M = spline->x_grid.num; int N; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) N = M+3; else N = M+2; BCtype_d xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; // Real part find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, 2, (double*)spline->coefs, 2); // Imaginarty part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, 2, ((double*)spline->coefs)+1, 2); } UBspline_2d_z* create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, BCtype_z xBC, BCtype_z yBC, complex_double *data) { // Create new spline UBspline_2d_z* restrict spline = malloc (sizeof(UBspline_2d_z)); spline->spcode = U2D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny); #endif BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data+doffset), 2*My, (double*)spline->coefs+coffset, 2*Ny); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My, ((double*)spline->coefs)+coffset+1, 2*Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, (double*)spline->coefs+coffset, 2); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, ((double*)spline->coefs)+coffset+1, 2); } init_sse_data(); return spline; } void recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data+doffset), 2*My, (double*)spline->coefs+coffset, 2*Ny); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My, ((double*)spline->coefs)+coffset+1, 2*Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)spline->coefs)+doffset, 2, (double*)spline->coefs+coffset, 2); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, (double*)spline->coefs+doffset+1, 2, ((double*)spline->coefs)+coffset+1, 2); } } UBspline_3d_z* create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data) { // Create new spline UBspline_3d_z* restrict spline = malloc (sizeof(UBspline_3d_z)); spline->spcode = U3D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, 2*sizeof(double)*Nx*Ny*Nz); #endif BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; yBC_r.lCode = yBC.lCode; yBC_r.rCode = yBC.rCode; yBC_r.lVal = yBC.lVal_r; yBC_r.rVal = yBC.rVal_r; yBC_i.lCode = yBC.lCode; yBC_i.rCode = yBC.rCode; yBC_i.lVal = yBC.lVal_i; yBC_i.rVal = yBC.rVal_i; zBC_r.lCode = zBC.lCode; zBC_r.rCode = zBC.rCode; zBC_r.lVal = zBC.lVal_r; zBC_r.rVal = zBC.rVal_r; zBC_i.lCode = zBC.lCode; zBC_i.rCode = zBC.rCode; zBC_i.lVal = zBC.lVal_i; zBC_i.rVal = zBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz, ((double*)spline->coefs)+coffset, 2*Ny*Nz); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, ((double*)spline->coefs)+coffset+1, 2*Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, ((double*)spline->coefs)+coffset, 2*Nz); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, ((double*)spline->coefs)+coffset+1, 2*Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, ((double*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, ((double*)spline->coefs)+coffset+1, 2); } init_sse_data(); return spline; } void recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data)+doffset, 2*My*Mz, ((double*)spline->coefs)+coffset, 2*Ny*Nz); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, ((double*)spline->coefs)+coffset+1, 2*Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)spline->coefs)+doffset, 2*Nz, ((double*)spline->coefs)+coffset, 2*Nz); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)spline->coefs)+doffset+1, 2*Nz, ((double*)spline->coefs)+coffset+1, 2*Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((double*)spline->coefs)+doffset, 2, ((double*)spline->coefs)+coffset, 2); // Imag part find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)spline->coefs)+doffset+1, 2, ((double*)spline->coefs)+coffset+1, 2); } } void destroy_UBspline (Bspline *spline) { free (spline->coefs); free (spline); } void destroy_NUBspline (Bspline *spline); void destroy_multi_UBspline (Bspline *spline); void destroy_Bspline (void *spline) { Bspline *sp = (Bspline *)spline; if (sp->sp_code <= U3D) destroy_UBspline (sp); else if (sp->sp_code <= NU3D) destroy_NUBspline (sp); else if (sp->sp_code <= MULTI_U3D) destroy_multi_UBspline (sp); else fprintf (stderr, "Error in destroy_Bspline: invalide spline code %d.\n", sp->sp_code); } einspline-0.9.2/src/multi_bspline_eval_std_z.c0000664000113000011300000000276611015560006016433 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_z_impl.h" einspline-0.9.2/src/multi_bspline_cuda_c_impl.h0000664000113000011300000006502211302247653016550 00000000000000#ifndef MULTI_BSPLINE_CUDA_C_IMPL_H #define MULTI_BSPLINE_CUDA_C_IMPL_H #include "multi_bspline.h" #include "multi_bspline_create_cuda.h" __global__ static void eval_multi_multi_UBspline_1d_c_kernel (float *pos, float drInv, float *coefs, float **vals, uint dim, uint stride, int N) { int tid = threadIdx.x; int ir = blockIdx.x; __shared__ float *ourval; __shared__ float r; if (tid == 0) { r = pos[ir]; ourval = vals[ir]; } __syncthreads(); int index; float t; float s, sf; float4 tp; s = r * drInv; sf = floor(s); index = min(max(0,(int)sf), dim-1); t = s - sf; tp = make_float4(t*t*t, t*t, t, 1.0); __shared__ float a[4]; if (tid < 4) a[tid] = Acuda[4*tid+0]*tp.x + Acuda[4*tid+1]*tp.y + Acuda[4*tid+2]*tp.z + Acuda[4*tid+3]*tp.w; __syncthreads(); int numBlocks = 2*N / SPLINE_BLOCK_SIZE; float *c = coefs + index*stride + tid; float *myval = ourval + tid; int stride2 = 2*stride; int stride3 = 3*stride; for (int block=0; block < numBlocks; block++) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); myval += SPLINE_BLOCK_SIZE; c += SPLINE_BLOCK_SIZE; } int remainder = 2*N - numBlocks*SPLINE_BLOCK_SIZE; if (tid < remainder) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); } } extern "C" void eval_multi_multi_UBspline_1d_c_cuda (multi_UBspline_1d_c_cuda *spline, float *pos_d, float *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(num); eval_multi_multi_UBspline_1d_c_kernel<<>> (pos_d, spline->gridInv, (float*)spline->coefs, vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_1d_c_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_1d_c_vgl_kernel (float *pos, float drInv, float *coefs, float **vals, float **grads, float **lapl, uint dim, uint stride, int N) { int tid = threadIdx.x; int ir = blockIdx.x; __shared__ float *ourval, *ourgrad, *ourlapl; __shared__ float r; if (tid == 0) { r = pos[ir]; ourval = vals[ir]; ourgrad = grads[ir]; ourlapl = lapl[ir]; } __syncthreads(); int index; float t; float s, sf; float4 tp; s = r * drInv; sf = floor(s); index = min(max(0,(int)sf), dim-1); t = s - sf; tp = make_float4(t*t*t, t*t, t, 1.0); __shared__ float a[12]; if (tid < 12) a[tid] = Acuda[4*tid+0]*tp.x + Acuda[4*tid+1]*tp.y + Acuda[4*tid+2]*tp.z + Acuda[4*tid+3]*tp.w; __syncthreads(); int numBlocks = 2*N / SPLINE_BLOCK_SIZE; float *c = coefs + index*stride + tid; float *myval = ourval + tid; float *mygrad = ourgrad + tid; float *mylapl = ourlapl + tid; int stride2 = 2*stride; int stride3 = 3*stride; __shared__ float coef[SPLINE_BLOCK_SIZE][5]; for (int block=0; block < numBlocks; block++) { coef[tid][0] = c[0]; coef[tid][1] = c[stride]; coef[tid][2] = c[stride2]; coef[tid][3] = c[stride3]; *myval = (a[0] * coef[tid][0] + a[1] * coef[tid][1] + a[2] * coef[tid][2] + a[3] * coef[tid][3]); *mygrad = (a[4] * coef[tid][0] + a[5] * coef[tid][1] + a[6] * coef[tid][2] + a[7] * coef[tid][3]); *mylapl = (a[8] * coef[tid][0] + a[9] * coef[tid][1] + a[10] * coef[tid][2] + a[11]* coef[tid][3]); myval += SPLINE_BLOCK_SIZE; mygrad += SPLINE_BLOCK_SIZE; mylapl += SPLINE_BLOCK_SIZE; c += SPLINE_BLOCK_SIZE; } int remainder = 2*N - numBlocks*SPLINE_BLOCK_SIZE; if (tid < remainder) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); } } extern "C" void eval_multi_multi_UBspline_1d_c_vgl_cuda (multi_UBspline_1d_c_cuda *spline, float *pos_d, float *vals_d[], float *grads_d[], float *lapl_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(num); eval_multi_multi_UBspline_1d_c_vgl_kernel<<>> (pos_d, spline->gridInv, (float*)spline->coefs, vals_d, grads_d, lapl_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_1d_c_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_c_kernel (float *pos, float3 drInv, float *coefs, float *vals[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ float *myval; __shared__ float abc[64]; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); //index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); //index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); //index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); __shared__ float a[4], b[4], c[4]; if (thr < 4) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); if (thr < 64) abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); if (off < 2*N) { float val = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = coefs + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) val += abc[16*i+4*j+k] * base[off+k*strides.z]; } } myval[off] = val; } } __global__ static void eval_multi_multi_UBspline_3d_c_vgh_kernel (float *pos, float3 drInv, float *coefs, float *vals[], float *grads[], float *hess[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ float *myval, *mygrad, *myhess; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad = grads[ir]; myhess = hess[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; float *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < 2*N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = b0 + i*strides.x + j*strides.y; float c0 = base[0*strides.z]; float c1 = base[1*strides.z]; float c2 = base[2*strides.z]; float c3 = base[3*strides.z]; v += abc[n+ 0]*c0 + abc[n+ 1]*c1 + abc[n+ 2]*c2 + abc[n+ 3]*c3; g0 += abc[n+ 64]*c0 + abc[n+ 65]*c1 + abc[n+ 66]*c2 + abc[n+ 67]*c3; g1 += abc[n+128]*c0 + abc[n+129]*c1 + abc[n+130]*c2 + abc[n+131]*c3; g2 += abc[n+192]*c0 + abc[n+193]*c1 + abc[n+194]*c2 + abc[n+195]*c3; h00 += abc[n+256]*c0 + abc[n+257]*c1 + abc[n+258]*c2 + abc[n+259]*c3; h01 += abc[n+320]*c0 + abc[n+321]*c1 + abc[n+322]*c2 + abc[n+323]*c3; h02 += abc[n+384]*c0 + abc[n+385]*c1 + abc[n+386]*c2 + abc[n+387]*c3; h11 += abc[n+448]*c0 + abc[n+449]*c1 + abc[n+450]*c2 + abc[n+451]*c3; h12 += abc[n+512]*c0 + abc[n+513]*c1 + abc[n+514]*c2 + abc[n+515]*c3; h22 += abc[n+576]*c0 + abc[n+577]*c1 + abc[n+578]*c2 + abc[n+579]*c3; n += 4; // for (int k=0; k<4; k++) { // float c = base[k*strides.z]; // v += abc[n+0] * c; // g0 += abc[n+64] * c; // g1 += abc[n+128] * c; // g2 += abc[n+192] * c; // h00 += abc[n+256] * c; // h01 += abc[n+320] * c; // h02 += abc[n+384] * c; // h11 += abc[n+448] * c; // h12 += abc[n+512] * c; // h22 += abc[n+576] * c; // n += 1; // } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } abc[3*thr+0] = g0; abc[3*thr+1] = g1; abc[3*thr+2] = g2; __syncthreads(); for (int i=0; i<3; i++) { int myoff = (3*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 6*N) mygrad[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } __syncthreads(); // Write Hessians abc[6*thr+0] = h00; abc[6*thr+1] = h01; abc[6*thr+2] = h02; abc[6*thr+3] = h11; abc[6*thr+4] = h12; abc[6*thr+5] = h22; __syncthreads(); for (int i=0; i<6; i++) { int myoff = (6*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 12*N) myhess[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } } extern "C" void eval_multi_multi_UBspline_3d_c_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, complex_float *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if (2*spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_c_kernel<<>> (pos_d, spline->gridInv, (float*)spline->coefs, (float**)vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_c_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } extern "C" void eval_multi_multi_UBspline_3d_c_vgh_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, complex_float *vals_d[], complex_float *grads_d[], complex_float *hess_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if ((2*spline->num_splines) % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_c_vgh_kernel<<>> (pos_d, spline->gridInv, (float*)spline->coefs, (float**)vals_d, (float**)grads_d, (float**)hess_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_c_vgh_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_c_vgl_kernel (float *pos, float3 drInv, float *coefs, float Linv[], float *vals[], float *grad_lapl[], uint3 dim, uint3 strides, int N, int row_stride) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ float *myval, *mygrad_lapl; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad_lapl = grad_lapl[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; float *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < 2*N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = b0 + i*strides.x + j*strides.y; float c0 = base[0*strides.z]; float c1 = base[1*strides.z]; float c2 = base[2*strides.z]; float c3 = base[3*strides.z]; v += abc[n+ 0]*c0 + abc[n+ 1]*c1 + abc[n+ 2]*c2 + abc[n+ 3]*c3; g0 += abc[n+ 64]*c0 + abc[n+ 65]*c1 + abc[n+ 66]*c2 + abc[n+ 67]*c3; g1 += abc[n+128]*c0 + abc[n+129]*c1 + abc[n+130]*c2 + abc[n+131]*c3; g2 += abc[n+192]*c0 + abc[n+193]*c1 + abc[n+194]*c2 + abc[n+195]*c3; h00 += abc[n+256]*c0 + abc[n+257]*c1 + abc[n+258]*c2 + abc[n+259]*c3; h01 += abc[n+320]*c0 + abc[n+321]*c1 + abc[n+322]*c2 + abc[n+323]*c3; h02 += abc[n+384]*c0 + abc[n+385]*c1 + abc[n+386]*c2 + abc[n+387]*c3; h11 += abc[n+448]*c0 + abc[n+449]*c1 + abc[n+450]*c2 + abc[n+451]*c3; h12 += abc[n+512]*c0 + abc[n+513]*c1 + abc[n+514]*c2 + abc[n+515]*c3; h22 += abc[n+576]*c0 + abc[n+577]*c1 + abc[n+578]*c2 + abc[n+579]*c3; n += 4; // for (int k=0; k<4; k++) { // float c = base[k*strides.z]; // v += abc[n+ 0] * c; // g0 += abc[n+ 64] * c; // g1 += abc[n+128] * c; // g2 += abc[n+192] * c; // h00 += abc[n+256] * c; // h01 += abc[n+320] * c; // h02 += abc[n+384] * c; // h11 += abc[n+448] * c; // h12 += abc[n+512] * c; // h22 += abc[n+576] * c; // n += 1; // } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } __shared__ float G[3][3], GGt[3][3]; int i0 = threadIdx.x/3; int i1 = threadIdx.x - 3*i0; if (threadIdx.x < 9) G[i0][i1] = Linv[threadIdx.x]; __syncthreads(); if (threadIdx.x < 9) GGt[i0][i1] = (G[0][i0]*G[0][i1] + G[1][i0]*G[1][i1] + G[2][i0]*G[2][i1]); __syncthreads(); if (off < 2*N) { // Store gradients back to global memory mygrad_lapl[off+0*row_stride] = G[0][0]*g0 + G[0][1]*g1 + G[0][2]*g2; mygrad_lapl[off+2*row_stride] = G[1][0]*g0 + G[1][1]*g1 + G[1][2]*g2; mygrad_lapl[off+4*row_stride] = G[2][0]*g0 + G[2][1]*g1 + G[2][2]*g2; // Store laplacians back to global memory // Hessian = H00 H01 H02 H11 H12 H22 // Matrix = [0 1 2] // [1 3 4] // [2 4 5] // laplacian = Trace(GGt*Hessian) mygrad_lapl[off+6*row_stride] = (GGt[0][0]*h00 + GGt[1][0]*h01 + GGt[2][0]*h02 + GGt[0][1]*h01 + GGt[1][1]*h11 + GGt[2][1]*h12 + GGt[0][2]*h02 + GGt[1][2]*h12 + GGt[2][2]*h22); } } extern "C" void eval_multi_multi_UBspline_3d_c_vgl_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, float *Linv_d, float *vals_d[], float *grad_lapl_d[], int num, int row_stride) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if ((2*spline->num_splines) % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_c_vgl_kernel<<>> (pos_d, spline->gridInv, (float*)spline->coefs, Linv_d, (float**)vals_d, (float**)grad_lapl_d, spline->dim, spline->stride, spline->num_splines, row_stride); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_c_vgl_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } /* __global__ static void eval_multi_multi_UBspline_3d_c_cuda (float *pos, float3 drInv, float *coefs_real, float *coefs_imag, float *vals[], uint3 strides) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ float *myval; __shared__ float abc[64]; // __shared__ float pos_s[SPLINE_BLOCK_SIZE]; // int ir1 = (ir >> 4)*64; // int ir2 = (ir & 15)*4; // pos_s[thr] = pos[ir1+thr]; // __syncthreads(); // float3 r; // r.x = pos_s[ir2+0]; // r.y = pos_s[ir2+1]; // r.z = pos_s[ir2+2]; __shared__ float3 r; if (thr == 0) { r.x = pos[4*ir+0]; r.y = pos[4*ir+1]; r.z = pos[4*ir+2]; myval = vals[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); __shared__ float a[4], b[4], c[4]; if (thr < 4) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); float val_real = 0.0; float val_imag = 0.0; val_real = val_imag = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base_real = coefs_real + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; float *base_imag = coefs_imag + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) { val_real += abc[16*i+4*j+k] * base_real[off+k*strides.z]; val_imag += abc[16*i+4*j+k] * base_imag[off+k*strides.z]; } } } __shared__ float buff[2*SPLINE_BLOCK_SIZE]; buff[2*thr+0] = val_real; buff[2*thr+1] = val_imag; __syncthreads(); myval[off] = buff[thr]; myval[off+SPLINE_BLOCK_SIZE] = buff[thr+SPLINE_BLOCK_SIZE]; } __global__ static void eval_multi_multi_UBspline_3d_c_vgh_cuda (float *pos, float3 drInv, float *coefs_real, float *coefs_imag, float *vals[], float *grads[], float *hess[], uint3 strides) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ float *myval, *mygrad, *myhess; __shared__ float3 r; if (thr == 0) { r.x = pos[4*ir+0]; r.y = pos[4*ir+1]; r.z = pos[4*ir+2]; myval = vals[ir]; mygrad = grads[ir]; myhess = hess[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[10*(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[10*(16*i+4*j+k)+1] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[10*(16*i+4*j+k)+2] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[10*(16*i+4*j+k)+3] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[10*(16*i+4*j+k)+4] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[10*(16*i+4*j+k)+5] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[10*(16*i+4*j+k)+6] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[10*(16*i+4*j+k)+7] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[10*(16*i+4*j+k)+8] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[10*(16*i+4*j+k)+9] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v_r = 0.0; float v_i = 0.0; float g0_r=0.0, g0_i=0.0, g1_r=0.0, g1_i=0.0, g2_r=0.0, g2_i=0.0, h00_r=0.0, h00_i=0.0, h01_r=0.0, h01_i=0.0, h02_r=0.0, h02_i=0.0, h11_r=0.0, h11_i=0.0, h12_r=0.0, h12_i=0.0, h22_r=0.0, h22_i=0.0; int n = 0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base_real = coefs_real + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; float *base_imag = coefs_imag + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) { float cr = base_real[off+k*strides.z]; float ci = base_imag[off+k*strides.z]; v_r += abc[n+0] * cr; v_i += abc[n+0] * ci; g0_r += abc[n+1] * cr; g0_i += abc[n+1] * ci; g1_r += abc[n+2] * cr; g1_i += abc[n+2] * ci; g2_r += abc[n+3] * cr; g2_i += abc[n+3] * ci; h00_r += abc[n+4] * cr; h00_i += abc[n+4] * ci; h01_r += abc[n+5] * cr; h01_i += abc[n+5] * ci; h02_r += abc[n+6] * cr; h02_i += abc[n+6] * ci; h11_r += abc[n+7] * cr; h11_i += abc[n+7] * ci; h12_r += abc[n+8] * cr; h12_i += abc[n+8] * ci; h22_r += abc[n+9] * cr; h22_i += abc[n+9] * ci; n += 10; } } } g0_r *= drInv.x; g0_i *= drInv.x; g1_r *= drInv.y; g1_i *= drInv.y; g2_r *= drInv.z; g2_i *= drInv.z; h00_r *= drInv.x * drInv.x; h00_i *= drInv.x * drInv.x; h01_r *= drInv.x * drInv.y; h01_i *= drInv.x * drInv.y; h02_r *= drInv.x * drInv.z; h02_i *= drInv.x * drInv.z; h11_r *= drInv.y * drInv.y; h11_i *= drInv.y * drInv.y; h12_r *= drInv.y * drInv.z; h12_i *= drInv.y * drInv.z; h22_r *= drInv.z * drInv.z; h22_i *= drInv.z * drInv.z; __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. buff[2*thr+0] = v_r; buff[2*thr+1] = v_i; __syncthreads(); myval[off] = buff[thr]; myval[off+SPLINE_BLOCK_SIZE] = buff[thr+SPLINE_BLOCK_SIZE]; buff[6*thr+0] = g0_r; buff[6*thr+1] = g0_i; buff[6*thr+2] = g1_r; buff[6*thr+3] = g1_i; buff[6*thr+4] = g2_r; buff[6*thr+5] = g2_i; __syncthreads(); for (int i=0; i<6; i++) mygrad[(6*block+i)*SPLINE_BLOCK_SIZE+thr] = buff[i*SPLINE_BLOCK_SIZE+thr]; __syncthreads(); // Write first half of Hessians if (thr < 32) { buff[12*thr+0] = h00_r; buff[12*thr+1] = h00_i; buff[12*thr+2] = h01_r; buff[12*thr+3] = h01_i; buff[12*thr+4] = h02_r; buff[12*thr+5] = h02_i; buff[12*thr+6] = h11_r; buff[12*thr+7] = h11_i; buff[12*thr+8] = h12_r; buff[12*thr+9] = h12_i; buff[12*thr+10] = h22_r; buff[12*thr+11] = h22_i; } __syncthreads(); if (thr < 32) for (int i=0; i<6; i++) myhess[(12*block+i)*SPLINE_BLOCK_SIZE+thr] = buff[i*SPLINE_BLOCK_SIZE+thr]; __syncthreads(); int th2 = thr-32; if (thr >= 32) { buff[12*th2+0] = h00_r; buff[12*th2+1] = h00_i; buff[12*th2+2] = h01_r; buff[12*th2+3] = h01_i; buff[12*th2+4] = h02_r; buff[12*th2+5] = h02_i; buff[12*th2+6] = h11_r; buff[12*th2+7] = h11_i; buff[12*th2+8] = h12_r; buff[12*th2+9] = h12_i; buff[12*th2+10] = h22_r; buff[12*th2+11] = h22_i; } __syncthreads(); if (thr >= 32) { for (int i=0; i<6; i++) myhess[(12*block+i+6)*SPLINE_BLOCK_SIZE+th2] = buff[i*SPLINE_BLOCK_SIZE+th2]; } } */ #endif einspline-0.9.2/src/multi_nubspline_structs.h0000664000113000011300000001237611035727230016364 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_NUBSPLINE_STRUCTS_STD_H #define MULTI_NUBSPLINE_STRUCTS_STD_H #include #include "bspline_base.h" #include "nubasis.h" /////////////////////////// // Single precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride; BCtype_s xBC; int num_splines; NUgrid *restrict x_grid; NUBasis *restrict x_basis; } multi_NUBspline_1d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride, y_stride; BCtype_s xBC, yBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; } multi_NUBspline_2d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride, y_stride, z_stride; BCtype_s xBC, yBC, zBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; } multi_NUBspline_3d_s; /////////////////////////// // Double precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride; BCtype_d xBC; int num_splines; NUgrid *restrict x_grid; NUBasis *restrict x_basis; } multi_NUBspline_1d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride, y_stride; BCtype_d xBC, yBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; } multi_NUBspline_2d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride, y_stride, z_stride; BCtype_d xBC, yBC, zBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; } multi_NUBspline_3d_d; ////////////////////////////// // Single precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride; BCtype_c xBC; int num_splines; NUgrid *restrict x_grid; NUBasis *restrict x_basis; } multi_NUBspline_1d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride, y_stride; BCtype_c xBC, yBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; } multi_NUBspline_2d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride, y_stride, z_stride; BCtype_c xBC, yBC, zBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; } multi_NUBspline_3d_c; ////////////////////////////// // Double precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride; BCtype_z xBC; int num_splines; NUgrid *restrict x_grid; NUBasis *restrict x_basis; } multi_NUBspline_1d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride, y_stride; BCtype_z xBC, yBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; } multi_NUBspline_2d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride, y_stride, z_stride; BCtype_z xBC, yBC, zBC; int num_splines; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; } multi_NUBspline_3d_z; #endif einspline-0.9.2/src/multi_bspline_eval_std_d_impl.h0000664000113000011300000010574511062522456017445 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_STD_D_H #define MULTI_BSPLINE_EVAL_STD_D_H #include #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern const double* restrict Ad; extern const double* restrict dAd; extern const double* restrict d2Ad; /************************************************************/ /* 1D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_d (multi_UBspline_1d_d *spline, double x, double* restrict vals) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_d_vg (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_d_vgl (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict lapl) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_d_vgh (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict hess) { eval_multi_UBspline_1d_d_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_d (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double prefactor = a[i]*b[j]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_2d_d_vg (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double ab = a[i]*b[j]; double dab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals [n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; } } void eval_multi_UBspline_2d_d_vgl (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; double lapl2[2*spline->num_splines]; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; lapl2[2*n+0] = lapl2[2*n+1] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double ab = a[i]*b[j]; double dab[2], d2ab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i]* b[j]; d2ab[1] = a[i]*d2b[j]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; lapl2[2*n+0] += d2ab[0]*coefs[n]; lapl2[2*n+1] += d2ab[1]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; lapl2[2*n+0] *= dxInv*dxInv; lapl2[2*n+1] *= dyInv*dyInv; lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; } } void eval_multi_UBspline_2d_d_vgh (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; for (int i=0; i<4; i++) hess[4*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++){ double ab = a[i]*b[j]; double dab[2], d2ab[3]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i] * b[j]; d2ab[1] = da[i] * db[j]; d2ab[2] = a[i] * d2b[j]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; hess [4*n+0] += d2ab[0]*coefs[n]; hess [4*n+1] += d2ab[1]*coefs[n]; hess [4*n+3] += d2ab[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; hess[4*n+0] *= dxInv*dxInv; hess[4*n+1] *= dxInv*dyInv; hess[4*n+3] *= dyInv*dyInv; // Copy hessian elements into lower half of 3x3 matrix hess[4*n+2] = hess[4*n+1]; } } /************************************************************/ /* 3D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_d (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double prefactor = a[i]*b[j]*c[k]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_3d_d_vg (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; } } void eval_multi_UBspline_3d_d_vgl (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; double lapl3[3*spline->num_splines]; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3], d2abc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = a[i]*d2b[j]* c[k]; d2abc[2] = a[i]* b[j]*d2c[k]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; lapl3[3*n+0] += d2abc[0]*coefs[n]; lapl3[3*n+1] += d2abc[1]*coefs[n]; lapl3[3*n+2] += d2abc[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; lapl3[3*n+0] *= dxInv*dxInv; lapl3[3*n+1] *= dyInv*dyInv; lapl3[3*n+2] *= dzInv*dzInv; lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; } } void eval_multi_UBspline_3d_d_vgh (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; for (int i=0; i<9; i++) hess[9*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3], d2abc[6]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = da[i]* db[j]* c[k]; d2abc[2] = da[i]* b[j]* dc[k]; d2abc[3] = a[i]*d2b[j]* c[k]; d2abc[4] = a[i]* db[j]* dc[k]; d2abc[5] = a[i]* b[j]*d2c[k]; double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; hess [9*n+0] += d2abc[0]*coefs[n]; hess [9*n+1] += d2abc[1]*coefs[n]; hess [9*n+2] += d2abc[2]*coefs[n]; hess [9*n+4] += d2abc[3]*coefs[n]; hess [9*n+5] += d2abc[4]*coefs[n]; hess [9*n+8] += d2abc[5]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; hess [9*n+0] *= dxInv*dxInv; hess [9*n+4] *= dyInv*dyInv; hess [9*n+8] *= dzInv*dzInv; hess [9*n+1] *= dxInv*dyInv; hess [9*n+2] *= dxInv*dzInv; hess [9*n+5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess [9*n+3] = hess[9*n+1]; hess [9*n+6] = hess[9*n+2]; hess [9*n+7] = hess[9*n+5]; } } #endif einspline-0.9.2/src/multi_nubspline_create.h0000664000113000011300000001470611035743400016114 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_NUBBSPLINE_CREATE_H #define MULTI_NUBBSPLINE_CREATE_H #include "bspline_base.h" #include "multi_nubspline_structs.h" #ifdef __cplusplus extern "C" { #endif //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Spline creation functions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// ///////////////////////////////////// // Uniform, single precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_NUBspline_1d_s * create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_NUBspline_2d_s * create_multi_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, BCtype_s xBC, BCtype_s yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_NUBspline_3d_s * create_multi_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline, int spline_num, float *data); void set_multi_NUBspline_2d_s (multi_NUBspline_2d_s *spline, int spline_num, float *data); void set_multi_NUBspline_3d_s (multi_NUBspline_3d_s *spline, int spline_num, float *data); ///////////////////////////////////// // Uniform, double precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_NUBspline_1d_d * create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_NUBspline_2d_d * create_multi_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, BCtype_d xBC, BCtype_d yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_NUBspline_3d_d * create_multi_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_NUBspline_1d_d (multi_NUBspline_1d_d *spline, int spline_num, double *data); void set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d *spline, int spline_num, double *data, BCtype_d xBC); void set_multi_NUBspline_2d_d (multi_NUBspline_2d_d *spline, int spline_num, double *data); void set_multi_NUBspline_3d_d (multi_NUBspline_3d_d *spline, int spline_num, double *data); /////////////////////////////////////// // Uniform, single precision, complex// /////////////////////////////////////// // Create 1D uniform single-precision, real Bspline multi_NUBspline_1d_c * create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines); // Create 2D uniform single-precision, real Bspline multi_NUBspline_2d_c * create_multi_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, BCtype_c xBC, BCtype_c yBC, int num_splines); // Create 3D uniform single-precision, real Bspline multi_NUBspline_3d_c * create_multi_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_NUBspline_1d_c (multi_NUBspline_1d_c *spline, int spline_num, complex_float *data); void set_multi_NUBspline_2d_c (multi_NUBspline_2d_c *spline, int spline_num, complex_float *data); void set_multi_NUBspline_3d_c (multi_NUBspline_3d_c *spline, int spline_num, complex_float *data); /////////////////////////////////////// // Uniform, double precision, complex// /////////////////////////////////////// // Create 1D uniform double-precision, complex Bspline multi_NUBspline_1d_z * create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines); // Create 2D uniform double-precision, complex Bspline multi_NUBspline_2d_z * create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, BCtype_z xBC, BCtype_z yBC, int num_splines); // Create 3D uniform double-precision, complex Bspline multi_NUBspline_3d_z * create_multi_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, int num_splines); // Set the data for the splines, and compute spline coefficients void set_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, int spline_num, complex_double *data); void set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int spline_num, complex_double *data, BCtype_z xBC); void set_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, int spline_num, complex_double *data); void set_multi_NUBspline_3d_z (multi_NUBspline_3d_z *spline, int spline_num, complex_double *data); #ifdef __cplusplus } #endif #endif einspline-0.9.2/src/blip_create.h0000664000113000011300000000461211012400563013617 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BLIP_CREATE_H #define BLIP_CREATE_H #include "bspline_base.h" #include "bspline_structs.h" #include //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Blip creation functions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// UBspline_3d_s* create_blip_3d_s (double *lattice, double *Gvecs, complex_float *coefs, int numG, double factor, bool useReal); UBspline_3d_d* create_blip_3d_d (double *lattice, double *Gvecs, complex_double *coefs, int numG, double factor, bool useReal); UBspline_3d_c* create_blip_3d_c (double *lattice, double *Gvecs, complex_float *coefs, int numG, double factor); UBspline_3d_z* create_blip_3d_z (double *lattice, double *Gvecs, complex_double *coefs, int numG, double factor); #endif einspline-0.9.2/src/bspline_create.h0000664000113000011300000001256011155575325014347 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_CREATE_H #define BSPLINE_CREATE_H #include "bspline_base.h" #include "bspline_structs.h" #ifdef __cplusplus extern "C" { #endif //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Spline creation functions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// ///////////////////////////////////// // Uniform, single precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline UBspline_1d_s * create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data); // Create 2D uniform single-precision, real Bspline UBspline_2d_s * create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, BCtype_s xBC, BCtype_s yBC, float *data); // Create 3D uniform single-precision, real Bspline UBspline_3d_s * create_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data); void recompute_UBspline_1d_s (UBspline_1d_s* spline, float *data); void recompute_UBspline_2d_s (UBspline_2d_s* spline, float *data); void recompute_UBspline_3d_s (UBspline_3d_s* spline, float *data); ///////////////////////////////////// // Uniform, double precision, real // ///////////////////////////////////// // Create 1D uniform single-precision, real Bspline UBspline_1d_d * create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data); // Create 2D uniform single-precision, real Bspline UBspline_2d_d * create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, BCtype_d xBC, BCtype_d yBC, double *data); // Create 3D uniform single-precision, real Bspline UBspline_3d_d * create_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data); void recompute_UBspline_1d_d (UBspline_1d_d* spline, double *data); void recompute_UBspline_2d_d (UBspline_2d_d* spline, double *data); void recompute_UBspline_3d_d (UBspline_3d_d* spline, double *data); /////////////////////////////////////// // Uniform, single precision, complex// /////////////////////////////////////// // Create 1D uniform single-precision, real Bspline UBspline_1d_c * create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data); // Create 2D uniform single-precision, real Bspline UBspline_2d_c * create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, BCtype_c xBC, BCtype_c yBC, complex_float *data); // Create 3D uniform single-precision, real Bspline UBspline_3d_c * create_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data); void recompute_UBspline_1d_c (UBspline_1d_c* spline, complex_float *data); void recompute_UBspline_2d_c (UBspline_2d_c* spline, complex_float *data); void recompute_UBspline_3d_c (UBspline_3d_c* spline, complex_float *data); /////////////////////////////////////// // Uniform, double precision, complex// /////////////////////////////////////// // Create 1D uniform double-precision, complex Bspline UBspline_1d_z * create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data); // Create 2D uniform double-precision, complex Bspline UBspline_2d_z * create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, BCtype_z xBC, BCtype_z yBC, complex_double *data); // Create 3D uniform double-precision, complex Bspline UBspline_3d_z * create_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data); void recompute_UBspline_1d_z (UBspline_1d_z* spline, complex_double *data); void recompute_UBspline_2d_z (UBspline_2d_z* spline, complex_double *data); void recompute_UBspline_3d_z (UBspline_3d_z* spline, complex_double *data); #ifdef __cplusplus } #endif #endif einspline-0.9.2/src/multi_bspline_eval_d.h0000664000113000011300000001006711021344427015535 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_D_H #define MULTI_BSPLINE_EVAL_D_H #include #include #include "multi_bspline_structs.h" /************************************************************/ /* 1D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_d (multi_UBspline_1d_d *spline, double x, double* restrict vals); void eval_multi_UBspline_1d_d_vg (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads); void eval_multi_UBspline_1d_d_vgl (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_UBspline_1d_d_vgh (multi_UBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict hess); /************************************************************/ /* 2D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_d (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals); void eval_multi_UBspline_2d_d_vg (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads); void eval_multi_UBspline_2d_d_vgl (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_UBspline_2d_d_vgh (multi_UBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict hess); /************************************************************/ /* 3D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_d (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals); void eval_multi_UBspline_3d_d_vg (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads); void eval_multi_UBspline_3d_d_vgl (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_UBspline_3d_d_vgh (multi_UBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict hess); #endif einspline-0.9.2/src/multi_nubspline_create.c0000664000113000011300000011554011035746454016121 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_nubspline_create.h" #ifndef _XOPEN_SOURCE #define _XOPEN_SOURCE 600 #endif #ifndef __USE_XOPEN2K #define __USE_XOPEN2K #endif #include #include #include int posix_memalign(void **memptr, size_t alignment, size_t size); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Helper functions for spline creation //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// void init_sse_data(); //////////////////////////////////////////////////////////// // Single-precision creation routines // //////////////////////////////////////////////////////////// void solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, float* restrict data, int datastride, float* restrict p, int pstride, float abcdInitial[4], float abcdFinal[4]); void solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis, float* restrict data, int datastride, float* restrict p, int pstride); void find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc, float *data, int dstride, float *coefs, int cstride); //////////////////////////////////////////////////////////// // Double-precision creation routines // //////////////////////////////////////////////////////////// void solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, double* restrict data, int datastride, double* restrict p, int pstride, double abcdInitial[4], double abcdFinal[4]); void solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis, double* restrict data, int datastride, double* restrict p, int pstride); void find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc, double *data, int dstride, double *coefs, int cstride); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Single-Precision, Real Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_NUBspline_1d_s* create_multi_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, int num_splines) { // Create new spline multi_NUBspline_1d_s* restrict spline = malloc (sizeof(multi_NUBspline_1d_s)); if (spline == NULL) return spline; spline->spcode = MULTI_NU1D; spline->tcode = SINGLE_REAL; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->xBC = xBC; spline->x_grid = x_grid; spline->num_splines = num_splines; // Setup internal variables int Mx, Nx; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; Nx = x_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = N; spline->x_grid = x_grid; #ifndef HAVE_SSE spline->coefs = malloc (sizeof(float)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N)); init_sse_data(); #endif return spline; } void set_multi_NUBspline_1d_s (multi_NUBspline_1d_s *spline, int num, float *data) { float *coefs = spline->coefs + num; int xs = spline->x_stride; find_NUBcoefs_1d_s (spline->x_basis, spline->xBC, data, 1, coefs, xs); } multi_NUBspline_2d_s* create_multi_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, BCtype_s xBC, BCtype_s yBC, int num_splines) { // Create new spline multi_NUBspline_2d_s* restrict spline = malloc (sizeof(multi_NUBspline_2d_s)); spline->spcode = MULTI_NU2D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE spline->coefs = malloc ((size_t)sizeof(float)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, sizeof(float)*Nx*Ny*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_2d_s (multi_NUBspline_2d_s* spline, int num, float *data) { int Mx, My, Nx, Ny; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; float *coefs = spline->coefs + num; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, spline->xBC, data+doffset, My, coefs+coffset, Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, ys, coefs+coffset, ys); } } multi_NUBspline_3d_s* create_multi_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines) { // Create new spline multi_NUBspline_3d_s* restrict spline = malloc (sizeof(multi_NUBspline_3d_s)); if (spline == NULL) return spline; spline->spcode = MULTI_NU3D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(float)*Nx*Ny*Nz*N)); init_sse_data(); #endif return spline; } void set_multi_NUBspline_3d_s (multi_NUBspline_3d_s* spline, int num, float *data) { int Mx, My, Mz, Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; else Mz = spline->z_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; Nz = spline->z_grid->num_points + 2; float *coefs = spline->coefs + num; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, spline->xBC, data+doffset, My*Mz, coefs+coffset, Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, Nz*zs, coefs+coffset, Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, spline->zBC, coefs+doffset, zs, coefs+coffset, zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Single-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_NUBspline_1d_c* create_multi_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, int num_splines) { // Create new spline multi_NUBspline_1d_c* restrict spline = malloc (sizeof(multi_NUBspline_1d_c)); if (spline == NULL) return spline; spline->spcode = MULTI_NU1D; spline->tcode = SINGLE_COMPLEX; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->xBC = xBC; spline->num_splines = num_splines; // Setup internal variables int Mx, Nx; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; Nx = x_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 2) N += 2 - (N % 2); #endif spline->x_stride = N; spline->x_grid = x_grid; #ifndef HAVE_SSE spline->coefs = malloc (2*sizeof(float)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_1d_c (multi_NUBspline_1d_c* spline, int num, complex_float *data) { complex_float *coefs = spline->coefs + num; BCtype_s xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; int xs = spline->x_stride; // Real part find_NUBcoefs_1d_s (spline->x_basis, xBC_r, (float*)data, 2, (float*)coefs, 2*xs); // Imaginarty part find_NUBcoefs_1d_s (spline->x_basis, xBC_i, ((float*)data)+1, 2, ((float*)coefs+1), 2*xs); } multi_NUBspline_2d_c* create_multi_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, BCtype_c xBC, BCtype_c yBC, int num_splines) { // Create new spline multi_NUBspline_2d_c* restrict spline = malloc (sizeof(multi_NUBspline_2d_c)); spline->spcode = MULTI_NU2D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); // Setup internal variables int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 2) N++; #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE spline->coefs = malloc (2*sizeof(float)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*Ny*N); #endif init_sse_data(); return spline; } void set_multi_NUBspline_2d_c (multi_NUBspline_2d_c* spline, int num, complex_float *data) { // Setup internal variables int Mx, My, Nx, Ny; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; complex_float* coefs = spline->coefs + num; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, xBC_r, ((float*)data)+doffset, 2*My, (float*)coefs+coffset, 2*Ny*ys); // Imag part find_NUBcoefs_1d_s (spline->x_basis, xBC_i, ((float*)data)+doffset+1, 2*My, ((float*)coefs)+coffset+1, 2*Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC_r, ((float*)coefs)+doffset, 2*ys, ((float*)coefs)+coffset, 2*ys); // Imag part find_NUBcoefs_1d_s (spline->y_basis, yBC_i, ((float*)coefs)+doffset+1, 2*ys, ((float*)coefs)+coffset+1, 2*ys); } } multi_NUBspline_3d_c* create_multi_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, int num_splines) { // Create new spline multi_NUBspline_3d_c* restrict spline = malloc (sizeof(multi_NUBspline_3d_c)); spline->spcode = MULTI_NU3D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; int N = spline->num_splines; #ifdef HAVE_SSE if (N % 2) N++; #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE spline->coefs = malloc ((size_t)2*sizeof(float)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(float)*Nx*Ny*Nz*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_3d_c (multi_NUBspline_3d_c* spline, int num, complex_float *data) { int Mx, My, Mz, Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; else Mz = spline->z_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; Nz = spline->z_grid->num_points + 2; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; complex_float *coefs = spline->coefs + num; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, xBC_r, ((float*)data)+doffset, 2*My*Mz, ((float*)coefs)+coffset, 2*Ny*Nz*zs); // Imag part find_NUBcoefs_1d_s (spline->x_basis, xBC_i, ((float*)data)+doffset+1, 2*My*Mz, ((float*)coefs)+coffset+1, 2*Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC_r, ((float*)coefs)+doffset, 2*Nz*zs, ((float*)coefs)+coffset, 2*Nz*zs); // Imag part find_NUBcoefs_1d_s (spline->y_basis, yBC_i, ((float*)coefs)+doffset+1, 2*Nz*zs, ((float*)coefs)+coffset+1, 2*Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC_r, ((float*)coefs)+doffset, 2*zs, ((float*)coefs)+coffset, 2*zs); // Imag part find_NUBcoefs_1d_s (spline->z_basis, zBC_i, ((float*)coefs)+doffset+1, 2*zs, ((float*)coefs)+coffset+1, 2*zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Real Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// multi_NUBspline_1d_d* create_multi_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, int num_splines) { // Create new spline multi_NUBspline_1d_d* restrict spline = malloc (sizeof(multi_NUBspline_1d_d)); if (spline == NULL) return spline; spline->spcode = MULTI_NU1D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->x_grid = x_grid; spline->num_splines = num_splines; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); // Setup internal variables int Mx, Nx; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; Nx = x_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data aligned for SSE operations if (N & 1) N++; #endif spline->x_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_1d_d (multi_NUBspline_1d_d* spline, int num, double *data) { double *coefs = spline->coefs + num; int xs = spline->x_stride; find_NUBcoefs_1d_d (spline->x_basis, spline->xBC, data, 1, coefs, xs); } void set_multi_NUBspline_1d_d_BC (multi_NUBspline_1d_d* spline, int num, double *data, BCtype_d xBC) { double *coefs = spline->coefs + num; int xs = spline->x_stride; find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, coefs, xs); } multi_NUBspline_2d_d* create_multi_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, BCtype_d xBC, BCtype_d yBC, int num_splines) { // Create new spline multi_NUBspline_2d_d* restrict spline = malloc (sizeof(multi_NUBspline_2d_d)); spline->spcode = MULTI_NU2D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data align for SSE operations if (num_splines & 1) N++; #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N)); init_sse_data(); #endif return spline; } void set_multi_NUBspline_2d_d (multi_NUBspline_2d_d* spline, int num, double *data) { int Mx, My, Nx, Ny; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; double *coefs = spline->coefs + num; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, spline->xBC, data+doffset, My, coefs+coffset, Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, ys, coefs+coffset, ys); } } multi_NUBspline_3d_d* create_multi_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, int num_splines) { // Create new spline multi_NUBspline_3d_d* restrict spline = malloc (sizeof(multi_NUBspline_3d_d)); if (spline == NULL) return spline; spline->spcode = MULTI_NU3D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data align for SSE operations if (N & 1) N++; #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc ((size_t)sizeof(double)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(double)*Nx*Ny*Nz*N)); init_sse_data(); #endif return spline; } void set_multi_NUBspline_3d_d (multi_NUBspline_3d_d* spline, int num, double *data) { int Mx, My, Mz, Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; else Mz = spline->z_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; Nz = spline->z_grid->num_points + 2; double *coefs = spline->coefs + num; intptr_t zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, spline->xBC, data+doffset, My*Mz, coefs+coffset, Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, spline->yBC, coefs+doffset, Nz*zs, coefs+coffset, Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, spline->zBC, coefs+doffset, zs, coefs+coffset, zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_NUBspline_1d_z* create_multi_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, int num_splines) { // Create new spline multi_NUBspline_1d_z* restrict spline = malloc (sizeof(multi_NUBspline_1d_z)); spline->spcode = MULTI_NU1D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->x_grid = x_grid; spline->num_splines = num_splines; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); if (spline->x_basis->grid != x_grid) { fprintf (stderr, "Error in basis creation.\n"); abort(); } if (spline->x_basis == NULL) { fprintf (stderr, "Error creating basis in create_multi_NUBspline_1d_z.\n"); abort(); } // Setup internal variables int Mx, Nx; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; Nx = x_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 2) N ++; #endif spline->x_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_1d_z (multi_NUBspline_1d_z* spline, int num, complex_double *data) { complex_double *coefs = spline->coefs + num; BCtype_d xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; int xs = spline->x_stride; // Real part find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, ((double*)coefs), 2*xs); // Imaginary part find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, ((double*)coefs)+1, 2*xs); } void set_multi_NUBspline_1d_z_BC (multi_NUBspline_1d_z *spline, int num, complex_double *data, BCtype_z xBC) { complex_double *coefs = spline->coefs + num; BCtype_d xBC_r, xBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; int xs = spline->x_stride; // Real part find_NUBcoefs_1d_d (spline->x_basis, xBC_r, (double*)data, 2, ((double*)coefs), 2*xs); // Imaginary part find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+1, 2, ((double*)coefs)+1, 2*xs); } multi_NUBspline_2d_z* create_multi_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, BCtype_z xBC, BCtype_z yBC, int num_splines) { // Create new spline multi_NUBspline_2d_z* restrict spline = malloc (sizeof(multi_NUBspline_2d_z)); spline->spcode = MULTI_NU2D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_2d_z (multi_NUBspline_2d_z* spline, int num, complex_double *data) { int Mx, My, Nx, Ny; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; complex_double *coefs = spline->coefs + num; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, xBC_r, ((double*)data+doffset), 2*My, (double*)coefs+coffset, 2*Ny*ys); // Imag part find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My, ((double*)coefs)+coffset+1, 2*Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC_r, ((double*)coefs)+doffset, 2*ys, (double*)coefs+coffset, 2*ys); // Imag part find_NUBcoefs_1d_d (spline->y_basis, yBC_i, (double*)coefs+doffset+1, 2*ys, ((double*)coefs)+coffset+1, 2*ys); } } multi_NUBspline_3d_z* create_multi_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, int num_splines) { // Create new spline multi_NUBspline_3d_z* restrict spline = malloc (sizeof(multi_NUBspline_3d_z)); spline->spcode = MULTI_NU3D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; spline->num_splines = num_splines; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; int N = num_splines; #ifdef HAVE_SSE2 if (N & 3) N += 4-(N & 3); #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc ((size_t)2*sizeof(double)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N); init_sse_data(); #endif return spline; } void set_multi_NUBspline_3d_z (multi_NUBspline_3d_z* spline, int num, complex_double *data) { int Mx, My, Mz, Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC) Mx = spline->x_grid->num_points - 1; else Mx = spline->x_grid->num_points; if (spline->yBC.lCode == PERIODIC) My = spline->y_grid->num_points - 1; else My = spline->y_grid->num_points; if (spline->zBC.lCode == PERIODIC) Mz = spline->z_grid->num_points - 1; else Mz = spline->z_grid->num_points; Nx = spline->x_grid->num_points + 2; Ny = spline->y_grid->num_points + 2; Nz = spline->z_grid->num_points + 2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; complex_double *coefs = spline->coefs + num; int N = spline->num_splines; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_basis, xBC_r, ((double*)data)+doffset, 2*My*Mz, ((double*)coefs)+coffset, 2*Ny*Nz*zs); // Imag part find_NUBcoefs_1d_d (spline->x_basis, xBC_i, ((double*)data)+doffset+1, 2*My*Mz, ((double*)coefs)+coffset+1, 2*Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC_r, ((double*)coefs)+doffset, 2*Nz*zs, ((double*)coefs)+coffset, 2*Nz*zs); // Imag part find_NUBcoefs_1d_d (spline->y_basis, yBC_i, ((double*)coefs)+doffset+1, 2*Nz*zs, ((double*)coefs)+coffset+1, 2*Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC_r, ((double*)coefs)+doffset, 2*zs, ((double*)coefs)+coffset, 2*zs); // Imag part find_NUBcoefs_1d_d (spline->z_basis, zBC_i, ((double*)coefs)+doffset+1, 2*zs, ((double*)coefs)+coffset+1, 2*zs); } } void destroy_multi_NUBspline (Bspline *spline) { free (spline->coefs); free (spline); } einspline-0.9.2/src/multi_bspline_cuda_d_impl.h0000664000113000011300000003077511302247653016560 00000000000000#ifndef MULTI_BSPLINE_CUDA_D_IMPL_H #define MULTI_BSPLINE_CUDA_D_IMPL_H #include "multi_bspline.h" #include "multi_bspline_create_cuda.h" __global__ static void eval_multi_multi_UBspline_3d_d_kernel (double *pos, double3 drInv, double *coefs, double *vals[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ double *myval; __shared__ double abc[64]; __shared__ double3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; } __syncthreads(); int3 index; double3 t; double s, sf; double4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0].x =t.x*t.x*t.x; tp[0].y=t.x*t.x; tp[0].z=t.x; tp[0].w=1.0; tp[1].x =t.y*t.y*t.y; tp[1].y=t.y*t.y; tp[1].z=t.y; tp[1].w=1.0; tp[2].x =t.z*t.z*t.z; tp[2].y=t.z*t.z; tp[2].z=t.z; tp[2].w=1.0; __shared__ double a[4], b[4], c[4]; if (thr < 4) { a[thr] = Bcuda[4*thr+0]*tp[0].x + Bcuda[4*thr+1]*tp[0].y + Bcuda[4*thr+2]*tp[0].z + Bcuda[4*thr+3]*tp[0].w; b[thr] = Bcuda[4*thr+0]*tp[1].x + Bcuda[4*thr+1]*tp[1].y + Bcuda[4*thr+2]*tp[1].z + Bcuda[4*thr+3]*tp[1].w; c[thr] = Bcuda[4*thr+0]*tp[2].x + Bcuda[4*thr+1]*tp[2].y + Bcuda[4*thr+2]*tp[2].z + Bcuda[4*thr+3]*tp[2].w; } __syncthreads(); int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); if (thr < 64) abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); if (off < N) { double val = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = coefs + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) val += abc[16*i+4*j+k] * base[off+k*strides.z]; } } myval[off] = val; } } __global__ static void eval_multi_multi_UBspline_3d_d_vgh_kernel (double *pos, double3 drInv, double *coefs, double *vals[], double *grads[], double *hess[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ double *myval, *mygrad, *myhess; __shared__ double3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad = grads[ir]; myhess = hess[ir]; } __syncthreads(); int3 index; double3 t; double s, sf; double4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0].x =t.x*t.x*t.x; tp[0].y=t.x*t.x; tp[0].z=t.x; tp[0].w=1.0; tp[1].x =t.y*t.y*t.y; tp[1].y=t.y*t.y; tp[1].z=t.y; tp[1].w=1.0; tp[2].x =t.z*t.z*t.z; tp[2].y=t.z*t.z; tp[2].z=t.z; tp[2].w=1.0; // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ double a[12], b[12], c[12]; if (thr < 12) { a[thr] = Bcuda[4*thr+0]*tp[0].x + Bcuda[4*thr+1]*tp[0].y + Bcuda[4*thr+2]*tp[0].z + Bcuda[4*thr+3]*tp[0].w; b[thr] = Bcuda[4*thr+0]*tp[1].x + Bcuda[4*thr+1]*tp[1].y + Bcuda[4*thr+2]*tp[1].z + Bcuda[4*thr+3]*tp[1].w; c[thr] = Bcuda[4*thr+0]*tp[2].x + Bcuda[4*thr+1]*tp[2].y + Bcuda[4*thr+2]*tp[2].z + Bcuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ double abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); double v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; double *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = b0 + i*strides.x + j*strides.y; for (int k=0; k<4; k++) { double c = base[k*strides.z]; v += abc[n+0] * c; g0 += abc[n+64] * c; g1 += abc[n+128] * c; g2 += abc[n+192] * c; h00 += abc[n+256] * c; h01 += abc[n+320] * c; h02 += abc[n+384] * c; h11 += abc[n+448] * c; h12 += abc[n+512] * c; h22 += abc[n+576] * c; n += 1; } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ double buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } abc[3*thr+0] = g0; abc[3*thr+1] = g1; abc[3*thr+2] = g2; __syncthreads(); for (int i=0; i<3; i++) { int myoff = (3*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 3*N) mygrad[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } __syncthreads(); // Write Hessians abc[6*thr+0] = h00; abc[6*thr+1] = h01; abc[6*thr+2] = h02; abc[6*thr+3] = h11; abc[6*thr+4] = h12; abc[6*thr+5] = h22; __syncthreads(); for (int i=0; i<6; i++) { int myoff = (6*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 6*N) myhess[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } } extern "C" void eval_multi_multi_UBspline_3d_d_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_d_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_d_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } extern "C" void eval_multi_multi_UBspline_3d_d_vgh_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *vals_d[], double *grads_d[], double *hess_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_d_vgh_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, grads_d, hess_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_d_vgh_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_d_vgl_kernel (double *pos, double3 drInv, double *coefs, double Linv[], double *vals[], double *grad_lapl[], uint3 dim, uint3 strides, int N, int row_stride) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ double *myval, *mygrad_lapl; __shared__ double3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad_lapl = grad_lapl[ir]; } __syncthreads(); int3 index; double3 t; double s, sf; double4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0].x =t.x*t.x*t.x; tp[0].y=t.x*t.x; tp[0].z=t.x; tp[0].w=1.0; tp[1].x =t.y*t.y*t.y; tp[1].y=t.y*t.y; tp[1].z=t.y; tp[1].w=1.0; tp[2].x =t.z*t.z*t.z; tp[2].y=t.z*t.z; tp[2].z=t.z; tp[2].w=1.0; // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ double a[12], b[12], c[12]; if (thr < 12) { a[thr] = Bcuda[4*thr+0]*tp[0].x + Bcuda[4*thr+1]*tp[0].y + Bcuda[4*thr+2]*tp[0].z + Bcuda[4*thr+3]*tp[0].w; b[thr] = Bcuda[4*thr+0]*tp[1].x + Bcuda[4*thr+1]*tp[1].y + Bcuda[4*thr+2]*tp[1].z + Bcuda[4*thr+3]*tp[1].w; c[thr] = Bcuda[4*thr+0]*tp[2].x + Bcuda[4*thr+1]*tp[2].y + Bcuda[4*thr+2]*tp[2].z + Bcuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ double abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); double v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; double *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = b0 + i*strides.x + j*strides.y; for (int k=0; k<4; k++) { double c = base[k*strides.z]; v += abc[n+ 0] * c; g0 += abc[n+ 64] * c; g1 += abc[n+128] * c; g2 += abc[n+192] * c; h00 += abc[n+256] * c; h01 += abc[n+320] * c; h02 += abc[n+384] * c; h11 += abc[n+448] * c; h12 += abc[n+512] * c; h22 += abc[n+576] * c; n += 1; } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ double buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } __shared__ double G[3][3], GGt[3][3]; int i0 = threadIdx.x/3; int i1 = threadIdx.x - 3*i0; if (threadIdx.x < 9) G[i0][i1] = Linv[threadIdx.x]; __syncthreads(); if (threadIdx.x < 9) GGt[i0][i1] = (G[0][i0]*G[0][i1] + G[1][i0]*G[1][i1] + G[2][i0]*G[2][i1]); __syncthreads(); if (off < N) { // Store gradients back to global memory mygrad_lapl[off+0*row_stride] = G[0][0]*g0 + G[0][1]*g1 + G[0][2]*g2; mygrad_lapl[off+1*row_stride] = G[1][0]*g0 + G[1][1]*g1 + G[1][2]*g2; mygrad_lapl[off+2*row_stride] = G[2][0]*g0 + G[2][1]*g1 + G[2][2]*g2; // Store laplacians back to global memory // Hessian = H00 H01 H02 H11 H12 H22 // Matrix = [0 1 2] // [1 3 4] // [2 4 5] // laplacian = Trace(GGt*Hessian) mygrad_lapl[off+3*row_stride] = (GGt[0][0]*h00 + GGt[1][0]*h01 + GGt[2][0]*h02 + GGt[0][1]*h01 + GGt[1][1]*h11 + GGt[2][1]*h12 + GGt[0][2]*h02 + GGt[1][2]*h12 + GGt[2][2]*h22); } } extern "C" void eval_multi_multi_UBspline_3d_d_vgl_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *Linv_d, double *vals_d[], double *grad_lapl_d[], int num, int row_stride) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_d_vgl_kernel<<>> (pos_d, spline->gridInv, spline->coefs, Linv_d, vals_d, grad_lapl_d, spline->dim, spline->stride, spline->num_splines, row_stride); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_d_vgl_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } #endif einspline-0.9.2/src/time_multi.c0000664000113000011300000027252411217254723013536 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline.h" #include "bspline.h" #include "multi_nubspline.h" #include "nubspline.h" #include #include #include #include double drand48(); inline double diff (double a, double b, double tol) { if (fabs(a-b) > tol) return 1; else return 0; } ////////////////////////////////////////// // Single-precision real test functions // ////////////////////////////////////////// int test_1d_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_s xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_s* norm_splines[num_splines]; multi_UBspline_1d_s *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_s (x_grid, xBC, num_splines); float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]); // fprintf (stderr, "multi coef = %1.14e\n", // multi_spline->coefs[19+27*multi_spline->x_stride]); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals [num_splines]; float multi_grads[num_splines], norm_grads[num_splines]; float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[2*num_splines], norm_grads[2*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; float multi_vals[num_splines], norm_vals[num_splines]; float multi_grads[3*num_splines], norm_grads[3*num_splines]; float multi_lapl[num_splines], norm_lapl[num_splines]; float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[2*num_splines], norm_grads[2*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_lapl[num_splines], norm_lapl[num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_float_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_c xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_c* norm_splines[num_splines]; multi_UBspline_1d_c *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_c (x_grid, xBC, num_splines); complex_float data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]), // cimagf(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // crealf(multi_spline->coefs[19+27*multi_spline->x_stride]), // cimagf(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals [num_splines]; complex_float multi_grads[num_splines], norm_grads[num_splines]; complex_float multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[2127]), // cimag(norm_splines[19]->coefs[2127])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+2127*multi_spline->y_stride]), // cimag(multi_spline->coefs[19+2127*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->z_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_float multi_vals[num_splines], norm_vals[num_splines]; complex_float multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_float multi_lapl[num_splines], norm_lapl[num_splines]; complex_float multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]), cimag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", creal(multi_spline->coefs[19+227*multi_spline->z_stride]), cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); //return; // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12 || fabs(idiff) > 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e + %1.14ei\n", creal(norm_vals[j]), cimag(norm_vals[j])); fprintf (stderr, " multi_vals[j] = %1.14e + %1.14ei\n", creal(multi_vals[j]), cimag(multi_vals[j])); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; i tol || idiff > tol) return 1; else return 0; } int test_1d_complex_double_all() { int Nx=73; int num_splines = 21; Ugrid x_grid; x_grid.start = 3.1; x_grid.end = 9.1; x_grid.num = Nx; BCtype_z xBC; xBC.lCode = xBC.rCode = PERIODIC; // First, create splines the normal way UBspline_1d_z* norm_splines[num_splines]; multi_UBspline_1d_z *multi_spline; // First, create multispline multi_spline = create_multi_UBspline_1d_z (x_grid, xBC, num_splines); complex_double data[Nx]; // Now, create normal splines and set multispline data for (int i=0; icoefs[27]), // cimag(norm_splines[19]->coefs[27])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+27*multi_spline->x_stride]), // cimag(multi_spline->coefs[19+27*multi_spline->x_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals [num_splines]; complex_double multi_grads[num_splines], norm_grads[num_splines]; complex_double multi_lapl[num_splines], norm_lapl [num_splines]; for (int i=0; icoefs[227]), // cimag(norm_splines[19]->coefs[227])); // fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", // creal(multi_spline->coefs[19+227*multi_spline->y_stride]), // cimag(multi_spline->coefs[19+227*multi_spline->y_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[2*num_splines], norm_grads[2*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[4*num_splines], norm_hess[4*num_splines]; for (int i=0; icoefs[227]), cimag(norm_splines[19]->coefs[227])); fprintf (stderr, "multi coef = %1.14e + %1.14ei\n", creal(multi_spline->coefs[19+227*multi_spline->z_stride]), cimag(multi_spline->coefs[19+227*multi_spline->z_stride])); // Now, test random values int num_vals = 100; complex_double multi_vals[num_splines], norm_vals[num_splines]; complex_double multi_grads[3*num_splines], norm_grads[3*num_splines]; complex_double multi_lapl[num_splines], norm_lapl[num_splines]; complex_double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; icoefs[227]); fprintf (stderr, "multi coef = %1.14e\n", multi_spline->coefs[19+227*multi_spline->z_stride]); // Now, test random values int num_vals = 100; double multi_vals[num_splines], norm_vals[num_splines]; double multi_grads[3*num_splines], norm_grads[3*num_splines]; double multi_hess[9*num_splines], norm_hess[9*num_splines]; for (int i=0; i 1.0e-12) { fprintf (stderr, "j = %d\n", j); fprintf (stderr, "Error! norm_vals[j] = %1.14e\n", norm_vals[j]); fprintf (stderr, " multi_vals[j] = %1.14e\n", multi_vals[j]); } // Check gradients for (int n=0; n<3; n++) { diff = norm_grads[3*j+n] - multi_grads[3*j+n]; if (fabs(diff) > 1.0e-12) { fprintf (stderr, "n=%d\n", n); fprintf (stderr, "Error! norm_grads[j] = %1.14e\n", norm_grads[3*j+n]); fprintf (stderr, " multi_grads[j] = %1.14e\n", multi_grads[3*j+n]); } } // Check hessian for (int n=0; n<9; n++) { diff = norm_hess[9*j+n] - multi_hess[9*j+n]; if (fabs(diff) > 1.0e-10) { fprintf (stderr, "Error! norm_hess[j] = %1.14e\n", norm_hess[9*j+n]); fprintf (stderr, " multi_hess[j] = %1.14e\n", multi_hess[9*j+n]); } } } } num_vals = 100000; // Now do timing clock_t norm_start, norm_end, multi_start, multi_end, rand_start, rand_end; rand_start = clock(); for (int i=0; ipoints[i]); BCtype_z xBC; // xBC.lCode = xBC.rCode = NATURAL; xBC.lCode = DERIV1; xBC.lVal_r = 2.3; xBC.lVal_i = 1.1; xBC.rCode = DERIV1; xBC.rVal_r = -2.3; xBC.rVal_i = -1.1; // First, create splines the normal way NUBspline_1d_z* norm_splines[num_splines]; multi_NUBspline_1d_z *multi_spline; // First, create multispline multi_spline = create_multi_NUBspline_1d_z (x_grid, xBC, num_splines); complex_double data[Nx]; // Now, create normal splines and set multispline data for (int i=0; istart + (1.0-rx)*x_grid->end; eval_multi_NUBspline_1d_z (multi_spline, x, multi_vals); } multi_end = clock(); norm_start = clock(); for (int i=0; istart + (1.0-rx)*x_grid->end; for (int j=0; jstart + (1.0-rx)*x_grid->end; eval_multi_NUBspline_1d_z_vgl (multi_spline, x, multi_vals, multi_grads, multi_lapl); } multi_end = clock(); /* norm_start = clock(); */ /* for (int i=0; istart + (1.0-rx)*x_grid->end; */ /* for (int j=0; j #include #ifdef HAVE_SSE3 #include #endif #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern __m128d *restrict A_d; extern double *restrict Ad, *restrict dAd, *restrict d2Ad; #ifndef _MM_DDOT4_PD #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_d (&(p), t1); \ } while (0); #endif #endif /************************************************************/ /* 1D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_z (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; complex_double* restrict coefs0 = spline->coefs +(ix+0)*xs; complex_double* restrict coefs1 = spline->coefs +(ix+1)*xs; complex_double* restrict coefs2 = spline->coefs +(ix+2)*xs; complex_double* restrict coefs3 = spline->coefs +(ix+3)*xs; for (int n=0; nnum_splines; n++) vals[n] = a[0]*coefs0[n] + a[1]*coefs1[n] + a[2]*coefs2[n] + a[3]*coefs3[n]; // for (int n=0; nnum_splines; n++) // vals[n] = 0.0; // for (int i=0; i<4; i++) { // complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); // for (int n=0; nnum_splines; n++) // vals[n] += a[i] * coefs[n]; // } } void eval_multi_UBspline_1d_z_vg (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_z_vgl (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_z_vgh (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { eval_multi_UBspline_1d_z_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_z (multi_UBspline_2d_z *spline, double x, double y, complex_double* restrict vals) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // Zero-out values __m128d mvals[N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // Zero-out values __m128d mvals[N], mgrads[2*N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; complex_double lapl2[2*N]; for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23, d2a01, d2b01, d2a23, d2b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Zero-out values __m128d mvals[N], mgrads[2*N], mlapl[2*N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; complex_double lapl2[2*N]; for (int n=0; nx_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01 , b01, a23, b23, da01 , db01, da23, db23, d2a01, d2b01, d2a23, d2b23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Zero-out values __m128d mvals[N], mgrads[2*N], mhess[3*N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, a23, b23, c23, tmp0, tmp1, r0, r1, i0, i1, val_r, val_i; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // z-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // Zero-out values __m128d mvals[N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); __m128d* restrict coefs1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); __m128d* restrict coefs2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); __m128d* restrict coefs3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // Zero-out values __m128d mvals[N], mgrads[3*N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23, d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Zero-out values __m128d mvals[N], mgrads[3*N], mlapl[3*N]; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nx_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; int N = spline->num_splines; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01 , b01, c01, a23, b23, c23, da01 , db01, dc01, da23, db23, dc23, d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Zero-out values //__m128d mvals[N], mgrads[3*N], mhess[6*N]; __m128d mpack[10*N]; for (int n=0; n<10*N; n++) mpack[n] = _mm_setzero_pd(); __m128d a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; a[0]=_mm_unpacklo_pd(a01,a01); da[0]=_mm_unpacklo_pd(da01,da01); d2a[0]=_mm_unpacklo_pd(d2a01,d2a01); a[1]=_mm_unpackhi_pd(a01,a01); da[1]=_mm_unpackhi_pd(da01,da01); d2a[1]=_mm_unpackhi_pd(d2a01,d2a01); a[2]=_mm_unpacklo_pd(a23,a23); da[2]=_mm_unpacklo_pd(da23,da23); d2a[2]=_mm_unpacklo_pd(d2a23,d2a23); a[3]=_mm_unpackhi_pd(a23,a23); da[3]=_mm_unpackhi_pd(da23,da23); d2a[3]=_mm_unpackhi_pd(d2a23,d2a23); b[0]=_mm_unpacklo_pd(b01,b01); db[0]=_mm_unpacklo_pd(db01,db01); d2b[0]=_mm_unpacklo_pd(d2b01,d2b01); b[1]=_mm_unpackhi_pd(b01,b01); db[1]=_mm_unpackhi_pd(db01,db01); d2b[1]=_mm_unpackhi_pd(d2b01,d2b01); b[2]=_mm_unpacklo_pd(b23,b23); db[2]=_mm_unpacklo_pd(db23,db23); d2b[2]=_mm_unpacklo_pd(d2b23,d2b23); b[3]=_mm_unpackhi_pd(b23,b23); db[3]=_mm_unpackhi_pd(db23,db23); d2b[3]=_mm_unpackhi_pd(d2b23,d2b23); c[0]=_mm_unpacklo_pd(c01,c01); dc[0]=_mm_unpacklo_pd(dc01,dc01); d2c[0]=_mm_unpacklo_pd(d2c01,d2c01); c[1]=_mm_unpackhi_pd(c01,c01); dc[1]=_mm_unpackhi_pd(dc01,dc01); d2c[1]=_mm_unpackhi_pd(d2c01,d2c01); c[2]=_mm_unpacklo_pd(c23,c23); dc[2]=_mm_unpacklo_pd(dc23,dc23); d2c[2]=_mm_unpacklo_pd(d2c23,d2c23); c[3]=_mm_unpackhi_pd(c23,c23); dc[3]=_mm_unpackhi_pd(dc23,dc23); d2c[3]=_mm_unpackhi_pd(d2c23,d2c23); // Main computation loop const int bs = 32; for (int nstart=0; nstartcoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); __m128d* restrict c1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); __m128d* restrict c2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); __m128d* restrict c3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); #ifdef USE_PREFETCH_VGH int nextIndex = i<<4 + j<<2 + k + 1; int iNext = nextIndex >> 4; int jNext = (nextIndex >> 2) & 3; int kNext = nextIndex & 3; if (nextIndex < 64) { __m128d* restrict nextCoefs = (__m128d*)(spline->coefs + (ix+iNext)*xs + (iy +jNext)*ys + (iz+kNext)*zs); for (int i=0,n=nstart; (nx_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; n #include #ifdef HAVE_SSE3 #include #endif #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern __m128 *restrict A_s; extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; // Use plain-old SSE instructions #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 _r0 = _mm_mul_ps (M0, v); \ __m128 _r1 = _mm_mul_ps (M1, v); \ __m128 _r2 = _mm_mul_ps (M2, v); \ __m128 _r3 = _mm_mul_ps (M3, v); \ _MM_TRANSPOSE4_PS (_r0, _r1, _r2, _r3); \ r = _mm_add_ps (_mm_add_ps (_r0, _r1), _mm_add_ps (_r2, _r3)); \ } while (0); #define _MM_DOT4_PS(A, B, p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 alo = _mm_shuffle_ps (t, t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (t, t, _MM_SHUFFLE(2,3,2,3)); \ __m128 _a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(1,1,1,1)); \ __m128 _r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(p), _r); \ } while(0); /************************************************************/ /* 1D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, double x, float* restrict vals) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_s_vg (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_s_vgl (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads, float* restrict lapl) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_s_vgh (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads, float* restrict hess) { eval_multi_UBspline_1d_s_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_s(multi_UBspline_2d_s *spline, double x, double y, float* restrict vals) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); __m128 a[4], b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm]; // Zero out values; for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); __m128 a[4], b[4], da[4], db[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[2*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[2*n+0] = ((float*)mgrad)[nd4*8 + 4*0 + nm4] * dxInv; grads[2*n+1] = ((float*)mgrad)[nd4*8 + 4*1 + nm4] * dyInv; } } void eval_multi_UBspline_2d_s_vgl (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals, float* restrict grads, float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); __m128 a[4], b[4], da[4], db[4], d2a[4], d2b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[2*Nm], mlapl[2*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[2*n+0] = ((float*)mgrad)[nd4*8 + 4*0 + nm4] * dxInv; grads[2*n+1] = ((float*)mgrad)[nd4*8 + 4*1 + nm4] * dyInv; lapl [n] = (((float*)mlapl)[nd4*8 + 4*0 + nm4] * dxInv*dxInv + ((float*)mlapl)[nd4*8 + 4*1 + nm4] * dyInv*dyInv); } } void eval_multi_UBspline_2d_s_vgh (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals, float* restrict grads, float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); __m128 a[4], b[4], da[4], db[4], d2a[4], d2b[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[2*Nm], mhess[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[2*n+0] = ((float*)mgrad)[nd4*8 + 4*0 + nm4] * dxInv; grads[2*n+1] = ((float*)mgrad)[nd4*8 + 4*1 + nm4] * dyInv; hess [4*n+0] = ((float*)mhess)[nd4*12 + 4*0 + nm4] * dxInv*dxInv; hess [4*n+1] = hess[4*n+2] = ((float*)mhess)[nd4*12 + 4*1 + nm4] * dxInv*dyInv; hess [4*n+3] = ((float*)mhess)[nd4*12 + 4*2 + nm4] * dyInv*dyInv; } } /************************************************************/ /* 3D single-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_s (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); __m128 a[4], b[4], c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[3*n+0] = ((float*)mgrad)[nd4*12 + 4*0 + nm4] * dxInv; grads[3*n+1] = ((float*)mgrad)[nd4*12 + 4*1 + nm4] * dyInv; grads[3*n+2] = ((float*)mgrad)[nd4*12 + 4*2 + nm4] * dzInv; } } void eval_multi_UBspline_3d_s_vgl (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals, float* restrict grads, float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4, cP[4], dcP[4], d2cP[4], bcP, dbcP, bdcP, d2bcP, dbdcP, bd2cP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2c4, d2c4); d2c[0]=_mm_unpacklo_ps(tmp, tmp); d2c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2c4, d2c4); d2c[2]=_mm_unpacklo_ps(tmp, tmp); d2c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[3*Nm], mlapl[3*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[3*n+0] = ((float*)mgrad)[nd4*12 + 4*0 + nm4] * dxInv; grads[3*n+1] = ((float*)mgrad)[nd4*12 + 4*1 + nm4] * dyInv; grads[3*n+2] = ((float*)mgrad)[nd4*12 + 4*2 + nm4] * dzInv; lapl [n] = (((float*)mlapl)[nd4*12 + 4*0 + nm4] * dxInv*dxInv + ((float*)mlapl)[nd4*12 + 4*1 + nm4] * dyInv*dyInv + ((float*)mlapl)[nd4*12 + 4*2 + nm4] * dzInv*dzInv); } } void eval_multi_UBspline_3d_s_vgh (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals, float* restrict grads, float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a4, b4, c4, da4, db4, dc4, d2a4, d2b4, d2c4; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a4); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b4); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c4); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc4); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c4); __m128 a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; __m128 tmp; // Unpack a values tmp=_mm_unpacklo_ps( a4, a4); a[0]=_mm_unpacklo_ps(tmp, tmp); a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( a4, a4); a[2]=_mm_unpacklo_ps(tmp, tmp); a[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( da4, da4); da[0]=_mm_unpacklo_ps(tmp, tmp); da[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( da4, da4); da[2]=_mm_unpacklo_ps(tmp, tmp); da[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2a4, d2a4); d2a[0]=_mm_unpacklo_ps(tmp, tmp); d2a[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2a4, d2a4); d2a[2]=_mm_unpacklo_ps(tmp, tmp); d2a[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack b values tmp=_mm_unpacklo_ps( b4, b4); b[0]=_mm_unpacklo_ps(tmp, tmp); b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( b4, b4); b[2]=_mm_unpacklo_ps(tmp, tmp); b[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( db4, db4); db[0]=_mm_unpacklo_ps(tmp, tmp); db[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( db4, db4); db[2]=_mm_unpacklo_ps(tmp, tmp); db[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2b4, d2b4); d2b[0]=_mm_unpacklo_ps(tmp, tmp); d2b[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2b4, d2b4); d2b[2]=_mm_unpacklo_ps(tmp, tmp); d2b[3]=_mm_unpackhi_ps(tmp, tmp); // Unpack c values tmp=_mm_unpacklo_ps( c4, c4); c[0]=_mm_unpacklo_ps(tmp, tmp); c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( c4, c4); c[2]=_mm_unpacklo_ps(tmp, tmp); c[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps( dc4, dc4); dc[0]=_mm_unpacklo_ps(tmp, tmp); dc[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps( dc4, dc4); dc[2]=_mm_unpacklo_ps(tmp, tmp); dc[3]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpacklo_ps(d2c4, d2c4); d2c[0]=_mm_unpacklo_ps(tmp, tmp); d2c[1]=_mm_unpackhi_ps(tmp, tmp); tmp=_mm_unpackhi_ps(d2c4, d2c4); d2c[2]=_mm_unpacklo_ps(tmp, tmp); d2c[3]=_mm_unpackhi_ps(tmp, tmp); int N = spline->num_splines; int Nm = (N+3)/4; __m128 mvals[Nm], mgrad[3*Nm], mhess[6*Nm]; // Zero out values; __m128 mzero = _mm_set_ps(0.0, 0.0, 0.0, 0.0); for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nx_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; n>2; int nm4 = n & 3; vals[n] = ((float*)mvals)[n]; grads[3*n+0] = ((float*)mgrad)[nd4*12 + 4*0 + nm4] * dxInv; grads[3*n+1] = ((float*)mgrad)[nd4*12 + 4*1 + nm4] * dyInv; grads[3*n+2] = ((float*)mgrad)[nd4*12 + 4*2 + nm4] * dzInv; hess [9*n+0] = ((float*)mhess)[nd4*24 + 4*0 + nm4] * dxInv*dxInv; hess [9*n+1] = hess[9*n+3] = ((float*)mhess)[nd4*24 + 4*1 + nm4] * dxInv*dyInv; hess [9*n+2] = hess[9*n+6] = ((float*)mhess)[nd4*24 + 4*2 + nm4] * dxInv*dzInv; hess [9*n+4] = ((float*)mhess)[nd4*24 + 4*3 + nm4] * dyInv*dyInv; hess [9*n+5] = hess[9*n+7] = ((float*)mhess)[nd4*24 + 4*4 + nm4] * dyInv*dzInv; hess [9*n+8] = ((float*)mhess)[nd4*24 + 4*5 + nm4] * dzInv*dzInv; } // for (int n=0; n #include #ifdef HAVE_SSE3 #include #endif #include extern __m128d *restrict A_d; // extern __m128d // A0_01, A0_23, A1_01, A1_23, A2_01, A2_23, A3_01, A3_23, // dA0_01, dA0_23, dA1_01, dA1_23, dA2_01, dA2_23, dA3_01, dA3_23, // d2A0_01, d2A0_23, d2A1_01, d2A1_23, d2A2_01, d2A2_23, d2A3_01, d2A3_23; // This returns, pack in r, the two four-element dot products given // by, r = [dot([a0,a1],[b0,b1], dot([a2,a3],[b2,b3]). Specifically // r_l = a0_l*b0_l + a0_h+b0_h + a1_l*b1_l + a1_h*b1_h // r_h = a2_l*b2_l + a2_h+b2_h + a3_l*b1_l + a3_h*b1_h #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_sd (&(p), t1); \ } while (0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_z (UBspline_1d_z * restrict spline, double x, complex_double* restrict val) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_z_vg (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_z_vgl (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Ad[ 2]*tp[2] + d2Ad[ 3]*tp[3])+ coefs[i+1]*(d2Ad[ 6]*tp[2] + d2Ad[ 7]*tp[3])+ coefs[i+2]*(d2Ad[10]*tp[2] + d2Ad[11]*tp[3])+ coefs[i+3]*(d2Ad[14]*tp[2] + d2Ad[15]*tp[3])); } inline void eval_UBspline_1d_z_vgh (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { eval_UBspline_1d_z_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_z (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, a23, b23, bP01r, bP23r, bP01i, bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // Now compute bP, dbP, d2bP products tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); #undef P } /* Value and gradient */ inline void eval_UBspline_2d_z_vg (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, a23, b23, da23, db23, bP01r, dbP01r, bP23r, dbP23r, bP01i, dbP01i, bP23i, dbP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_z_vgl (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01r, dbP01r, d2bP01r, bP23r, dbP23r, d2bP23r, bP01i, dbP01i, d2bP01i, bP23i, dbP23i, d2bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); // Compute Laplacian double d2x_r, d2x_i, d2y_r, d2y_i; _MM_DOT4_PD (d2a01, d2a23, bP01r, bP23r, d2x_r); _MM_DOT4_PD (d2a01, d2a23, bP01i, bP23i, d2x_i); _MM_DOT4_PD (a01, a23, d2bP01r, d2bP23r, d2y_r); _MM_DOT4_PD (a01, a23, d2bP01i, d2bP23i, d2y_i); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; d2x_r *= dxInv*dxInv; d2x_i *= dxInv*dxInv; d2y_r *= dyInv*dyInv; d2y_i *= dyInv*dyInv; #ifdef __cplusplus *lapl = std::complex(d2x_r + d2y_r, d2x_i + d2y_i); #else *lapl = (d2x_r + d2y_r) + 1.0I*(d2x_i + d2y_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_z_vgh (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01r, dbP01r, d2bP01r, bP23r, dbP23r, d2bP23r, bP01i, dbP01i, d2bP01i, bP23i, dbP23i, d2bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; double *dhess = (double*) hess; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); // Compute Hessian _MM_DOT4_PD (d2a01, d2a23, bP01r, bP23r, dhess[0]); _MM_DOT4_PD (d2a01, d2a23, bP01i, bP23i, dhess[1]); _MM_DOT4_PD (a01, a23, d2bP01r, d2bP23r, dhess[6]); _MM_DOT4_PD (a01, a23, d2bP01i, d2bP23i, dhess[7]); _MM_DOT4_PD (da01, da23, dbP01r, dbP23r, dhess[2]); _MM_DOT4_PD (da01, da23, dbP01i, dbP23i, dhess[3]); _MM_DOT4_PD (da01, da23, dbP01r, dbP23r, dhess[4]); _MM_DOT4_PD (da01, da23, dbP01i, dbP23i, dhess[5]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; hess[0] *= dxInv*dxInv; hess[1] *= dxInv*dyInv; hess[2] *= dxInv*dyInv; hess[3] *= dyInv*dyInv; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_z (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val) { _mm_prefetch ((const char*) &A_d[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[7],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, cPr[8], dcPr[8], bcP01r, bcP23r, a23, b23, c23, cPi[8], dcPi[8], bcP01i, bcP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // z-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth tmp0 = _mm_load_pd (P(0,0,0)); tmp1 = _mm_load_pd (P(0,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,0,2)); tmp1 = _mm_load_pd (P(0,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,0)); tmp1 = _mm_load_pd (P(0,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,2)); tmp1 = _mm_load_pd (P(0,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (P(0,2,0)); tmp1 = _mm_load_pd (P(0,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2,2)); tmp1 = _mm_load_pd (P(0,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,0)); tmp1 = _mm_load_pd (P(0,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,2)); tmp1 = _mm_load_pd (P(0,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); // 3rd eighth tmp0 = _mm_load_pd (P(1,0,0)); tmp1 = _mm_load_pd (P(1,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0,2)); tmp1 = _mm_load_pd (P(1,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,0)); tmp1 = _mm_load_pd (P(1,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,2)); tmp1 = _mm_load_pd (P(1,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); // 4th eighth tmp0 = _mm_load_pd (P(1,2,0)); tmp1 = _mm_load_pd (P(1,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2,2)); tmp1 = _mm_load_pd (P(1,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,0)); tmp1 = _mm_load_pd (P(1,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,2)); tmp1 = _mm_load_pd (P(1,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); // 5th eighth tmp0 = _mm_load_pd (P(2,0,0)); tmp1 = _mm_load_pd (P(2,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,0,2)); tmp1 = _mm_load_pd (P(2,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,0)); tmp1 = _mm_load_pd (P(2,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,2)); tmp1 = _mm_load_pd (P(2,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); // 6th eighth tmp0 = _mm_load_pd (P(2,2,0)); tmp1 = _mm_load_pd (P(2,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2,2)); tmp1 = _mm_load_pd (P(2,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,0)); tmp1 = _mm_load_pd (P(2,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,2)); tmp1 = _mm_load_pd (P(2,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); // 7th eighth tmp0 = _mm_load_pd (P(3,0,0)); tmp1 = _mm_load_pd (P(3,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0,2)); tmp1 = _mm_load_pd (P(3,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,0)); tmp1 = _mm_load_pd (P(3,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,2)); tmp1 = _mm_load_pd (P(3,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); // 8th eighth tmp0 = _mm_load_pd (P(3,2,0)); tmp1 = _mm_load_pd (P(3,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2,2)); tmp1 = _mm_load_pd (P(3,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,0)); tmp1 = _mm_load_pd (P(3,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,2)); tmp1 = _mm_load_pd (P(3,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_z_vg (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad) { _mm_prefetch((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, a23, b23, c23, da23, db23, dc23, cPr[8], dcPr[8], cPi[8], dcPi[8], bcP01r, dbcP01r, bdcP01r, bcP23r, dbcP23r, bdcP23r, bcP01i, dbcP01i, bdcP01i, bcP23i, dbcP23i, bdcP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth tmp0 = _mm_load_pd (P(0,0,0)); tmp1 = _mm_load_pd (P(0,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,0,2)); tmp1 = _mm_load_pd (P(0,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,0)); tmp1 = _mm_load_pd (P(0,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,2)); tmp1 = _mm_load_pd (P(0,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); // 2nd eighth tmp0 = _mm_load_pd (P(0,2,0)); tmp1 = _mm_load_pd (P(0,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2,2)); tmp1 = _mm_load_pd (P(0,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,0)); tmp1 = _mm_load_pd (P(0,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,2)); tmp1 = _mm_load_pd (P(0,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); // 3rd eighth tmp0 = _mm_load_pd (P(1,0,0)); tmp1 = _mm_load_pd (P(1,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0,2)); tmp1 = _mm_load_pd (P(1,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,0)); tmp1 = _mm_load_pd (P(1,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,2)); tmp1 = _mm_load_pd (P(1,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); // 4th eighth tmp0 = _mm_load_pd (P(1,2,0)); tmp1 = _mm_load_pd (P(1,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2,2)); tmp1 = _mm_load_pd (P(1,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,0)); tmp1 = _mm_load_pd (P(1,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,2)); tmp1 = _mm_load_pd (P(1,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); // 5th eighth tmp0 = _mm_load_pd (P(2,0,0)); tmp1 = _mm_load_pd (P(2,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,0,2)); tmp1 = _mm_load_pd (P(2,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,0)); tmp1 = _mm_load_pd (P(2,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,2)); tmp1 = _mm_load_pd (P(2,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); // 6th eighth tmp0 = _mm_load_pd (P(2,2,0)); tmp1 = _mm_load_pd (P(2,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2,2)); tmp1 = _mm_load_pd (P(2,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,0)); tmp1 = _mm_load_pd (P(2,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,2)); tmp1 = _mm_load_pd (P(2,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); // 7th eighth tmp0 = _mm_load_pd (P(3,0,0)); tmp1 = _mm_load_pd (P(3,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0,2)); tmp1 = _mm_load_pd (P(3,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,0)); tmp1 = _mm_load_pd (P(3,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,2)); tmp1 = _mm_load_pd (P(3,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); // 8th eighth tmp0 = _mm_load_pd (P(3,2,0)); tmp1 = _mm_load_pd (P(3,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2,2)); tmp1 = _mm_load_pd (P(3,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,0)); tmp1 = _mm_load_pd (P(3,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,2)); tmp1 = _mm_load_pd (P(3,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_z_vgl (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cPr[8], dcPr[8], d2cPr[8], cPi[8], dcPi[8], d2cPi[8], bcP01r, dbcP01r, bdcP01r, d2bcP01r, bd2cP01r, bcP23r, dbcP23r, bdcP23r, d2bcP23r, bd2cP23r, bcP01i, dbcP01i, bdcP01i, d2bcP01i, bd2cP01i, bcP23i, dbcP23i, bdcP23i, d2bcP23i, bd2cP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth tmp0 = _mm_load_pd (P(0,0,0)); tmp1 = _mm_load_pd (P(0,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,0,2)); tmp1 = _mm_load_pd (P(0,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,0)); tmp1 = _mm_load_pd (P(0,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,2)); tmp1 = _mm_load_pd (P(0,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (P(0,2,0)); tmp1 = _mm_load_pd (P(0,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2,2)); tmp1 = _mm_load_pd (P(0,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,0)); tmp1 = _mm_load_pd (P(0,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,2)); tmp1 = _mm_load_pd (P(0,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[1]); // 3rd eighth tmp0 = _mm_load_pd (P(1,0,0)); tmp1 = _mm_load_pd (P(1,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0,2)); tmp1 = _mm_load_pd (P(1,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,0)); tmp1 = _mm_load_pd (P(1,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,2)); tmp1 = _mm_load_pd (P(1,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[2]); // 4th eighth tmp0 = _mm_load_pd (P(1,2,0)); tmp1 = _mm_load_pd (P(1,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2,2)); tmp1 = _mm_load_pd (P(1,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,0)); tmp1 = _mm_load_pd (P(1,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,2)); tmp1 = _mm_load_pd (P(1,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[3]); // 5th eighth tmp0 = _mm_load_pd (P(2,0,0)); tmp1 = _mm_load_pd (P(2,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,0,2)); tmp1 = _mm_load_pd (P(2,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,0)); tmp1 = _mm_load_pd (P(2,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,2)); tmp1 = _mm_load_pd (P(2,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[4]); // 6th eighth tmp0 = _mm_load_pd (P(2,2,0)); tmp1 = _mm_load_pd (P(2,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2,2)); tmp1 = _mm_load_pd (P(2,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,0)); tmp1 = _mm_load_pd (P(2,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,2)); tmp1 = _mm_load_pd (P(2,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[5]); // 7th eighth tmp0 = _mm_load_pd (P(3,0,0)); tmp1 = _mm_load_pd (P(3,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0,2)); tmp1 = _mm_load_pd (P(3,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,0)); tmp1 = _mm_load_pd (P(3,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,2)); tmp1 = _mm_load_pd (P(3,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[6]); // 8th eighth tmp0 = _mm_load_pd (P(3,2,0)); tmp1 = _mm_load_pd (P(3,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2,2)); tmp1 = _mm_load_pd (P(3,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,0)); tmp1 = _mm_load_pd (P(3,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,2)); tmp1 = _mm_load_pd (P(3,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[0], cPr[1], cPr[2], cPr[3], d2bcP01r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[0], cPi[1], cPi[2], cPi[3], d2bcP01i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[4], cPr[5], cPr[6], cPr[7], d2bcP23r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[4], cPi[5], cPi[6], cPi[7], d2bcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], bd2cP01r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], bd2cP01i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[4], d2cPr[5], d2cPr[6], d2cPr[7], bd2cP23r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[4], d2cPi[5], d2cPi[6], d2cPi[7], bd2cP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); double sec_derivs[6]; // Compute laplacian // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01r, bcP23r, sec_derivs[0]); _MM_DOT4_PD (d2a01, d2a23, bcP01i, bcP23i, sec_derivs[1]); // d2y _MM_DOT4_PD (a01, a23, d2bcP01r, d2bcP23r, sec_derivs[2]); _MM_DOT4_PD (a01, a23, d2bcP01i, d2bcP23i, sec_derivs[3]); // d2z _MM_DOT4_PD (a01, a23, bd2cP01r, bd2cP23r, sec_derivs[4]); _MM_DOT4_PD (a01, a23, bd2cP01i, bd2cP23i, sec_derivs[5]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; sec_derivs[0] *= dxInv*dxInv; sec_derivs[1] *= dxInv*dxInv; sec_derivs[2] *= dyInv*dyInv; sec_derivs[3] *= dyInv*dyInv; sec_derivs[4] *= dzInv*dzInv; sec_derivs[5] *= dzInv*dzInv; #ifdef __cplusplus *lapl = std::complex (sec_derivs[0] + sec_derivs[2] + sec_derivs[4], sec_derivs[1] + sec_derivs[3] + sec_derivs[5]); #else *lapl = (sec_derivs[0] + sec_derivs[2] + sec_derivs[4]) + 1.0I*(sec_derivs[1] + sec_derivs[3] + sec_derivs[5]); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_z_vgh (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cPr[8], dcPr[8], d2cPr[8], cPi[8], dcPi[8], d2cPi[8], bcP01r, dbcP01r, bdcP01r, d2bcP01r, dbdcP01r, bd2cP01r, bcP23r, dbcP23r, bdcP23r, d2bcP23r, dbdcP23r, bd2cP23r, bcP01i, dbcP01i, bdcP01i, d2bcP01i, dbdcP01i, bd2cP01i, bcP23i, dbcP23i, bdcP23i, d2bcP23i, dbdcP23i, bd2cP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth tmp0 = _mm_load_pd (P(0,0,0)); tmp1 = _mm_load_pd (P(0,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,0,2)); tmp1 = _mm_load_pd (P(0,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,0)); tmp1 = _mm_load_pd (P(0,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,1,2)); tmp1 = _mm_load_pd (P(0,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (P(0,2,0)); tmp1 = _mm_load_pd (P(0,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2,2)); tmp1 = _mm_load_pd (P(0,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,0)); tmp1 = _mm_load_pd (P(0,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,3,2)); tmp1 = _mm_load_pd (P(0,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[1]); // 3rd eighth tmp0 = _mm_load_pd (P(1,0,0)); tmp1 = _mm_load_pd (P(1,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0,2)); tmp1 = _mm_load_pd (P(1,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,0)); tmp1 = _mm_load_pd (P(1,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,1,2)); tmp1 = _mm_load_pd (P(1,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[2]); // 4th eighth tmp0 = _mm_load_pd (P(1,2,0)); tmp1 = _mm_load_pd (P(1,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2,2)); tmp1 = _mm_load_pd (P(1,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,0)); tmp1 = _mm_load_pd (P(1,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,3,2)); tmp1 = _mm_load_pd (P(1,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[3]); // 5th eighth tmp0 = _mm_load_pd (P(2,0,0)); tmp1 = _mm_load_pd (P(2,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,0,2)); tmp1 = _mm_load_pd (P(2,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,0)); tmp1 = _mm_load_pd (P(2,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,1,2)); tmp1 = _mm_load_pd (P(2,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[4]); // 6th eighth tmp0 = _mm_load_pd (P(2,2,0)); tmp1 = _mm_load_pd (P(2,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2,2)); tmp1 = _mm_load_pd (P(2,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,0)); tmp1 = _mm_load_pd (P(2,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,3,2)); tmp1 = _mm_load_pd (P(2,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[5]); // 7th eighth tmp0 = _mm_load_pd (P(3,0,0)); tmp1 = _mm_load_pd (P(3,0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0,2)); tmp1 = _mm_load_pd (P(3,0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,0)); tmp1 = _mm_load_pd (P(3,1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,1,2)); tmp1 = _mm_load_pd (P(3,1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[6]); // 8th eighth tmp0 = _mm_load_pd (P(3,2,0)); tmp1 = _mm_load_pd (P(3,2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2,2)); tmp1 = _mm_load_pd (P(3,2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,0)); tmp1 = _mm_load_pd (P(3,3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,3,2)); tmp1 = _mm_load_pd (P(3,3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[0], cPr[1], cPr[2], cPr[3], d2bcP01r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[0], cPi[1], cPi[2], cPi[3], d2bcP01i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[4], cPr[5], cPr[6], cPr[7], d2bcP23r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[4], cPi[5], cPi[6], cPi[7], d2bcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], bd2cP01r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], bd2cP01i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[4], d2cPr[5], d2cPr[6], d2cPr[7], bd2cP23r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[4], d2cPi[5], d2cPi[6], d2cPi[7], bd2cP23i); _MM_DDOT4_PD (db01, db23, db01, db23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], dbdcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], dbdcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], dbdcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], dbdcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); // Compute hessian // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01r, bcP23r, ((double*)hess)[0]); _MM_DOT4_PD (d2a01, d2a23, bcP01i, bcP23i, ((double*)hess)[1]); // d2y _MM_DOT4_PD (a01, a23, d2bcP01r, d2bcP23r, ((double*)hess)[8]); _MM_DOT4_PD (a01, a23, d2bcP01i, d2bcP23i, ((double*)hess)[9]); // d2z _MM_DOT4_PD (a01, a23, bd2cP01r, bd2cP23r, ((double*)hess)[16]); _MM_DOT4_PD (a01, a23, bd2cP01i, bd2cP23i, ((double*)hess)[17]); // dx dy _MM_DOT4_PD (da01, da23, dbcP01r, dbcP23r, ((double*)hess)[2]); _MM_DOT4_PD (da01, da23, dbcP01i, dbcP23i, ((double*)hess)[3]); _MM_DOT4_PD (da01, da23, dbcP01r, dbcP23r, ((double*)hess)[6]); _MM_DOT4_PD (da01, da23, dbcP01i, dbcP23i, ((double*)hess)[7]); // dx dz _MM_DOT4_PD (da01, da23, bdcP01r, bdcP23r, ((double*)hess)[4]); _MM_DOT4_PD (da01, da23, bdcP01i, bdcP23i, ((double*)hess)[5]); _MM_DOT4_PD (da01, da23, bdcP01r, bdcP23r, ((double*)hess)[12]); _MM_DOT4_PD (da01, da23, bdcP01i, bdcP23i, ((double*)hess)[13]); // dy dz _MM_DOT4_PD (a01, a23, dbdcP01r, dbdcP23r, ((double*)hess)[10]); _MM_DOT4_PD (a01, a23, dbdcP01i, dbdcP23i, ((double*)hess)[11]); _MM_DOT4_PD (a01, a23, dbdcP01r, dbdcP23r, ((double*)hess)[14]); _MM_DOT4_PD (a01, a23, dbdcP01i, dbdcP23i, ((double*)hess)[15]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; hess[0] *= dxInv*dxInv; hess[4] *= dyInv*dyInv; hess[8] *= dzInv*dzInv; hess[1] *= dxInv*dyInv; hess[3] *= dxInv*dyInv; hess[2] *= dxInv*dzInv; hess[6] *= dxInv*dzInv; hess[5] *= dyInv*dzInv; hess[7] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix // hess[3] = hess[1]; // hess[6] = hess[2]; // hess[7] = hess[5]; #undef P } #endif einspline-0.9.2/src/bspline_base.h0000664000113000011300000000642211262712545014012 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_BASE_H #define BSPLINE_BASE_H #include "config.h" #ifdef __cplusplus #include typedef std::complex complex_float; typedef std::complex complex_double; #else #include typedef complex float complex_float; typedef complex double complex_double; #endif // Conventions: // Postfixes: // s: single precision real // d: double precision real // c: single precision complex // z: double precision complex //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Basic type declarations //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// typedef enum { PERIODIC, DERIV1, DERIV2, FLAT, NATURAL, ANTIPERIODIC } bc_code; typedef enum { U1D , U2D , U3D , NU1D , NU2D , NU3D , MULTI_U1D , MULTI_U2D , MULTI_U3D, MULTI_NU1D, MULTI_NU2D, MULTI_NU3D } spline_code; typedef enum { SINGLE_REAL, DOUBLE_REAL, SINGLE_COMPLEX, DOUBLE_COMPLEX } type_code; typedef struct { double x,y,z; } double3; typedef struct { double x,y,z,w; } double4; typedef struct { bc_code lCode, rCode; float lVal, rVal; } BCtype_s; typedef struct { bc_code lCode, rCode; double lVal, rVal; } BCtype_d; typedef struct { bc_code lCode, rCode; float lVal_r, lVal_i, rVal_r, rVal_i; } BCtype_c; typedef struct { bc_code lCode, rCode; double lVal_r, lVal_i, rVal_r, rVal_i; } BCtype_z; typedef struct { double start, end; int num; // private double delta, delta_inv; } Ugrid; typedef struct { spline_code sp_code; type_code t_code; void *restrict coefs; } Bspline; #ifdef __cplusplus extern "C" #endif void destroy_Bspline (void *spline); #endif einspline-0.9.2/src/bspline_structs_cuda.h0000664000113000011300000000211411262713035015570 00000000000000#ifndef BSPLINE_STRUCTS_CUDA_H #define BSPLINE_STRUCTS_CUDA_H #define SPLINE_BLOCK_SIZE 64 //////// // 2D // //////// // typedef struct // { // double x,y,z; // } double3; // typedef struct // { // double x,y,z,w; // } double4; typedef struct { float *coefs; uint2 stride; float2 gridInv; } UBspline_2d_s_cuda; typedef struct { float *coefs_real, *coefs_imag; uint2 stride; float2 gridInv; } UBspline_2d_c_cuda; typedef struct { double *coefs; uint2 stride; double gridInv[2]; } UBspline_2d_d_cuda; typedef struct { complex_double *coefs; uint2 stride; double gridInv[2]; } UBspline_2d_z_cuda; //////// // 3D // //////// typedef struct { float *coefs; uint3 stride; float3 gridInv; uint3 dim; } UBspline_3d_s_cuda; typedef struct { complex_float *coefs; uint3 stride; float3 gridInv; uint3 dim; } UBspline_3d_c_cuda; typedef struct { double *coefs; uint3 stride; double3 gridInv; uint3 dim; } UBspline_3d_d_cuda; typedef struct { complex_double *coefs; uint3 stride; double3 gridInv; uint3 dim; } UBspline_3d_z_cuda; #endif einspline-0.9.2/src/multi_bspline_eval_std_s.c0000664000113000011300000000276611015561465016436 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_s_impl.h" einspline-0.9.2/src/nubspline_eval_sse_z.h0000664000113000011300000024563211012400563015570 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_SSE_Z_H #define NUBSPLINE_EVAL_SSE_Z_H #include #include #include "nubspline_structs.h" #ifdef HAVE_SSE2 #include #include #endif #ifdef HAVE_SSE3 #include #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_sd (&(p), t1); \ } while (0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_z (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val) { double bfuncs[4]; int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_z_vg (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad) { double bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_z_vgl (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { double bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_z_vgh (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { eval_NUBspline_1d_z_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_z (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val) { __m128d a01, b01, a23, b23, bP01r, bP23r, bP01i, bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_funcs_sse_d (spline->x_basis, x, &a01, &a23); int iy = get_NUBasis_funcs_sse_d (spline->y_basis, y, &b01, &b23); int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*)P(0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); #undef P } /* Value and gradient */ inline void eval_NUBspline_2d_z_vg (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad) { __m128d a01, b01, da01, db01, a23, b23, da23, db23, bP01r, dbP01r, bP23r, dbP23r, bP01i, dbP01i, bP23i, dbP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_dfuncs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23); int iy = get_NUBasis_dfuncs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23); int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*)P(0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_z_vgl (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { __m128d a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01r, dbP01r, d2bP01r, bP23r, dbP23r, d2bP23r, bP01i, dbP01i, d2bP01i, bP23i, dbP23i, d2bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*)P(0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); // Compute Laplacian double d2x_r, d2x_i, d2y_r, d2y_i; _MM_DOT4_PD (d2a01, d2a23, bP01r, bP23r, d2x_r); _MM_DOT4_PD (d2a01, d2a23, bP01i, bP23i, d2x_i); _MM_DOT4_PD (a01, a23, d2bP01r, d2bP23r, d2y_r); _MM_DOT4_PD (a01, a23, d2bP01i, d2bP23i, d2y_i); #ifdef __cplusplus *lapl = std::complex(d2x_r + d2y_r, d2x_i + d2y_i); #else *lapl = (d2x_r + d2y_r) + 1.0i*(d2x_i + d2y_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_z_vgh (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { __m128d a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01r, dbP01r, d2bP01r, bP23r, dbP23r, d2bP23r, bP01i, dbP01i, d2bP01i, bP23i, dbP23i, d2bP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*)P(0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p+= xs; tmp0 = _mm_load_pd (P(0,0)); tmp1 = _mm_load_pd (P(0,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(0,2)); tmp1 = _mm_load_pd (P(0,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,0)); tmp1 = _mm_load_pd (P(1,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(1,2)); tmp1 = _mm_load_pd (P(1,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP01i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP01r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP01i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP01r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP01i); tmp0 = _mm_load_pd (P(2,0)); tmp1 = _mm_load_pd (P(2,1)); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(2,2)); tmp1 = _mm_load_pd (P(2,3)); r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,0)); tmp1 = _mm_load_pd (P(3,1)); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (P(3,2)); tmp1 = _mm_load_pd (P(3,3)); r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, b01, b23, b01, b23, bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, b01, b23, b01, b23, bP23i); _MM_DDOT4_PD(r0, r1, r2, r3, db01, db23, db01, db23, dbP23r); _MM_DDOT4_PD(i0, i1, i2, i3, db01, db23, db01, db23, dbP23i); _MM_DDOT4_PD(r0, r1, r2, r3, d2b01, d2b23, d2b01, d2b23, d2bP23r); _MM_DDOT4_PD(i0, i1, i2, i3, d2b01, d2b23, d2b01, d2b23, d2bP23i); // Compute value _MM_DOT4_PD (a01, a23, bP01r, bP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bP01i, bP23i, *((double*)val+1)); double *dgrad = (double*) grad; double *dhess = (double*) hess; // Compute gradient _MM_DOT4_PD (da01, da23, bP01r, bP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bP01i, bP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbP01r, dbP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbP01i, dbP23i, dgrad[3]); // Compute Hessian _MM_DOT4_PD (d2a01, d2a23, bP01r, bP23r, dhess[0]); _MM_DOT4_PD (d2a01, d2a23, bP01i, bP23i, dhess[1]); _MM_DOT4_PD (a01, a23, d2bP01r, d2bP23r, dhess[6]); _MM_DOT4_PD (a01, a23, d2bP01i, d2bP23i, dhess[7]); _MM_DOT4_PD (da01, da23, dbP01r, dbP23r, dhess[2]); _MM_DOT4_PD (da01, da23, dbP01i, dbP23i, dhess[3]); hess[2] = hess[1]; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_z (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val) { __m128d a01, b01, c01, a23, b23, c23, cPr[8], cPi[8], bcP01r, bcP23r, bcP01i, bcP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_funcs_sse_d (spline->x_basis, x, &a01, &a23); int iy = get_NUBasis_funcs_sse_d (spline->y_basis, y, &b01, &b23); int iz = get_NUBasis_funcs_sse_d (spline->z_basis, z, &c01, &c23); int xs = spline->x_stride; int ys = spline->y_stride; int xs2 = 2*xs; int ys2 = 2*ys; int delta = xs2-3*ys2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*) P(0,0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth p = (double*) P(0,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); // 3rd eighth p = (double*) P(1,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); // 4th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); // 5th eighth p = (double*) P(2,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); // 6th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); // 7th eighth p = (double*) P(3,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); // 8th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_z_vg (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad) { __m128d a01, b01, c01, da01, db01, dc01, a23, b23, c23, da23, db23, dc23, cPr[8], dcPr[8], cPi[8], dcPi[8], bcP01r, dbcP01r, bdcP01r, bcP23r, dbcP23r, bdcP23r, bcP01i, dbcP01i, bdcP01i, bcP23i, dbcP23i, bdcP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_dfuncs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23); int iy = get_NUBasis_dfuncs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23); int iz = get_NUBasis_dfuncs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23); int xs = spline->x_stride; int ys = spline->y_stride; int xs2 = 2*xs; int ys2 = 2*ys; int delta = xs2-3*ys2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*) P(0,0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth p = (double*) P(0,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); // 2nd eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); // 3rd eighth p = (double*) P(1,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); // 4th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); // 5th eighth p = (double*) P(2,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); // 6th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); // 7th eighth p = (double*) P(3,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); // 8th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_z_vgl (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { __m128d a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cPr[8], dcPr[8], d2cPr[8], cPi[8], dcPi[8], d2cPi[8], bcP01r, dbcP01r, bdcP01r, d2bcP01r, bd2cP01r, bcP23r, dbcP23r, bdcP23r, d2bcP23r, bd2cP23r, bcP01i, dbcP01i, bdcP01i, d2bcP01i, bd2cP01i, bcP23i, dbcP23i, bdcP23i, d2bcP23i, bd2cP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int iz = get_NUBasis_d2funcs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23, &d2c01, &d2c23); int xs = spline->x_stride; int ys = spline->y_stride; int xs2 = 2*xs; int ys2 = 2*ys; int delta = xs2-3*ys2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*) P(0,0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth p = (double*) P(0,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[1]); // 3rd eighth p = (double*) P(1,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[2]); // 4th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[3]); // 5th eighth p = (double*) P(2,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[4]); // 6th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[5]); // 7th eighth p = (double*) P(3,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[6]); // 8th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[0], cPr[1], cPr[2], cPr[3], d2bcP01r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[0], cPi[1], cPi[2], cPi[3], d2bcP01i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[4], cPr[5], cPr[6], cPr[7], d2bcP23r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[4], cPi[5], cPi[6], cPi[7], d2bcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], bd2cP01r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], bd2cP01i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[4], d2cPr[5], d2cPr[6], d2cPr[7], bd2cP23r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[4], d2cPi[5], d2cPi[6], d2cPi[7], bd2cP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); // Compute Laplacian double d2x_r, d2x_i, d2y_r, d2y_i, d2z_r, d2z_i; // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01r, bcP23r, d2x_r); _MM_DOT4_PD (d2a01, d2a23, bcP01i, bcP23i, d2x_i); // d2y _MM_DOT4_PD (a01, a23, d2bcP01r, d2bcP23r, d2y_r); _MM_DOT4_PD (a01, a23, d2bcP01i, d2bcP23i, d2y_i); // d2z _MM_DOT4_PD (a01, a23, bd2cP01r, bd2cP23r, d2z_r); _MM_DOT4_PD (a01, a23, bd2cP01i, bd2cP23i, d2z_i); #ifdef __cplusplus *lapl = std::complex(d2x_r + d2y_r + d2z_r, d2x_i + d2y_i + d2z_i); #else *lapl = (d2x_r + d2y_r + d2z_r) + 1.0i*(d2x_i + d2y_i + d2z_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_z_vgh (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { __m128d a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cPr[8], dcPr[8], d2cPr[8], cPi[8], dcPi[8], d2cPi[8], bcP01r, dbcP01r, bdcP01r, d2bcP01r, dbdcP01r, bd2cP01r, bcP23r, dbcP23r, bdcP23r, d2bcP23r, dbdcP23r, bd2cP23r, bcP01i, dbcP01i, bdcP01i, d2bcP01i, dbdcP01i, bd2cP01i, bcP23i, dbcP23i, bdcP23i, d2bcP23i, dbdcP23i, bd2cP23i, tmp0, tmp1, tmp2, tmp3, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int iz = get_NUBasis_d2funcs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23, &d2c01, &d2c23); int xs = spline->x_stride; int ys = spline->y_stride; int xs2 = 2*xs; int ys2 = 2*ys; int delta = xs2-3*ys2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (const double*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. double *restrict p = (double*) P(0,0,0); _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += delta; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); p += ys2; _mm_prefetch ((const char*)(p+0), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); _mm_prefetch ((const char*)(p+6), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // Complex values are read in, then shuffled such that 4 registers // hold the read parts and 4 register hold the imaginary parts. // 1st eighth p = (double*) P(0,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[0]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[0]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[0]); // 2nd eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[1]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[1]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[1]); // 3rd eighth p = (double*) P(1,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[2]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[2]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[2]); // 4th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[3]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[3]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[3]); // 5th eighth p = (double*) P(2,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[4]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[4]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[4]); // 6th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[5]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[5]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[5]); // 7th eighth p = (double*) P(3,0,0); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[6]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[6]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[6]); // 8th eighth tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i0 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += ys2; r1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i1 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+0); tmp1 = _mm_load_pd (p+2); r2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i2 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); tmp0 = _mm_load_pd (p+4); tmp1 = _mm_load_pd (p+6); p += delta; r3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(0, 0)); i3 = _mm_shuffle_pd (tmp0, tmp1, _MM_SHUFFLE2(1, 1)); _MM_DDOT4_PD(r0, r1, r2, r3, c01, c23, c01, c23, cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, c01, c23, c01, c23, cPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, dc01, dc23, dc01, dc23, dcPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, dc01, dc23, dc01, dc23, dcPi[7]); _MM_DDOT4_PD(r0, r1, r2, r3, d2c01,d2c23,d2c01,d2c23,d2cPr[7]); _MM_DDOT4_PD(i0, i1, i2, i3, d2c01,d2c23,d2c01,d2c23,d2cPi[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cPr[0], cPr[1], cPr[2], cPr[3], bcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[0], cPi[1], cPi[2], cPi[3], bcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, cPr[4], cPr[5], cPr[6], cPr[7], bcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, cPi[4], cPi[5], cPi[6], cPi[7], bcP23i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[0], cPr[1], cPr[2], cPr[3], dbcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[0], cPi[1], cPi[2], cPi[3], dbcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, cPr[4], cPr[5], cPr[6], cPr[7], dbcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, cPi[4], cPi[5], cPi[6], cPi[7], dbcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], bdcP01r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], bdcP01i); _MM_DDOT4_PD (b01, b23, b01, b23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], bdcP23r); _MM_DDOT4_PD (b01, b23, b01, b23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], bdcP23i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[0], cPr[1], cPr[2], cPr[3], d2bcP01r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[0], cPi[1], cPi[2], cPi[3], d2bcP01i); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPr[4], cPr[5], cPr[6], cPr[7], d2bcP23r); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cPi[4], cPi[5], cPi[6], cPi[7], d2bcP23i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], bd2cP01r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], bd2cP01i); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPr[4], d2cPr[5], d2cPr[6], d2cPr[7], bd2cP23r); _MM_DDOT4_PD (b01, b23, b01, b23, d2cPi[4], d2cPi[5], d2cPi[6], d2cPi[7], bd2cP23i); _MM_DDOT4_PD (db01, db23, db01, db23, dcPr[0], dcPr[1], dcPr[2], dcPr[3], dbdcP01r); _MM_DDOT4_PD (db01, db23, db01, db23, dcPi[0], dcPi[1], dcPi[2], dcPi[3], dbdcP01i); _MM_DDOT4_PD (db01, db23, db01, db23, dcPr[4], dcPr[5], dcPr[6], dcPr[7], dbdcP23r); _MM_DDOT4_PD (db01, db23, db01, db23, dcPi[4], dcPi[5], dcPi[6], dcPi[7], dbdcP23i); // Compute value _MM_DOT4_PD (a01, a23, bcP01r, bcP23r, *((double*)val+0)); _MM_DOT4_PD (a01, a23, bcP01i, bcP23i, *((double*)val+1)); double *dgrad = (double*) grad; // Compute gradient _MM_DOT4_PD (da01, da23, bcP01r, bcP23r, dgrad[0]); _MM_DOT4_PD (da01, da23, bcP01i, bcP23i, dgrad[1]); _MM_DOT4_PD ( a01, a23, dbcP01r, dbcP23r, dgrad[2]); _MM_DOT4_PD ( a01, a23, dbcP01i, dbcP23i, dgrad[3]); _MM_DOT4_PD ( a01, a23, bdcP01r, bdcP23r, dgrad[4]); _MM_DOT4_PD ( a01, a23, bdcP01i, bdcP23i, dgrad[5]); double *dhess = (double*) hess; // Compute hessian // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01r, bcP23r, dhess[0]); _MM_DOT4_PD (d2a01, d2a23, bcP01i, bcP23i, dhess[1]); // d2y _MM_DOT4_PD (a01, a23, d2bcP01r, d2bcP23r, dhess[8]); _MM_DOT4_PD (a01, a23, d2bcP01i, d2bcP23i, dhess[9]); // d2z _MM_DOT4_PD (a01, a23, bd2cP01r, bd2cP23r, dhess[16]); _MM_DOT4_PD (a01, a23, bd2cP01i, bd2cP23i, dhess[17]); // dx dy _MM_DOT4_PD (da01, da23, dbcP01r, dbcP23r, dhess[2]); _MM_DOT4_PD (da01, da23, dbcP01i, dbcP23i, dhess[3]); // dx dz _MM_DOT4_PD (da01, da23, bdcP01r, bdcP23r, dhess[4]); _MM_DOT4_PD (da01, da23, bdcP01i, bdcP23i, dhess[5]); // dy dz _MM_DOT4_PD (a01, a23, dbdcP01r, dbdcP23r, dhess[10]); _MM_DOT4_PD (a01, a23, dbdcP01i, dbdcP23i, dhess[11]); // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_DDOT4_PD #undef _MM_DOT4_PD #endif einspline-0.9.2/src/nubasis.h0000664000113000011300000001134411012400563013012 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBASIS_H #define NUBASIS_H #include "nugrid.h" #include "config.h" #include typedef struct { NUgrid* restrict grid; // xVals is just the grid points, augmented by two extra points on // either side. These are necessary to generate enough basis // functions. double* restrict xVals; // dxInv[3*i+j] = 1.0/(grid(i+j-1)-grid(i-2)) double* restrict dxInv; bool periodic; } NUBasis; #ifdef __cplusplus extern "C" { #endif ///////////////// // Constructor // ///////////////// NUBasis* create_NUBasis (NUgrid *grid, bool periodic); //////////////// // Destructor // //////////////// void destroy_NUBasis (NUBasis *basis); //////////////////////////////////////////////// // Single-precision basis function evaluation // //////////////////////////////////////////////// int get_NUBasis_funcs_s (NUBasis* restrict basis, double x, float bfuncs[4]); void get_NUBasis_funcs_si (NUBasis* restrict basis, int i, float bfuncs[4]); int get_NUBasis_dfuncs_s (NUBasis* restrict basis, double x, float bfuncs[4], float dbfuncs[4]); void get_NUBasis_dfuncs_si (NUBasis* restrict basis, int i, float bfuncs[4], float dbfuncs[4]); int get_NUBasis_d2funcs_s (NUBasis* restrict basis, double x, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]); void get_NUBasis_d2funcs_si (NUBasis* restrict basis, int i, float bfuncs[4], float dbfuncs[4], float d2bfuncs[4]); //////////////////////////////////////////////// // Double-precision basis function evaluation // //////////////////////////////////////////////// int get_NUBasis_funcs_d (NUBasis* restrict basis, double x, double bfuncs[4]); void get_NUBasis_funcs_di (NUBasis* restrict basis, int i, double bfuncs[4]); int get_NUBasis_dfuncs_d (NUBasis* restrict basis, double x, double bfuncs[4], double dbfuncs[4]); void get_NUBasis_dfuncs_di (NUBasis* restrict basis, int i, double bfuncs[4], double dbfuncs[4]); int get_NUBasis_d2funcs_d (NUBasis* restrict basis, double x, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]); void get_NUBasis_d2funcs_di (NUBasis* restrict basis, int i, double bfuncs[4], double dbfuncs[4], double d2bfuncs[4]); #ifdef __cplusplus } #endif #ifdef HAVE_SSE2 #include #include #ifdef __cplusplus extern "C" { #endif int get_NUBasis_funcs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs); int get_NUBasis_dfuncs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs, __m128 *restrict dfuncs); int get_NUBasis_d2funcs_sse_s (NUBasis* restrict basis, double x, __m128 *restrict funcs, __m128 *restrict dfuncs, __m128 *restrict d2funcs); int get_NUBasis_funcs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23); int get_NUBasis_dfuncs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23, __m128d *restrict df01, __m128d *restrict df23); int get_NUBasis_d2funcs_sse_d (NUBasis* restrict basis, double x, __m128d *restrict f01, __m128d *restrict f23, __m128d *restrict df01, __m128d *restrict df23, __m128d *restrict d2f01, __m128d *restrict d2f23); #ifdef __cplusplus } #endif #endif // #ifdef HAVE_SSE2 #endif // #ifdef NUBASIS_H einspline-0.9.2/src/bspline_eval_std_s.h0000664000113000011300000013351711012400563015214 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_STD_S_H #define BSPLINE_EVAL_STD_S_H #include #include extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_s (UBspline_1d_s * restrict spline, double x, float* restrict val) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_s_vg (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_s_vgl (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict lapl) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Af[ 2]*tp[2] + d2Af[ 3]*tp[3])+ coefs[i+1]*(d2Af[ 6]*tp[2] + d2Af[ 7]*tp[3])+ coefs[i+2]*(d2Af[10]*tp[2] + d2Af[11]*tp[3])+ coefs[i+3]*(d2Af[14]*tp[2] + d2Af[15]*tp[3])); } inline void eval_UBspline_1d_s_vgh (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict hess) { eval_UBspline_1d_s_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_s (UBspline_2d_s * restrict spline, double x, double y, float* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_UBspline_2d_s_vg (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_s_vgl (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])) + spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ d2a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ d2a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ d2a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_s_vgh (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = ( dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = ( dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = ( dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = ( dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = ( a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[0] = spline->x_grid.delta_inv * ( da[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ da[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ da[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ da[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[1] = spline->y_grid.delta_inv * ( a[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ a[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ a[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ a[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ d2a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ d2a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ d2a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * ( da[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ da[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ da[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ da[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[3] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * ( a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_s (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_s_vg (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], cP[16], bcP[4], dbcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_s_vgl (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + + spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_s_vgh (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; // if ((ix >= spline->x_grid.num)) x = spline->x_grid.num; // if ((ix < 0)) x = 0; // if ((iy >= spline->y_grid.num)) y = spline->y_grid.num; // if ((iy < 0)) y = 0; // if ((iz >= spline->z_grid.num)) z = spline->z_grid.num; // if ((iz < 0)) z = 0; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; int offmax = (ix+3)*xs + (iy+3)*ys + iz+3; // if (offmax > spline->coef_size) { // fprintf (stderr, "Outside bounds in spline evalutation.\n" // "offmax = %d csize = %d\n", offmax, spline->csize); // fprintf (stderr, "ix=%d iy=%d iz=%d\n", ix,iy,iz); // } #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = spline->x_grid.delta_inv * (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]); hess[3] = hess[1]; // dx dz; hess[2] = spline->x_grid.delta_inv * spline->z_grid.delta_inv * (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]); hess[6] = hess[2]; // d2y hess[4] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = spline->y_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/test_bspline_d.c0000664000113000011300000001446511012400563014346 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "bspline.h" #include #include #include #include #ifndef M_PI #define M_PI 3.1415926535897932384626433 #endif double drand48(); void sincos (double phi, double *s, double *c); typedef struct { double kcut; double *Gvecs; double *coefs; int numG; } periodic_func_d; void int_periodic_func (periodic_func_d *func, double kcut) { func->kcut = kcut; func->numG = 0; int imax = (int) ceil (kcut/(2.0*M_PI)); for (int ix=-imax; ix<=imax; ix++) { double kx = 2.0*M_PI * ix; for (int iy=-imax; iy<=imax; iy++) { double ky = 2.0*M_PI * iy; for (int iz=-imax; iz<=imax; iz++) { double kz = 2.0*M_PI * iz; if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) func->numG++; } } } func->Gvecs = (double*) malloc (3*sizeof(double)*func->numG); func->coefs = (double*) malloc (2*sizeof(double) *func->numG); int iG = 0; for (int ix=-imax; ix<=imax; ix++) { double kx = 2.0*M_PI * ix; for (int iy=-imax; iy<=imax; iy++) { double ky = 2.0*M_PI * iy; for (int iz=-imax; iz<=imax; iz++) { double kz = 2.0*M_PI * iz; if ((kx*kx + ky*ky + kz*kz) < (kcut*kcut)) { func->Gvecs[3*iG+0] = kx; func->Gvecs[3*iG+1] = ky; func->Gvecs[3*iG+2] = kz; func->coefs[2*iG+0] = 2.0*(drand48()-0.5); func->coefs[2*iG+1] = 2.0*(drand48()-0.5); iG++; } } } } } void eval_periodic_func_d (periodic_func_d* restrict func, double x, double y, double z, double *restrict val, double *restrict grad, double *restrict hess) { *val = 0.0; for (int i=0; i<3; i++) grad[i] = 0.0; for (int i=0; i<9; i++) hess[i] = 0.0; for (int iG=0; iGnumG; iG++) { double kx = func->Gvecs[3*iG+0]; double ky = func->Gvecs[3*iG+1]; double kz = func->Gvecs[3*iG+2]; double phase = x*kx + y*ky + z*kz; double re, im; sincos(phase, &im, &re); double c_re = func->coefs[2*iG+0]; double c_im = func->coefs[2*iG+1]; *val += re*c_re - im*c_im; grad[0] += -kx*(re*c_im + im*c_re); grad[1] += -ky*(re*c_im + im*c_re); grad[2] += -kz*(re*c_im + im*c_re); hess[0] += -kx*kx*(re*c_re - im*c_im); hess[1] += -kx*ky*(re*c_re - im*c_im); hess[2] += -kx*kz*(re*c_re - im*c_im); hess[3] += -ky*kx*(re*c_re - im*c_im); hess[4] += -ky*ky*(re*c_re - im*c_im); hess[5] += -ky*kz*(re*c_re - im*c_im); hess[6] += -kz*kx*(re*c_re - im*c_im); hess[7] += -kz*ky*(re*c_re - im*c_im); hess[8] += -kz*kz*(re*c_re - im*c_im); } } void test_bspline_3d_d() { double kcut = 2.0*M_PI * 5.0; int Nspline = 100; Ugrid x_grid, y_grid, z_grid; x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nspline; y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Nspline; z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nspline; double dx = 1.0/(double)(Nspline); double dy = 1.0/(double)(Nspline); double dz = 1.0/(double)(Nspline); BCtype_d xBC, yBC, zBC; xBC.lCode = xBC.rCode = PERIODIC; yBC.lCode = yBC.rCode = PERIODIC; zBC.lCode = zBC.rCode = PERIODIC; double *data = malloc (sizeof(double)*Nspline*Nspline*Nspline); periodic_func_d func; int_periodic_func (&func, kcut); for (int ix=0; ix < x_grid.num; ix++) { double x = (double) ix * dx; for (int iy=0; iy < y_grid.num; iy++) { double y = (double) iy * dy; for (int iz=0; iz < z_grid.num; iz++) { double z = (double) iz * dz; double val, grad[3], hess[9]; eval_periodic_func_d (&func, x, y, z, &val, grad, hess); data[(ix*Nspline+iy)*Nspline+iz] = val; } } } UBspline_3d_d *spline = create_UBspline_3d_d (x_grid, y_grid, z_grid, xBC, yBC, zBC, data); int numTest = 10000; double valerror = 0.0; double graderror = 0.0; double hesserror = 0.0; double valsum=0.0, gradsum=0.0, hesssum=0.0; for (int i=0; i #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; /************************************************************/ /* 1D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, double x, float* restrict vals) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_s_vg (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_s_vgl (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads, float* restrict lapl) { x -= spline->x_grid.start; float ux = x*spline->x_grid.delta_inv; float ipartx, tx; tx = modff (ux, &ipartx); int ix = (int) ipartx; float tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { float* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } float dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_s_vgh (multi_UBspline_1d_s *spline, double x, float* restrict vals, float* restrict grads, float* restrict hess) { eval_multi_UBspline_1d_s_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_s (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float prefactor = a[i]*b[j]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_2d_s_vg (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals, float* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float ab = a[i]*b[j]; float dab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals [n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; } } void eval_multi_UBspline_2d_s_vgl (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals, float* restrict grads, float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; float lapl2[2*spline->num_splines]; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; lapl2[2*n+0] = lapl2[2*n+1] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { float ab = a[i]*b[j]; float dab[2], d2ab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i]* b[j]; d2ab[1] = a[i]*d2b[j]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; lapl2[2*n+0] += d2ab[0]*coefs[n]; lapl2[2*n+1] += d2ab[1]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; lapl2[2*n+0] *= dxInv*dxInv; lapl2[2*n+1] *= dyInv*dyInv; lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; } } void eval_multi_UBspline_2d_s_vgh (multi_UBspline_2d_s *spline, double x, double y, float* restrict vals, float* restrict grads, float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; for (int i=0; i<4; i++) hess[4*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++){ float ab = a[i]*b[j]; float dab[2], d2ab[3]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i] * b[j]; d2ab[1] = da[i] * db[j]; d2ab[2] = a[i] * d2b[j]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; hess [4*n+0] += d2ab[0]*coefs[n]; hess [4*n+1] += d2ab[1]*coefs[n]; hess [4*n+3] += d2ab[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; hess[4*n+0] *= dxInv*dxInv; hess[4*n+1] *= dxInv*dyInv; hess[4*n+3] *= dyInv*dyInv; // Copy hessian elements into lower half of 3x3 matrix hess[4*n+2] = hess[4*n+1]; } } /************************************************************/ /* 3D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_s (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) vals[n] += abc*coefs[n]; } } void eval_multi_UBspline_3d_s_vg (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals, float* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; } } void eval_multi_UBspline_3d_s_vgl (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals, float* restrict grads, float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; float lapl3[3*spline->num_splines]; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3], d2abc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = a[i]*d2b[j]* c[k]; d2abc[2] = a[i]* b[j]*d2c[k]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; lapl3[3*n+0] += d2abc[0]*coefs[n]; lapl3[3*n+1] += d2abc[1]*coefs[n]; lapl3[3*n+2] += d2abc[2]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; lapl3[3*n+0] *= dxInv*dxInv; lapl3[3*n+1] *= dyInv*dyInv; lapl3[3*n+2] *= dzInv*dzInv; lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; } } void eval_multi_UBspline_3d_s_vgh (multi_UBspline_3d_s *spline, double x, double y, double z, float* restrict vals, float* restrict grads, float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 0]*tpx[0] + dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 4]*tpx[0] + dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 8]*tpx[0] + dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[12]*tpx[0] + dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 0]*tpx[0] + d2Af[ 1]*tpx[1] + d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 4]*tpx[0] + d2Af[ 5]*tpx[1] + d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[ 8]*tpx[0] + d2Af[ 9]*tpx[1] + d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[12]*tpx[0] + d2Af[13]*tpx[1] + d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 0]*tpy[0] + dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 4]*tpy[0] + dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 8]*tpy[0] + dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[12]*tpy[0] + dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 0]*tpy[0] + d2Af[ 1]*tpy[1] + d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 4]*tpy[0] + d2Af[ 5]*tpy[1] + d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[ 8]*tpy[0] + d2Af[ 9]*tpy[1] + d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[12]*tpy[0] + d2Af[13]*tpy[1] + d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 0]*tpz[0] + dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 4]*tpz[0] + dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 8]*tpz[0] + dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[12]*tpz[0] + dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 0]*tpz[0] + d2Af[ 1]*tpz[1] + d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 4]*tpz[0] + d2Af[ 5]*tpz[1] + d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[ 8]*tpz[0] + d2Af[ 9]*tpz[1] + d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[12]*tpz[0] + d2Af[13]*tpz[1] + d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; for (int i=0; i<9; i++) hess[9*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { float abc = a[i]*b[j]*c[k]; float dabc[3], d2abc[6]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = da[i]* db[j]* c[k]; d2abc[2] = da[i]* b[j]* dc[k]; d2abc[3] = a[i]*d2b[j]* c[k]; d2abc[4] = a[i]* db[j]* dc[k]; d2abc[5] = a[i]* b[j]*d2c[k]; float* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; hess [9*n+0] += d2abc[0]*coefs[n]; hess [9*n+1] += d2abc[1]*coefs[n]; hess [9*n+2] += d2abc[2]*coefs[n]; hess [9*n+4] += d2abc[3]*coefs[n]; hess [9*n+5] += d2abc[4]*coefs[n]; hess [9*n+8] += d2abc[5]*coefs[n]; } } float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; hess [9*n+0] *= dxInv*dxInv; hess [9*n+4] *= dyInv*dyInv; hess [9*n+8] *= dzInv*dzInv; hess [9*n+1] *= dxInv*dyInv; hess [9*n+2] *= dxInv*dzInv; hess [9*n+5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess [9*n+3] = hess[9*n+1]; hess [9*n+6] = hess[9*n+2]; hess [9*n+7] = hess[9*n+5]; } } #endif einspline-0.9.2/src/nubspline_create.h0000664000113000011300000001010111012400563014656 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_CREATE_H #define NUBSPLINE_CREATE_H #include "nubspline_structs.h" #ifdef __cplusplus extern "C" { #endif NUgrid* create_center_grid (double start, double end, double ratio, int num_points); NUgrid* create_general_grid (double *points, int num_points); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Nonuniform spline creation routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //////////////////////////////////////// // Nonuniform, single precision, real // //////////////////////////////////////// NUBspline_1d_s * create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data); NUBspline_2d_s * create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, BCtype_s xBC, BCtype_s yBC, float *data); NUBspline_3d_s * create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data); //////////////////////////////////////// // Nonuniform, double precision, real // //////////////////////////////////////// NUBspline_1d_d * create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data); NUBspline_2d_d * create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, BCtype_d xBC, BCtype_d yBC, double *data); NUBspline_3d_d * create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data); /////////////////////////////////////////// // Nonuniform, single precision, complex // /////////////////////////////////////////// NUBspline_1d_c * create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data); NUBspline_2d_c * create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, BCtype_c xBC, BCtype_c yBC, complex_float *data); NUBspline_3d_c * create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data); /////////////////////////////////////////// // Nonuniform, double precision, complex // /////////////////////////////////////////// NUBspline_1d_z * create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data); NUBspline_2d_z * create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* restrict y_grid, BCtype_z xBC, BCtype_z yBC, complex_double *data); NUBspline_3d_z * create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data); #ifdef __cplusplus } #endif #endif einspline-0.9.2/src/bspline_eval_sse_s.h0000664000113000011300000013525311235572632015231 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_SSE_S_H #define BSPLINE_EVAL_SSE_S_H #include #include #ifdef HAVE_SSE3 #include #endif #include #include // extern __m128 A0, A1, A2, A3; // extern __m128 dA0, dA1, dA2, dA3; // extern __m128 d2A0, d2A1, d2A2, d2A3; extern __m128* restrict A_s; extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; /// SSE3 add "horizontal add" instructions, which makes things /// simpler and faster #ifdef HAVE_SSE9 #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 r0 = _mm_hadd_ps (_mm_mul_ps (M0, v), _mm_mul_ps (M1, v)); \ __m128 r1 = _mm_hadd_ps (_mm_mul_ps (M2, v), _mm_mul_ps (M3, v)); \ r = _mm_hadd_ps (r0, r1); \ } while (0); #define _MM_DOT4_PS(A, B, _p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 t1 = _mm_hadd_ps (t,t); \ __m128 r = _mm_hadd_ps (t1, t1); \ _mm_store_ss (&(_p), r); \ } while(0); #else // Use plain-old SSE instructions #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 _r0 = _mm_mul_ps (M0, v); \ __m128 _r1 = _mm_mul_ps (M1, v); \ __m128 _r2 = _mm_mul_ps (M2, v); \ __m128 _r3 = _mm_mul_ps (M3, v); \ _MM_TRANSPOSE4_PS (_r0, _r1, _r2, _r3); \ r = _mm_add_ps (_mm_add_ps (_r0, _r1), _mm_add_ps (_r2, _r3)); \ } while (0); #define _MM_DOT4_PS(A, B, p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 alo = _mm_shuffle_ps (t, t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (t, t, _MM_SHUFFLE(2,3,2,3)); \ __m128 _a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(1,1,1,1)); \ __m128 _r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(p), _r); \ } while(0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_s (UBspline_1d_s * restrict spline, double x, float* restrict val) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_s_vg (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_s_vgl (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict lapl) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float* restrict coefs = spline->coefs; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; // It turns out that std version is faster than SSE in 1D // __m128 tp = _mm_set_ps (t*t*t, t*t, t, 1.0); // __m128 a, da, d2a; // _MM_MATVEC4_PS ( A_s[0], A_s[1], A_s[ 2], A_s[ 3], tp, a); // _MM_MATVEC4_PS ( A_s[4], A_s[5], A_s[6], A_s[7], tp, da); // _MM_MATVEC4_PS (A_s[8], A_s[9], A_s[10], A_s[11], tp, d2a); // __m128 cf = _mm_loadu_ps (&(coefs[i])); // _MM_DOT4_PS ( a, cf, *val); // _MM_DOT4_PS ( da, cf, *grad); // _MM_DOT4_PS (d2a, cf, *lapl); *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Af[ 2]*tp[2] + d2Af[ 3]*tp[3])+ coefs[i+1]*(d2Af[ 6]*tp[2] + d2Af[ 7]*tp[3])+ coefs[i+2]*(d2Af[10]*tp[2] + d2Af[11]*tp[3])+ coefs[i+3]*(d2Af[14]*tp[2] + d2Af[15]*tp[3])); } inline void eval_UBspline_1d_s_vgh (UBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict hess) { eval_UBspline_1d_s_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_s (UBspline_2d_s * restrict spline, double x, double y, float* restrict val) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, bP, tmp0, tmp1, tmp2, tmp3; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); // Compute value _MM_DOT4_PS (a, bP, *val); #undef P } /* Value and gradient */ inline void eval_UBspline_2d_s_vg (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[7],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, da, db, bP, dbP, tmp0, tmp1, tmp2, tmp3; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpx, da); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpy, db); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_s_vgl (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, da, db, d2a, d2b, bP, dbP, d2bP, tmp0, tmp1, tmp2, tmp3; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[8], A_s[9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[8], A_s[9], A_s[10], A_s[11], tpy, d2b); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2b, d2bP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); float sec_derivs[2]; // Compute laplacian _MM_DOT4_PS (d2a, bP, sec_derivs[0]); _MM_DOT4_PS (a, d2bP, sec_derivs[1]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; sec_derivs[0] *= dxInv*dxInv; sec_derivs[1] *= dyInv*dyInv; // Copy hessian elements into lower half of 2x2 matrix *lapl = sec_derivs[0] + sec_derivs[1]; #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_s_vgh (UBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); xy = _mm_sub_ps (xy, x0y0); // ux = (x - x0)/delta_x and same for y __m128 uxuy = _mm_mul_ps (xy, delta_inv); // intpart = trunc (ux, uy) __m128i intpart = _mm_cvttps_epi32(uxuy); __m128i ixiy; _mm_storeu_si128 (&ixiy, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiy)[3]; int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txty = _mm_sub_ps (uxuy, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txty, txty); __m128 t3 = _mm_mul_ps (t2, txty); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txty; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, da, db, d2a, d2b, bP, dbP, d2bP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[8], A_s[9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[8], A_s[9], A_s[10], A_s[11], tpy, d2b); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2b, d2bP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); // Compute hessian _MM_DOT4_PS (d2a, bP, hess[0]); _MM_DOT4_PS (a, d2bP, hess[3]); _MM_DOT4_PS (da, dbP, hess[1]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; hess[0] *= dxInv*dxInv; hess[3] *= dyInv*dyInv; hess[1] *= dxInv*dyInv; // Copy hessian elements into lower half of 2x2 matrix hess[2] = hess[1]; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_s (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, c, cP[4],bcP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); // z-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpz, c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,1)); tmp2 = _mm_loadu_ps (P(0,2)); tmp3 = _mm_loadu_ps (P(0,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,1)); tmp2 = _mm_loadu_ps (P(1,2)); tmp3 = _mm_loadu_ps (P(1,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,1)); tmp2 = _mm_loadu_ps (P(2,2)); tmp3 = _mm_loadu_ps (P(2,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,1)); tmp2 = _mm_loadu_ps (P(3,2)); tmp3 = _mm_loadu_ps (P(3,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); // Compute value _MM_DOT4_PS (a, bcP, *val); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_s_vg (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, c, da, db, dc, cP[4], dcP[4], d2cP[4], bcP, dbcP, bdcP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpx, da); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpy, db); // z-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpz, c); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpz, dc); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,1)); tmp2 = _mm_loadu_ps (P(0,2)); tmp3 = _mm_loadu_ps (P(0,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,1)); tmp2 = _mm_loadu_ps (P(1,2)); tmp3 = _mm_loadu_ps (P(1,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,1)); tmp2 = _mm_loadu_ps (P(2,2)); tmp3 = _mm_loadu_ps (P(2,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,1)); tmp2 = _mm_loadu_ps (P(3,2)); tmp3 = _mm_loadu_ps (P(3,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_s_vgl (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict lapl) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cP[4], dcP[4], d2cP[4], bcP, dbcP, bdcP, d2bcP, dbdcP, bd2cP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,1)); tmp2 = _mm_loadu_ps (P(0,2)); tmp3 = _mm_loadu_ps (P(0,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,1)); tmp2 = _mm_loadu_ps (P(1,2)); tmp3 = _mm_loadu_ps (P(1,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,1)); tmp2 = _mm_loadu_ps (P(2,2)); tmp3 = _mm_loadu_ps (P(2,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,1)); tmp2 = _mm_loadu_ps (P(3,2)); tmp3 = _mm_loadu_ps (P(3,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], d2b, d2bcP); _MM_MATVEC4_PS (d2cP[0], d2cP[1], d2cP[2], d2cP[3], b, bd2cP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], db, dbdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); // Compute laplacian float lx, ly, lz; _MM_DOT4_PS (d2a, bcP, lx); _MM_DOT4_PS (a, d2bcP, ly); _MM_DOT4_PS (a, bd2cP, lz); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; lx *= dxInv*dxInv; ly *= dyInv*dyInv; lz *= dzInv*dzInv; *lapl = lx + ly + lz; #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_s_vgh (UBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict hess) { _mm_prefetch ((const char*) &A_s[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[11],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[ 2], A_s[ 3] __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cP[4], dcP[4], d2cP[4], bcP, dbcP, bdcP, d2bcP, dbdcP, bd2cP, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,1)); tmp2 = _mm_loadu_ps (P(0,2)); tmp3 = _mm_loadu_ps (P(0,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,1)); tmp2 = _mm_loadu_ps (P(1,2)); tmp3 = _mm_loadu_ps (P(1,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,1)); tmp2 = _mm_loadu_ps (P(2,2)); tmp3 = _mm_loadu_ps (P(2,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,1)); tmp2 = _mm_loadu_ps (P(3,2)); tmp3 = _mm_loadu_ps (P(3,3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], d2b, d2bcP); _MM_MATVEC4_PS (d2cP[0], d2cP[1], d2cP[2], d2cP[3], b, bd2cP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], db, dbdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); // Compute hessian _MM_DOT4_PS (d2a, bcP, hess[0]); _MM_DOT4_PS (a, d2bcP, hess[4]); _MM_DOT4_PS (a, bd2cP, hess[8]); _MM_DOT4_PS (da, dbcP, hess[1]); _MM_DOT4_PS (da, bdcP, hess[2]); _MM_DOT4_PS (a, dbdcP, hess[5]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; hess[0] *= dxInv*dxInv; hess[4] *= dyInv*dyInv; hess[8] *= dzInv*dzInv; hess[1] *= dxInv*dyInv; hess[2] *= dxInv*dzInv; hess[5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_MATVEC4_PS #undef _MM_DOT4_PS #endif einspline-0.9.2/src/multi_bspline_eval_cuda.h0000664000113000011300000001001411217254173016223 00000000000000#ifndef MULTI_BSPLINE_EVAL_CUDA_H #define MULTI_BSPLINE_EVAL_CUDA_H #include "multi_bspline_structs_cuda.h" //////// // 1D // //////// // Single-precision real extern "C" void eval_multi_multi_UBspline_1d_s_cuda (multi_UBspline_1d_s_cuda *spline, float *pos_d, float *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_1d_s_vgl_cuda (multi_UBspline_1d_s_cuda *spline, float *pos_d, float *vals_d[], float *grads_d[], float *lapl_d[], int num); // Double-precision real extern "C" void eval_multi_multi_UBspline_1d_d_cuda (multi_UBspline_1d_d_cuda *spline, double *pos_d, double *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_1d_d_vgl_cuda (multi_UBspline_1d_d_cuda *spline, double *pos_d, double *vals_d[], double *grad_lapl_d[], int num, int row_stride); // Single-precision complex extern "C" void eval_multi_multi_UBspline_1d_c_cuda (multi_UBspline_1d_c_cuda *spline, float *pos_d, complex_float *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_1d_c_vgl_cuda (multi_UBspline_1d_c_cuda *spline, float *pos_d, complex_float *vals_d[], complex_float *grad_lapl_d[], int num, int row_stride); // Doublele-precision complex extern "C" void eval_multi_multi_UBspline_1d_z_cuda (multi_UBspline_1d_z_cuda *spline, double *pos_d, complex_double *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_1d_z_vgl_cuda (multi_UBspline_1d_z_cuda *spline, double *pos_d, complex_double *vals_d[], complex_double *grad_lapl_d[], int num, int row_stride); //////// // 3D // //////// // Single-precision real extern "C" void eval_multi_multi_UBspline_3d_s_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_s_sign_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *sign_d, float *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_s_vgh_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *vals_d[], float *grads_d[], float *hess_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_s_vgl_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *Linv_d, float *vals_d[], float *grad_lapl_d[], int num, int row_stride); extern "C" void eval_multi_multi_UBspline_3d_s_vgl_sign_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *sign_d, float *Linv_d, float *vals_d[], float *grad_lapl_d[], int num, int row_stride); // Double-precision real extern "C" void eval_multi_multi_UBspline_3d_d_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_d_vgh_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *vals_d[], double *grads_d[], double *hess_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_d_vgl_cuda (multi_UBspline_3d_d_cuda *spline, double *pos_d, double *Linv_d, double *vals_d[], double *grad_lapl_d[], int num, int row_stride); // Single-precision complex extern "C" void eval_multi_multi_UBspline_3d_c_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, complex_float *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_c_vgh_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, complex_float *vals_d[], complex_float *grads_d[], complex_float *hess_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_c_vgl_cuda (multi_UBspline_3d_c_cuda *spline, float *pos_d, float *Linv_d, complex_float *vals_d[], complex_float *grad_lapl_d[], int num, int row_stride); // Doublele-precision complex extern "C" void eval_multi_multi_UBspline_3d_z_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, complex_double *vals_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_z_vgh_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, complex_double *vals_d[], complex_double *grads_d[], complex_double *hess_d[], int num); extern "C" void eval_multi_multi_UBspline_3d_z_vgl_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, double *Linv_d, complex_double *vals_d[], complex_double *grad_lapl_d[], int num, int row_stride); #endif einspline-0.9.2/src/nubspline_eval_sse_d.h0000664000113000011300000011326311012400563015534 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_SSE_D_H #define NUBSPLINE_EVAL_SSE_D_H #include #include #include "nubspline_structs.h" #ifdef HAVE_SSE2 #include #include #endif #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_sd (&(p), t1); \ } while (0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline, double x, double* restrict val) { double bfuncs[4]; int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad) { double bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict lapl) { double bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict hess) { eval_NUBspline_1d_d_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val) { __m128d a01, b01, bP01, a23, b23, bP23, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_funcs_sse_d (spline->x_basis, x, &a01, &a23); int iy = get_NUBasis_funcs_sse_d (spline->y_basis, y, &b01, &b23); int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); #undef P } /* Value and gradient */ inline void eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad) { __m128d a01, b01, da01, db01, bP01, dbP01, a23, b23, da23, db23, bP23, dbP23, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_dfuncs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23); int iy = get_NUBasis_dfuncs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23); int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict lapl) { __m128d a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01, dbP01, d2bP01, bP23, dbP23, d2bP23, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); // Compute laplacian double d2x, d2y; _MM_DOT4_PD (d2a01, d2a23, bP01, bP23, d2x); _MM_DOT4_PD ( a01, a23, d2bP01, d2bP23, d2y); *lapl = d2x + d2y; #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict hess) { __m128d a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01, dbP01, d2bP01, bP23, dbP23, d2bP23, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); // Compute hessian _MM_DOT4_PD (d2a01, d2a23, bP01, bP23, hess[0]); _MM_DOT4_PD ( a01, a23, d2bP01, d2bP23, hess[3]); _MM_DOT4_PD ( da01, da23, dbP01, dbP23, hess[1]); hess[2] = hess[1]; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val) { __m128d a01, b01, c01, a23, b23, c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, bcP23, dbcP23, bdcP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int ix = get_NUBasis_funcs_sse_d (spline->x_basis, x, &a01, &a23); int iy = get_NUBasis_funcs_sse_d (spline->y_basis, y, &b01, &b23); int iz = get_NUBasis_funcs_sse_d (spline->z_basis, z, &c01, &c23); int xs = spline->x_stride; int ys = spline->y_stride; int ysb = ys+2; int ys2 = 2*ys; int ys2b = 2*ys + 2; int ys3 = 3*ys; int ys3b = 3*ys + 2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) double *p = P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. p = P(0,0,0); // 1st eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); p += xs; // 3rd eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); p += xs; // 5th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); p += xs; // 7th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); // Now compute bcP, dbcP, bdcP and products _MM_DDOT4_PD (b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD (b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad) { __m128d a01, b01, c01, da01, db01, dc01, a23, b23, c23, da23, db23, dc23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, bcP23, dbcP23, bdcP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int ix = get_NUBasis_dfuncs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23); int iy = get_NUBasis_dfuncs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23); int iz = get_NUBasis_dfuncs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23); int xs = spline->x_stride; int ys = spline->y_stride; int ysb = ys+2; int ys2 = 2*ys; int ys2b = 2*ys + 2; int ys3 = 3*ys; int ys3b = 3*ys + 2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) double *p = P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. p = P(0,0,0); // 1st eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); p += xs; // 3rd eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); // 4th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); p += xs; // 5th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); // 6th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); p += xs; // 7th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); // 8th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); // Now compute bcP, dbcP, bdcP and products _MM_DDOT4_PD (b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD (b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD (db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD (db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict lapl) { __m128d a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, d2bcP01, dbdcP01, bd2cP01, bcP23, dbcP23, bdcP23, d2bcP23, dbdcP23, bd2cP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int iz = get_NUBasis_d2funcs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23, &d2c01, &d2c23); int xs = spline->x_stride; int ys = spline->y_stride; int ysb = ys+2; int ys2 = 2*ys; int ys2b = 2*ys + 2; int ys3 = 3*ys; int ys3b = 3*ys + 2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) double *p = P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. p = P(0,0,0); // 1st eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[1]); p += xs; // 3rd eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[3]); p += xs; // 5th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[5]); p += xs; // 7th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD (b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD (db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD (db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[0], cP[1], cP[2], cP[3], d2bcP01); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[4], cP[5], cP[6], cP[7], d2bcP23); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[0], d2cP[1], d2cP[2], d2cP[3], bd2cP01); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[4], d2cP[5], d2cP[6], d2cP[7], bd2cP23); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[0], dcP[1], dcP[2], dcP[3], dbdcP01); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[4], dcP[5], dcP[6], dcP[7], dbdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); // Compute Laplacian double d2x, d2y, d2z; _MM_DOT4_PD (d2a01, d2a23, bcP01, bcP23, d2x); _MM_DOT4_PD (a01, a23, d2bcP01, d2bcP23, d2y); _MM_DOT4_PD (a01, a23, bd2cP01, bd2cP23, d2z); *lapl = d2x + d2y + d2z; #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict hess) { __m128d a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, d2bcP01, dbdcP01, bd2cP01, bcP23, dbcP23, bdcP23, d2bcP23, dbdcP23, bd2cP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; int ix = get_NUBasis_d2funcs_sse_d (spline->x_basis, x, &a01, &a23, &da01, &da23, &d2a01, &d2a23); int iy = get_NUBasis_d2funcs_sse_d (spline->y_basis, y, &b01, &b23, &db01, &db23, &d2b01, &d2b23); int iz = get_NUBasis_d2funcs_sse_d (spline->z_basis, z, &c01, &c23, &dc01, &dc23, &d2c01, &d2c23); int xs = spline->x_stride; int ys = spline->y_stride; int ysb = ys+2; int ys2 = 2*ys; int ys2b = 2*ys + 2; int ys3 = 3*ys; int ys3b = 3*ys + 2; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) double *p = P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ysb ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2b), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3 ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3b), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. p = P(0,0,0); // 1st eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[1]); p += xs; // 3rd eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[3]); p += xs; // 5th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[5]); p += xs; // 7th eighth tmp0 = _mm_loadu_pd (p ); tmp1 = _mm_loadu_pd (p+2 ); tmp2 = _mm_loadu_pd (p+ys ); tmp3 = _mm_loadu_pd (p+ysb); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (p+ys2 ); tmp1 = _mm_loadu_pd (p+ys2b); tmp2 = _mm_loadu_pd (p+ys3 ); tmp3 = _mm_loadu_pd (p+ys3b); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD (b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD (db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD (db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[0], cP[1], cP[2], cP[3], d2bcP01); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[4], cP[5], cP[6], cP[7], d2bcP23); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[0], d2cP[1], d2cP[2], d2cP[3], bd2cP01); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[4], d2cP[5], d2cP[6], d2cP[7], bd2cP23); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[0], dcP[1], dcP[2], dcP[3], dbdcP01); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[4], dcP[5], dcP[6], dcP[7], dbdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); // Compute hessian // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01, bcP23, hess[0]); // d2y _MM_DOT4_PD (a01, a23, d2bcP01, d2bcP23, hess[4]); // d2z _MM_DOT4_PD (a01, a23, bd2cP01, bd2cP23, hess[8]); // dx dy _MM_DOT4_PD (da01, da23, dbcP01, dbcP23, hess[1]); // dx dz _MM_DOT4_PD (da01, da23, bdcP01, bdcP23, hess[2]); // dy dz _MM_DOT4_PD (a01, a23, dbdcP01, dbdcP23, hess[5]); // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_DDOT4_PD #undef _MM_DOT4_PD #endif einspline-0.9.2/src/multi_bspline_eval_std_s_cpp.cc0000664000113000011300000000276611015561410017431 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_s_impl.h" einspline-0.9.2/src/multi_bspline_eval_cuda_c.cu0000664000113000011300000003115711067570416016724 00000000000000#define BLOCK_SIZE 64 #include #include #include #include #include __global__ void eval_multi_UBspline_3d_cuda_c (float *coefs, float *abc, float *vals, int ix, int iy, int iz, int xs, int ys, int zs, int N) { int block = blockIdx.x; int thr = threadIdx.x; int offset = block*BLOCK_SIZE+thr; __shared__ float abcs[64]; abcs[thr] = abc[thr]; __syncthreads(); float val= 0.0; //int index=0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) { for (int k=0; k<4; k++) { float *base_addr = coefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs; //val += abc[(16*i+4*j+k)*BLOCK_SIZE + thr] * base_addr[offset]; val += abcs[16*i+4*j+k] * base_addr[offset]; //index++; } } vals[offset] = val; } __constant__ float A[16], dA[16], d2A[16]; __global__ static void eval_multi_multi_UBspline_3d_cuda_c (float *pos, float3 drInv, float *coefs_real, float *coefs_imag, float *vals_real, float *vals_imag, int3 strides) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int offset = block*BLOCK_SIZE+thr; __shared__ float abc[64]; __shared__ float pos_s[BLOCK_SIZE]; int ir1 = (ir >> 4)*64; int ir2 = (ir & 15)*4; pos_s[thr] = pos[ir1+thr]; __syncthreads(); float3 r; r.x = pos_s[ir2+0]; r.y = pos_s[ir2+1]; r.z = pos_s[ir2+2]; int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(1.0, t.x, t.x*t.x, t.x*t.x*t.x); tp[1] = make_float4(1.0, t.y, t.y*t.y, t.y*t.y*t.y); tp[2] = make_float4(1.0, t.z, t.z*t.z, t.z*t.z*t.z); __shared__ float a[4], b[4], c[4]; if (thr == 0) { a[0] = A[ 0]*tp[0].x + A[ 1]*tp[0].y + A[ 2]*tp[0].z + A[ 3]*tp[0].w; a[1] = A[ 4]*tp[0].x + A[ 5]*tp[0].y + A[ 6]*tp[0].z + A[ 7]*tp[0].w; a[2] = A[ 8]*tp[0].x + A[ 9]*tp[0].y + A[10]*tp[0].z + A[11]*tp[0].w; a[3] = A[12]*tp[0].x + A[13]*tp[0].y + A[14]*tp[0].z + A[15]*tp[0].w; b[0] = A[ 0]*tp[1].x + A[ 1]*tp[1].y + A[ 2]*tp[1].z + A[ 3]*tp[1].w; b[1] = A[ 4]*tp[1].x + A[ 5]*tp[1].y + A[ 6]*tp[1].z + A[ 7]*tp[1].w; b[2] = A[ 8]*tp[1].x + A[ 9]*tp[1].y + A[10]*tp[1].z + A[11]*tp[1].w; b[3] = A[12]*tp[1].x + A[13]*tp[1].y + A[14]*tp[1].z + A[15]*tp[1].w; c[0] = A[ 0]*tp[2].x + A[ 1]*tp[2].y + A[ 2]*tp[2].z + A[ 3]*tp[2].w; c[1] = A[ 4]*tp[2].x + A[ 5]*tp[2].y + A[ 6]*tp[2].z + A[ 7]*tp[2].w; c[2] = A[ 8]*tp[2].x + A[ 9]*tp[2].y + A[10]*tp[2].z + A[11]*tp[2].w; c[3] = A[12]*tp[2].x + A[13]*tp[2].y + A[14]*tp[2].z + A[15]*tp[2].w; } int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); float val_real = 0.0; float val_imag = 0.0; //int index=0; val_real = val_imag = 0.0; // int di = strides.x - 4*strides.y; // int dj = strides.y - 4*strides.z; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base_real = coefs_real + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; float *base_imag = coefs_imag + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) { // float *base_real = coefs_real + (index.x+i)*strides.x + (index.y+j)*strides.y + (index.z+k)*strides.z; // float *base_imag = coefs_imag + (index.x+i)*strides.x + (index.y+j)*strides.y + (index.z+k)*strides.z; val_real += abc[16*i+4*j+k] * base_real[offset+k*strides.z]; val_imag += abc[16*i+4*j+k] * base_imag[offset+k*strides.z]; // base_real += strides.z; // base_imag += strides.z; } // base_real += dj; // base_imag += dj; } // base_real += di; // base_imag += di; } vals_real[offset+ir*128] = val_real; vals_imag[offset+ir*128] = val_imag; //vals_real[ir][offset] = val_real; // vals_imag[ir][offset] = val_imag; } // __global__ void // eval_multi_UBspline_3d_cuda_c2 (float3 r, // float *coefs, float *vals, // int xs, int ys, int zs, int N) // { // int block = blockIdx.x; // int thr = threadIdx.x; // __shared__ float abcs[64]; // abcs[thr] = abc[thr]; // float dxInv = 0.0625f; // float v, dv; // v = floor(dxInv*r.x); // dv = dxInv*r.x - v; // int ix = (int) v; // v = floor(dxInv*r.x); // dv = dxInv*r.x - v; // int iy = (int) v; // v = floor(dxInv*r.y); // dv = dxInv*r.y - v; // int iz = (int) v; // int offset = block*BLOCK_SIZE+thr; // __shared__ float abcs[64]; // abcs[thr] = abc[thr]; // float val= 0.0; // //int index=0; // val = 0.0; // for (int i=0; i<4; i++) // for (int j=0; j<4; j++) // for (int k=0; k<4; k++) { // float *base_addr = coefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs; // //val += abc[(16*i+4*j+k)*BLOCK_SIZE + thr] * base_addr[offset]; // val += abcs[16*i+4*j+k] * base_addr[offset]; // //index++; // } // vals[offset] = val; // } void test_cuda() { float *coefs , *abc , *abc2, *vals; float *coefs_d, *abc_d, *vals_d; int xs, ys, zs, N; int Nx, Ny, Nz; N = 4096; Nx = Ny = Nz = 16; xs = Nx*Ny*Nz; ys = Ny*Nz; zs = Nz; int size = Nx*Ny*Nz*N*sizeof(float); posix_memalign((void**)&coefs, 16, size); cudaMalloc((void**)&coefs_d, size); for (int ix=0; ix>> (coefs_d, abc_d, vals_d, ix, iy, iz, xs, ys, zs, N); } end = clock(); double time = (double)(end-start)/(double)(CLOCKS_PER_SEC*100000*N); fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time); cudaMemcpy (vals, vals_d, N*sizeof(float), cudaMemcpyDeviceToHost); float vals2[N]; for (int n=0; n>> // (r_d, drInv, coefs_real_d, coefs_imag_d, // vals_real_d, vals_imag_d, strides); eval_multi_multi_UBspline_3d_cuda_c<<>> (r_d, drInv, coefs_real_d, coefs_imag_d, valBlock_d, valBlock_d+numVals/2, strides); //cudaMemcpy(valBlock_h, valBlock_d, numVals*sizeof(float), cudaMemcpyDeviceToHost); } end = clock(); double time = (double)(end-start)/(double)((double)CLOCKS_PER_SEC*(double)10000*N*numWalkers); fprintf (stderr, "Evals per second = %1.8e\n", 1.0/time); cudaFree (valBlock_d); cudaFree (vals_real_d); cudaFree (vals_imag_d); cudaFree (coefs_real_d); cudaFree (coefs_imag_d); cudaFree (r_d); return NULL; // cudaMemcpy (vals, vals_d, N*sizeof(float), cudaMemcpyDeviceToHost); // float vals2[N]; // for (int n=0; n #include #include "multi_bspline_structs.h" /************************************************************/ /* 1D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_c (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals); void eval_multi_UBspline_1d_c_vg (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_UBspline_1d_c_vgl (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_UBspline_1d_c_vgh (multi_UBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); /************************************************************/ /* 2D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_c (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals); void eval_multi_UBspline_2d_c_vg (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_UBspline_2d_c_vgl (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_UBspline_2d_c_vgh (multi_UBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); /************************************************************/ /* 3D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_c (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals); void eval_multi_UBspline_3d_c_vg (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_UBspline_3d_c_vgl (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_UBspline_3d_c_vgh (multi_UBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); #endif einspline-0.9.2/src/bspline_create_cuda.cu0000664000113000011300000003027211262471516015517 00000000000000#include #include "bspline_base.h" #include "bspline_structs.h" #include "bspline_structs_cuda.h" __device__ double Bcuda[48]; __constant__ float Acuda[48]; // #include "bspline_cuda_s_impl.h" // #include "bspline_cuda_c_impl.h" // #include "bspline_cuda_d_impl.h" // #include "bspline_cuda_z_impl.h" extern "C" UBspline_3d_c_cuda* create_UBspline_3d_c_cuda (UBspline_3d_c* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); UBspline_3d_c_cuda *cuda_spline = (UBspline_3d_c_cuda*) malloc (sizeof (UBspline_3d_c_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32)*32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); std::complex *spline_buff = (std::complex*)malloc(size); for (int ix=0; ixstride.x + iy*cuda_spline->stride.y + iz] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride +iz]; for (int isp=Nz; isp < N; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + isp] = 0.0; } } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); cuda_spline->stride.x = 2*Ny*N; cuda_spline->stride.y = 2*N; return cuda_spline; } extern "C" UBspline_3d_c_cuda* create_UBspline_3d_c_cuda_conv (UBspline_3d_z* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); UBspline_3d_c_cuda *cuda_spline = (UBspline_3d_c_cuda*) malloc (sizeof (UBspline_3d_c_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32) * 32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); std::complex *spline_buff = (std::complex*)malloc(size); for (int ix=0; ix z = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz]; spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz] = std::complex(z.real(), z.imag()); for (int iz=Nz; iz < N; iz++) spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz] = 0.0; } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); cuda_spline->stride.x = 2*Ny*N; cuda_spline->stride.y = 2*N; return cuda_spline; } extern "C" UBspline_3d_s_cuda* create_UBspline_3d_s_cuda (UBspline_3d_s* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); UBspline_3d_s_cuda *cuda_spline = (UBspline_3d_s_cuda*) malloc (sizeof (UBspline_3d_s_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32)*32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->stride.z = 1; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); float *spline_buff = (float*)malloc(size); for (int ix=0; ixstride.x + iy*cuda_spline->stride.y + iz] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" UBspline_3d_s_cuda* create_UBspline_3d_s_cuda_conv (UBspline_3d_d* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); UBspline_3d_s_cuda *cuda_spline = (UBspline_3d_s_cuda*) malloc (sizeof (UBspline_3d_s_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32)*32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->stride.z = 1; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to allocate %ld memory for GPU spline coefficients. Error %s\n", size, cudaGetErrorString(err)); abort(); } float *spline_buff = (float*)malloc(size); for (int ix=0; ixstride.x + iy*cuda_spline->stride.y + iz] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to copy spline to GPU memory. Error: %s\n", cudaGetErrorString(err)); abort(); } free(spline_buff); return cuda_spline; } extern "C" UBspline_3d_d_cuda* create_UBspline_3d_d_cuda (UBspline_3d_d* spline) { double B_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Bcuda, B_h, 48*sizeof(double), 0, cudaMemcpyHostToDevice); UBspline_3d_d_cuda *cuda_spline = (UBspline_3d_d_cuda*) malloc (sizeof (UBspline_3d_d_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32)*32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->stride.z = 1; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(double); cudaMalloc((void**)&(cuda_spline->coefs), size); double *spline_buff = (double*)malloc(size); for (int ix=0; ixstride.x + iy*cuda_spline->stride.y + iz] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" UBspline_3d_z_cuda* create_UBspline_3d_z_cuda (UBspline_3d_z* spline) { double B_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Bcuda, B_h, 48*sizeof(double), 0, cudaMemcpyHostToDevice); UBspline_3d_z_cuda *cuda_spline = (UBspline_3d_z_cuda*) malloc (sizeof (UBspline_3d_z_cuda)); int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = ((Nz+31)/32)*32; cuda_spline->stride.x = Ny*N; cuda_spline->stride.y = N; cuda_spline->stride.z = 1; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); std::complex *spline_buff = (std::complex*)malloc(size); for (int ix=0; ixstride.x + iy*cuda_spline->stride.y + iz] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); cuda_spline->stride.x = 2*Ny*N; cuda_spline->stride.y = 2*N; cuda_spline->stride.z = 2; free(spline_buff); return cuda_spline; } einspline-0.9.2/src/multi_nubspline_eval_sse_z.c0000664000113000011300000000277011035737370017005 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_nubspline_eval_sse_z_impl.h" einspline-0.9.2/src/bspline_structs.h0000664000113000011300000000752111012400563014573 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_STRUCTS_STD_H #define BSPLINE_STRUCTS_STD_H /////////////////////////// // Single precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; Ugrid x_grid; BCtype_s xBC; } UBspline_1d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; int x_stride; Ugrid x_grid, y_grid; BCtype_s xBC, yBC; } UBspline_2d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; int x_stride, y_stride; Ugrid x_grid, y_grid, z_grid; BCtype_s xBC, yBC, zBC; } UBspline_3d_s; /////////////////////////// // Double precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; Ugrid x_grid; BCtype_d xBC; } UBspline_1d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; int x_stride; Ugrid x_grid, y_grid; BCtype_d xBC, yBC; } UBspline_2d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; int x_stride, y_stride; Ugrid x_grid, y_grid, z_grid; BCtype_d xBC, yBC, zBC; } UBspline_3d_d; ////////////////////////////// // Single precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; Ugrid x_grid; BCtype_c xBC; } UBspline_1d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; int x_stride; Ugrid x_grid, y_grid; BCtype_c xBC, yBC; } UBspline_2d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; int x_stride, y_stride; Ugrid x_grid, y_grid, z_grid; BCtype_c xBC, yBC, zBC; } UBspline_3d_c; ////////////////////////////// // Double precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; Ugrid x_grid; BCtype_z xBC; } UBspline_1d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; int x_stride; Ugrid x_grid, y_grid; BCtype_z xBC, yBC; } UBspline_2d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; int x_stride, y_stride; Ugrid x_grid, y_grid, z_grid; BCtype_z xBC, yBC, zBC; } UBspline_3d_z; #endif einspline-0.9.2/src/nubspline_create.c0000664000113000011300000010455111012400563014666 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "nubspline_create.h" #include #include #ifndef _XOPEN_SOURCE #define _XOPEN_SOURCE 600 #endif #ifndef __USE_XOPEN2K #define __USE_XOPEN2K #endif #include #include //////////////////////////////////////////////////////// // Notes on conventions: // // Below, M (and Mx, My, Mz) represent the number of // // data points to be interpolated. With derivative // // boundary conditions, it is equal to the number of // // grid points. With periodic boundary conditions, // // it is one less than the number of grid points. // // N (and Nx, Ny, Nz) is the number of B-spline // // coefficients, which is #(grid points)+2 for all // // boundary conditions. // //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// //// Single-precision real creation routines //// //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// void solve_NUB_deriv_interp_1d_s (NUBasis* restrict basis, float* restrict data, int datastride, float* restrict p, int pstride, float abcdInitial[4], float abcdFinal[4]) { int M = basis->grid->num_points; int N = M+2; // Banded matrix storage. The first three elements in the // tinyvector store the tridiagonal coefficients. The last element // stores the RHS data. #ifdef HAVE_C_VARARRAYS float bands[4*N]; #else float *bands = malloc (4*N*sizeof(float)); #endif // Fill up bands for (int i=0; i<4; i++) { bands[i] = abcdInitial[i]; bands[4*(N-1)+i] = abcdFinal[i]; } for (int i=0; i0; row--) p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)]; // Finish with first row p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2]; #ifndef HAVE_C_VARARRAYS free (bands); #endif } // The number of elements in data should be one less than the number // of grid points void solve_NUB_periodic_interp_1d_s (NUBasis* restrict basis, float* restrict data, int datastride, float* restrict p, int pstride) { int M = basis->grid->num_points-1; // Banded matrix storage. The first three elements in each row // store the tridiagonal coefficients. The last element // stores the RHS data. #ifdef HAVE_C_VARARRAYS float bands[4*M], lastCol[M]; #else float *bands = malloc (4*M*sizeof(float)); float *lastCol = malloc ( M*sizeof(float)); #endif // Fill up bands for (int i=0; i=0; row--) p[pstride*(row+1)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M]; p[pstride* 0 ] = p[pstride*M]; p[pstride*(M+1)] = p[pstride*1]; p[pstride*(M+2)] = p[pstride*2]; #ifndef HAVE_C_VARARRAYS free (bands); free (lastCol); #endif } void find_NUBcoefs_1d_s (NUBasis* restrict basis, BCtype_s bc, float *data, int dstride, float *coefs, int cstride) { if (bc.lCode == PERIODIC) solve_NUB_periodic_interp_1d_s (basis, data, dstride, coefs, cstride); else { int M = basis->grid->num_points; // Setup boundary conditions float bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4]; // Left boundary if (bc.lCode == FLAT || bc.lCode == NATURAL) bc.lVal = 0.0; if (bc.lCode == FLAT || bc.lCode == DERIV1) { get_NUBasis_dfuncs_si (basis, 0, bfuncs, abcd_left); abcd_left[3] = bc.lVal; } if (bc.lCode == NATURAL || bc.lCode == DERIV2) { get_NUBasis_d2funcs_si (basis, 0, bfuncs, dbfuncs, abcd_left); abcd_left[3] = bc.lVal; } // Right boundary if (bc.rCode == FLAT || bc.rCode == NATURAL) bc.rVal = 0.0; if (bc.rCode == FLAT || bc.rCode == DERIV1) { get_NUBasis_dfuncs_si (basis, M-1, bfuncs, abcd_right); abcd_right[3] = bc.rVal; } if (bc.rCode == NATURAL || bc.rCode == DERIV2) { get_NUBasis_d2funcs_si (basis, M-1, bfuncs, dbfuncs, abcd_right); abcd_right[3] = bc.rVal; } // Now, solve for coefficients solve_NUB_deriv_interp_1d_s (basis, data, dstride, coefs, cstride, abcd_left, abcd_right); } } NUBspline_1d_s * create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data) { // First, create the spline structure NUBspline_1d_s* spline = malloc (sizeof(NUBspline_1d_s)); if (spline == NULL) return spline; spline->sp_code = NU1D; spline->t_code = SINGLE_REAL; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); // M is the number of data points int M; if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; else M = x_grid->num_points; int N = x_grid->num_points + 2; // Allocate coefficients and solve spline->coefs = malloc(N*sizeof(float)); find_NUBcoefs_1d_s (spline->x_basis, xBC, data, 1, spline->coefs, 1); return spline; } NUBspline_2d_s * create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid, BCtype_s xBC, BCtype_s yBC, float *data) { // First, create the spline structure NUBspline_2d_s* spline = malloc (sizeof(NUBspline_2d_s)); if (spline == NULL) return spline; spline->sp_code = NU2D; spline->t_code = SINGLE_REAL; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } NUBspline_3d_s * create_NUBspline_3d_s (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, float *data) { // First, create the spline structure NUBspline_3d_s* spline = malloc (sizeof(NUBspline_3d_s)); if (spline == NULL) return spline; spline->sp_code = NU3D; spline->t_code = SINGLE_REAL; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; // Allocate coefficients and solve spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny*Nz); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// //// Double-precision real creation routines //// //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// void solve_NUB_deriv_interp_1d_d (NUBasis* restrict basis, double* restrict data, int datastride, double* restrict p, int pstride, double abcdInitial[4], double abcdFinal[4]) { int M = basis->grid->num_points; int N = M+2; // Banded matrix storage. The first three elements in the // tinyvector store the tridiagonal coefficients. The last element // stores the RHS data. #ifdef HAVE_C_VARARRAYS double bands[4*N]; #else double *bands = malloc (4*N*sizeof(double)); #endif // Fill up bands for (int i=0; i<4; i++) { bands[i] = abcdInitial[i]; bands[4*(N-1)+i] = abcdFinal[i]; } for (int i=0; i0; row--) p[pstride*(row)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+1)]; // Finish with first row p[0] = bands[4*(0)+3] - bands[4*(0)+1]*p[pstride*1] - bands[4*(0)+2]*p[pstride*2]; #ifndef HAVE_C_VARARRAYS free (bands); #endif } void solve_NUB_periodic_interp_1d_d (NUBasis* restrict basis, double* restrict data, int datastride, double* restrict p, int pstride) { int M = basis->grid->num_points-1; // Banded matrix storage. The first three elements in the // tinyvector store the tridiagonal coefficients. The last element // stores the RHS data. #ifdef HAVE_C_VARARRAYS double bands[4*M], lastCol[M]; #else double *bands = malloc (4*M*sizeof(double)); double *lastCol = malloc ( M*sizeof(double)); #endif // Fill up bands for (int i=0; i=0; row--) p[pstride*(row+1)] = bands[4*(row)+3] - bands[4*(row)+2]*p[pstride*(row+2)] - lastCol[row]*p[pstride*M]; p[pstride* 0 ] = p[pstride*M]; p[pstride*(M+1)] = p[pstride*1]; p[pstride*(M+2)] = p[pstride*2]; #ifndef HAVE_C_VARARRAYS free (bands); free (lastCol); #endif } void find_NUBcoefs_1d_d (NUBasis* restrict basis, BCtype_d bc, double *data, int dstride, double *coefs, int cstride) { if (bc.lCode == PERIODIC) solve_NUB_periodic_interp_1d_d (basis, data, dstride, coefs, cstride); else { int M = basis->grid->num_points; // Setup boundary conditions double bfuncs[4], dbfuncs[4], abcd_left[4], abcd_right[4]; // Left boundary if (bc.lCode == FLAT || bc.lCode == NATURAL) bc.lVal = 0.0; if (bc.lCode == FLAT || bc.lCode == DERIV1) { get_NUBasis_dfuncs_di (basis, 0, bfuncs, abcd_left); abcd_left[3] = bc.lVal; } if (bc.lCode == NATURAL || bc.lCode == DERIV2) { get_NUBasis_d2funcs_di (basis, 0, bfuncs, dbfuncs, abcd_left); abcd_left[3] = bc.lVal; } // Right boundary if (bc.rCode == FLAT || bc.rCode == NATURAL) bc.rVal = 0.0; if (bc.rCode == FLAT || bc.rCode == DERIV1) { get_NUBasis_dfuncs_di (basis, M-1, bfuncs, abcd_right); abcd_right[3] = bc.rVal; } if (bc.rCode == NATURAL || bc.rCode == DERIV2) { get_NUBasis_d2funcs_di (basis, M-1, bfuncs, dbfuncs, abcd_right); abcd_right[3] = bc.rVal; } // Now, solve for coefficients solve_NUB_deriv_interp_1d_d (basis, data, dstride, coefs, cstride, abcd_left, abcd_right); } } NUBspline_1d_d * create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data) { // First, create the spline structure NUBspline_1d_d* spline = malloc (sizeof(NUBspline_1d_d)); if (spline == NULL) return spline; spline->sp_code = NU1D; spline->t_code = DOUBLE_REAL; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); // M is the number of data points int M; if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; else M = x_grid->num_points; int N = x_grid->num_points + 2; // Allocate coefficients and solve spline->coefs = malloc(N*sizeof(double)); find_NUBcoefs_1d_d (spline->x_basis, xBC, data, 1, spline->coefs, 1); return spline; } NUBspline_2d_d * create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid, BCtype_d xBC, BCtype_d yBC, double *data) { // First, create the spline structure NUBspline_2d_d* spline = malloc (sizeof(NUBspline_2d_d)); if (spline == NULL) return spline; spline->sp_code = NU2D; spline->t_code = DOUBLE_REAL; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } NUBspline_3d_d * create_NUBspline_3d_d (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, double *data) { // First, create the spline structure NUBspline_3d_d* spline = malloc (sizeof(NUBspline_3d_d)); if (spline == NULL) return spline; spline->sp_code = NU3D; spline->t_code = DOUBLE_REAL; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(double)*Nx*Ny*Nz); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// //// Single-precision complex creation routines //// //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// void find_NUBcoefs_1d_c (NUBasis* restrict basis, BCtype_c bc, complex_float *data, int dstride, complex_float *coefs, int cstride) { BCtype_s bc_r, bc_i; bc_r.lCode = bc.lCode; bc_i.lCode = bc.lCode; bc_r.rCode = bc.rCode; bc_i.rCode = bc.rCode; bc_r.lVal = bc.lVal_r; bc_r.rVal = bc.rVal_r; bc_i.lVal = bc.lVal_i; bc_i.rVal = bc.rVal_i; float *data_r = ((float*)data ); float *data_i = ((float*)data )+1; float *coefs_r = ((float*)coefs); float *coefs_i = ((float*)coefs)+1; find_NUBcoefs_1d_s (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride); find_NUBcoefs_1d_s (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride); } NUBspline_1d_c * create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data) { // First, create the spline structure NUBspline_1d_c* spline = malloc (sizeof(NUBspline_1d_c)); if (spline == NULL) return spline; spline->sp_code = NU1D; spline->t_code = SINGLE_COMPLEX; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); // M is the number of data points int M; if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; else M = x_grid->num_points; int N = x_grid->num_points + 2; // Allocate coefficients and solve spline->coefs = malloc(N*sizeof(complex_float)); find_NUBcoefs_1d_c (spline->x_basis, xBC, data, 1, spline->coefs, 1); return spline; } NUBspline_2d_c * create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid, BCtype_c xBC, BCtype_c yBC, complex_float *data) { // First, create the spline structure NUBspline_2d_c* spline = malloc (sizeof(NUBspline_2d_c)); if (spline == NULL) return spline; spline->sp_code = NU2D; spline->t_code = SINGLE_COMPLEX; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(complex_float)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } NUBspline_3d_c * create_NUBspline_3d_c (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, complex_float *data) { // First, create the spline structure NUBspline_3d_c* spline = malloc (sizeof(NUBspline_3d_c)); if (spline == NULL) return spline; spline->sp_code = NU3D; spline->t_code = SINGLE_COMPLEX; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; // Allocate coefficients and solve spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(complex_float)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_float)*Nx*Ny*Nz); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// //// Double-precision complex creation routines //// //////////////////////////////////////////////////////// //////////////////////////////////////////////////////// void find_NUBcoefs_1d_z (NUBasis* restrict basis, BCtype_z bc, complex_double *data, int dstride, complex_double *coefs, int cstride) { BCtype_d bc_r, bc_i; bc_r.lCode = bc.lCode; bc_i.lCode = bc.lCode; bc_r.rCode = bc.rCode; bc_i.rCode = bc.rCode; bc_r.lVal = bc.lVal_r; bc_r.rVal = bc.rVal_r; bc_i.lVal = bc.lVal_i; bc_i.rVal = bc.rVal_i; double *data_r = ((double*)data ); double *data_i = ((double*)data )+1; double *coefs_r = ((double*)coefs); double *coefs_i = ((double*)coefs)+1; find_NUBcoefs_1d_d (basis, bc_r, data_r, 2*dstride, coefs_r, 2*cstride); find_NUBcoefs_1d_d (basis, bc_i, data_i, 2*dstride, coefs_i, 2*cstride); } NUBspline_1d_z * create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data) { // First, create the spline structure NUBspline_1d_z* spline = malloc (sizeof(NUBspline_1d_z)); if (spline == NULL) return spline; spline->sp_code = NU1D; spline->t_code = DOUBLE_COMPLEX; // Next, create the basis spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); // M is the number of data points int M; if (xBC.lCode == PERIODIC) M = x_grid->num_points - 1; else M = x_grid->num_points; int N = x_grid->num_points + 2; // Allocate coefficients and solve spline->coefs = malloc(N*sizeof(complex_double)); find_NUBcoefs_1d_z (spline->x_basis, xBC, data, 1, spline->coefs, 1); return spline; } NUBspline_2d_z * create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid, BCtype_z xBC, BCtype_z yBC, complex_double *data) { // First, create the spline structure NUBspline_2d_z* spline = malloc (sizeof(NUBspline_2d_z)); if (spline == NULL) return spline; spline->sp_code = NU2D; spline->t_code = DOUBLE_COMPLEX; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); int Mx, My, Nx, Ny; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; spline->x_stride = Ny; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(complex_double)*Nx*Ny); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My, spline->coefs+coffset, Ny); } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } NUBspline_3d_z * create_NUBspline_3d_z (NUgrid* x_grid, NUgrid* y_grid, NUgrid* z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, complex_double *data) { // First, create the spline structure NUBspline_3d_z* spline = malloc (sizeof(NUBspline_3d_z)); if (spline == NULL) return spline; spline->sp_code = NU3D; spline->t_code = DOUBLE_COMPLEX; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; // Next, create the bases spline->x_basis = create_NUBasis (x_grid, xBC.lCode==PERIODIC); spline->y_basis = create_NUBasis (y_grid, yBC.lCode==PERIODIC); spline->z_basis = create_NUBasis (z_grid, zBC.lCode==PERIODIC); int Mx, My, Mz, Nx, Ny, Nz; if (xBC.lCode == PERIODIC) Mx = x_grid->num_points - 1; else Mx = x_grid->num_points; if (yBC.lCode == PERIODIC) My = y_grid->num_points - 1; else My = y_grid->num_points; if (zBC.lCode == PERIODIC) Mz = z_grid->num_points - 1; else Mz = z_grid->num_points; Nx = x_grid->num_points + 2; Ny = y_grid->num_points + 2; Nz = z_grid->num_points + 2; // Allocate coefficients and solve spline->x_stride = Ny*Nz; spline->y_stride = Nz; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(complex_double)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(complex_double)*Nx*Ny*Nz); #endif // First, solve in the X-direction for (int iy=0; iyx_basis, xBC, data+doffset, My*Mz, spline->coefs+coffset, Ny*Nz); /* for (int ix=0; ixcoefs[coffset+ix*spline->x_stride]; if (isnan(creal(z))) fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at real part of (%d,%d,%d)\n", ix,iy,iz); if (isnan(cimag(z))) fprintf (stderr, "NAN encountered in create_NUBspline_3d_z at imag part of (%d,%d,%d)\n", ix,iy,iz); } */ } // Now, solve in the Y-direction for (int ix=0; ixy_basis, yBC, spline->coefs+doffset, Nz, spline->coefs+coffset, Nz); } // Now, solve in the Z-direction for (int ix=0; ixz_basis, zBC, spline->coefs+doffset, 1, spline->coefs+coffset, 1); } return spline; } void destroy_NUBspline(Bspline *spline) { free (spline->coefs); switch (spline->sp_code) { case NU1D: destroy_NUBasis (((NUBspline_1d*)spline)->x_basis); break; case NU2D: destroy_NUBasis (((NUBspline_2d*)spline)->x_basis); destroy_NUBasis (((NUBspline_2d*)spline)->y_basis); break; case NU3D: destroy_NUBasis (((NUBspline_3d*)spline)->x_basis); destroy_NUBasis (((NUBspline_3d*)spline)->y_basis); destroy_NUBasis (((NUBspline_3d*)spline)->z_basis); break; } } einspline-0.9.2/src/multi_nubspline.h0000664000113000011300000000354411035737074014601 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_NUBSPLINE_H #define MULTI_NUBSPLINE_H #include "bspline_base.h" #include "multi_nubspline_structs.h" // Currently, some of the single-precision routines use SSE2 instructions #include "multi_nubspline_eval_s.h" #include "multi_nubspline_eval_c.h" #include "multi_nubspline_eval_d.h" #include "multi_nubspline_eval_z.h" #include "nubspline_create.h" #include "multi_nubspline_create.h" #endif einspline-0.9.2/src/nubspline_eval_std_z.h0000664000113000011300000006654311012400563015572 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_STD_Z_H #define NUBSPLINE_EVAL_STD_Z_H #include #include #include "nubspline_structs.h" /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_z (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val) { double bfuncs[4]; int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_z_vg (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad) { double bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_z_vgl (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { double bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_z_vgh (NUBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { eval_NUBspline_1d_z_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_z (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val) { double a[4], b[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_NUBspline_2d_z_vg (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad) { double a[4], b[4], da[4], db[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_z_vgl (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { double a[4], b[4], da[4], db[4], d2a[4], d2b[4]; complex_double bc[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+ a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_z_vgh (NUBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { double a[4], b[4], da[4], db[4], d2a[4], d2b[4]; complex_double bc[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]); hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_z (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val) { double a[4], b[4], c[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); int iz = get_NUBasis_funcs_d (spline->z_basis, z, c); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_z_vg (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad) { double a[4], b[4], c[4], da[4], db[4], dc[4]; complex_double cP[16], bcP[4], dbcP[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); int iz = get_NUBasis_dfuncs_d (spline->z_basis, z, c, dc); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_z_vgl (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { double a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_double cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); complex_double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_z_vgh (NUBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { double a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_double cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); int xs = spline->x_stride; int ys = spline->y_stride; complex_double* restrict coefs = spline->coefs; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[1]*dbcP[1] + da[1]*dbcP[1]); hess[3] = hess[1]; // dx dz; hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[1]*bdcP[1] + da[1]*bdcP[1]); hess[6] = hess[2]; // d2y hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/multi_bspline_create_cuda.cu0000664000113000011300000005513611266201270016727 00000000000000#include #include "multi_bspline.h" #include "multi_bspline_structs_cuda.h" __device__ double Bcuda[48]; __constant__ float Acuda[48]; #include "multi_bspline_cuda_s_impl.h" #include "multi_bspline_cuda_c_impl.h" #include "multi_bspline_cuda_d_impl.h" #include "multi_bspline_cuda_z_impl.h" extern "C" multi_UBspline_1d_s_cuda* create_multi_UBspline_1d_s_cuda (multi_UBspline_1d_s* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_1d_s_cuda *cuda_spline = (multi_UBspline_1d_s_cuda*) malloc (sizeof (multi_UBspline_1d_s_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride = N; cuda_spline->gridInv = spline->x_grid.delta_inv; cuda_spline->dim = spline->x_grid.num; size_t size = Nx*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); float *spline_buff = (float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) spline_buff[ix*cuda_spline->stride + isp] = spline->coefs[ix*spline->x_stride + isp]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_1d_s_cuda* create_multi_UBspline_1d_s_cuda_conv (multi_UBspline_1d_d* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_1d_s_cuda *cuda_spline = (multi_UBspline_1d_s_cuda*) malloc (sizeof (multi_UBspline_1d_s_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride = N; cuda_spline->gridInv = spline->x_grid.delta_inv; cuda_spline->dim = spline->x_grid.num; size_t size = Nx*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); float *spline_buff = (float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) spline_buff[ix*cuda_spline->stride + isp] = (float)spline->coefs[ix*spline->x_stride + isp]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_1d_c_cuda* create_multi_UBspline_1d_c_cuda (multi_UBspline_1d_c* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_1d_c_cuda *cuda_spline = (multi_UBspline_1d_c_cuda*) malloc (sizeof (multi_UBspline_1d_c_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride = N; cuda_spline->gridInv = spline->x_grid.delta_inv; cuda_spline->dim = spline->x_grid.num; size_t size = Nx*N*sizeof(complex_float); cudaMalloc((void**)&(cuda_spline->coefs), size); complex_float *spline_buff = (complex_float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) spline_buff[ix*cuda_spline->stride + isp] = spline->coefs[ix*spline->x_stride + isp]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_1d_c_cuda* create_multi_UBspline_1d_c_cuda_conv (multi_UBspline_1d_z* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Error copying A matrix to GPU constant memory: Erorr = %s\n", cudaGetErrorString(err)); abort(); } multi_UBspline_1d_c_cuda *cuda_spline = (multi_UBspline_1d_c_cuda*) malloc (sizeof (multi_UBspline_1d_c_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride = N; cuda_spline->gridInv = spline->x_grid.delta_inv; cuda_spline->dim = spline->x_grid.num; size_t size = Nx*N*sizeof(complex_float); cudaMalloc((void**)&(cuda_spline->coefs), size); complex_float *spline_buff = (complex_float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) spline_buff[ix*cuda_spline->stride + isp] = spline->coefs[ix*spline->x_stride + isp]; cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_3d_c_cuda* create_multi_UBspline_3d_c_cuda (multi_UBspline_3d_c* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_c_cuda *cuda_spline = (multi_UBspline_3d_c_cuda*) malloc (sizeof (multi_UBspline_3d_c_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); std::complex *spline_buff = (std::complex*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; } for (int isp=spline->num_splines; isp < N; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = 0.0; } } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); cuda_spline->stride.x = 2*Ny*Nz*N; cuda_spline->stride.y = 2*Nz*N; cuda_spline->stride.z = 2*N; return cuda_spline; } extern "C" multi_UBspline_3d_c_cuda* create_multi_UBspline_3d_c_cuda_conv (multi_UBspline_3d_z* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_c_cuda *cuda_spline = (multi_UBspline_3d_c_cuda*) malloc (sizeof (multi_UBspline_3d_c_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to allocate %ld memory for GPU spline coefficients. Error %s\n", size, cudaGetErrorString(err)); abort(); } std::complex *spline_buff = (std::complex*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { std::complex z = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = std::complex(z.real(), z.imag()); } for (int isp=spline->num_splines; isp < N; isp++) spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = 0.0; } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); cudaThreadSynchronize(); err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to copy spline to GPU memory. Error: %s\n", cudaGetErrorString(err)); abort(); } free(spline_buff); cuda_spline->stride.x = 2*Ny*Nz*N; cuda_spline->stride.y = 2*Nz*N; cuda_spline->stride.z = 2*N; return cuda_spline; } extern "C" multi_UBspline_3d_s_cuda* create_multi_UBspline_3d_s_cuda (multi_UBspline_3d_s* spline) { float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_s_cuda *cuda_spline = (multi_UBspline_3d_s_cuda*) malloc (sizeof (multi_UBspline_3d_s_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); float *spline_buff = (float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_3d_s_cuda* create_multi_UBspline_3d_s_cuda_conv (multi_UBspline_3d_d* spline) { fprintf (stderr, "In create_multi_UBspline_3d_s_cuda_conv.\n"); float A_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Acuda, A_h, 48*sizeof(float), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_s_cuda *cuda_spline = (multi_UBspline_3d_s_cuda*) malloc (sizeof (multi_UBspline_3d_s_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += SPLINE_BLOCK_SIZE - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(float); cudaMalloc((void**)&(cuda_spline->coefs), size); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to allocate %ld memory for GPU spline coefficients. Error %s\n", size, cudaGetErrorString(err)); abort(); } float *spline_buff = (float*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; // if (isnan (spline->coefs[ix*spline->x_stride + // iy*spline->y_stride + // iz*spline->z_stride + isp])) // fprintf (stderr, "NAN at ix=%d iy=%d iz=%d isp=%d\n", // ix,iy,iz,isp); } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); cudaThreadSynchronize(); err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "Failed to copy spline to GPU memory. Error: %s\n", cudaGetErrorString(err)); abort(); } free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_3d_d_cuda* create_multi_UBspline_3d_d_cuda (multi_UBspline_3d_d* spline) { double B_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Bcuda, B_h, 48*sizeof(double), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_d_cuda *cuda_spline = (multi_UBspline_3d_d_cuda*) malloc (sizeof (multi_UBspline_3d_d_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += SPLINE_BLOCK_SIZE - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(double); cudaMalloc((void**)&(cuda_spline->coefs), size); double *spline_buff = (double*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); free(spline_buff); return cuda_spline; } extern "C" multi_UBspline_3d_z_cuda* create_multi_UBspline_3d_z_cuda (multi_UBspline_3d_z* spline) { double B_h[48] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0, 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; cudaMemcpyToSymbol(Bcuda, B_h, 48*sizeof(double), 0, cudaMemcpyHostToDevice); multi_UBspline_3d_z_cuda *cuda_spline = (multi_UBspline_3d_z_cuda*) malloc (sizeof (multi_UBspline_3d_z_cuda)); cuda_spline->num_splines = spline->num_splines; int Nx = spline->x_grid.num+3; int Ny = spline->y_grid.num+3; int Nz = spline->z_grid.num+3; int N = spline->num_splines; if ((N%SPLINE_BLOCK_SIZE) != 0) N += 64 - (N%SPLINE_BLOCK_SIZE); cuda_spline->stride.x = Ny*Nz*N; cuda_spline->stride.y = Nz*N; cuda_spline->stride.z = N; cuda_spline->gridInv.x = spline->x_grid.delta_inv; cuda_spline->gridInv.y = spline->y_grid.delta_inv; cuda_spline->gridInv.z = spline->z_grid.delta_inv; cuda_spline->dim.x = spline->x_grid.num; cuda_spline->dim.y = spline->y_grid.num; cuda_spline->dim.z = spline->z_grid.num; size_t size = Nx*Ny*Nz*N*sizeof(std::complex); cudaMalloc((void**)&(cuda_spline->coefs), size); std::complex *spline_buff = (std::complex*)malloc(size); if (!spline_buff) { fprintf (stderr, "Failed to allocate memory for temporary spline buffer.\n"); abort(); } for (int ix=0; ixnum_splines; isp++) { spline_buff[ix*cuda_spline->stride.x + iy*cuda_spline->stride.y + iz*cuda_spline->stride.z + isp] = spline->coefs[ix*spline->x_stride + iy*spline->y_stride + iz*spline->z_stride + isp]; } cudaMemcpy(cuda_spline->coefs, spline_buff, size, cudaMemcpyHostToDevice); cuda_spline->stride.x = 2*Ny*Nz*N; cuda_spline->stride.y = 2*Nz*N; cuda_spline->stride.z = 2*N; free(spline_buff); return cuda_spline; } einspline-0.9.2/src/nubspline_base.h0000664000113000011300000000306511012400563014340 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_BASH_H #define NUBSPLINE_BASH_H #include "bspline_base.h" #include "nubasis.h" #endif einspline-0.9.2/src/fbspline.c0000664000113000011300000006065111012400563013150 00000000000000#include "bspline_create.h" #include "bspline.h" #include "fbspline.h" #include "config.h" #ifdef __cplusplus #define CFUNC "C" /* Avoid name mangling in C++ */ #else #define CFUNC #endif /////////////////////// // Creation routines // /////////////////////// //////// // 1D // //////// CFUNC void F77_FUNC_(fcreate_ubspline_1d_s,FCREATE_UBSPLINE_1D_S) (double *x0, double *x1, int *num_x, int *x0_code, float *x0_val, int *x1_code, float *x1_val, float *data, UBspline_1d_s **spline) { Ugrid xgrid; BCtype_s xBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; *spline = create_UBspline_1d_s (xgrid, xBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_1d_d,FCREATE_UBSPLINE_1D_D) (double *x0, double *x1, int *num_x, int *x0_code, double *x0_val, int *x1_code, double *x1_val, double *data, UBspline_1d_d **spline) { Ugrid xgrid; BCtype_d xBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; *spline = create_UBspline_1d_d (xgrid, xBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_1d_c,FCREATE_UBSPLINE_1D_C) (double *x0, double *x1, int *num_x, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, complex_float *data, UBspline_1d_c **spline) { Ugrid xgrid; BCtype_c xBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = crealf(*x0_val); xBC.lVal_i = cimagf(*x0_val); xBC.rVal_r = crealf(*x1_val); xBC.rVal_i = cimagf(*x1_val); *spline = create_UBspline_1d_c (xgrid, xBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_1d_z,FCREATE_UBSPLINE_1D_Z) (double *x0, double *x1, int *num_x, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, complex_double *data, UBspline_1d_z **spline) { Ugrid xgrid; BCtype_z xBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = creal(*x0_val); xBC.lVal_i = cimag(*x0_val); xBC.rVal_r = creal(*x1_val); xBC.rVal_i = cimag(*x1_val); *spline = create_UBspline_1d_z (xgrid, xBC, data); } CFUNC void F77_FUNC_(frecompute_ubspline_1d_s,FRECOMPUTE_UBSPLINE_1D_S) (UBspline_1d_s **spline, float *data) { recompute_UBspline_1d_s (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_1d_d,FRECOMPUTE_UBSPLINE_1D_D) (UBspline_1d_d **spline, double *data) { recompute_UBspline_1d_d (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_1d_c,FRECOMPUTE_UBSPLINE_1D_C) (UBspline_1d_c **spline, complex_float *data) { recompute_UBspline_1d_c (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_1d_z,FRECOMPUTE_UBSPLINE_1D_Z) (UBspline_1d_z **spline, complex_double *data) { recompute_UBspline_1d_z (*spline, data); } //////// // 2D // //////// CFUNC void F77_FUNC_(fcreate_ubspline_2d_s,FCREATE_UBSPLINE_2D_S) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, int *x0_code, float *x0_val, int *x1_code, float *x1_val, int *y0_code, float *y0_val, int *y1_code, float *y1_val, float *data, UBspline_2d_s **spline) { Ugrid xgrid, ygrid; BCtype_s xBC, yBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal = *y0_val; yBC.rVal = *y1_val; *spline = create_UBspline_2d_s (xgrid, ygrid, xBC, yBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_2d_d,FCREATE_UBSPLINE_2D_D) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, int *x0_code, double *x0_val, int *x1_code, double *x1_val, int *y0_code, double *y0_val, int *y1_code, double *y1_val, double *data, UBspline_2d_d **spline) { Ugrid xgrid, ygrid; BCtype_d xBC, yBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal = *y0_val; yBC.rVal = *y1_val; *spline = create_UBspline_2d_d (xgrid, ygrid, xBC, yBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_2d_c,FCREATE_UBSPLINE_2D_C) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, int *y0_code, complex_float *y0_val, int *y1_code, complex_float *y1_val, complex_float *data, UBspline_2d_c **spline) { Ugrid xgrid, ygrid; BCtype_c xBC, yBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = crealf(*x0_val); xBC.lVal_i = cimagf(*x0_val); xBC.rVal_r = crealf(*x1_val); xBC.rVal_i = cimagf(*x1_val); yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal_r = crealf(*y0_val); yBC.lVal_i = cimagf(*y0_val); yBC.rVal_r = crealf(*y1_val); yBC.rVal_i = cimagf(*y1_val); *spline = create_UBspline_2d_c (xgrid, ygrid, xBC, yBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_2d_z,FCREATE_UBSPLINE_2D_Z) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, int *y0_code, complex_double *y0_val, int *y1_code, complex_double *y1_val, complex_double *data, UBspline_2d_z **spline) { Ugrid xgrid, ygrid; BCtype_z xBC, yBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = crealf(*x0_val); xBC.lVal_i = cimagf(*x0_val); xBC.rVal_r = crealf(*x1_val); xBC.rVal_i = cimagf(*x1_val); yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal_r = creal(*y0_val); yBC.lVal_i = cimag(*y0_val); yBC.rVal_r = creal(*y1_val); yBC.rVal_i = cimag(*y1_val); *spline = create_UBspline_2d_z (xgrid, ygrid, xBC, yBC, data); } CFUNC void F77_FUNC_(frecompute_ubspline_2d_s,FRECOMPUTE_UBSPLINE_2D_S) (UBspline_2d_s **spline, float *data) { recompute_UBspline_2d_s (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_2d_d,FRECOMPUTE_UBSPLINE_2D_D) (UBspline_2d_d **spline, double *data) { recompute_UBspline_2d_d (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_2d_c,FRECOMPUTE_UBSPLINE_2D_C) (UBspline_2d_c **spline, complex_float *data) { recompute_UBspline_2d_c (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_2d_z,FRECOMPUTE_UBSPLINE_2D_Z) (UBspline_2d_z **spline, complex_double *data) { recompute_UBspline_2d_z (*spline, data); } //////// // 3D // //////// CFUNC void F77_FUNC_(fcreate_ubspline_3d_s,FCREATE_UBSPLINE_3D_S) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, double *z0, double *z1, int *num_z, int *x0_code, float *x0_val, int *x1_code, float *x1_val, int *y0_code, float *y0_val, int *y1_code, float *y1_val, int *z0_code, float *z0_val, int *z1_code, float *z1_val, float *data, UBspline_3d_s **spline) { Ugrid xgrid, ygrid, zgrid; BCtype_s xBC, yBC, zBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; zgrid.start = *z0; zgrid.end = *z1; zgrid.num = *num_z; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal = *y0_val; yBC.rVal = *y1_val; zBC.lCode = (bc_code) *z0_code; zBC.rCode = (bc_code) *z1_code; zBC.lVal = *z0_val; zBC.rVal = *z1_val; *spline = create_UBspline_3d_s (xgrid, ygrid, zgrid, xBC, yBC, zBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_3d_d,FCREATE_UBSPLINE_3D_D) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, double *z0, double *z1, int *num_z, int *x0_code, double *x0_val, int *x1_code, double *x1_val, int *y0_code, double *y0_val, int *y1_code, double *y1_val, int *z0_code, double *z0_val, int *z1_code, double *z1_val, double *data, UBspline_3d_d **spline) { Ugrid xgrid, ygrid, zgrid; BCtype_d xBC, yBC, zBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; zgrid.start = *z0; zgrid.end = *z1; zgrid.num = *num_z; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal = *x0_val; xBC.rVal = *x1_val; yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal = *y0_val; yBC.rVal = *y1_val; zBC.lCode = (bc_code) *z0_code; zBC.rCode = (bc_code) *z1_code; zBC.lVal = *z0_val; zBC.rVal = *z1_val; *spline = create_UBspline_3d_d (xgrid, ygrid, zgrid, xBC, yBC, zBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_3d_c,FCREATE_UBSPLINE_3D_C) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, double *z0, double *z1, int *num_z, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, int *y0_code, complex_float *y0_val, int *y1_code, complex_float *y1_val, int *z0_code, complex_float *z0_val, int *z1_code, complex_float *z1_val, complex_float *data, UBspline_3d_c **spline) { Ugrid xgrid, ygrid, zgrid; BCtype_c xBC, yBC, zBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; zgrid.start = *z0; zgrid.end = *z1; zgrid.num = *num_z; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = crealf(*x0_val); xBC.lVal_i = cimagf(*x0_val); xBC.rVal_r = crealf(*x1_val); xBC.rVal_i = cimagf(*x1_val); yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal_r = crealf(*y0_val); yBC.lVal_i = cimagf(*y0_val); yBC.rVal_r = crealf(*y1_val); yBC.rVal_i = cimagf(*y1_val); zBC.lCode = (bc_code) *z0_code; zBC.rCode = (bc_code) *z1_code; zBC.lVal_r = crealf(*z0_val); zBC.lVal_i = cimagf(*z0_val); zBC.rVal_r = crealf(*z1_val); zBC.rVal_i = cimagf(*z1_val); *spline = create_UBspline_3d_c (xgrid, ygrid, zgrid, xBC, yBC, zBC, data); } CFUNC void F77_FUNC_(fcreate_ubspline_3d_z,FCREATE_UBSPLINE_3D_Z) (double *x0, double *x1, int *num_x, double *y0, double *y1, int *num_y, double *z0, double *z1, int *num_z, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, int *y0_code, complex_double *y0_val, int *y1_code, complex_double *y1_val, int *z0_code, complex_double *z0_val, int *z1_code, complex_double *z1_val, complex_double *data, UBspline_3d_z **spline) { Ugrid xgrid, ygrid, zgrid; BCtype_z xBC, yBC, zBC; xgrid.start = *x0; xgrid.end = *x1; xgrid.num = *num_x; ygrid.start = *y0; ygrid.end = *y1; ygrid.num = *num_y; zgrid.start = *z0; zgrid.end = *z1; zgrid.num = *num_z; xBC.lCode = (bc_code) *x0_code; xBC.rCode = (bc_code) *x1_code; xBC.lVal_r = creal(*x0_val); xBC.lVal_i = cimag(*x0_val); xBC.rVal_r = creal(*x1_val); xBC.rVal_i = cimag(*x1_val); yBC.lCode = (bc_code) *y0_code; yBC.rCode = (bc_code) *y1_code; yBC.lVal_r = creal(*y0_val); yBC.lVal_i = cimag(*y0_val); yBC.rVal_r = creal(*y1_val); yBC.rVal_i = cimag(*y1_val); zBC.lCode = (bc_code) *z0_code; zBC.rCode = (bc_code) *z1_code; zBC.lVal_r = creal(*z0_val); zBC.lVal_i = cimag(*z0_val); zBC.rVal_r = creal(*z1_val); zBC.rVal_i = cimag(*z1_val); *spline = create_UBspline_3d_z (xgrid, ygrid, zgrid, xBC, yBC, zBC, data); } CFUNC void F77_FUNC_(frecompute_ubspline_3d_s,FRECOMPUTE_UBSPLINE_3D_S) (UBspline_3d_s **spline, float *data) { recompute_UBspline_3d_s (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_3d_d,FRECOMPUTE_UBSPLINE_3D_D) (UBspline_3d_d **spline, double *data) { recompute_UBspline_3d_d (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_3d_c,FRECOMPUTE_UBSPLINE_3D_C) (UBspline_3d_c **spline, complex_float *data) { recompute_UBspline_3d_c (*spline, data); } CFUNC void F77_FUNC_(frecompute_ubspline_3d_z,FRECOMPUTE_UBSPLINE_3D_Z) (UBspline_3d_z **spline, complex_double *data) { recompute_UBspline_3d_z (*spline, data); } CFUNC void F77_FUNC_(fdestroy_bspline,FDESTROY_BSPLINE) (Bspline **spline) { destroy_Bspline (*spline); } ///////////////////////// // Evaluation routines // ///////////////////////// ////////////////////////////// // 1D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_1d_s,FEVAL_UBSPLINE_1D_S) (UBspline_1d_s **spline, double *x, float *val) { eval_UBspline_1d_s (*spline, *x, val); } CFUNC void F77_FUNC_(feval_ubspline_1d_s_vg,FEVAL_UBSPLINE_1D_S_VG) (UBspline_1d_s **spline, double *x, float *val, float *grad) { eval_UBspline_1d_s_vg (*spline, *x, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_1d_s_vgl,FEVAL_UBSPLINE_1D_S_VGL) (UBspline_1d_s **spline, double *x, float *val, float *grad, float *lapl) { eval_UBspline_1d_s_vgl (*spline, *x, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_1d_s_vgh,FEVAL_UBSPLINE_1D_S_VGH) (UBspline_1d_s **spline, double *x, float *val, float *grad, float *hess) { eval_UBspline_1d_s_vgh (*spline, *x, val, grad, hess); } ////////////////////////////// // 1D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_1d_d,FEVAL_UBSPLINE_1D_D) (UBspline_1d_d **spline, double *x, double *val) { eval_UBspline_1d_d (*spline, *x, val); } CFUNC void F77_FUNC_(feval_ubspline_1d_d_vg,FEVAL_UBSPLINE_1D_D_VG) (UBspline_1d_d **spline, double *x, double *val, double *grad) { eval_UBspline_1d_d_vg (*spline, *x, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_1d_d_vgl,FEVAL_UBSPLINE_1D_D_VGL) (UBspline_1d_d **spline, double *x, double *val, double *grad, double *lapl) { eval_UBspline_1d_d_vgl (*spline, *x, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_1d_d_vgh,FEVAL_UBSPLINE_1D_D_VGH) (UBspline_1d_d **spline, double *x, double *val, double *grad, double *hess) { eval_UBspline_1d_d_vgh (*spline, *x, val, grad, hess); } ///////////////////////////////// // 1D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_1d_c,FEVAL_UBSPLINE_1D_C) (UBspline_1d_c **spline, double *x, complex_float *val) { eval_UBspline_1d_c (*spline, *x, val); } CFUNC void F77_FUNC_(feval_ubspline_1d_c_vg,FEVAL_UBSPLINE_1D_C_VG) (UBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad) { eval_UBspline_1d_c_vg (*spline, *x, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_1d_c_vgl,FEVAL_UBSPLINE_1D_C_VGL) (UBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad, complex_float *lapl) { eval_UBspline_1d_c_vgl (*spline, *x, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_1d_c_vgh,FEVAL_UBSPLINE_1D_C_VGH) (UBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad, complex_float *hess) { eval_UBspline_1d_c_vgh (*spline, *x, val, grad, hess); } ///////////////////////////////// // 1D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_1d_z,FEVAL_UBSPLINE_1D_Z) (UBspline_1d_z **spline, double *x, complex_double *val) { eval_UBspline_1d_z (*spline, *x, val); } CFUNC void F77_FUNC_(feval_ubspline_1d_z_vg,FEVAL_UBSPLINE_1D_Z_VG) (UBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad) { eval_UBspline_1d_z_vg (*spline, *x, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_1d_z_vgl,FEVAL_UBSPLINE_1D_Z_VGL) (UBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad, complex_double *lapl) { eval_UBspline_1d_z_vgl (*spline, *x, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_1d_z_vgh,FEVAL_UBSPLINE_1D_Z_VGH) (UBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad, complex_double *hess) { eval_UBspline_1d_z_vgh (*spline, *x, val, grad, hess); } ////////////////////////////// // 2D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_2d_s,FEVAL_UBSPLINE_2D_S) (UBspline_2d_s **spline, double *x, double *y, float *val) { eval_UBspline_2d_s (*spline, *x, *y, val); } CFUNC void F77_FUNC_(feval_ubspline_2d_s_vg,FEVAL_UBSPLINE_2D_S_VG) (UBspline_2d_s **spline, double *x, double *y, float *val, float *grad) { eval_UBspline_2d_s_vg (*spline, *x, *y, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_2d_s_vgl,FEVAL_UBSPLINE_2D_S_VGL) (UBspline_2d_s **spline, double *x, double *y, float *val, float *grad, float* lapl) { eval_UBspline_2d_s_vgl (*spline, *x, *y, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_2d_s_vgh,FEVAL_UBSPLINE_2D_S_VGH) (UBspline_2d_s **spline, double *x, double *y, float *val, float *grad, float *hess) { eval_UBspline_2d_s_vgh (*spline, *x, *y, val, grad, hess); } ////////////////////////////// // 2D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_2d_d,FEVAL_UBSPLINE_2D_D) (UBspline_2d_d **spline, double *x, double *y, double *val) { eval_UBspline_2d_d (*spline, *x, *y, val); } CFUNC void F77_FUNC_(feval_ubspline_2d_d_vg,FEVAL_UBSPLINE_2D_D_VG) (UBspline_2d_d **spline, double *x, double *y, double *val, double *grad) { eval_UBspline_2d_d_vg (*spline, *x, *y, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_2d_d_vgl,FEVAL_UBSPLINE_2D_D_VGL) (UBspline_2d_d **spline, double *x, double *y, double *val, double *grad, double *lapl) { eval_UBspline_2d_d_vgl (*spline, *x, *y, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_2d_d_vgh,FEVAL_UBSPLINE_2D_D_VGH) (UBspline_2d_d **spline, double *x, double *y, double *val, double *grad, double *hess) { eval_UBspline_2d_d_vgl (*spline, *x, *y, val, grad, hess); } ///////////////////////////////// // 2D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_2d_c,FEVAL_UBSPLINE_2D_C) (UBspline_2d_c **spline, double *x, double *y, complex_float *val) { eval_UBspline_2d_c (*spline, *x, *y, val); } CFUNC void F77_FUNC_(feval_ubspline_2d_c_vg,FEVAL_UBSPLINE_2D_C_VG) (UBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad) { eval_UBspline_2d_c_vg (*spline, *x, *y, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_2d_c_vgl,FEVAL_UBSPLINE_2D_C_VGL) (UBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad, complex_float *lapl) { eval_UBspline_2d_c_vgl (*spline, *x, *y, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_2d_c_vgh,FEVAL_UBSPLINE_2D_C_VGH) (UBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad, complex_float *hess) { eval_UBspline_2d_c_vgh (*spline, *x, *y, val, grad, hess); } ///////////////////////////////// // 2D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_2d_z,FEVAL_UBSPLINE_2D_Z) (UBspline_2d_z **spline, double *x, double *y, complex_double *val) { eval_UBspline_2d_z (*spline, *x, *y, val); } CFUNC void F77_FUNC_(feval_ubspline_2d_z_vg,FEVAL_UBSPLINE_2D_Z_VG) (UBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad) { eval_UBspline_2d_z_vg (*spline, *x, *y, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_2d_z_vgl,FEVAL_UBSPLINE_2D_Z_VGL) (UBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad, complex_double *lapl) { eval_UBspline_2d_z_vgl (*spline, *x, *y, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_2d_z_vgh,FEVAL_UBSPLINE_2D_Z_VGH) (UBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad, complex_double *hess) { eval_UBspline_2d_z_vgh (*spline, *x, *y, val, grad, hess); } ////////////////////////////// // 3D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_3d_s,FEVAL_UBSPLINE_3D_S) (UBspline_3d_s **spline, double *x, double *y, double *z, float *val) { eval_UBspline_3d_s (*spline, *x, *y, *z, val); } CFUNC void F77_FUNC_(feval_ubspline_3d_s_vg,FEVAL_UBSPLINE_3D_S_VG) (UBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad) { eval_UBspline_3d_s_vg (*spline, *x, *y, *z, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_3d_s_vgl,FEVAL_UBSPLINE_3D_S_VGL) (UBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad, float* lapl) { eval_UBspline_3d_s_vgl (*spline, *x, *y, *z, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_3d_s_vgh,FEVAL_UBSPLINE_3D_S_VGH) (UBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad, float *hess) { eval_UBspline_3d_s_vgh (*spline, *x, *y, *z, val, grad, hess); } ////////////////////////////// // 3D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_3d_d,FEVAL_UBSPLINE_3D_D) (UBspline_3d_d **spline, double *x, double *y, double *z, double *val) { eval_UBspline_3d_d (*spline, *x, *y, *z, val); } CFUNC void F77_FUNC_(feval_ubspline_3d_d_vg,FEVAL_UBSPLINE_3D_D_VG) (UBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad) { eval_UBspline_3d_d_vg (*spline, *x, *y, *z, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_3d_d_vgl,FEVAL_UBSPLINE_3D_D_VGL) (UBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad, double *lapl) { eval_UBspline_3d_d_vgl (*spline, *x, *y, *z, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_3d_d_vgh,FEVAL_UBSPLINE_3D_D_VGH) (UBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad, double *hess) { eval_UBspline_3d_d_vgh (*spline, *x, *y, *z, val, grad, hess); } ///////////////////////////////// // 3D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_3d_c,FEVAL_UBSPLINE_3D_C) (UBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val) { eval_UBspline_3d_c (*spline, *x, *y, *z, val); } CFUNC void F77_FUNC_(feval_ubspline_3d_c_vg,FEVAL_UBSPLINE_3D_C_VG) (UBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad) { eval_UBspline_3d_c_vg (*spline, *x, *y, *z, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_3d_c_vgl,FEVAL_UBSPLINE_3D_C_VGL) (UBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad, complex_float *lapl) { eval_UBspline_3d_c_vgl (*spline, *x, *y, *z, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_3d_c_vgh,FEVAL_UBSPLINE_3D_C_VGH) (UBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad, complex_float *hess) { eval_UBspline_3d_c_vgh (*spline, *x, *y, *z, val, grad, hess); } ///////////////////////////////// // 3D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_ubspline_3d_z,FEVAL_UBSPLINE_3D_Z) (UBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val) { eval_UBspline_3d_z (*spline, *x, *y, *z, val); } CFUNC void F77_FUNC_(feval_ubspline_3d_z_vg,FEVAL_UBSPLINE_3D_Z_VG) (UBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad) { eval_UBspline_3d_z_vg (*spline, *x, *y, *z, val, grad); } CFUNC void F77_FUNC_(feval_ubspline_3d_z_vgl,FEVAL_UBSPLINE_3D_Z_VGL) (UBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad, complex_double *lapl) { eval_UBspline_3d_z_vgl (*spline, *x, *y, *z, val, grad, lapl); } CFUNC void F77_FUNC_(feval_ubspline_3d_z_vgh,FEVAL_UBSPLINE_3D_Z_VGH) (UBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad, complex_double *hess) { eval_UBspline_3d_z_vgh (*spline, *x, *y, *z, val, grad, hess); } einspline-0.9.2/src/fnubspline.h0000664000113000011300000003367011012400563013521 00000000000000#ifndef F_NUBSPLINE_H #define F_NUBSPLINE_H #include "config.h" #include "nugrid.h" #include "nubspline_structs.h" #ifdef __cplusplus #define CFUNC extern "C" /* Avoid name mangling in C++ */ #else #define CFUNC #endif ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// //// Grid Creation routines //// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// CFUNC void F77_FUNC_(fcreate_general_grid,FCREATE_GENERAL_GRID) (double *points, int *num_points, NUgrid **grid); CFUNC void F77_FUNC_(fcreate_center_grid,FCREATE_CENTER_GRID) (double *start, double *end, double *ratio, int *num_points, NUgrid **grid); CFUNC void F77_FUNC_(fdestroy_grid,FDESTROY_GRID) (NUgrid **grid); ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// //// Nonuniform spline creation routines //// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// //////// // 1D // //////// CFUNC void F77_FUNC_(fcreate_nubspline_1d_s,FCREATE_NUBSPLINE_1D_S) (NUgrid **x_grid, int* x0_code, float *x0_val, int *x1_code, float *x1_val, float *data, NUBspline_1d_s **spline); CFUNC void F77_FUNC_(fcreate_nubspline_1d_d,FCREATE_NUBSPLINE_1D_D) (NUgrid **x_grid, int *x0_code, double *x0_val, int *x1_code, double *x1_val, double *data, NUBspline_1d_d **spline); CFUNC void F77_FUNC_(fcreate_nubspline_1d_c,FCREATE_NUBSPLINE_1D_C) (NUgrid **x_grid, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, complex_float *data, NUBspline_1d_c **spline); CFUNC void F77_FUNC_(fcreate_nubspline_1d_z,FCREATE_NUBSPLINE_1D_Z) (NUgrid **x_grid, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, complex_double *data, NUBspline_1d_z **spline); //////// // 2D // //////// CFUNC void F77_FUNC_(fcreate_nubspline_2d_s,FCREATE_NUBSPLINE_2D_S) (NUgrid **x_grid, NUgrid **y_grid, int* x0_code, float *x0_val, int *x1_code, float *x1_val, int* y0_code, float *y0_val, int *y1_code, float *y1_val, float *data, NUBspline_2d_s **spline); CFUNC void F77_FUNC_(fcreate_nubspline_2d_d,FCREATE_NUBSPLINE_2D_D) (NUgrid **x_grid, NUgrid **y_grid, int *x0_code, double *x0_val, int *x1_code, double *x1_val, int *y0_code, double *y0_val, int *y1_code, double *y1_val, double *data, NUBspline_2d_d **spline); CFUNC void F77_FUNC_(fcreate_nubspline_2d_c,FCREATE_NUBSPLINE_2D_C) (NUgrid **x_grid, NUgrid **y_grid, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, int *y0_code, complex_float *y0_val, int *y1_code, complex_float *y1_val, complex_float *data, NUBspline_2d_c **spline); CFUNC void F77_FUNC_(fcreate_nubspline_2d_z,FCREATE_NUBSPLINE_2D_Z) (NUgrid **x_grid, NUgrid **y_grid, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, int *y0_code, complex_double *y0_val, int *y1_code, complex_double *y1_val, complex_double *data, NUBspline_2d_z **spline); //////// // 3D // //////// CFUNC void F77_FUNC_(fcreate_nubspline_3d_s,FCREATE_NUBSPLINE_3D_S) (NUgrid **x_grid, NUgrid **y_grid, NUgrid **z_grid, int* x0_code, float *x0_val, int *x1_code, float *x1_val, int* y0_code, float *y0_val, int *y1_code, float *y1_val, int* z0_code, float *z0_val, int *z1_code, float *z1_val, float *data, NUBspline_3d_s **spline); CFUNC void F77_FUNC_(fcreate_nubspline_3d_d,FCREATE_NUBSPLINE_3D_D) (NUgrid **x_grid, NUgrid **y_grid, NUgrid **z_grid, int *x0_code, double *x0_val, int *x1_code, double *x1_val, int *y0_code, double *y0_val, int *y1_code, double *y1_val, int* z0_code, float *z0_val, int *z1_code, float *z1_val, double *data, NUBspline_3d_d **spline); CFUNC void F77_FUNC_(fcreate_nubspline_3d_c,FCREATE_NUBSPLINE_3D_C) (NUgrid **x_grid, NUgrid **y_grid, NUgrid **z_grid, int *x0_code, complex_float *x0_val, int *x1_code, complex_float *x1_val, int *y0_code, complex_float *y0_val, int *y1_code, complex_float *y1_val, int *z0_code, complex_float *z0_val, int *z1_code, complex_float *z1_val, complex_float *data, NUBspline_3d_c **spline); CFUNC void F77_FUNC_(fcreate_nubspline_3d_z,FCREATE_NUBSPLINE_3D_Z) (NUgrid **x_grid, NUgrid **y_grid, NUgrid **z_grid, int *x0_code, complex_double *x0_val, int *x1_code, complex_double *x1_val, int *y0_code, complex_double *y0_val, int *y1_code, complex_double *y1_val, int *z0_code, complex_float *z0_val, int *z1_code, complex_float *z1_val, complex_double *data, NUBspline_3d_z **spline); ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// //// Nonuniform spline evaluation routines //// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// ////////////////////////////// // 1D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_1d_s,FEVAL_NUBSPLINE_1D_S) (NUBspline_1d_s **spline, double *x, float *val); CFUNC void F77_FUNC_(feval_nubspline_1d_s_vg,FEVAL_NUBSPLINE_1D_S_VG) (NUBspline_1d_s **spline, double *x, float *val, float *grad); CFUNC void F77_FUNC_(feval_nubspline_1d_s_vgl,FEVAL_NUBSPLINE_1D_S_VGL) (NUBspline_1d_s **spline, double *x, float *val, float *grad, float *lapl); CFUNC void F77_FUNC_(feval_nubspline_1d_s_vgh,FEVAL_NUBSPLINE_1D_S_VGH) (NUBspline_1d_s **spline, double *x, float *val, float *grad, float *hess); ////////////////////////////// // 1D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_1d_d,FEVAL_NUBSPLINE_1D_D) (NUBspline_1d_d **spline, double *x, double *val); CFUNC void F77_FUNC_(feval_nubspline_1d_d_vg,FEVAL_NUBSPLINE_1D_D_VG) (NUBspline_1d_d **spline, double *x, double *val, double *grad); CFUNC void F77_FUNC_(feval_nubspline_1d_d_vgl,FEVAL_NUBSPLINE_1D_D_VGL) (NUBspline_1d_d **spline, double *x, double *val, double *grad, double *lapl); CFUNC void F77_FUNC_(feval_nubspline_1d_d_vgh,FEVAL_NUBSPLINE_1D_D_VGH) (NUBspline_1d_d **spline, double *x, double *val, double *grad, double *hess); ///////////////////////////////// // 1D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_1d_c,FEVAL_NUBSPLINE_1D_C) (NUBspline_1d_c **spline, double *x, complex_float *val); CFUNC void F77_FUNC_(feval_nubspline_1d_c_vg,FEVAL_NUBSPLINE_1D_C_VG) (NUBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad); CFUNC void F77_FUNC_(feval_nubspline_1d_c_vgl,FEVAL_NUBSPLINE_1D_C_VGL) (NUBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad, complex_float *lapl); CFUNC void F77_FUNC_(feval_nubspline_1d_c_vgh,FEVAL_NUBSPLINE_1D_C_VGH) (NUBspline_1d_c **spline, double *x, complex_float *val, complex_float *grad, complex_float *hess); ///////////////////////////////// // 1D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nnubspline_1d_z,FEVAL_NNUBSPLINE_1D_Z) (NUBspline_1d_z **spline, double *x, complex_double *val); CFUNC void F77_FUNC_(feval_nubspline_1d_z_vg,FEVAL_NUBSPLINE_1D_Z_VG) (NUBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad); CFUNC void F77_FUNC_(feval_nubspline_1d_z_vgl,FEVAL_NUBSPLINE_1D_Z_VGL) (NUBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad, complex_double *lapl); CFUNC void F77_FUNC_(feval_nubspline_1d_z_vgh,FEVAL_NUBSPLINE_1D_Z_VGH) (NUBspline_1d_z **spline, double *x, complex_double *val, complex_double *grad, complex_double *hess); ////////////////////////////// // 2D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_2d_s,FEVAL_NUBSPLINE_2D_S) (NUBspline_2d_s **spline, double *x, double *y, float *val); CFUNC void F77_FUNC_(feval_nubspline_2d_s_vg,FEVAL_NUBSPLINE_2D_S_VG) (NUBspline_2d_s **spline, double *x, double *y, float *val, float *grad); CFUNC void F77_FUNC_(feval_nubspline_2d_s_vgl,FEVAL_NUBSPLINE_2D_S_VGL) (NUBspline_2d_s **spline, double *x, double *y, float *val, float *grad, float* lapl); CFUNC void F77_FUNC_(feval_nubspline_2d_s_vgh,FEVAL_NUBSPLINE_2D_S_VGH) (NUBspline_2d_s **spline, double *x, double *y, float *val, float *grad, float *hess); ////////////////////////////// // 2D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_2d_d,FEVAL_NUBSPLINE_2D_D) (NUBspline_2d_d **spline, double *x, double *y, double *val); CFUNC void F77_FUNC_(feval_nubspline_2d_d_vg,FEVAL_NUBSPLINE_2D_D_VG) (NUBspline_2d_d **spline, double *x, double *y, double *val, double *grad); CFUNC void F77_FUNC_(feval_nubspline_2d_d_vgl,FEVAL_NUBSPLINE_2D_D_VGL) (NUBspline_2d_d **spline, double *x, double *y, double *val, double *grad, double *lapl); CFUNC void F77_FUNC_(feval_nubspline_2d_d_vgh,FEVAL_NUBSPLINE_2D_D_VGH) (NUBspline_2d_d **spline, double *x, double *y, double *val, double *grad, double *hess); ///////////////////////////////// // 2D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_2d_c,FEVAL_NUBSPLINE_2D_C) (NUBspline_2d_c **spline, double *x, double *y, complex_float *val); CFUNC void F77_FUNC_(feval_nubspline_2d_c_vg,FEVAL_NUBSPLINE_2D_C_VG) (NUBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad); CFUNC void F77_FUNC_(feval_nubspline_2d_c_vgl,FEVAL_NUBSPLINE_2D_C_VGL) (NUBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad, complex_float *lapl); CFUNC void F77_FUNC_(feval_nubspline_2d_c_vgh,FEVAL_NUBSPLINE_2D_C_VGH) (NUBspline_2d_c **spline, double *x, double *y, complex_float *val, complex_float *grad, complex_float *hess); ///////////////////////////////// // 2D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_2d_z,FEVAL_NUBSPLINE_2D_Z) (NUBspline_2d_z **spline, double *x, double *y, complex_double *val); CFUNC void F77_FUNC_(feval_nubspline_2d_z_vg,FEVAL_NUBSPLINE_2D_Z_VG) (NUBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad); CFUNC void F77_FUNC_(feval_nubspline_2d_z_vgl,FEVAL_NUBSPLINE_2D_Z_VGL) (NUBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad, complex_double *lapl); CFUNC void F77_FUNC_(feval_nubspline_2d_z_vgh,FEVAL_NUBSPLINE_2D_Z_VGH) (NUBspline_2d_z **spline, double *x, double *y, complex_double *val, complex_double *grad, complex_double *hess); ////////////////////////////// // 3D single-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_3d_s,FEVAL_NUBSPLINE_3D_S) (NUBspline_3d_s **spline, double *x, double *y, double *z, float *val); CFUNC void F77_FUNC_(feval_nubspline_3d_s_vg,FEVAL_NUBSPLINE_3D_S_VG) (NUBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad); CFUNC void F77_FUNC_(feval_nubspline_3d_s_vgl,FEVAL_NUBSPLINE_3D_S_VGL) (NUBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad, float* lapl); CFUNC void F77_FUNC_(feval_nubspline_3d_s_vgh,FEVAL_NUBSPLINE_3D_S_VGH) (NUBspline_3d_s **spline, double *x, double *y, double *z, float *val, float *grad, float *hess); ////////////////////////////// // 3D double-precision real // ////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_3d_d,FEVAL_NUBSPLINE_3D_D) (NUBspline_3d_d **spline, double *x, double *y, double *z, double *val); CFUNC void F77_FUNC_(feval_nubspline_3d_d_vg,FEVAL_NUBSPLINE_3D_D_VG) (NUBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad); CFUNC void F77_FUNC_(feval_nubspline_3d_d_vgl,FEVAL_NUBSPLINE_3D_D_VGL) (NUBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad, double *lapl); CFUNC void F77_FUNC_(feval_nubspline_3d_d_vgh,FEVAL_NUBSPLINE_3D_D_VGH) (NUBspline_3d_d **spline, double *x, double *y, double *z, double *val, double *grad, double *hess); ///////////////////////////////// // 3D single-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_3d_c,FEVAL_NUBSPLINE_3D_C) (NUBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val); CFUNC void F77_FUNC_(feval_nubspline_3d_c_vg,FEVAL_NUBSPLINE_3D_C_VG) (NUBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad); CFUNC void F77_FUNC_(feval_nubspline_3d_c_vgl,FEVAL_NUBSPLINE_3D_C_VGL) (NUBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad, complex_float *lapl); CFUNC void F77_FUNC_(feval_nubspline_3d_c_vgh,FEVAL_NUBSPLINE_3D_C_VGH) (NUBspline_3d_c **spline, double *x, double *y, double *z, complex_float *val, complex_float *grad, complex_float *hess); ///////////////////////////////// // 3D double-precision complex // ///////////////////////////////// CFUNC void F77_FUNC_(feval_nubspline_3d_z,FEVAL_NUBSPLINE_3D_Z) (NUBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val); CFUNC void F77_FUNC_(feval_nubspline_3d_z_vg,FEVAL_NUBSPLINE_3D_Z_VG) (NUBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad); CFUNC void F77_FUNC_(feval_nubspline_3d_z_vgl,FEVAL_NUBSPLINE_3D_Z_VGL) (NUBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad, complex_double *lapl); CFUNC void F77_FUNC_(feval_nubspline_3d_z_vgh,FEVAL_NUBSPLINE_3D_Z_VGH) (NUBspline_3d_z **spline, double *x, double *y, double *z, complex_double *val, complex_double *grad, complex_double *hess); #endif einspline-0.9.2/src/multi_nubspline_eval_d.h0000664000113000011300000001017411035721725016104 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_NUBSPLINE_EVAL_D_H #define MULTI_NUBSPLINE_EVAL_D_H #include #include #include "multi_nubspline_structs.h" /************************************************************/ /* 1D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_NUBspline_1d_d (multi_NUBspline_1d_d *spline, double x, double* restrict vals); void eval_multi_NUBspline_1d_d_vg (multi_NUBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads); void eval_multi_NUBspline_1d_d_vgl (multi_NUBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_NUBspline_1d_d_vgh (multi_NUBspline_1d_d *spline, double x, double* restrict vals, double* restrict grads, double* restrict hess); /************************************************************/ /* 2D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_NUBspline_2d_d (multi_NUBspline_2d_d *spline, double x, double y, double* restrict vals); void eval_multi_NUBspline_2d_d_vg (multi_NUBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads); void eval_multi_NUBspline_2d_d_vgl (multi_NUBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_NUBspline_2d_d_vgh (multi_NUBspline_2d_d *spline, double x, double y, double* restrict vals, double* restrict grads, double* restrict hess); /************************************************************/ /* 3D double-precision, real evaulation functions */ /************************************************************/ void eval_multi_NUBspline_3d_d (multi_NUBspline_3d_d *spline, double x, double y, double z, double* restrict vals); void eval_multi_NUBspline_3d_d_vg (multi_NUBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads); void eval_multi_NUBspline_3d_d_vgl (multi_NUBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict lapl); void eval_multi_NUBspline_3d_d_vgh (multi_NUBspline_3d_d *spline, double x, double y, double z, double* restrict vals, double* restrict grads, double* restrict hess); #endif einspline-0.9.2/src/nubspline_eval_std_c.h0000664000113000011300000006645511012400563015545 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_STD_C_H #define NUBSPLINE_EVAL_STD_C_H #include #include #include "nubspline_structs.h" /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_c (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val) { float bfuncs[4]; int i = get_NUBasis_funcs_s (spline->x_basis, x, bfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_c_vg (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad) { float bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_s (spline->x_basis, x, bfuncs, dbfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_c_vgl (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { float bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_s (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_c_vgh (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { eval_NUBspline_1d_c_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_c (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val) { float a[4], b[4]; int ix = get_NUBasis_funcs_s (spline->x_basis, x, a); int iy = get_NUBasis_funcs_s (spline->y_basis, y, b); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_NUBspline_2d_c_vg (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad) { float a[4], b[4], da[4], db[4]; int ix = get_NUBasis_dfuncs_s (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_s (spline->y_basis, y, b, db); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_c_vgl (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { float a[4], b[4], da[4], db[4], d2a[4], d2b[4]; complex_float bc[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+ a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_c_vgh (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { float a[4], b[4], da[4], db[4], d2a[4], d2b[4]; complex_float bc[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]); hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_c (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val) { float a[4], b[4], c[4]; int ix = get_NUBasis_funcs_s (spline->x_basis, x, a); int iy = get_NUBasis_funcs_s (spline->y_basis, y, b); int iz = get_NUBasis_funcs_s (spline->z_basis, z, c); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_c_vg (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad) { float a[4], b[4], c[4], da[4], db[4], dc[4]; complex_float cP[16], bcP[4], dbcP[4]; int ix = get_NUBasis_dfuncs_s (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_s (spline->y_basis, y, b, db); int iz = get_NUBasis_dfuncs_s (spline->z_basis, z, c, dc); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_c_vgl (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { float a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_float cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_s (spline->z_basis, z, c, dc, d2c); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_c_vgh (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { float a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_float cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_s (spline->z_basis, z, c, dc, d2c); int xs = spline->x_stride; int ys = spline->y_stride; complex_float* restrict coefs = spline->coefs; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[1]*dbcP[1] + da[1]*dbcP[1]); hess[3] = hess[1]; // dx dz; hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[1]*bdcP[1] + da[1]*bdcP[1]); hess[6] = hess[2]; // d2y hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/TestBspline.c0000664000113000011300000007303111115070414013600 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "bspline.h" #include #include #include #include double drand48(); void Test_1d_s() { Ugrid grid; grid.start = 1.0; grid.end = 3.0; grid.num = 11; float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; BCtype_s bc; bc.lCode = DERIV2; bc.lVal = 10.0; bc.rCode = DERIV2; bc.rVal = -10.0; FILE *fout = fopen ("1dSpline.dat", "w"); UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data); for (double x=1.0; x<=3.00001; x+=0.001) { float val, grad, lapl; eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl); fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl); } fclose (fout); } void Test_1d_d() { Ugrid grid; grid.start = 1.0; grid.end = 3.0; grid.num = 1000; // double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; double data[10000]; for (int i=0; i<10000; i++) data[i] = -2.0 + 4.0*drand48(); BCtype_d bc; bc.lCode = DERIV1; bc.lVal = 10.0; bc.rCode = DERIV2; bc.rVal = -10.0; FILE *fout = fopen ("Spline_1d_d.dat", "w"); UBspline_1d_d *spline = (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data); for (double x=1.0; x<=3.00001; x+=0.001) { double val, grad, lapl; eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl); fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, val, grad, lapl); } fclose (fout); } void Test_1d_d_antiperiodic() { Ugrid grid; grid.start = 1.0; grid.end = 3.0; grid.num = 10; // double data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; double data[10]; for (int i=0; i<10; i++) data[i] = -2.0 + 4.0*drand48(); BCtype_d bc; bc.lCode = ANTIPERIODIC; FILE *fout = fopen ("Spline_1d_d_antiperiodic.dat", "w"); UBspline_1d_d *spline = (UBspline_1d_d*) create_UBspline_1d_d (grid, bc, data); for (double x=1.0; x<=5.00001; x+=0.001) { double val, grad, lapl; double xp = x; double sign = 1.0; while (xp >= grid.end) { xp -= (grid.end-grid.start); sign *= -1.0; } eval_UBspline_1d_d_vgl (spline, xp, &val, &grad, &lapl); fprintf (fout, "%1.5f %20.14f %20.14f %20.14f\n", x, sign*val, sign*grad, sign*lapl); } double val, grad, lapl; double x = grid.start + (grid.end-grid.start) * (double)1/(double)grid.num; eval_UBspline_1d_d_vgl (spline, x, &val, &grad, &lapl); fclose (fout); } void Speed_1d_s() { Ugrid grid; grid.start = 1.0; grid.end = 3.0; grid.num = 11; float data[] = { 3.0, -4.0, 2.0, 1.0, -2.0, 0.0, 3.0, 2.0, 0.5, 1.0, 3.0 }; BCtype_s bc; bc.lCode = DERIV2; bc.lVal = 10.0; bc.rCode = DERIV2; bc.rVal = -10.0; UBspline_1d_s *spline = (UBspline_1d_s*) create_UBspline_1d_s (grid, bc, data); float val, grad, lapl; clock_t start, end, rstart, rend; rstart = clock(); for (int i=0; i<100000000; i++) { double x = grid.start + 0.99999*drand48()*(grid.end-grid.start); } rend = clock(); start = clock(); for (int i=0; i<100000000; i++) { double x = grid.start + 0.99999*drand48()*(grid.end-grid.start); eval_UBspline_1d_s_vgl (spline, x, &val, &grad, &lapl); } end = clock(); fprintf (stderr, "100,000,000 evalations in %f seconds.\n", (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); } void Test_2d_s() { Ugrid x_grid, y_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 30; y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 30; float *data = malloc (x_grid.num * y_grid.num * sizeof(float)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; float spval, grad[2], hess[4]; eval_UBspline_2d_s_vgh (spline, x, y, &spval, grad, hess); fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); } void Speed_2d_s() { Ugrid x_grid, y_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; float *data = malloc (x_grid.num * y_grid.num * sizeof(float)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; complex_float spval, grad[2], hess[4]; eval_UBspline_2d_c_vgh (spline, x, y, &spval, grad, hess); fprintf (stderr, "exval = (%20.15f + %20.15fi) spval = (%20.15f + %20.15fi)\n", crealf(exval), cimagf(exval), creal(spval), cimagf(spval)); } void Speed_2d_c() { Ugrid x_grid, y_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; complex_float *data = malloc (x_grid.num * y_grid.num * sizeof(complex_float)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; double spval, grad[2], hess[4]; eval_UBspline_2d_d_vgh (spline, x, y, &spval, grad, hess); fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); } void Speed_2d_d() { Ugrid x_grid, y_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; double *data = malloc (x_grid.num * y_grid.num * sizeof(double)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; complex_double spval, grad[2], hess[4]; eval_UBspline_2d_z_vgh (spline, x, y, &spval, grad, hess); fprintf (stderr, "exval = (%20.15f + %20.15fi) spval = (%20.15f + %20.15fi)\n", creal(exval), cimag(exval), creal(spval), cimag(spval)); } void Speed_2d_z() { Ugrid x_grid, y_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 300; y_grid.start = 1.0; y_grid.end = 3.0; y_grid.num = 300; complex_double *data = malloc (x_grid.num * y_grid.num * sizeof(complex_double)); for (int ix=0; ixx_grid.delta + 0.000001; double y = y_grid.start + (double)iy * spline->y_grid.delta + 0.000001; z = z_grid.start + (double)iz * spline->z_grid.delta + 0.000001; float spval, grad[3], hess[9], lapl; eval_UBspline_3d_s_vgh (spline, x, y, z, &spval, grad, hess); fprintf (stderr, "exval = %20.15f spval = %20.15f\n", exval, spval); } void Speed_3d_s() { Ugrid x_grid, y_grid, z_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(float)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; z = z_grid.start + (double)iz * spline->z_grid.delta; double spval, grad[3], hess[9]; eval_UBspline_3d_d_vgh (spline, x, y, z, &spval, grad, hess); fprintf (stderr, "exval = %23.17f spval = %23.17f\n", exval, spval); } void Speed_3d_d() { Ugrid x_grid, y_grid, z_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; double *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(double)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; z = z_grid.start + (double)iz * spline->z_grid.delta; complex_float spval, grad[3], hess[9]; eval_UBspline_3d_c_vgh (spline, x, y, z, &spval, grad, hess); fprintf (stderr, "exval = (%23.17f + %23.17fi)\nspval = (%23.17f + %23.17fi)\n", crealf(exval), cimagf(exval), crealf(spval), cimagf(spval)); } void Speed_3d_c() { Ugrid x_grid, y_grid, z_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; complex_float *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_float)); for (int ix=0; ixx_grid.delta; double y = y_grid.start + (double)iy * spline->y_grid.delta; z = z_grid.start + (double)iz * spline->z_grid.delta; complex_double spval, grad[3], hess[9]; eval_UBspline_3d_z_vgh (spline, x, y, z, &spval, grad, hess); fprintf (stderr, "exval = (%23.19f + %23.19fi)\nspval = (%23.17f + %23.17fi)\n", crealf(exval), cimagf(exval), crealf(spval), cimagf(spval)); } void Speed_3d_z() { Ugrid x_grid, y_grid, z_grid; x_grid.start = 1.0; x_grid.end = 3.0; x_grid.num = 200; y_grid.start = 1.0; y_grid.end = 5.0; y_grid.num = 200; z_grid.start = 1.0; z_grid.end = 7.0; z_grid.num = 200; complex_double *data = malloc (x_grid.num * y_grid.num * z_grid.num * sizeof(complex_double)); for (int ix=0; ix>4)&3; int j = (thr>>2)&3; int k = (thr & 3); if (thr < 64) abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); if (off < 2*N) { double val = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = coefs + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) val += abc[16*i+4*j+k] * base[off+k*strides.z]; } } myval[off] = val; } } __global__ static void eval_multi_multi_UBspline_3d_z_vgh_kernel (double *pos, double3 drInv, double *coefs, double *vals[], double *grads[], double *hess[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ double *myval, *mygrad, *myhess; __shared__ double3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad = grads[ir]; myhess = hess[ir]; } __syncthreads(); int3 index; double3 t; double s, sf; double4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0].x=t.x*t.x*t.x; tp[0].y=t.x*t.x; tp[0].z=t.x; tp[0].w=1.0; tp[1].x=t.y*t.y*t.y; tp[1].y=t.y*t.y; tp[1].z=t.y; tp[1].w=1.0; tp[2].x=t.z*t.z*t.z; tp[2].y=t.z*t.z; tp[2].z=t.z; tp[2].w=1.0; // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ double a[12], b[12], c[12]; if (thr < 12) { a[thr] = Bcuda[4*thr+0]*tp[0].x + Bcuda[4*thr+1]*tp[0].y + Bcuda[4*thr+2]*tp[0].z + Bcuda[4*thr+3]*tp[0].w; b[thr] = Bcuda[4*thr+0]*tp[1].x + Bcuda[4*thr+1]*tp[1].y + Bcuda[4*thr+2]*tp[1].z + Bcuda[4*thr+3]*tp[1].w; c[thr] = Bcuda[4*thr+0]*tp[2].x + Bcuda[4*thr+1]*tp[2].y + Bcuda[4*thr+2]*tp[2].z + Bcuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ double abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); double v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; double *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < 2*N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = b0 + i*strides.x + j*strides.y; for (int k=0; k<4; k++) { double c = base[k*strides.z]; v += abc[n+0] * c; g0 += abc[n+64] * c; g1 += abc[n+128] * c; g2 += abc[n+192] * c; h00 += abc[n+256] * c; h01 += abc[n+320] * c; h02 += abc[n+384] * c; h11 += abc[n+448] * c; h12 += abc[n+512] * c; h22 += abc[n+576] * c; n += 1; } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ double buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } abc[3*thr+0] = g0; abc[3*thr+1] = g1; abc[3*thr+2] = g2; __syncthreads(); for (int i=0; i<3; i++) { int myoff = (3*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 3*N) mygrad[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } __syncthreads(); // Write Hessians abc[6*thr+0] = h00; abc[6*thr+1] = h01; abc[6*thr+2] = h02; abc[6*thr+3] = h11; abc[6*thr+4] = h12; abc[6*thr+5] = h22; __syncthreads(); for (int i=0; i<6; i++) { int myoff = (6*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 12*N) myhess[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } } extern "C" void eval_multi_multi_UBspline_3d_z_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, double *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if (2*spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_z_kernel<<>> (pos_d, spline->gridInv, (double*)spline->coefs, (double**)vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_z_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } extern "C" void eval_multi_multi_UBspline_3d_z_vgh_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, complex_double *vals_d[], complex_double *grads_d[], complex_double *hess_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if (2*spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_z_vgh_kernel<<>> (pos_d, spline->gridInv, (double*)spline->coefs, (double**)vals_d, (double**)grads_d, (double**)hess_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_z_vgh_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_z_vgl_kernel (double *pos, double3 drInv, double *coefs, double Linv[], double *vals[], double *grad_lapl[], uint3 dim, uint3 strides, int N, int row_stride) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ double *myval, *mygrad_lapl; __shared__ double3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad_lapl = grad_lapl[ir]; } __syncthreads(); int3 index; double3 t; double s, sf; double4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0].x=t.x*t.x*t.x; tp[0].y=t.x*t.x; tp[0].z=t.x; tp[0].w=1.0; tp[1].x=t.y*t.y*t.y; tp[1].y=t.y*t.y; tp[1].z=t.y; tp[1].w=1.0; tp[2].x=t.z*t.z*t.z; tp[2].y=t.z*t.z; tp[2].z=t.z; tp[2].w=1.0; // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ double a[12], b[12], c[12]; if (thr < 12) { a[thr] = Bcuda[4*thr+0]*tp[0].x + Bcuda[4*thr+1]*tp[0].y + Bcuda[4*thr+2]*tp[0].z + Bcuda[4*thr+3]*tp[0].w; b[thr] = Bcuda[4*thr+0]*tp[1].x + Bcuda[4*thr+1]*tp[1].y + Bcuda[4*thr+2]*tp[1].z + Bcuda[4*thr+3]*tp[1].w; c[thr] = Bcuda[4*thr+0]*tp[2].x + Bcuda[4*thr+1]*tp[2].y + Bcuda[4*thr+2]*tp[2].z + Bcuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ double abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); double v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; double *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < 2*N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { double *base = b0 + i*strides.x + j*strides.y; for (int k=0; k<4; k++) { double c = base[k*strides.z]; v += abc[n+ 0] * c; g0 += abc[n+ 64] * c; g1 += abc[n+128] * c; g2 += abc[n+192] * c; h00 += abc[n+256] * c; h01 += abc[n+320] * c; h02 += abc[n+384] * c; h11 += abc[n+448] * c; h12 += abc[n+512] * c; h22 += abc[n+576] * c; n += 1; } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ double buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } __shared__ double G[3][3], GGt[3][3]; int i0 = threadIdx.x/3; int i1 = threadIdx.x - 3*i0; if (threadIdx.x < 9) G[i0][i1] = Linv[threadIdx.x]; __syncthreads(); if (threadIdx.x < 9) GGt[i0][i1] = (G[0][i0]*G[0][i1] + G[1][i0]*G[1][i1] + G[2][i0]*G[2][i1]); __syncthreads(); if (off < 2*N) { // Store gradients back to global memory mygrad_lapl[off+0*row_stride] = G[0][0]*g0 + G[0][1]*g1 + G[0][2]*g2; mygrad_lapl[off+2*row_stride] = G[1][0]*g0 + G[1][1]*g1 + G[1][2]*g2; mygrad_lapl[off+4*row_stride] = G[2][0]*g0 + G[2][1]*g1 + G[2][2]*g2; // Store laplacians back to global memory // Hessian = H00 H01 H02 H11 H12 H22 // Matrix = [0 1 2] // [1 3 4] // [2 4 5] // laplacian = Trace(GGt*Hessian) mygrad_lapl[off+6*row_stride] = (GGt[0][0]*h00 + GGt[1][0]*h01 + GGt[2][0]*h02 + GGt[0][1]*h01 + GGt[1][1]*h11 + GGt[2][1]*h12 + GGt[0][2]*h02 + GGt[1][2]*h12 + GGt[2][2]*h22); } } extern "C" void eval_multi_multi_UBspline_3d_z_vgl_cuda (multi_UBspline_3d_z_cuda *spline, double *pos_d, double *Linv_d, double *vals_d[], double *grad_lapl_d[], int num, int row_stride) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(2*spline->num_splines/SPLINE_BLOCK_SIZE, num); if (2*spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_z_vgl_kernel<<>> (pos_d, spline->gridInv, (double*)spline->coefs, Linv_d, (double**)vals_d, (double**)grad_lapl_d, spline->dim, spline->stride, spline->num_splines, row_stride); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_z_vgl_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } #endif einspline-0.9.2/src/multi_bspline_eval_std_z_impl.h0000664000113000011300000010675411147116544017475 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_STD_Z_IMPL_H #define MULTI_BSPLINE_EVAL_STD_Z_IMPL_H #include #include #include "bspline_base.h" #include "multi_bspline_structs.h" extern const double* restrict Ad; extern const double* restrict dAd; extern const double* restrict d2Ad; /************************************************************/ /* 1D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_1d_z (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) vals[n] += a[i] * coefs[n]; } } void eval_multi_UBspline_1d_z_vg (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) grads[n] *= dxInv; } void eval_multi_UBspline_1d_z_vgl (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { x -= spline->x_grid.start; double ux = x*spline->x_grid.delta_inv; double ipartx, tx; tx = modf (ux, &ipartx); int ix = (int) ipartx; double tpx[4], a[4], da[4], d2a[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); intptr_t xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } double dxInv = spline->x_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[n] *= dxInv; lapl [n] *= dxInv*dxInv; } } void eval_multi_UBspline_1d_z_vgh (multi_UBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { eval_multi_UBspline_1d_z_vgl (spline, x, vals, grads, hess); } /************************************************************/ /* 2D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_2d_z (multi_UBspline_2d_z *spline, double x, double y, complex_double* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double prefactor = a[i]*b[j]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_2d_z_vg (multi_UBspline_2d_z *spline, double x, double y, complex_double* restrict vals, complex_double* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = grads[2*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double ab = a[i]*b[j]; double dab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals [n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; } } void eval_multi_UBspline_2d_z_vgl (multi_UBspline_2d_z *spline, double x, double y, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; //complex_double lapl2[2*spline->num_splines]; complex_double* restrict lapl2 = spline->lapl2; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; lapl2[2*n+0] = lapl2[2*n+1] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) { double ab = a[i]*b[j]; double dab[2], d2ab[2]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i]* b[j]; d2ab[1] = a[i]*d2b[j]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; lapl2[2*n+0] += d2ab[0]*coefs[n]; lapl2[2*n+1] += d2ab[1]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; lapl2[2*n+0] *= dxInv*dxInv; lapl2[2*n+1] *= dyInv*dyInv; lapl[n] = lapl2[2*n+0] + lapl2[2*n+1]; } } void eval_multi_UBspline_2d_z_vgh (multi_UBspline_2d_z *spline, double x, double y, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[2*n+0] = grads[2*n+1] = 0.0; for (int i=0; i<4; i++) hess[4*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++){ double ab = a[i]*b[j]; double dab[2], d2ab[3]; dab[0] = da[i]* b[j]; dab[1] = a[i]*db[j]; d2ab[0] = d2a[i] * b[j]; d2ab[1] = da[i] * db[j]; d2ab[2] = a[i] * d2b[j]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys); for (int n=0; nnum_splines; n++) { vals[n] += ab *coefs[n]; grads[2*n+0] += dab[0]*coefs[n]; grads[2*n+1] += dab[1]*coefs[n]; hess [4*n+0] += d2ab[0]*coefs[n]; hess [4*n+1] += d2ab[1]*coefs[n]; hess [4*n+3] += d2ab[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[2*n+0] *= dxInv; grads[2*n+1] *= dyInv; hess[4*n+0] *= dxInv*dxInv; hess[4*n+1] *= dxInv*dyInv; hess[4*n+3] *= dyInv*dyInv; // Copy hessian elements into lower half of 3x3 matrix hess[4*n+2] = hess[4*n+1]; } } /************************************************************/ /* 3D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_UBspline_3d_z (multi_UBspline_3d_z *spline, double x, double y, double z, complex_double* restrict vals) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) vals[n] = 0.0; for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double prefactor = a[i]*b[j]*c[k]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) vals[n] += prefactor*coefs[n]; } } void eval_multi_UBspline_3d_z_vg (multi_UBspline_3d_z *spline, double x, double y, double z, complex_double* restrict vals, complex_double* restrict grads) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; } } void eval_multi_UBspline_3d_z_vgl (multi_UBspline_3d_z *spline, double x, double y, double z, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; //complex_double lapl3[3*spline->num_splines]; complex_double* restrict lapl3 = spline->lapl3; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; lapl3[3*n+0] = lapl3[3*n+1] = lapl3[3*n+2] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3], d2abc[3]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = a[i]*d2b[j]* c[k]; d2abc[2] = a[i]* b[j]*d2c[k]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; lapl3[3*n+0] += d2abc[0]*coefs[n]; lapl3[3*n+1] += d2abc[1]*coefs[n]; lapl3[3*n+2] += d2abc[2]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; lapl3[3*n+0] *= dxInv*dxInv; lapl3[3*n+1] *= dyInv*dyInv; lapl3[3*n+2] *= dzInv*dzInv; lapl[n] = lapl3[3*n+0] + lapl3[3*n+1] + lapl3[3*n+2]; } } void eval_multi_UBspline_3d_z_vgh (multi_UBspline_3d_z *spline, double x, double y, double z, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 0]*tpx[0] + dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 4]*tpx[0] + dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 8]*tpx[0] + dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[12]*tpx[0] + dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 0]*tpx[0] + d2Ad[ 1]*tpx[1] + d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 4]*tpx[0] + d2Ad[ 5]*tpx[1] + d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[ 8]*tpx[0] + d2Ad[ 9]*tpx[1] + d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[12]*tpx[0] + d2Ad[13]*tpx[1] + d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 0]*tpy[0] + dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 4]*tpy[0] + dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 8]*tpy[0] + dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[12]*tpy[0] + dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 0]*tpy[0] + d2Ad[ 1]*tpy[1] + d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 4]*tpy[0] + d2Ad[ 5]*tpy[1] + d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[ 8]*tpy[0] + d2Ad[ 9]*tpy[1] + d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[12]*tpy[0] + d2Ad[13]*tpy[1] + d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 0]*tpz[0] + dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 4]*tpz[0] + dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 8]*tpz[0] + dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[12]*tpz[0] + dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 0]*tpz[0] + d2Ad[ 1]*tpz[1] + d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 4]*tpz[0] + d2Ad[ 5]*tpz[1] + d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[ 8]*tpz[0] + d2Ad[ 9]*tpz[1] + d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[12]*tpz[0] + d2Ad[13]*tpz[1] + d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); intptr_t xs = spline->x_stride; intptr_t ys = spline->y_stride; intptr_t zs = spline->z_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[3*n+0] = grads[3*n+1] = grads[3*n+2] = 0.0; for (int i=0; i<9; i++) hess[9*n+i] = 0.0; } for (int i=0; i<4; i++) for (int j=0; j<4; j++) for (int k=0; k<4; k++) { double abc = a[i]*b[j]*c[k]; double dabc[3], d2abc[6]; dabc[0] = da[i]* b[j]* c[k]; dabc[1] = a[i]*db[j]* c[k]; dabc[2] = a[i]* b[j]*dc[k]; d2abc[0] = d2a[i]* b[j]* c[k]; d2abc[1] = da[i]* db[j]* c[k]; d2abc[2] = da[i]* b[j]* dc[k]; d2abc[3] = a[i]*d2b[j]* c[k]; d2abc[4] = a[i]* db[j]* dc[k]; d2abc[5] = a[i]* b[j]*d2c[k]; complex_double* restrict coefs = spline->coefs + ((ix+i)*xs + (iy+j)*ys + (iz+k)*zs); for (int n=0; nnum_splines; n++) { vals[n] += abc *coefs[n]; grads[3*n+0] += dabc[0]*coefs[n]; grads[3*n+1] += dabc[1]*coefs[n]; grads[3*n+2] += dabc[2]*coefs[n]; hess [9*n+0] += d2abc[0]*coefs[n]; hess [9*n+1] += d2abc[1]*coefs[n]; hess [9*n+2] += d2abc[2]*coefs[n]; hess [9*n+4] += d2abc[3]*coefs[n]; hess [9*n+5] += d2abc[4]*coefs[n]; hess [9*n+8] += d2abc[5]*coefs[n]; } } double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; for (int n=0; nnum_splines; n++) { grads[3*n+0] *= dxInv; grads[3*n+1] *= dyInv; grads[3*n+2] *= dzInv; hess[9*n+0] *= dxInv*dxInv; hess[9*n+4] *= dyInv*dyInv; hess[9*n+8] *= dzInv*dzInv; hess[9*n+1] *= dxInv*dyInv; hess[9*n+2] *= dxInv*dzInv; hess[9*n+5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess[9*n+3] = hess[9*n+1]; hess[9*n+6] = hess[9*n+2]; hess[9*n+7] = hess[9*n+5]; } } #endif einspline-0.9.2/src/test_multi_cuda.cu0000664000113000011300000005236211266200630014724 00000000000000#include "multi_bspline.h" #include "multi_bspline_create_cuda.h" #include "multi_bspline_structs_cuda.h" #include "multi_bspline_eval_cuda.h" void test_float_1d() { int numWalkers = 1000; float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; float *coefs, __device__ **vals_d, **grads_d, **hess_d; float *r_d, *r_h; int xs, N; int Nx; N = 128*36; Nx = 100; xs = N; // Setup Bspline coefficients int size = Nx*N*sizeof(float); posix_memalign((void**)&coefs, 16, size); for (int ix=0; ixcoefs); cudaFree (valBlock_d); cudaFree (vals_d); cudaFree (grads_d); cudaFree (hess_d); cudaFree (r_d); } void test_float() { int numWalkers = 512; float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; float *coefs, __device__ **vals_d, **grads_d, **hess_d; float *r_d, *r_h; int xs, ys, zs, N; int Nx, Ny, Nz; N = 256; Nx = Ny = Nz = 32; xs = Ny*Nz*N; ys = Nz*N; zs = N; // Setup Bspline coefficients int size = Nx*Ny*Nz*N*sizeof(float); posix_memalign((void**)&coefs, 16, size); for (int ix=0; ixcoefs); cudaFree (valBlock_d); cudaFree (vals_d); cudaFree (grads_d); cudaFree (hess_d); cudaFree (r_d); } void test_complex_float() { int numWalkers = 1000; complex_float *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; complex_float *coefs, __device__ **vals_d, **grads_d, **hess_d; float *Linv_d; float *r_d, *r_h; int xs, ys, zs, N; int Nx, Ny, Nz; N = 128; Nx = Ny = Nz = 32; xs = Ny*Nz*N; ys = Nz*N; zs = N; // Setup Bspline coefficients int size = Nx*Ny*Nz*N*sizeof(complex_float); posix_memalign((void**)&coefs, 16, size); for (int ix=0; ix(drand48(), drand48()); Ugrid x_grid, y_grid, z_grid; x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx; y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny; z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz; BCtype_c xBC, yBC, zBC; xBC.lCode = xBC.rCode = PERIODIC; yBC.lCode = yBC.rCode = PERIODIC; zBC.lCode = zBC.rCode = PERIODIC; multi_UBspline_3d_c *spline = create_multi_UBspline_3d_c (x_grid, y_grid, z_grid, xBC, yBC, zBC, N); for (int i=0; icoefs); cudaFree (valBlock_d); cudaFree (vals_d); cudaFree (grads_d); cudaFree (hess_d); cudaFree (r_d); } void test_double() { int numWalkers = 1000; double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; double *coefs, __device__ **vals_d, **grads_d, **hess_d; double *r_d, *r_h; int xs, ys, zs, N; int Nx, Ny, Nz; N = 128; Nx = Ny = Nz = 32; xs = Ny*Nz*N; ys = Nz*N; zs = N; // Setup Bspline coefficients int size = Nx*Ny*Nz*N*sizeof(double); posix_memalign((void**)&coefs, 16, size); for (int ix=0; ixcoefs); cudaFree (valBlock_d); cudaFree (vals_d); cudaFree (grads_d); cudaFree (hess_d); cudaFree (r_d); } void test_complex_double() { int numWalkers = 1000; complex_double *vals[numWalkers], *grads[numWalkers], *hess[numWalkers]; complex_double *coefs, __device__ **vals_d, **grads_d, **hess_d; double *r_d, *r_h; int xs, ys, zs, N; int Nx, Ny, Nz; N = 128; Nx = Ny = Nz = 32; xs = Ny*Nz*N; ys = Nz*N; zs = N; // Setup Bspline coefficients int size = Nx*Ny*Nz*N*sizeof(complex_double); posix_memalign((void**)&coefs, 16, size); for (int ix=0; ix(drand48(), drand48()); Ugrid x_grid, y_grid, z_grid; x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Nx; y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = Ny; z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Nz; BCtype_z xBC, yBC, zBC; xBC.lCode = xBC.rCode = PERIODIC; yBC.lCode = yBC.rCode = PERIODIC; zBC.lCode = zBC.rCode = PERIODIC; multi_UBspline_3d_z *spline = create_multi_UBspline_3d_z (x_grid, y_grid, z_grid, xBC, yBC, zBC, N); for (int i=0; icoefs); cudaFree (valBlock_d); cudaFree (vals_d); cudaFree (grads_d); cudaFree (hess_d); cudaFree (r_d); } main() { // fprintf(stderr, "Testing 1D single-precision real routines:\n"); // test_float_1d(); fprintf(stderr, "Testing 3D single-precision real routines:\n"); test_float(); // fprintf(stderr, "Testing 3D single-precision complex routines:\n"); // test_complex_float(); // fprintf(stderr, "Testing 3D double-precision real routines:\n"); // test_double(); // fprintf(stderr, "Testing 3D double-precision complex routines:\n"); // test_complex_double(); } einspline-0.9.2/src/multi_bspline_create_cuda.h0000664000113000011300000000401511262673727016555 00000000000000#ifndef MULTI_BSPLINE_CREATE_CUDA_H #define MULTI_BSPLINE_CREATE_CUDA_H #include "multi_bspline_structs_cuda.h" //////// // 1D // //////// extern "C" multi_UBspline_1d_s_cuda* create_multi_UBspline_1d_s_cuda (multi_UBspline_1d_s* spline); extern "C" multi_UBspline_1d_s_cuda* create_multi_UBspline_1d_s_cuda_conv (multi_UBspline_1d_d* spline); extern "C" multi_UBspline_1d_c_cuda* create_multi_UBspline_1d_c_cuda (multi_UBspline_1d_c* spline); extern "C" multi_UBspline_1d_c_cuda* create_multi_UBspline_1d_c_cuda_conv (multi_UBspline_1d_z* spline); extern "C" multi_UBspline_1d_d_cuda* create_multi_UBspline_1d_d_cuda (multi_UBspline_1d_d* spline); extern "C" multi_UBspline_1d_z_cuda* create_multi_UBspline_1d_z_cuda (multi_UBspline_1d_z* spline); //////// // 2D // //////// extern "C" multi_UBspline_2d_s_cuda* create_multi_UBspline_2d_s_cuda (multi_UBspline_2d_s* spline); extern "C" multi_UBspline_2d_s_cuda* create_multi_UBspline_2d_s_cuda_conv (multi_UBspline_2d_d* spline); extern "C" multi_UBspline_2d_c_cuda* create_multi_UBspline_2d_c_cuda (multi_UBspline_2d_c* spline); extern "C" multi_UBspline_2d_c_cuda* create_multi_UBspline_2d_c_cuda_conv (multi_UBspline_2d_z* spline); extern "C" multi_UBspline_2d_d_cuda* create_multi_UBspline_2d_d_cuda (multi_UBspline_2d_d* spline); extern "C" multi_UBspline_2d_z_cuda* create_multi_UBspline_2d_z_cuda (multi_UBspline_2d_z* spline); //////// // 3D // //////// extern "C" multi_UBspline_3d_s_cuda* create_multi_UBspline_3d_s_cuda (multi_UBspline_3d_s* spline); extern "C" multi_UBspline_3d_s_cuda* create_multi_UBspline_3d_s_cuda_conv (multi_UBspline_3d_d* spline); extern "C" multi_UBspline_3d_c_cuda* create_multi_UBspline_3d_c_cuda (multi_UBspline_3d_c* spline); extern "C" multi_UBspline_3d_c_cuda* create_multi_UBspline_3d_c_cuda_conv (multi_UBspline_3d_z* spline); extern "C" multi_UBspline_3d_d_cuda* create_multi_UBspline_3d_d_cuda (multi_UBspline_3d_d* spline); extern "C" multi_UBspline_3d_z_cuda* create_multi_UBspline_3d_z_cuda (multi_UBspline_3d_z* spline); #endif einspline-0.9.2/src/multi_bspline_eval_std_z_cpp.cc0000664000113000011300000000276611015560045017443 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_z_impl.h" einspline-0.9.2/src/bspline.h0000664000113000011300000000513711162451752013021 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_H #define BSPLINE_H #include "bspline_base.h" //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Bspline structure definitions //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// #include "bspline_structs.h" #include "multi_bspline_structs.h" // Currently, some of the single-precision routines use SSE2 instructions #ifdef HAVE_SSE2 #include "bspline_eval_sse_s.h" #include "bspline_eval_sse_c.h" #include "bspline_eval_sse_d.h" #include "bspline_eval_sse_z.h" #elif defined HAVE_SSE #include "bspline_eval_sse_s.h" #include "bspline_eval_sse_c.h" #include "bspline_eval_std_d.h" #include "bspline_eval_std_z.h" #elif defined USE_ALTIVEC #include "bspline_eval_altivec_s.h" #include "bspline_eval_std_c.h" #include "bspline_eval_std_d.h" #include "bspline_eval_std_z.h" #else #include "bspline_eval_std_s.h" #include "bspline_eval_std_c.h" #include "bspline_eval_std_d.h" #include "bspline_eval_std_z.h" #endif #include "bspline_create.h" #include "multi_bspline_create.h" #endif einspline-0.9.2/src/multi_bspline_eval_std_c_cpp.cc0000664000113000011300000000276611015565050017415 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_std_c_impl.h" einspline-0.9.2/src/bspline_eval_std_c.h0000664000113000011300000013264111012400563015171 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_STD_C_H #define BSPLINE_EVAL_STD_C_H #include #include extern const float* restrict Af; extern const float* restrict dAf; extern const float* restrict d2Af; /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_c (UBspline_1d_c * restrict spline, double x, complex_float* restrict val) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_c_vg (UBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; float dxInv = spline->x_grid.delta_inv; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = dxInv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_c_vgl (UBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; float dxInv = spline->x_grid.delta_inv; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = dxInv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); *lapl = dxInv * dxInv * (coefs[i+0]*(d2Af[ 2]*tp[2] + d2Af[ 3]*tp[3])+ coefs[i+1]*(d2Af[ 6]*tp[2] + d2Af[ 7]*tp[3])+ coefs[i+2]*(d2Af[10]*tp[2] + d2Af[11]*tp[3])+ coefs[i+3]*(d2Af[14]*tp[2] + d2Af[15]*tp[3])); } inline void eval_UBspline_1d_c_vgh (UBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { eval_UBspline_1d_c_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_c (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_UBspline_2d_c_vg (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = (dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = (dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = (dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = (dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); int xs = spline->x_stride; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = dxInv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = dyInv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_c_vgl (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); int xs = spline->x_stride; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = dxInv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = dyInv* (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = dyInv * dyInv * (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])) + dxInv * dxInv * (d2a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ d2a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ d2a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ d2a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_c_vgh (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; float tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = ( dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = ( dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = ( dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = ( dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); int xs = spline->x_stride; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = ( a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[0] = dxInv * ( da[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ da[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ da[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ da[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[1] = dyInv * ( a[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ a[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ a[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ a[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[0] = dxInv * dxInv * (d2a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ d2a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ d2a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ d2a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); hess[1] = dxInv * dyInv * ( da[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ da[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ da[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ da[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[3] = dyInv * dyInv * ( a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_c (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = (Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = (Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = (Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = (Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); b[0] = (Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = (Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = (Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = (Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); c[0] = (Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = (Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = (Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = (Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_c_vg (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; complex_float cP[16], bcP[4], dbcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = dxInv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = dyInv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = dzInv * (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_c_vgl (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_float cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = dxInv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = dyInv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = dzInv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = dxInv * dxInv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + dyInv * dyInv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + + dzInv * dzInv * (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_c_vgh (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); float ipartx, iparty, ipartz, tx, ty, tz; tx = modff (ux, &ipartx); int ix = (int) ipartx; ty = modff (uy, &iparty); int iy = (int) iparty; tz = modff (uz, &ipartz); int iz = (int) ipartz; float tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_float cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_float* restrict coefs = spline->coefs; a[0] = ( Af[ 0]*tpx[0] + Af[ 1]*tpx[1] + Af[ 2]*tpx[2] + Af[ 3]*tpx[3]); a[1] = ( Af[ 4]*tpx[0] + Af[ 5]*tpx[1] + Af[ 6]*tpx[2] + Af[ 7]*tpx[3]); a[2] = ( Af[ 8]*tpx[0] + Af[ 9]*tpx[1] + Af[10]*tpx[2] + Af[11]*tpx[3]); a[3] = ( Af[12]*tpx[0] + Af[13]*tpx[1] + Af[14]*tpx[2] + Af[15]*tpx[3]); da[0] = ( dAf[ 1]*tpx[1] + dAf[ 2]*tpx[2] + dAf[ 3]*tpx[3]); da[1] = ( dAf[ 5]*tpx[1] + dAf[ 6]*tpx[2] + dAf[ 7]*tpx[3]); da[2] = ( dAf[ 9]*tpx[1] + dAf[10]*tpx[2] + dAf[11]*tpx[3]); da[3] = ( dAf[13]*tpx[1] + dAf[14]*tpx[2] + dAf[15]*tpx[3]); d2a[0] = (d2Af[ 2]*tpx[2] + d2Af[ 3]*tpx[3]); d2a[1] = (d2Af[ 6]*tpx[2] + d2Af[ 7]*tpx[3]); d2a[2] = (d2Af[10]*tpx[2] + d2Af[11]*tpx[3]); d2a[3] = (d2Af[14]*tpx[2] + d2Af[15]*tpx[3]); b[0] = ( Af[ 0]*tpy[0] + Af[ 1]*tpy[1] + Af[ 2]*tpy[2] + Af[ 3]*tpy[3]); b[1] = ( Af[ 4]*tpy[0] + Af[ 5]*tpy[1] + Af[ 6]*tpy[2] + Af[ 7]*tpy[3]); b[2] = ( Af[ 8]*tpy[0] + Af[ 9]*tpy[1] + Af[10]*tpy[2] + Af[11]*tpy[3]); b[3] = ( Af[12]*tpy[0] + Af[13]*tpy[1] + Af[14]*tpy[2] + Af[15]*tpy[3]); db[0] = (dAf[ 1]*tpy[1] + dAf[ 2]*tpy[2] + dAf[ 3]*tpy[3]); db[1] = (dAf[ 5]*tpy[1] + dAf[ 6]*tpy[2] + dAf[ 7]*tpy[3]); db[2] = (dAf[ 9]*tpy[1] + dAf[10]*tpy[2] + dAf[11]*tpy[3]); db[3] = (dAf[13]*tpy[1] + dAf[14]*tpy[2] + dAf[15]*tpy[3]); d2b[0] = (d2Af[ 2]*tpy[2] + d2Af[ 3]*tpy[3]); d2b[1] = (d2Af[ 6]*tpy[2] + d2Af[ 7]*tpy[3]); d2b[2] = (d2Af[10]*tpy[2] + d2Af[11]*tpy[3]); d2b[3] = (d2Af[14]*tpy[2] + d2Af[15]*tpy[3]); c[0] = ( Af[ 0]*tpz[0] + Af[ 1]*tpz[1] + Af[ 2]*tpz[2] + Af[ 3]*tpz[3]); c[1] = ( Af[ 4]*tpz[0] + Af[ 5]*tpz[1] + Af[ 6]*tpz[2] + Af[ 7]*tpz[3]); c[2] = ( Af[ 8]*tpz[0] + Af[ 9]*tpz[1] + Af[10]*tpz[2] + Af[11]*tpz[3]); c[3] = ( Af[12]*tpz[0] + Af[13]*tpz[1] + Af[14]*tpz[2] + Af[15]*tpz[3]); dc[0] = (dAf[ 1]*tpz[1] + dAf[ 2]*tpz[2] + dAf[ 3]*tpz[3]); dc[1] = (dAf[ 5]*tpz[1] + dAf[ 6]*tpz[2] + dAf[ 7]*tpz[3]); dc[2] = (dAf[ 9]*tpz[1] + dAf[10]*tpz[2] + dAf[11]*tpz[3]); dc[3] = (dAf[13]*tpz[1] + dAf[14]*tpz[2] + dAf[15]*tpz[3]); d2c[0] = (d2Af[ 2]*tpz[2] + d2Af[ 3]*tpz[3]); d2c[1] = (d2Af[ 6]*tpz[2] + d2Af[ 7]*tpz[3]); d2c[2] = (d2Af[10]*tpz[2] + d2Af[11]*tpz[3]); d2c[3] = (d2Af[14]*tpz[2] + d2Af[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; int offmax = (ix+3)*xs + (iy+3)*ys + iz+3; float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = dxInv * (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = dyInv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = dzInv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = dxInv * dxInv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = dxInv * dyInv * (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]); hess[3] = hess[1]; // dx dz; hess[2] = dxInv * dzInv * (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]); hess[6] = hess[2]; // d2y hess[4] = dyInv * dyInv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = dyInv * dzInv * (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = dzInv * dzInv * (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/nubspline_eval_sse_s.h0000664000113000011300000006126611012400563015560 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_SSE_S_H #define NUBSPLINE_EVAL_SSE_S_H #include #include #include "nubspline_structs.h" #ifdef HAVE_SSE #include #endif #ifdef HAVE_SSE2 #include #endif #ifdef HAVE_SSE3 #include #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 r0 = _mm_hadd_ps (_mm_mul_ps (M0, v), _mm_mul_ps (M1, v)); \ __m128 r1 = _mm_hadd_ps (_mm_mul_ps (M2, v), _mm_mul_ps (M3, v)); \ r = _mm_hadd_ps (r0, r1); \ } while (0); #define _MM_DOT4_PS(A, B, p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 t1 = _mm_hadd_ps (t,t); \ __m128 r = _mm_hadd_ps (t1, t1); \ _mm_store_ss (&(p), r); \ } while(0); #else // Use plain-old SSE instructions #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 r0 = _mm_mul_ps (M0, v); \ __m128 r1 = _mm_mul_ps (M1, v); \ __m128 r2 = _mm_mul_ps (M2, v); \ __m128 r3 = _mm_mul_ps (M3, v); \ _MM_TRANSPOSE4_PS (r0, r1, r2, r3); \ r = _mm_add_ps (_mm_add_ps (r0, r1), _mm_add_ps (r2, r3)); \ } while (0); #define _MM_DOT4_PS(A, B, p) \ do { \ __m128 t = _mm_mul_ps (A, B); \ __m128 alo = _mm_shuffle_ps (t, t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (t, t, _MM_SHUFFLE(2,3,2,3)); \ __m128 a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (a, a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (a, a, _MM_SHUFFLE(1,1,1,1)); \ __m128 r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(p), r); \ } while(0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_s (NUBspline_1d_s * restrict spline, double x, float* restrict val) { float bfuncs[4]; int i = get_NUBasis_funcs_s (spline->x_basis, x, bfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_s_vg (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad) { float bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_s (spline->x_basis, x, bfuncs, dbfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_s_vgl (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict lapl) { float bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_s (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_s_vgh (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict hess) { eval_NUBspline_1d_s_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_s (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val) { __m128 a, b, bP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_funcs_sse_s (spline->x_basis, x, &a); int iy = get_NUBasis_funcs_sse_s (spline->y_basis, y, &b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); // Compute value _MM_DOT4_PS (a, bP, *val); #undef P } /* Value and gradient */ inline void eval_NUBspline_2d_s_vg (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad) { __m128 a, b, da, db, bP, dbP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_dfuncs_sse_s (spline->x_basis, x, &a, &da); int iy = get_NUBasis_dfuncs_sse_s (spline->y_basis, y, &b, &db); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_s_vgl (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict lapl) { __m128 a, b, da, db, d2a, d2b, bP, dbP, d2bP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define P(i) (spline->coefs+(ix+(i))*xs+iy) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1), _MM_HINT_T0); _mm_prefetch ((const char*)P(2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3), _MM_HINT_T0); tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2b, d2bP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); float sec_derivs[2]; // Compute laplacian _MM_DOT4_PS (d2a, bP, sec_derivs[0]); _MM_DOT4_PS (a, d2bP, sec_derivs[1]); *lapl = sec_derivs[0] + sec_derivs[1]; #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_s_vgh (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict hess) { __m128 a, b, da, db, d2a, d2b, bP, dbP, d2bP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define P(i) (spline->coefs+(ix+(i))*xs+iy) float *restrict p = P(0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ xs), _MM_HINT_T0); _mm_prefetch ((const char*)(p+2*xs), _MM_HINT_T0); _mm_prefetch ((const char*)(p+3*xs), _MM_HINT_T0); tmp0 = _mm_loadu_ps (P(0)); tmp1 = _mm_loadu_ps (P(1)); tmp2 = _mm_loadu_ps (P(2)); tmp3 = _mm_loadu_ps (P(3)); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, b, bP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, db, dbP); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2b, d2bP); // Compute value _MM_DOT4_PS (a, bP, *val); // Compute gradient _MM_DOT4_PS (da, bP, grad[0]); _MM_DOT4_PS (a, dbP, grad[1]); float sec_derivs[2]; // Compute hessian // Compute hessian _MM_DOT4_PS (d2a, bP, hess[0]); _MM_DOT4_PS (a, d2bP, hess[3]); _MM_DOT4_PS (da, dbP, hess[1]); // Copy hessian element into lower half of 2x2 matrix hess[2] = hess[1]; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_s (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val) { __m128 a, b, c, cP[4], bcP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_funcs_sse_s (spline->x_basis, x, &a); int iy = get_NUBasis_funcs_sse_s (spline->y_basis, y, &b); int iz = get_NUBasis_funcs_sse_s (spline->z_basis, z, &c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; float* restrict coefs = spline->coefs; #define P(i,j) (coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) float *restrict p = P(0,0); _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = P(0,0); tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); // Compute value _MM_DOT4_PS (a, bcP, *val); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_s_vg (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad) { __m128 a, b, c, da, db, dc, cP[4], dcP[4], bcP, dbcP, dbP, bdcP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_dfuncs_sse_s (spline->x_basis, x, &a, &da); int iy = get_NUBasis_dfuncs_sse_s (spline->y_basis, y, &b, &db); int iz = get_NUBasis_dfuncs_sse_s (spline->z_basis, z, &c, &dc); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; float* restrict coefs = spline->coefs; #define P(i,j) (coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) float *restrict p = P(0,0); _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = P(0,0); tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); // 4th quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_s_vgl (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict lapl) { __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cP[4], dcP[4], d2cP[4], bcP, dbcP, d2bcP, dbdcP, bd2cP, bdcP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); int iz = get_NUBasis_d2funcs_sse_s (spline->z_basis, z, &c, &dc, &d2c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; float* restrict coefs = spline->coefs; #define P(i,j) (coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) float *restrict p = P(0,0); _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = P(0,0); tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], d2b, d2bcP); _MM_MATVEC4_PS (d2cP[0], d2cP[1], d2cP[2], d2cP[3], b, bd2cP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], db, dbdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); // Compute laplacian float sec_derivs[3]; _MM_DOT4_PS (d2a, bcP, sec_derivs[0]); _MM_DOT4_PS (a, d2bcP, sec_derivs[1]); _MM_DOT4_PS (a, bd2cP, sec_derivs[2]); *lapl = sec_derivs[0] + sec_derivs[1] + sec_derivs[2]; #undef P } typedef union { float scalars[4]; __m128 v; } vec4; /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_s_vgh (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict hess) { __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cP[4], dcP[4], d2cP[4], bcP, dbcP, d2bcP, dbdcP, bd2cP, bdcP, tmp0, tmp1, tmp2, tmp3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); int iz = get_NUBasis_d2funcs_sse_s (spline->z_basis, z, &c, &dc, &d2c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; float* restrict coefs = spline->coefs; #define P(i,j) (coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)) float *restrict p = P(0,0); _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); p+= xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = P(0,0); tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[0]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[0]); // 2nd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[1]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[1]); // 3rd quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); p += xs; _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[2]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[2]); // 4th quarter tmp0 = _mm_loadu_ps (p ); tmp1 = _mm_loadu_ps (p+ys ); tmp2 = _mm_loadu_ps (p+ys2); tmp3 = _mm_loadu_ps (p+ys3); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, c, cP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, dc, dcP[3]); _MM_MATVEC4_PS (tmp0, tmp1, tmp2, tmp3, d2c, d2cP[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], b, bcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], db, dbcP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], b, bdcP); _MM_MATVEC4_PS ( cP[0], cP[1], cP[2], cP[3], d2b, d2bcP); _MM_MATVEC4_PS (d2cP[0], d2cP[1], d2cP[2], d2cP[3], b, bd2cP); _MM_MATVEC4_PS ( dcP[0], dcP[1], dcP[2], dcP[3], db, dbdcP); // Compute value _MM_DOT4_PS (a, bcP, *val); // Compute gradient _MM_DOT4_PS (da, bcP, grad[0]); _MM_DOT4_PS (a, dbcP, grad[1]); _MM_DOT4_PS (a, bdcP, grad[2]); // Compute hessian _MM_DOT4_PS (d2a, bcP, hess[0]); _MM_DOT4_PS (a, d2bcP, hess[4]); _MM_DOT4_PS (a, bd2cP, hess[8]); _MM_DOT4_PS (da, dbcP, hess[1]); _MM_DOT4_PS (da, bdcP, hess[2]); _MM_DOT4_PS (a, dbdcP, hess[5]); // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_MATVEC4_PS #undef _MM_DOT4_PS #endif einspline-0.9.2/src/bspline_data.c0000664000113000011300000001666011012400563013774 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "config.h" /***************** /* SSE Data */ /*****************/ #ifdef _XOPEN_SOURCE #undef _XOPEN_SOURCE #endif #define _XOPEN_SOURCE 600 #ifndef __USE_XOPEN2K #define __USE_XOPEN2K #endif #include #ifdef HAVE_SSE #include // Single-precision version of matrices __m128 *restrict A_s = (__m128 *)0; // There is a problem with alignment of global variables in shared // libraries on 32-bit machines. // __m128 A0, A1, A2, A3, dA0, dA1, dA2, dA3, d2A0, d2A1, d2A2, d2A3; #endif #ifdef HAVE_SSE2 // Double-precision version of matrices #include __m128d *restrict A_d = (__m128d *)0; // There is a problem with alignment of global variables in shared // libraries on 32-bit machines. //__m128d A0_01, A0_23, A1_01, A1_23, A2_01, A2_23, A3_01, A3_23, // dA0_01, dA0_23, dA1_01, dA1_23, dA2_01, dA2_23, dA3_01, dA3_23, // d2A0_01, d2A0_23, d2A1_01, d2A1_23, d2A2_01, d2A2_23, d2A3_01, d2A3_23; #endif void init_sse_data() { #ifdef HAVE_SSE if (A_s == 0) { posix_memalign ((void**)&A_s, 16, (sizeof(__m128)*12)); A_s[0] = _mm_setr_ps ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0 ); A_s[0] = _mm_setr_ps ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0 ); A_s[1] = _mm_setr_ps ( 4.0/6.0, 0.0/6.0, -6.0/6.0, 3.0/6.0 ); A_s[2] = _mm_setr_ps ( 1.0/6.0, 3.0/6.0, 3.0/6.0, -3.0/6.0 ); A_s[3] = _mm_setr_ps ( 0.0/6.0, 0.0/6.0, 0.0/6.0, 1.0/6.0 ); A_s[4] = _mm_setr_ps ( -0.5, 1.0, -0.5, 0.0 ); A_s[5] = _mm_setr_ps ( 0.0, -2.0, 1.5, 0.0 ); A_s[6] = _mm_setr_ps ( 0.5, 1.0, -1.5, 0.0 ); A_s[7] = _mm_setr_ps ( 0.0, 0.0, 0.5, 0.0 ); A_s[8] = _mm_setr_ps ( 1.0, -1.0, 0.0, 0.0 ); A_s[9] = _mm_setr_ps ( -2.0, 3.0, 0.0, 0.0 ); A_s[10] = _mm_setr_ps ( 1.0, -3.0, 0.0, 0.0 ); A_s[11] = _mm_setr_ps ( 0.0, 1.0, 0.0, 0.0 ); } #endif #ifdef HAVE_SSE2 if (A_d == 0) { posix_memalign ((void**)&A_d, 16, (sizeof(__m128d)*24)); A_d[ 0] = _mm_setr_pd ( 3.0/6.0, -1.0/6.0 ); A_d[ 1] = _mm_setr_pd ( 1.0/6.0, -3.0/6.0 ); A_d[ 2] = _mm_setr_pd ( -6.0/6.0, 3.0/6.0 ); A_d[ 3] = _mm_setr_pd ( 4.0/6.0, 0.0/6.0 ); A_d[ 4] = _mm_setr_pd ( 3.0/6.0, -3.0/6.0 ); A_d[ 5] = _mm_setr_pd ( 1.0/6.0, 3.0/6.0 ); A_d[ 6] = _mm_setr_pd ( 0.0/6.0, 1.0/6.0 ); A_d[ 7] = _mm_setr_pd ( 0.0/6.0, 0.0/6.0 ); A_d[ 8] = _mm_setr_pd ( -0.5, 0.0 ); A_d[ 9] = _mm_setr_pd ( -0.5, 1.0 ); A_d[10] = _mm_setr_pd ( 1.5, 0.0 ); A_d[11] = _mm_setr_pd ( 0.0, -2.0 ); A_d[12] = _mm_setr_pd ( -1.5, 0.0 ); A_d[13] = _mm_setr_pd ( 0.5, 1.0 ); A_d[14] = _mm_setr_pd ( 0.5, 0.0 ); A_d[15] = _mm_setr_pd ( 0.0, 0.0 ); A_d[16] = _mm_setr_pd ( 0.0, 0.0 ); A_d[17] = _mm_setr_pd ( 1.0, -1.0 ); A_d[18] = _mm_setr_pd ( 0.0, 0.0 ); A_d[19] = _mm_setr_pd ( -2.0, 3.0 ); A_d[20] = _mm_setr_pd ( 0.0, 0.0 ); A_d[21] = _mm_setr_pd ( 1.0, -3.0 ); A_d[22] = _mm_setr_pd ( 0.0, 0.0 ); A_d[23] = _mm_setr_pd ( 0.0, 1.0 ); } #endif } #ifdef USE_ALTIVEC vector float A0 = (vector float) ( -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0); vector float A1 = (vector float) ( 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0); vector float A2 = (vector float) ( -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0); vector float A3 = (vector float) ( 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0); /* vector float A0 = (vector float) ( -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0); */ /* vector float A1 = (vector float) ( 3.0/6.0, -6.0/6.0, 3.0/6.0, 0.0/6.0); */ /* vector float A2 = (vector float) ( -3.0/6.0, 0.0/6.0, 3.0/6.0, 0.0/6.0); */ /* vector float A3 = (vector float) ( 1.0/6.0, 4.0/6.0, 1.0/6.0, 0.0/6.0); */ /* vector float A0 = (vector float) ( 1.0/6.0, -3.0/6.0, 3.0/6.0, -1.0/6.0); */ /* vector float A1 = (vector float) ( 4.0/6.0, 0.0/6.0, -6.0/6.0, 3.0/6.0); */ /* vector float A2 = (vector float) ( 1.0/6.0, 3.0/6.0, 3.0/6.0, -3.0/6.0); */ /* vector float A3 = (vector float) ( 0.0/6.0, 0.0/6.0, 0.0/6.0, 1.0/6.0); */ vector float dA0 = (vector float) ( 0.0, -0.5, 1.0, -0.5 ); vector float dA1 = (vector float) ( 0.0, 1.5, -2.0, 0.0 ); vector float dA2 = (vector float) ( 0.0, -1.5, 1.0, 0.5 ); vector float dA3 = (vector float) ( 0.0, 0.5, 0.0, 0.0 ); vector float d2A0 = (vector float) ( 0.0, 0.0, -1.0, 1.0 ); vector float d2A1 = (vector float) ( 0.0, 0.0, 3.0, -2.0 ); vector float d2A2 = (vector float) ( 0.0, 0.0, -3.0, 1.0 ); vector float d2A3 = (vector float) ( 0.0, 0.0, 1.0, 0.0 ); #endif /*****************/ /* Standard Data */ /*****************/ ////////////////////// // Single precision // ////////////////////// const float A44f[16] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0 }; const float* restrict Af = A44f; const float dA44f[16] = { 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0 }; const float* restrict dAf = dA44f; const float d2A44f[16] = { 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; const float* restrict d2Af = d2A44f; ////////////////////// // Double precision // ////////////////////// const double A44d[16] = { -1.0/6.0, 3.0/6.0, -3.0/6.0, 1.0/6.0, 3.0/6.0, -6.0/6.0, 0.0/6.0, 4.0/6.0, -3.0/6.0, 3.0/6.0, 3.0/6.0, 1.0/6.0, 1.0/6.0, 0.0/6.0, 0.0/6.0, 0.0/6.0 }; const double* restrict Ad = A44d; const double dA44d[16] = { 0.0, -0.5, 1.0, -0.5, 0.0, 1.5, -2.0, 0.0, 0.0, -1.5, 1.0, 0.5, 0.0, 0.5, 0.0, 0.0 }; const double* restrict dAd = dA44d; const double d2A44d[16] = { 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 3.0, -2.0, 0.0, 0.0, -3.0, 1.0, 0.0, 0.0, 1.0, 0.0 }; const double* restrict d2Ad = d2A44d; einspline-0.9.2/src/bspline_eval_sse_c.h0000664000113000011300000021563611235572632015215 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_SSE_C_H #define BSPLINE_EVAL_SSE_C_H #include "bspline_structs.h" #include #include #ifdef HAVE_SSE3 #include #endif #include // extern __m128 A0, A1, A2, A3; // extern __m128 dA0, dA1, dA2, dA3; // extern __m128 d2A0, d2A1, d2A2, d2A3; extern __m128* restrict A_s; inline void print__m128 (__m128 val) { float v[4]; __m128 vshuf = _mm_shuffle_ps (val, val, _MM_SHUFFLE(0,0,0,0)); _mm_store_ss (&(v[0]), vshuf); vshuf = _mm_shuffle_ps (val, val, _MM_SHUFFLE(1,1,1,1)); _mm_store_ss (&(v[1]), vshuf); vshuf = _mm_shuffle_ps (val, val, _MM_SHUFFLE(2,2,2,2)); _mm_store_ss (&(v[2]), vshuf); vshuf = _mm_shuffle_ps (val, val, _MM_SHUFFLE(3,3,3,3)); _mm_store_ss (&(v[3]), vshuf); fprintf (stderr, "[ %8.5f, %8.5f, %8.5f, %8.5f ]", v[0], v[1], v[2], v[3]); } /// SSE3 adds "horizontal add" instructions, which makes things /// simpler and faster #ifdef HAVE_SSE3 #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 _r0 = _mm_hadd_ps (_mm_mul_ps (M0, v), _mm_mul_ps (M1, v)); \ __m128 _r1 = _mm_hadd_ps (_mm_mul_ps (M2, v), _mm_mul_ps (M3, v)); \ r = _mm_hadd_ps (_r0, _r1); \ } while (0); #define _MM_DOT4_PS(_A, _B, _p) \ do { \ __m128 t = _mm_mul_ps (_A, _B); \ __m128 t1 = _mm_hadd_ps (t,t); \ __m128 r = _mm_hadd_ps (t1, t1); \ _mm_store_ss (&(_p), r); \ } while(0); #else // Use plain-old SSE instructions #define _MM_MATVEC4_PS(_M0, _M1, _M2, _M3, _v, _r) \ do { \ __m128 _r0 = _mm_mul_ps (_M0, _v); \ __m128 _r1 = _mm_mul_ps (_M1, _v); \ __m128 _r2 = _mm_mul_ps (_M2, _v); \ __m128 _r3 = _mm_mul_ps (_M3, _v); \ _MM_TRANSPOSE4_PS (_r0, _r1, _r2, _r3); \ _r = _mm_add_ps (_mm_add_ps (_r0, _r1), _mm_add_ps (_r2, _r3)); \ } while (0); #define _MM_DOT4_PS(_A, _B, _p) \ do { \ __m128 _t = _mm_mul_ps (_A, _B); \ __m128 alo = _mm_shuffle_ps (_t, _t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (_t, _t, _MM_SHUFFLE(2,3,2,3)); \ __m128 _a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (_a, _a, _MM_SHUFFLE(1,1,1,1)); \ __m128 r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(_p), r); \ } while(0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_c (UBspline_1d_c * restrict spline, double x, complex_float* restrict val) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_c_vg (UBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = (float)spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_c_vgl (UBspline_1d_c* restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { x -= spline->x_grid.start; float u = x*spline->x_grid.delta_inv; float ipart, t; t = modff (u, &ipart); int i = (int) ipart; float tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Af[ 0]*tp[0] + Af[ 1]*tp[1] + Af[ 2]*tp[2] + Af[ 3]*tp[3])+ coefs[i+1]*(Af[ 4]*tp[0] + Af[ 5]*tp[1] + Af[ 6]*tp[2] + Af[ 7]*tp[3])+ coefs[i+2]*(Af[ 8]*tp[0] + Af[ 9]*tp[1] + Af[10]*tp[2] + Af[11]*tp[3])+ coefs[i+3]*(Af[12]*tp[0] + Af[13]*tp[1] + Af[14]*tp[2] + Af[15]*tp[3])); *grad = (float)spline->x_grid.delta_inv * (coefs[i+0]*(dAf[ 1]*tp[1] + dAf[ 2]*tp[2] + dAf[ 3]*tp[3])+ coefs[i+1]*(dAf[ 5]*tp[1] + dAf[ 6]*tp[2] + dAf[ 7]*tp[3])+ coefs[i+2]*(dAf[ 9]*tp[1] + dAf[10]*tp[2] + dAf[11]*tp[3])+ coefs[i+3]*(dAf[13]*tp[1] + dAf[14]*tp[2] + dAf[15]*tp[3])); *lapl = (float)(spline->x_grid.delta_inv * spline->x_grid.delta_inv) * (coefs[i+0]*(d2Af[ 2]*tp[2] + d2Af[ 3]*tp[3])+ coefs[i+1]*(d2Af[ 6]*tp[2] + d2Af[ 7]*tp[3])+ coefs[i+2]*(d2Af[10]*tp[2] + d2Af[11]*tp[3])+ coefs[i+3]*(d2Af[14]*tp[2] + d2Af[15]*tp[3])); } inline void eval_UBspline_1d_c_vgh (UBspline_1d_c* restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { eval_UBspline_1d_c_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_c (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // /// SSE mesh point determination // __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); // __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); // __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); // xy = _mm_sub_ps (xy, x0y0); // // ux = (x - x0)/delta_x and same for y // __m128 uxuy = _mm_mul_ps (xy, delta_inv); // // intpart = trunc (ux, uy) // __m128i intpart = _mm_cvttps_epi32(uxuy); // __m128i ixiy; // _mm_storeu_si128 (&ixiy, intpart); // // Store to memory for use in C expressions // // xmm registers are stored to memory in reverse order // int ix = ((int *)&ixiy)[3]; // int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // __m128 ipart = _mm_cvtepi32_ps (intpart); // __m128 txty = _mm_sub_ps (uxuy, ipart); // __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); // __m128 t2 = _mm_mul_ps (txty, txty); // __m128 t3 = _mm_mul_ps (t2, txty); // __m128 tpx = t3; // __m128 tpy = t2; // __m128 tpz = txty; // __m128 zero = one; // _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, bPr, bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); #undef P } /* Value and gradient */ inline void eval_UBspline_2d_c_vg (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // /// SSE mesh point determination // __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); // __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); // __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); // xy = _mm_sub_ps (xy, x0y0); // // ux = (x - x0)/delta_x and same for y // __m128 uxuy = _mm_mul_ps (xy, delta_inv); // // intpart = trunc (ux, uy) // __m128i intpart = _mm_cvttps_epi32(uxuy); // __m128i ixiy; // _mm_storeu_si128 (&ixiy, intpart); // // Store to memory for use in C expressions // // xmm registers are stored to memory in reverse order // int ix = ((int *)&ixiy)[3]; // int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // __m128 ipart = _mm_cvtepi32_ps (intpart); // __m128 txty = _mm_sub_ps (uxuy, ipart); // __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); // __m128 t2 = _mm_mul_ps (txty, txty); // __m128 t3 = _mm_mul_ps (t2, txty); // __m128 tpx = t3; // __m128 tpy = t2; // __m128 tpz = txty; // __m128 zero = one; // _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, da, db, bPr, dbPr, bPi, dbPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpx, da); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpy, db); tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradi0); _MM_DOT4_PS (a, dbPr, *gradr1); _MM_DOT4_PS (a, dbPi, *gradi1); float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_c_vgl (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // /// SSE mesh point determination // __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); // __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); // __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); // xy = _mm_sub_ps (xy, x0y0); // // ux = (x - x0)/delta_x and same for y // __m128 uxuy = _mm_mul_ps (xy, delta_inv); // // intpart = trunc (ux, uy) // __m128i intpart = _mm_cvttps_epi32(uxuy); // __m128i ixiy; // _mm_storeu_si128 (&ixiy, intpart); // // Store to memory for use in C expressions // // xmm registers are stored to memory in reverse order // int ix = ((int *)&ixiy)[3]; // int iy = ((int *)&ixiy)[2]; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // __m128 ipart = _mm_cvtepi32_ps (intpart); // __m128 txty = _mm_sub_ps (uxuy, ipart); // __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); // __m128 t2 = _mm_mul_ps (txty, txty); // __m128 t3 = _mm_mul_ps (t2, txty); // __m128 tpx = t3; // __m128 tpy = t2; // __m128 tpz = txty; // __m128 zero = one; // _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, da, db, d2a, d2b, bPr, dbPr, d2bPr, bPi, dbPi, d2bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); _MM_MATVEC4_PS (r0, r1, r2, r3, d2b, d2bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, d2b, d2bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float hess_d2x_r, hess_d2x_i, hess_d2y_r, hess_d2y_i; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradi0); _MM_DOT4_PS (a, dbPr, *gradr1); _MM_DOT4_PS (a, dbPi, *gradi1); // Compute Hessian _MM_DOT4_PS (d2a, bPr, hess_d2x_r); _MM_DOT4_PS (d2a, bPi, hess_d2x_i); _MM_DOT4_PS (a, d2bPr, hess_d2y_r); _MM_DOT4_PS (a, d2bPi, hess_d2y_i); float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; hess_d2x_r *= dxInv*dxInv; hess_d2x_i *= dxInv*dxInv; hess_d2y_r *= dyInv*dyInv; hess_d2y_i *= dyInv*dyInv; #ifdef __cplusplus *lapl = std::complex(hess_d2x_r + hess_d2y_r, hess_d2x_i + hess_d2y_i); #else *lapl = (hess_d2x_r + hess_d2y_r) + 1.0fI* (hess_d2x_i + hess_d2y_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_c_vgh (UBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); // /// SSE mesh point determination // __m128 xy = _mm_set_ps (x, y, 0.0, 0.0); // __m128 x0y0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, 0.0, 0.0); // __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, 0.0, 0.0); // xy = _mm_sub_ps (xy, x0y0); // // ux = (x - x0)/delta_x and same for y // __m128 uxuy = _mm_mul_ps (xy, delta_inv); // // intpart = trunc (ux, uy) // __m128i intpart = _mm_cvttps_epi32(uxuy); // __m128i ixiy; // _mm_storeu_si128 (&ixiy, intpart); // // Store to memory for use in C expressions // // xmm registers are stored to memory in reverse order // int ix = ((int *)&ixiy)[3]; // int iy = ((int *)&ixiy)[2]; // __m128 ipart = _mm_cvtepi32_ps (intpart); // __m128 txty = _mm_sub_ps (uxuy, ipart); // __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); // __m128 t2 = _mm_mul_ps (txty, txty); // __m128 t3 = _mm_mul_ps (t2, txty); // __m128 tpx = t3; // __m128 tpy = t2; // __m128 tpz = txty; // __m128 zero = one; // _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); x -= spline->x_grid.start; y -= spline->y_grid.start; float ux = x*spline->x_grid.delta_inv; float uy = y*spline->y_grid.delta_inv; float ipartx, iparty, tx, ty; tx = modff (ux, &ipartx); ty = modff (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; int xs = spline->x_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no j value is needed. #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 tpx = _mm_set_ps (tx*tx*tx, tx*tx, tx, 1.0); __m128 tpy = _mm_set_ps (ty*ty*ty, ty*ty, ty, 1.0); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, da, db, d2a, d2b, bPr, dbPr, d2bPr, bPi, dbPi, d2bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); tmp0 = _mm_loadu_ps (P(0,0)); tmp1 = _mm_loadu_ps (P(0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,0)); tmp1 = _mm_loadu_ps (P(1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,0)); tmp1 = _mm_loadu_ps (P(2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,0)); tmp1 = _mm_loadu_ps (P(3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); _MM_MATVEC4_PS (r0, r1, r2, r3, d2b, d2bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, d2b, d2bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *hess_d2x_r = ((float*)hess)+0; float *hess_d2x_i = ((float*)hess)+1; float *hess_d2y_r = ((float*)hess)+6; float *hess_d2y_i = ((float*)hess)+7; float *hess_dxdy_r = ((float*)hess)+2; float *hess_dxdy_i = ((float*)hess)+3; float *hess_dydx_r = ((float*)hess)+4; float *hess_dydx_i = ((float*)hess)+5; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradi0); _MM_DOT4_PS (a, dbPr, *gradr1); _MM_DOT4_PS (a, dbPi, *gradi1); // Compute Hessian _MM_DOT4_PS (d2a, bPr, *hess_d2x_r); _MM_DOT4_PS (d2a, bPi, *hess_d2x_i); _MM_DOT4_PS (a, d2bPr, *hess_d2y_r); _MM_DOT4_PS (a, d2bPi, *hess_d2y_i); _MM_DOT4_PS (da, dbPr, *hess_dxdy_r); _MM_DOT4_PS (da, dbPi, *hess_dxdy_i); _MM_DOT4_PS (da, dbPr, *hess_dydx_r); _MM_DOT4_PS (da, dbPi, *hess_dydx_i); float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; hess[0] *= dxInv*dxInv; hess[1] *= dxInv*dyInv; hess[2] *= dxInv*dyInv; hess[3] *= dyInv*dyInv; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_c (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, c, cPr[4], cPi[4], bcPr, bcPi, tmp0, tmp1, r0, r1, r2, r3, i0, i1, i2, i3; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); // z-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpz, c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0,0)); tmp1 = _mm_loadu_ps (P(0,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,1,0)); tmp1 = _mm_loadu_ps (P(0,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,2,0)); tmp1 = _mm_loadu_ps (P(0,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,3,0)); tmp1 = _mm_loadu_ps (P(0,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0,0)); tmp1 = _mm_loadu_ps (P(1,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,1,0)); tmp1 = _mm_loadu_ps (P(1,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,2,0)); tmp1 = _mm_loadu_ps (P(1,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,3,0)); tmp1 = _mm_loadu_ps (P(1,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0,0)); tmp1 = _mm_loadu_ps (P(2,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,1,0)); tmp1 = _mm_loadu_ps (P(2,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,2,0)); tmp1 = _mm_loadu_ps (P(2,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,3,0)); tmp1 = _mm_loadu_ps (P(2,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0,0)); tmp1 = _mm_loadu_ps (P(3,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,1,0)); tmp1 = _mm_loadu_ps (P(3,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,2,0)); tmp1 = _mm_loadu_ps (P(3,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,3,0)); tmp1 = _mm_loadu_ps (P(3,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_c_vg (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, c, da, db, dc, cPr[4], dcPr[4], bcPr, dbcPr, bdcPr, cPi[4], dcPi[4], bcPi, dbcPi, bdcPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; // x-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpx, a); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpx, da); // y-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpy, b); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpy, db); // z-dependent vectors _MM_MATVEC4_PS (A_s[0], A_s[1], A_s[2], A_s[3], tpz, c); _MM_MATVEC4_PS (A_s[4], A_s[5], A_s[6], A_s[7], tpz, dc); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0,0)); tmp1 = _mm_loadu_ps (P(0,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,1,0)); tmp1 = _mm_loadu_ps (P(0,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,2,0)); tmp1 = _mm_loadu_ps (P(0,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,3,0)); tmp1 = _mm_loadu_ps (P(0,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0,0)); tmp1 = _mm_loadu_ps (P(1,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,1,0)); tmp1 = _mm_loadu_ps (P(1,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,2,0)); tmp1 = _mm_loadu_ps (P(1,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,3,0)); tmp1 = _mm_loadu_ps (P(1,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0,0)); tmp1 = _mm_loadu_ps (P(2,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,1,0)); tmp1 = _mm_loadu_ps (P(2,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,2,0)); tmp1 = _mm_loadu_ps (P(2,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,3,0)); tmp1 = _mm_loadu_ps (P(2,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0,0)); tmp1 = _mm_loadu_ps (P(3,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,1,0)); tmp1 = _mm_loadu_ps (P(3,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,2,0)); tmp1 = _mm_loadu_ps (P(3,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,3,0)); tmp1 = _mm_loadu_ps (P(3,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_c_vgl (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cPr[4], dcPr[4], d2cPr[4], bcPr, dbcPr, bdcPr, d2bcPr, dbdcPr, bd2cPr, cPi[4], dcPi[4], d2cPi[4], bcPi, dbcPi, bdcPi, d2bcPi, dbdcPi, bd2cPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0,0)); tmp1 = _mm_loadu_ps (P(0,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,1,0)); tmp1 = _mm_loadu_ps (P(0,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,2,0)); tmp1 = _mm_loadu_ps (P(0,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,3,0)); tmp1 = _mm_loadu_ps (P(0,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0,0)); tmp1 = _mm_loadu_ps (P(1,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,1,0)); tmp1 = _mm_loadu_ps (P(1,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,2,0)); tmp1 = _mm_loadu_ps (P(1,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,3,0)); tmp1 = _mm_loadu_ps (P(1,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0,0)); tmp1 = _mm_loadu_ps (P(2,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,1,0)); tmp1 = _mm_loadu_ps (P(2,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,2,0)); tmp1 = _mm_loadu_ps (P(2,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,3,0)); tmp1 = _mm_loadu_ps (P(2,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0,0)); tmp1 = _mm_loadu_ps (P(3,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,1,0)); tmp1 = _mm_loadu_ps (P(3,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,2,0)); tmp1 = _mm_loadu_ps (P(3,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,3,0)); tmp1 = _mm_loadu_ps (P(3,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], d2b, d2bcPr); _MM_MATVEC4_PS (d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], b, bd2cPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], db, dbdcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], d2b, d2bcPi); _MM_MATVEC4_PS (d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], b, bd2cPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], db, dbdcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); // Compute laplacian float sec_deriv[6]; _MM_DOT4_PS (d2a, bcPr, sec_deriv[0]); _MM_DOT4_PS (d2a, bcPi, sec_deriv[1]); _MM_DOT4_PS (a, d2bcPr, sec_deriv[2]); _MM_DOT4_PS (a, d2bcPi, sec_deriv[3]); _MM_DOT4_PS (a, bd2cPr, sec_deriv[4]); _MM_DOT4_PS (a, bd2cPi, sec_deriv[5]); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; sec_deriv[0] *= dxInv*dxInv; sec_deriv[1] *= dxInv*dxInv; sec_deriv[2] *= dyInv*dyInv; sec_deriv[3] *= dyInv*dyInv; sec_deriv[4] *= dzInv*dzInv; sec_deriv[5] *= dzInv*dzInv; #ifdef __cplusplus *lapl = std::complex(sec_deriv[0] + sec_deriv[2] + sec_deriv[4], sec_deriv[1] + sec_deriv[3] + sec_deriv[5]); #else *lapl = (sec_deriv[0] + sec_deriv[2] + sec_deriv[4]) + 1.0fi*(sec_deriv[1] + sec_deriv[3] + sec_deriv[5]); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_c_vgh (UBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { _mm_prefetch ((const char*) &A_s[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_s[3],_MM_HINT_T0); /// SSE mesh point determination __m128 xyz = _mm_set_ps (x, y, z, 0.0); __m128 x0y0z0 = _mm_set_ps (spline->x_grid.start, spline->y_grid.start, spline->z_grid.start, 0.0); __m128 delta_inv = _mm_set_ps (spline->x_grid.delta_inv,spline->y_grid.delta_inv, spline->z_grid.delta_inv, 0.0); xyz = _mm_sub_ps (xyz, x0y0z0); // ux = (x - x0)/delta_x and same for y and z __m128 uxuyuz = _mm_mul_ps (xyz, delta_inv); // intpart = trunc (ux, uy, uz) __m128i intpart = _mm_cvttps_epi32(uxuyuz); __m128i ixiyiz; _mm_storeu_si128 (&ixiyiz, intpart); // Store to memory for use in C expressions // xmm registers are stored to memory in reverse order int ix = ((int *)&ixiyiz)[3]; int iy = ((int *)&ixiyiz)[2]; int iz = ((int *)&ixiyiz)[1]; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] __m128 ipart = _mm_cvtepi32_ps (intpart); __m128 txtytz = _mm_sub_ps (uxuyuz, ipart); __m128 one = _mm_set_ps (1.0, 1.0, 1.0, 1.0); __m128 t2 = _mm_mul_ps (txtytz, txtytz); __m128 t3 = _mm_mul_ps (t2, txtytz); __m128 tpx = t3; __m128 tpy = t2; __m128 tpz = txtytz; __m128 zero = one; _MM_TRANSPOSE4_PS(zero, tpz, tpy, tpx); // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A_s[0], A_s[1], A_s[2], A_s[3] __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cPr[4], dcPr[4], d2cPr[4], bcPr, dbcPr, bdcPr, d2bcPr, dbdcPr, bd2cPr, cPi[4], dcPi[4], d2cPi[4], bcPi, dbcPi, bdcPi, d2bcPi, dbdcPi, bd2cPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; // x-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpx, a); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpx, da); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpx, d2a); // y-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpy, b); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpy, db); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpy, d2b); // z-dependent vectors _MM_MATVEC4_PS (A_s[ 0], A_s[ 1], A_s[ 2], A_s[ 3], tpz, c); _MM_MATVEC4_PS (A_s[ 4], A_s[ 5], A_s[ 6], A_s[ 7], tpz, dc); _MM_MATVEC4_PS (A_s[ 8], A_s[ 9], A_s[10], A_s[11], tpz, d2c); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter tmp0 = _mm_loadu_ps (P(0,0,0)); tmp1 = _mm_loadu_ps (P(0,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,1,0)); tmp1 = _mm_loadu_ps (P(0,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,2,0)); tmp1 = _mm_loadu_ps (P(0,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(0,3,0)); tmp1 = _mm_loadu_ps (P(0,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[0]); // 2nd quarter tmp0 = _mm_loadu_ps (P(1,0,0)); tmp1 = _mm_loadu_ps (P(1,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,1,0)); tmp1 = _mm_loadu_ps (P(1,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,2,0)); tmp1 = _mm_loadu_ps (P(1,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(1,3,0)); tmp1 = _mm_loadu_ps (P(1,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[1]); // 3rd quarter tmp0 = _mm_loadu_ps (P(2,0,0)); tmp1 = _mm_loadu_ps (P(2,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,1,0)); tmp1 = _mm_loadu_ps (P(2,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,2,0)); tmp1 = _mm_loadu_ps (P(2,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(2,3,0)); tmp1 = _mm_loadu_ps (P(2,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[2]); // 4th quarter tmp0 = _mm_loadu_ps (P(3,0,0)); tmp1 = _mm_loadu_ps (P(3,0,2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,1,0)); tmp1 = _mm_loadu_ps (P(3,1,2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,2,0)); tmp1 = _mm_loadu_ps (P(3,2,2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (P(3,3,0)); tmp1 = _mm_loadu_ps (P(3,3,2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], d2b, d2bcPr); _MM_MATVEC4_PS (d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], b, bd2cPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], db, dbdcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], d2b, d2bcPi); _MM_MATVEC4_PS (d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], b, bd2cPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], db, dbdcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); // Compute hessian _MM_DOT4_PS (d2a, bcPr, *(float*)(&hess[0])); _MM_DOT4_PS (a, d2bcPr, *(float*)(&hess[4])); _MM_DOT4_PS (a, bd2cPr, *(float*)(&hess[8])); _MM_DOT4_PS (da, dbcPr, *(float*)(&hess[1])); _MM_DOT4_PS (da, bdcPr, *(float*)(&hess[2])); _MM_DOT4_PS (a, dbdcPr, *(float*)(&hess[5])); _MM_DOT4_PS (d2a, bcPi, *((float*)(&hess[0])+1)); _MM_DOT4_PS (a, d2bcPi, *((float*)(&hess[4])+1)); _MM_DOT4_PS (a, bd2cPi, *((float*)(&hess[8])+1)); _MM_DOT4_PS (da, dbcPi, *((float*)(&hess[1])+1)); _MM_DOT4_PS (da, bdcPi, *((float*)(&hess[2])+1)); _MM_DOT4_PS (a, dbdcPi, *((float*)(&hess[5])+1)); // Multiply gradients and hessians by appropriate grid inverses float dxInv = spline->x_grid.delta_inv; float dyInv = spline->y_grid.delta_inv; float dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; hess[0] *= dxInv*dxInv; hess[4] *= dyInv*dyInv; hess[8] *= dzInv*dzInv; hess[1] *= dxInv*dyInv; hess[2] *= dxInv*dzInv; hess[5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_MATVEC4_PS #undef _MM_DOT4_PS #endif einspline-0.9.2/src/test_fbspline.f0000664000113000011300000000142411012400563014203 00000000000000 PROGRAM test_fbspline c23456789 implicit none real*8 x0, x1 integer*8 spline integer num, x0code, x1code real x0val, x1val real a(11), val real*8 x x0 = 0.0D0 x1 = 1.0D0 num = 11 a(1) = 1.5 a(2) = -1.3 a(3) = 2.3 a(4) = 3.1 a(5) = 1.8 a(6) = 0.9 a(7) = -0.2 a(8) = -1.0 a(9) = -1.2 a(10) = -0.8 a(11) = 1.5 x0code = 0 x1code = 0 call fcreate_ubspline_1d_s (x0, x1, num, x0code, x0val, + x1code, x1val, a, spline) x = 0.0D0 do while (x .le. 1.00001D0) call feval_ubspline_1d_s (spline, x, val) write (*,*) x, val x = x + 0.001D0 enddo stop end einspline-0.9.2/src/multi_bspline_eval_sse_c_cpp.cc0000664000113000011300000000276611015565204017416 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_sse_c_impl.h" einspline-0.9.2/src/multi_bspline_eval_sse_s_cpp.cc0000664000113000011300000000276611015561334017436 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_sse_s_impl.h" einspline-0.9.2/src/multi_bspline_cuda_s_impl.h0000664000113000011300000006435711302247653016602 00000000000000#ifndef MULTI_BSPLINE_CUDA_S_IMPL_H #define MULTI_BSPLINE_CUDA_S_IMPL_H //#include #include "multi_bspline.h" #include "multi_bspline_create_cuda.h" __global__ static void eval_multi_multi_UBspline_1d_s_kernel (float *pos, float drInv, float *coefs, float **vals, uint dim, uint stride, int N) { int tid = threadIdx.x; int ir = blockIdx.x; __shared__ float *ourval; __shared__ float r; if (tid == 0) { r = pos[ir]; ourval = vals[ir]; } __syncthreads(); int index; float t; float s, sf; float4 tp; s = r * drInv; sf = floor(s); index = min(max(0,(int)sf), dim-1); t = s - sf; tp = make_float4(t*t*t, t*t, t, 1.0); __shared__ float a[4]; if (tid < 4) a[tid] = Acuda[4*tid+0]*tp.x + Acuda[4*tid+1]*tp.y + Acuda[4*tid+2]*tp.z + Acuda[4*tid+3]*tp.w; __syncthreads(); int numBlocks = N / SPLINE_BLOCK_SIZE; float *c = coefs + index*stride + tid; float *myval = ourval + tid; int stride2 = 2*stride; int stride3 = 3*stride; for (int block=0; block < numBlocks; block++) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); myval += SPLINE_BLOCK_SIZE; c += SPLINE_BLOCK_SIZE; } int remainder = N - numBlocks*SPLINE_BLOCK_SIZE; if (tid < remainder) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); } } extern "C" void eval_multi_multi_UBspline_1d_s_cuda (multi_UBspline_1d_s_cuda *spline, float *pos_d, float *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(num); eval_multi_multi_UBspline_1d_s_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_1d_s_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_1d_s_vgl_kernel (float *pos, float drInv, float *coefs, float **vals, float **grads, float **lapl, uint dim, uint stride, int N) { int tid = threadIdx.x; int ir = blockIdx.x; __shared__ float *ourval, *ourgrad, *ourlapl; __shared__ float r; if (tid == 0) { r = pos[ir]; ourval = vals[ir]; ourgrad = grads[ir]; ourlapl = lapl[ir]; } __syncthreads(); int index; float t; float s, sf; float4 tp; s = r * drInv; sf = floor(s); index = min(max(0,(int)sf), dim-1); t = s - sf; tp = make_float4(t*t*t, t*t, t, 1.0); __shared__ float a[12]; if (tid < 12) a[tid] = Acuda[4*tid+0]*tp.x + Acuda[4*tid+1]*tp.y + Acuda[4*tid+2]*tp.z + Acuda[4*tid+3]*tp.w; __syncthreads(); int numBlocks = N / SPLINE_BLOCK_SIZE; float *c = coefs + index*stride + tid; float *myval = ourval + tid; float *mygrad = ourgrad + tid; float *mylapl = ourlapl + tid; int stride2 = 2*stride; int stride3 = 3*stride; __shared__ float coef[SPLINE_BLOCK_SIZE][5]; for (int block=0; block < numBlocks; block++) { coef[tid][0] = c[0]; coef[tid][1] = c[stride]; coef[tid][2] = c[stride2]; coef[tid][3] = c[stride3]; *myval = (a[0] * coef[tid][0] + a[1] * coef[tid][1] + a[2] * coef[tid][2] + a[3] * coef[tid][3]); *mygrad = (a[4] * coef[tid][0] + a[5] * coef[tid][1] + a[6] * coef[tid][2] + a[7] * coef[tid][3]); *mylapl = (a[8] * coef[tid][0] + a[9] * coef[tid][1] + a[10] * coef[tid][2] + a[11]* coef[tid][3]); myval += SPLINE_BLOCK_SIZE; mygrad += SPLINE_BLOCK_SIZE; mylapl += SPLINE_BLOCK_SIZE; c += SPLINE_BLOCK_SIZE; } int remainder = N - numBlocks*SPLINE_BLOCK_SIZE; if (tid < remainder) { *myval = (a[0] * c[0] + a[1] * c[stride] + a[2] * c[stride2] + a[3] * c[stride3]); } } extern "C" void eval_multi_multi_UBspline_1d_s_vgl_cuda (multi_UBspline_1d_s_cuda *spline, float *pos_d, float *vals_d[], float *grads_d[], float *lapl_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(num); eval_multi_multi_UBspline_1d_s_vgl_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, grads_d, lapl_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_1d_s_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_s_kernel (float *pos, float3 drInv, float *coefs, float *vals[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ float *myval; __shared__ float abc[64]; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); //index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); //index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); //index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); __shared__ float a[4], b[4], c[4]; if (thr < 4) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); if (thr < 64) abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); if (off < N) { float val = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = coefs + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) val += abc[16*i+4*j+k] * base[off+k*strides.z]; } } myval[off] = val; } } __global__ static void eval_multi_multi_UBspline_3d_s_sign_kernel (float *pos, float *sign, float3 drInv, float *coefs, float *vals[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+thr; __shared__ float *myval; __shared__ float abc[64]; __shared__ float mysign; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mysign = sign[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); //index.x = (int)sf; t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); //index.y = (int)sf; t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); //index.z = (int)sf; t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); __shared__ float a[4], b[4], c[4]; if (thr < 4) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); if (thr < 64) abc[thr] = a[i]*b[j]*c[k]; __syncthreads(); if (off < N) { float val = 0.0; for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = coefs + (index.x+i)*strides.x + (index.y+j)*strides.y + index.z*strides.z; for (int k=0; k<4; k++) val += abc[16*i+4*j+k] * base[off+k*strides.z]; } } myval[off] = mysign*val; } } __global__ static void eval_multi_multi_UBspline_3d_s_vgh_kernel (float *pos, float3 drInv, float *coefs, float *vals[], float *grads[], float *hess[], uint3 dim, uint3 strides, int N) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ float *myval, *mygrad, *myhess; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad = grads[ir]; myhess = hess[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; float *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = b0 + i*strides.x + j*strides.y; float c0 = base[0*strides.z]; float c1 = base[1*strides.z]; float c2 = base[2*strides.z]; float c3 = base[3*strides.z]; v += abc[n+ 0]*c0 + abc[n+ 1]*c1 + abc[n+ 2]*c2 + abc[n+ 3]*c3; g0 += abc[n+ 64]*c0 + abc[n+ 65]*c1 + abc[n+ 66]*c2 + abc[n+ 67]*c3; g1 += abc[n+128]*c0 + abc[n+129]*c1 + abc[n+130]*c2 + abc[n+131]*c3; g2 += abc[n+192]*c0 + abc[n+193]*c1 + abc[n+194]*c2 + abc[n+195]*c3; h00 += abc[n+256]*c0 + abc[n+257]*c1 + abc[n+258]*c2 + abc[n+259]*c3; h01 += abc[n+320]*c0 + abc[n+321]*c1 + abc[n+322]*c2 + abc[n+323]*c3; h02 += abc[n+384]*c0 + abc[n+385]*c1 + abc[n+386]*c2 + abc[n+387]*c3; h11 += abc[n+448]*c0 + abc[n+449]*c1 + abc[n+450]*c2 + abc[n+451]*c3; h12 += abc[n+512]*c0 + abc[n+513]*c1 + abc[n+514]*c2 + abc[n+515]*c3; h22 += abc[n+576]*c0 + abc[n+577]*c1 + abc[n+578]*c2 + abc[n+579]*c3; n += 4; } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } abc[3*thr+0] = g0; abc[3*thr+1] = g1; abc[3*thr+2] = g2; __syncthreads(); for (int i=0; i<3; i++) { int myoff = (3*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 3*N) mygrad[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } __syncthreads(); // Write Hessians abc[6*thr+0] = h00; abc[6*thr+1] = h01; abc[6*thr+2] = h02; abc[6*thr+3] = h11; abc[6*thr+4] = h12; abc[6*thr+5] = h22; __syncthreads(); for (int i=0; i<6; i++) { int myoff = (6*block+i)*SPLINE_BLOCK_SIZE + thr; if (myoff < 6*N) myhess[myoff] = abc[i*SPLINE_BLOCK_SIZE+thr]; } } extern "C" void eval_multi_multi_UBspline_3d_s_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_s_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_s_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } extern "C" void eval_multi_multi_UBspline_3d_s_sign_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *sign_d, float *vals_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_s_sign_kernel<<>> (pos_d, sign_d, spline->gridInv, spline->coefs, vals_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_s_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } extern "C" void eval_multi_multi_UBspline_3d_s_vgh_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *vals_d[], float *grads_d[], float *hess_d[], int num) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_s_vgh_kernel<<>> (pos_d, spline->gridInv, spline->coefs, vals_d, grads_d, hess_d, spline->dim, spline->stride, spline->num_splines); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_s_vgh_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_s_vgl_kernel (float *pos, float3 drInv, float *coefs, float Linv[], float *vals[], float *grad_lapl[], uint3 dim, uint3 strides, int N, int row_stride) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ float *myval, *mygrad_lapl; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad_lapl = grad_lapl[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; float *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = b0 + i*strides.x + j*strides.y; float c0 = base[0*strides.z]; float c1 = base[1*strides.z]; float c2 = base[2*strides.z]; float c3 = base[3*strides.z]; v += abc[n+ 0]*c0 + abc[n+ 1]*c1 + abc[n+ 2]*c2 + abc[n+ 3]*c3; g0 += abc[n+ 64]*c0 + abc[n+ 65]*c1 + abc[n+ 66]*c2 + abc[n+ 67]*c3; g1 += abc[n+128]*c0 + abc[n+129]*c1 + abc[n+130]*c2 + abc[n+131]*c3; g2 += abc[n+192]*c0 + abc[n+193]*c1 + abc[n+194]*c2 + abc[n+195]*c3; h00 += abc[n+256]*c0 + abc[n+257]*c1 + abc[n+258]*c2 + abc[n+259]*c3; h01 += abc[n+320]*c0 + abc[n+321]*c1 + abc[n+322]*c2 + abc[n+323]*c3; h02 += abc[n+384]*c0 + abc[n+385]*c1 + abc[n+386]*c2 + abc[n+387]*c3; h11 += abc[n+448]*c0 + abc[n+449]*c1 + abc[n+450]*c2 + abc[n+451]*c3; h12 += abc[n+512]*c0 + abc[n+513]*c1 + abc[n+514]*c2 + abc[n+515]*c3; h22 += abc[n+576]*c0 + abc[n+577]*c1 + abc[n+578]*c2 + abc[n+579]*c3; n += 4; } } // if (off < N) { // for (int i=0; i<4; i++) { // for (int j=0; j<4; j++) { // float *base = b0 + i*strides.x + j*strides.y; // for (int k=0; k<4; k++) { // float c = base[k*strides.z]; // v += abc[n+ 0] * c; // g0 += abc[n+ 64] * c; // g1 += abc[n+128] * c; // g2 += abc[n+192] * c; // h00 += abc[n+256] * c; // h01 += abc[n+320] * c; // h02 += abc[n+384] * c; // h11 += abc[n+448] * c; // h12 += abc[n+512] * c; // h22 += abc[n+576] * c; // n += 1; // } // } // } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = v; } __shared__ float G[3][3], GGt[3][3]; int i0 = threadIdx.x/3; int i1 = threadIdx.x - 3*i0; if (threadIdx.x < 9) G[i0][i1] = Linv[threadIdx.x]; __syncthreads(); if (threadIdx.x < 9) GGt[i0][i1] = (G[0][i0]*G[0][i1] + G[1][i0]*G[1][i1] + G[2][i0]*G[2][i1]); __syncthreads(); if (off < N) { // Store gradients back to global memory mygrad_lapl[off+0*row_stride] = G[0][0]*g0 + G[0][1]*g1 + G[0][2]*g2; mygrad_lapl[off+1*row_stride] = G[1][0]*g0 + G[1][1]*g1 + G[1][2]*g2; mygrad_lapl[off+2*row_stride] = G[2][0]*g0 + G[2][1]*g1 + G[2][2]*g2; // Store laplacians back to global memory // Hessian = H00 H01 H02 H11 H12 H22 // Matrix = [0 1 2] // [1 3 4] // [2 4 5] // laplacian = Trace(GGt*Hessian) mygrad_lapl[off+3*row_stride] = (GGt[0][0]*h00 + GGt[1][0]*h01 + GGt[2][0]*h02 + GGt[0][1]*h01 + GGt[1][1]*h11 + GGt[2][1]*h12 + GGt[0][2]*h02 + GGt[1][2]*h12 + GGt[2][2]*h22); } } extern "C" void eval_multi_multi_UBspline_3d_s_vgl_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *Linv_d, float *vals_d[], float *grad_lapl_d[], int num, int row_stride) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_s_vgl_kernel<<>> (pos_d, spline->gridInv, spline->coefs, Linv_d, vals_d, grad_lapl_d, spline->dim, spline->stride, spline->num_splines, row_stride); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_s_vgl_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } __global__ static void eval_multi_multi_UBspline_3d_s_vgl_sign_kernel (float *pos, float sign[], float3 drInv, float *coefs, float Linv[], float *vals[], float *grad_lapl[], uint3 dim, uint3 strides, int N, int row_stride) { int block = blockIdx.x; int thr = threadIdx.x; int ir = blockIdx.y; int off = block*SPLINE_BLOCK_SIZE+threadIdx.x; __shared__ float *myval, *mygrad_lapl, mysign; __shared__ float3 r; if (thr == 0) { r.x = pos[3*ir+0]; r.y = pos[3*ir+1]; r.z = pos[3*ir+2]; myval = vals[ir]; mygrad_lapl = grad_lapl[ir]; mysign = sign[ir]; } __syncthreads(); int3 index; float3 t; float s, sf; float4 tp[3]; s = r.x * drInv.x; sf = floor(s); index.x = min(max(0,(int)sf), dim.x-1); t.x = s - sf; s = r.y * drInv.y; sf = floor(s); index.y = min(max(0,(int)sf), dim.y-1); t.y = s - sf; s = r.z * drInv.z; sf = floor(s); index.z = min(max(0,(int)sf), dim.z-1); t.z = s - sf; tp[0] = make_float4(t.x*t.x*t.x, t.x*t.x, t.x, 1.0); tp[1] = make_float4(t.y*t.y*t.y, t.y*t.y, t.y, 1.0); tp[2] = make_float4(t.z*t.z*t.z, t.z*t.z, t.z, 1.0); // First 4 of a are value, second 4 are derivative, last four are // second derivative. __shared__ float a[12], b[12], c[12]; if (thr < 12) { a[thr] = Acuda[4*thr+0]*tp[0].x + Acuda[4*thr+1]*tp[0].y + Acuda[4*thr+2]*tp[0].z + Acuda[4*thr+3]*tp[0].w; b[thr] = Acuda[4*thr+0]*tp[1].x + Acuda[4*thr+1]*tp[1].y + Acuda[4*thr+2]*tp[1].z + Acuda[4*thr+3]*tp[1].w; c[thr] = Acuda[4*thr+0]*tp[2].x + Acuda[4*thr+1]*tp[2].y + Acuda[4*thr+2]*tp[2].z + Acuda[4*thr+3]*tp[2].w; } __syncthreads(); __shared__ float abc[640]; int i = (thr>>4)&3; int j = (thr>>2)&3; int k = (thr & 3); abc[(16*i+4*j+k)+0] = a[i+0]*b[j+0]*c[k+0]; // val abc[(16*i+4*j+k)+64] = a[i+4]*b[j+0]*c[k+0]; // d/dx abc[(16*i+4*j+k)+128] = a[i+0]*b[j+4]*c[k+0]; // d/dy abc[(16*i+4*j+k)+192] = a[i+0]*b[j+0]*c[k+4]; // d/dz abc[(16*i+4*j+k)+256] = a[i+8]*b[j+0]*c[k+0]; // d2/dx2 abc[(16*i+4*j+k)+320] = a[i+4]*b[j+4]*c[k+0]; // d2/dxdy abc[(16*i+4*j+k)+384] = a[i+4]*b[j+0]*c[k+4]; // d2/dxdz abc[(16*i+4*j+k)+448] = a[i+0]*b[j+8]*c[k+0]; // d2/dy2 abc[(16*i+4*j+k)+512] = a[i+0]*b[j+4]*c[k+4]; // d2/dydz abc[(16*i+4*j+k)+576] = a[i+0]*b[j+0]*c[k+8]; // d2/dz2 __syncthreads(); float v = 0.0, g0=0.0, g1=0.0, g2=0.0, h00=0.0, h01=0.0, h02=0.0, h11=0.0, h12=0.0, h22=0.0; int n = 0; float *b0 = coefs + index.x*strides.x + index.y*strides.y + index.z*strides.z + off; if (off < N) { for (int i=0; i<4; i++) { for (int j=0; j<4; j++) { float *base = b0 + i*strides.x + j*strides.y; for (int k=0; k<4; k++) { float c = base[k*strides.z]; v += abc[n+ 0] * c; g0 += abc[n+ 64] * c; g1 += abc[n+128] * c; g2 += abc[n+192] * c; h00 += abc[n+256] * c; h01 += abc[n+320] * c; h02 += abc[n+384] * c; h11 += abc[n+448] * c; h12 += abc[n+512] * c; h22 += abc[n+576] * c; n += 1; } } } g0 *= drInv.x; g1 *= drInv.y; g2 *= drInv.z; h00 *= drInv.x * drInv.x; h01 *= drInv.x * drInv.y; h02 *= drInv.x * drInv.z; h11 *= drInv.y * drInv.y; h12 *= drInv.y * drInv.z; h22 *= drInv.z * drInv.z; // __shared__ float buff[6*SPLINE_BLOCK_SIZE]; // Note, we can reuse abc, by replacing buff with abc. myval[off] = mysign * v; } __shared__ float G[3][3], GGt[3][3]; int i0 = threadIdx.x/3; int i1 = threadIdx.x - 3*i0; if (threadIdx.x < 9) G[i0][i1] = Linv[threadIdx.x]; __syncthreads(); if (threadIdx.x < 9) GGt[i0][i1] = (G[0][i0]*G[0][i1] + G[1][i0]*G[1][i1] + G[2][i0]*G[2][i1]); __syncthreads(); if (off < N) { // Store gradients back to global memory mygrad_lapl[off+0*row_stride] = mysign*(G[0][0]*g0 + G[0][1]*g1 + G[0][2]*g2); mygrad_lapl[off+1*row_stride] = mysign*(G[1][0]*g0 + G[1][1]*g1 + G[1][2]*g2); mygrad_lapl[off+2*row_stride] = mysign*(G[2][0]*g0 + G[2][1]*g1 + G[2][2]*g2); // Store laplacians back to global memory // Hessian = H00 H01 H02 H11 H12 H22 // Matrix = [0 1 2] // [1 3 4] // [2 4 5] // laplacian = Trace(GGt*Hessian) mygrad_lapl[off+3*row_stride] = mysign * (GGt[0][0]*h00 + GGt[1][0]*h01 + GGt[2][0]*h02 + GGt[0][1]*h01 + GGt[1][1]*h11 + GGt[2][1]*h12 + GGt[0][2]*h02 + GGt[1][2]*h12 + GGt[2][2]*h22); } } extern "C" void eval_multi_multi_UBspline_3d_s_vgl_sign_cuda (multi_UBspline_3d_s_cuda *spline, float *pos_d, float *sign_d, float *Linv_d, float *vals_d[], float *grad_lapl_d[], int num, int row_stride) { dim3 dimBlock(SPLINE_BLOCK_SIZE); dim3 dimGrid(spline->num_splines/SPLINE_BLOCK_SIZE, num); if (spline->num_splines % SPLINE_BLOCK_SIZE) dimGrid.x++; eval_multi_multi_UBspline_3d_s_vgl_sign_kernel<<>> (pos_d, sign_d, spline->gridInv, spline->coefs, Linv_d, vals_d, grad_lapl_d, spline->dim, spline->stride, spline->num_splines, row_stride); cudaThreadSynchronize(); cudaError_t err = cudaGetLastError(); if (err != cudaSuccess) { fprintf (stderr, "CUDA error in eval_multi_multi_UBspline_3d_s_vgl_cuda:\n %s\n", cudaGetErrorString(err)); abort(); } } #endif einspline-0.9.2/src/bspline_create_cuda.h0000664000113000011300000000114511162233046015325 00000000000000#ifndef BSPLINE_CREATE_CUDA_H #define BSPLINE_CREATE_CUDA_H #include "bspline_structs_cuda.h" extern "C" UBspline_3d_s_cuda* create_UBspline_3d_s_cuda (UBspline_3d_s* spline); extern "C" UBspline_3d_s_cuda* create_UBspline_3d_s_cuda_conv (UBspline_3d_d* spline); extern "C" UBspline_3d_c_cuda* create_UBspline_3d_c_cuda (UBspline_3d_c* spline); extern "C" UBspline_3d_c_cuda* create_UBspline_3d_c_cuda_conv (UBspline_3d_z* spline); extern "C" UBspline_3d_d_cuda* create_UBspline_3d_d_cuda (UBspline_3d_d* spline); extern "C" UBspline_3d_z_cuda* create_UBspline_3d_z_cuda (UBspline_3d_z* spline); #endif einspline-0.9.2/src/multi_bspline_structs.h0000664000113000011300000001126111147115526016014 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_STRUCTS_STD_H #define MULTI_BSPLINE_STRUCTS_STD_H #include /////////////////////////// // Single precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride; Ugrid x_grid; BCtype_s xBC; int num_splines; } multi_UBspline_1d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride, y_stride; Ugrid x_grid, y_grid; BCtype_s xBC, yBC; int num_splines; } multi_UBspline_2d_s; typedef struct { spline_code spcode; type_code tcode; float* restrict coefs; intptr_t x_stride, y_stride, z_stride; Ugrid x_grid, y_grid, z_grid; BCtype_s xBC, yBC, zBC; int num_splines; } multi_UBspline_3d_s; /////////////////////////// // Double precision real // /////////////////////////// typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride; Ugrid x_grid; BCtype_d xBC; int num_splines; } multi_UBspline_1d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride, y_stride; Ugrid x_grid, y_grid; BCtype_d xBC, yBC; int num_splines; } multi_UBspline_2d_d; typedef struct { spline_code spcode; type_code tcode; double* restrict coefs; intptr_t x_stride, y_stride, z_stride; Ugrid x_grid, y_grid, z_grid; BCtype_d xBC, yBC, zBC; int num_splines; } multi_UBspline_3d_d; ////////////////////////////// // Single precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride; Ugrid x_grid; BCtype_c xBC; int num_splines; } multi_UBspline_1d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride, y_stride; Ugrid x_grid, y_grid; BCtype_c xBC, yBC; int num_splines; // temporary storage for laplacian components complex_float* restrict lapl2; } multi_UBspline_2d_c; typedef struct { spline_code spcode; type_code tcode; complex_float* restrict coefs; intptr_t x_stride, y_stride, z_stride; Ugrid x_grid, y_grid, z_grid; BCtype_c xBC, yBC, zBC; int num_splines; // temporary storage for laplacian components complex_float* restrict lapl3; } multi_UBspline_3d_c; ////////////////////////////// // Double precision complex // ////////////////////////////// typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride; Ugrid x_grid; BCtype_z xBC; int num_splines; } multi_UBspline_1d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride, y_stride; Ugrid x_grid, y_grid; BCtype_z xBC, yBC; int num_splines; // temporary storage for laplacian components complex_double* restrict lapl2; } multi_UBspline_2d_z; typedef struct { spline_code spcode; type_code tcode; complex_double* restrict coefs; intptr_t x_stride, y_stride, z_stride; Ugrid x_grid, y_grid, z_grid; BCtype_z xBC, yBC, zBC; int num_splines; // temporary storage for laplacian components complex_double* restrict lapl3; } multi_UBspline_3d_z; #endif einspline-0.9.2/src/multi_nubspline_eval_sse_z_impl.h0000664000113000011300000016343611113036707020033 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_BSPLINE_EVAL_SSE_Z_IMPL_H #define MULTI_BSPLINE_EVAL_SSE_Z_IMPL_H #include #include #ifdef HAVE_SSE3 #include #endif #include #include "bspline_base.h" #include "multi_nubspline_structs.h" extern __m128d *restrict A_d; extern double *restrict Ad, *restrict dAd, *restrict d2Ad; #ifndef _MM_DDOT4_PD #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_d (&(p), t1); \ } while (0); #endif #endif /************************************************************/ /* 1D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals) { double a[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int xs = spline->x_stride; complex_double* restrict coefs0 = spline->coefs +(ix+0)*xs; complex_double* restrict coefs1 = spline->coefs +(ix+1)*xs; complex_double* restrict coefs2 = spline->coefs +(ix+2)*xs; complex_double* restrict coefs3 = spline->coefs +(ix+3)*xs; for (int n=0; nnum_splines; n++) vals[n] = (a[0]*coefs0[n] + a[1]*coefs1[n] + a[2]*coefs2[n] + a[3]*coefs3[n]); } void eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads) { double a[4], da[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } } void eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { double a[4], da[4], d2a[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } } void eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { eval_multi_NUBspline_1d_z_vgl (spline, x, vals, grads, hess); } // /************************************************************/ // /* 2D double-precision, complex evaulation functions */ // /************************************************************/ // void // eval_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, // double x, double y, // complex_double* restrict vals) // { // _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); // x -= spline->x_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // // Zero-out values // __m128d mvals[N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // // Zero-out values // __m128d mvals[N], mgrads[2*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // complex_double lapl2[2*N]; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23, // d2a01, d2b01, d2a23, d2b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // Zero-out values // __m128d mvals[N], mgrads[2*N], mlapl[2*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // complex_double lapl2[2*N]; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23, // d2a01, d2b01, d2a23, d2b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // Zero-out values // __m128d mvals[N], mgrads[2*N], mhess[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01, b01, c01, a23, b23, c23, // tmp0, tmp1, r0, r1, i0, i1, val_r, val_i; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // // Zero-out values // __m128d mvals[N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); // __m128d* restrict coefs1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); // __m128d* restrict coefs2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); // __m128d* restrict coefs3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // // Zero-out values // __m128d mvals[N], mgrads[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23, // d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // // Zero-out values // __m128d mvals[N], mgrads[3*N], mlapl[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23, // d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // // Zero-out values // //__m128d mvals[N], mgrads[3*N], mhess[6*N]; // __m128d mpack[10*N]; // for (int n=0; n<10*N; n++) // mpack[n] = _mm_setzero_pd(); // __m128d a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; // a[0]=_mm_unpacklo_pd(a01,a01); da[0]=_mm_unpacklo_pd(da01,da01); d2a[0]=_mm_unpacklo_pd(d2a01,d2a01); // a[1]=_mm_unpackhi_pd(a01,a01); da[1]=_mm_unpackhi_pd(da01,da01); d2a[1]=_mm_unpackhi_pd(d2a01,d2a01); // a[2]=_mm_unpacklo_pd(a23,a23); da[2]=_mm_unpacklo_pd(da23,da23); d2a[2]=_mm_unpacklo_pd(d2a23,d2a23); // a[3]=_mm_unpackhi_pd(a23,a23); da[3]=_mm_unpackhi_pd(da23,da23); d2a[3]=_mm_unpackhi_pd(d2a23,d2a23); // b[0]=_mm_unpacklo_pd(b01,b01); db[0]=_mm_unpacklo_pd(db01,db01); d2b[0]=_mm_unpacklo_pd(d2b01,d2b01); // b[1]=_mm_unpackhi_pd(b01,b01); db[1]=_mm_unpackhi_pd(db01,db01); d2b[1]=_mm_unpackhi_pd(d2b01,d2b01); // b[2]=_mm_unpacklo_pd(b23,b23); db[2]=_mm_unpacklo_pd(db23,db23); d2b[2]=_mm_unpacklo_pd(d2b23,d2b23); // b[3]=_mm_unpackhi_pd(b23,b23); db[3]=_mm_unpackhi_pd(db23,db23); d2b[3]=_mm_unpackhi_pd(d2b23,d2b23); // c[0]=_mm_unpacklo_pd(c01,c01); dc[0]=_mm_unpacklo_pd(dc01,dc01); d2c[0]=_mm_unpacklo_pd(d2c01,d2c01); // c[1]=_mm_unpackhi_pd(c01,c01); dc[1]=_mm_unpackhi_pd(dc01,dc01); d2c[1]=_mm_unpackhi_pd(d2c01,d2c01); // c[2]=_mm_unpacklo_pd(c23,c23); dc[2]=_mm_unpacklo_pd(dc23,dc23); d2c[2]=_mm_unpacklo_pd(d2c23,d2c23); // c[3]=_mm_unpackhi_pd(c23,c23); dc[3]=_mm_unpackhi_pd(dc23,dc23); d2c[3]=_mm_unpackhi_pd(d2c23,d2c23); // // Main computation loop // const int bs = 32; // for (int nstart=0; nstartcoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); // __m128d* restrict c1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); // __m128d* restrict c2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); // __m128d* restrict c3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); // #ifdef USE_PREFETCH_VGH // int nextIndex = i<<4 + j<<2 + k + 1; // int iNext = nextIndex >> 4; // int jNext = (nextIndex >> 2) & 3; // int kNext = nextIndex & 3; // if (nextIndex < 64) { // __m128d* restrict nextCoefs = (__m128d*)(spline->coefs + (ix+iNext)*xs + (iy +jNext)*ys + (iz+kNext)*zs); // for (int i=0,n=nstart; (nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; n #include #include "nubspline_structs.h" #ifdef HAVE_SSE #include #endif #ifdef HAVE_SSE2 #include #endif /// SSE3 adds "horizontal add" instructions, which makes things /// simpler and faster #ifdef HAVE_SSE3 #include #define _MM_MATVEC4_PS(M0, M1, M2, M3, v, r) \ do { \ __m128 _r0 = _mm_hadd_ps (_mm_mul_ps (M0, v), _mm_mul_ps (M1, v)); \ __m128 _r1 = _mm_hadd_ps (_mm_mul_ps (M2, v), _mm_mul_ps (M3, v)); \ r = _mm_hadd_ps (_r0, _r1); \ } while (0); #define _MM_DOT4_PS(_A, _B, _p) \ do { \ __m128 t = _mm_mul_ps (_A, _B); \ __m128 t1 = _mm_hadd_ps (t,t); \ __m128 r = _mm_hadd_ps (t1, t1); \ _mm_store_ss (&(_p), r); \ } while(0); #else // Use plain-old SSE instructions #define _MM_MATVEC4_PS(_M0, _M1, _M2, _M3, _v, _r) \ do { \ __m128 _r0 = _mm_mul_ps (_M0, _v); \ __m128 _r1 = _mm_mul_ps (_M1, _v); \ __m128 _r2 = _mm_mul_ps (_M2, _v); \ __m128 _r3 = _mm_mul_ps (_M3, _v); \ _MM_TRANSPOSE4_PS (_r0, _r1, _r2, _r3); \ _r = _mm_add_ps (_mm_add_ps (r0, r1), _mm_add_ps (r2, r3)); \ } while (0); #define _MM_DOT4_PS(_A, _B, _p) \ do { \ __m128 t = _mm_mul_ps (_A, _B); \ __m128 alo = _mm_shuffle_ps (t, t, _MM_SHUFFLE(0,1,0,1)); \ __m128 ahi = _mm_shuffle_ps (t, t, _MM_SHUFFLE(2,3,2,3)); \ __m128 a = _mm_add_ps (alo, ahi); \ __m128 rlo = _mm_shuffle_ps (a, a, _MM_SHUFFLE(0,0,0,0)); \ __m128 rhi = _mm_shuffle_ps (a, a, _MM_SHUFFLE(1,1,1,1)); \ __m128 r = _mm_add_ps (rlo, rhi); \ _mm_store_ss (&(_p), r); \ } while(0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_c (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val) { float bfuncs[4]; int i = get_NUBasis_funcs_s (spline->x_basis, x, bfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_c_vg (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad) { float bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_s (spline->x_basis, x, bfuncs, dbfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_c_vgl (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { float bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_s (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); complex_float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_c_vgh (NUBspline_1d_c * restrict spline, double x, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { eval_NUBspline_1d_c_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_c (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val) { __m128 a, b, bPr, bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; complex_float* restrict coefs = spline->coefs; int ix = get_NUBasis_funcs_sse_s (spline->x_basis, x, &a); int iy = get_NUBasis_funcs_sse_s (spline->y_basis, y, &b); int xs = spline->x_stride; int xs2 = 2*xs; #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. float* restrict p = (float*)P(0,0); _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p = (float *)P(0,0); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); #undef P } /* Value and gradient */ inline void eval_NUBspline_2d_c_vg (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad) { __m128 a, b, da, db, bPr, dbPr, bPi, dbPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; int ix = get_NUBasis_dfuncs_sse_s (spline->x_basis, x, &a, &da); int iy = get_NUBasis_dfuncs_sse_s (spline->y_basis, y, &b, &db); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int xs2 = 2*xs; #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. float* restrict p = (float*)P(0,0); _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p = (float *)P(0,0); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradr0); _MM_DOT4_PS (a, dbPr, *gradi1); _MM_DOT4_PS (a, dbPi, *gradi1); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_c_vgl (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { __m128 a, b, da, db, d2a, d2b, bPr, dbPr, d2bPr, bPi, dbPi, d2bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int xs2 = 2*xs; #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. float* restrict p = (float*)P(0,0); _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p = (float *)P(0,0); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); _MM_MATVEC4_PS (r0, r1, r2, r3, d2b, d2bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, d2b, d2bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradr0); _MM_DOT4_PS (a, dbPr, *gradi1); _MM_DOT4_PS (a, dbPi, *gradi1); // Compute laplacian float d2x_r, d2y_r, d2x_i, d2y_i; _MM_DOT4_PS (d2a, bPr, d2x_r); _MM_DOT4_PS (d2a, bPi, d2x_i); _MM_DOT4_PS (a, d2bPr, d2y_r); _MM_DOT4_PS (a, d2bPi, d2y_i); #ifdef __cplusplus *lapl = std::complex(d2x_r + d2y_r, d2x_i + d2y_i); #else *lapl = (d2x_r + d2y_r) + 1.0if*(d2x_i + d2y_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_c_vgh (NUBspline_2d_c * restrict spline, double x, double y, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { __m128 a, b, da, db, d2a, d2b, bPr, dbPr, d2bPr, bPi, dbPi, d2bPi, r0, r1, r2, r3, i0, i1, i2, i3, tmp0, tmp1; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); complex_float* restrict coefs = spline->coefs; int xs = spline->x_stride; int xs2 = 2*xs; #define P(i,j) (const float*)(spline->coefs+(ix+(i))*xs+iy+j) // Prefetch the data from main memory into cache so it's available // when we need to use it. float* restrict p = (float*)P(0,0); _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p += xs2; _mm_prefetch ((const char*)p, _MM_HINT_T0); _mm_prefetch ((const char*)(p+4), _MM_HINT_T0); p = (float *)P(0,0); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps (p); tmp1 = _mm_loadu_ps (p+4); p+= xs2; r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, b, bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, b, bPi); _MM_MATVEC4_PS (r0, r1, r2, r3, db, dbPr); _MM_MATVEC4_PS (i0, i1, i2, i3, db, dbPi); _MM_MATVEC4_PS (r0, r1, r2, r3, d2b, d2bPr); _MM_MATVEC4_PS (i0, i1, i2, i3, d2b, d2bPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *hess_d2x_r = ((float*)hess)+0; float *hess_d2x_i = ((float*)hess)+1; float *hess_d2y_r = ((float*)hess)+6; float *hess_d2y_i = ((float*)hess)+7; float *hess_dxdy_r = ((float*)hess)+2; float *hess_dxdy_i = ((float*)hess)+3; // Compute value _MM_DOT4_PS (a, bPr, *valr); _MM_DOT4_PS (a, bPi, *vali); // Compute gradient _MM_DOT4_PS (da, bPr, *gradr0); _MM_DOT4_PS (da, bPi, *gradr0); _MM_DOT4_PS (a, dbPr, *gradi1); _MM_DOT4_PS (a, dbPi, *gradi1); // Compute Hessian _MM_DOT4_PS (d2a, bPr, *hess_d2x_r); _MM_DOT4_PS (d2a, bPi, *hess_d2x_i); _MM_DOT4_PS (a, d2bPr, *hess_d2y_r); _MM_DOT4_PS (a, d2bPi, *hess_d2y_i); _MM_DOT4_PS (da, dbPr, *hess_dxdy_r); _MM_DOT4_PS (da, dbPi, *hess_dxdy_i); #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_c (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val) { __m128 a, b, c, cPr[4], bcPr, cPi[4], bcPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_funcs_sse_s (spline->x_basis, x, &a); int iy = get_NUBasis_funcs_sse_s (spline->y_basis, y, &b); int iz = get_NUBasis_funcs_sse_s (spline->z_basis, z, &c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; complex_float* restrict coefs = spline->coefs; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) complex_float *p = (complex_float*)P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)p , _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ ys), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = (complex_float*)P(0,0,0); tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); p += xs; // 2nd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); p += xs; // 3rd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); p += xs; // 4th quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_c_vg (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad) { __m128 a, b, c, da, db, dc, cPr[4], dcPr[4], bcPr, dbcPr, bdcPr, cPi[4], dcPi[4], bcPi, dbcPi, bdcPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_dfuncs_sse_s (spline->x_basis, x, &a, &da); int iy = get_NUBasis_dfuncs_sse_s (spline->y_basis, y, &b, &db); int iz = get_NUBasis_dfuncs_sse_s (spline->z_basis, z, &c, &dc); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; complex_float* restrict coefs = spline->coefs; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) complex_float *p = (complex_float*)P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)p , _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ ys), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = (complex_float*)P(0,0,0); tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); p += xs; // 2nd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); p += xs; // 3rd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); p += xs; // 4th quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_c_vgl (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl) { __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cPr[4], dcPr[4], d2cPr[4], bcPr, dbcPr, bdcPr, d2bcPr, bd2cPr, cPi[4], dcPi[4], d2cPi[4], bcPi, dbcPi, bdcPi, d2bcPi, bd2cPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); int iz = get_NUBasis_d2funcs_sse_s (spline->z_basis, z, &c, &dc, &d2c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; complex_float* restrict coefs = spline->coefs; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) complex_float *p = (complex_float*)P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)p , _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ ys), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = (complex_float*)P(0,0,0); tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[0]); p += xs; // 2nd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[1]); p += xs; // 3rd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[2]); p += xs; // 4th quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], d2b, d2bcPr); _MM_MATVEC4_PS (d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], b, bd2cPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], d2b, d2bcPi); _MM_MATVEC4_PS (d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], b, bd2cPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; float d2x_r, d2x_i, d2y_r, d2y_i, d2z_r, d2z_i; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); // Compute laplacian _MM_DOT4_PS (d2a, bcPr, d2x_r); _MM_DOT4_PS (a, d2bcPr, d2y_r); _MM_DOT4_PS (a, bd2cPr, d2z_r); _MM_DOT4_PS (d2a, bcPi, d2x_i); _MM_DOT4_PS (a, d2bcPi, d2y_i); _MM_DOT4_PS (a, bd2cPi, d2z_i); #ifdef __cplusplus *lapl = std::complex(d2x_r + d2y_r + d2z_r, d2x_i + d2y_i + d2z_i); #else *lapl = (d2x_r + d2y_r + d2z_r) + 1.0if * (d2x_i+d2y_i+d2z_i); #endif #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_c_vgh (NUBspline_3d_c * restrict spline, double x, double y, double z, complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess) { __m128 a, b, c, da, db, dc, d2a, d2b, d2c, cPr[4], dcPr[4], d2cPr[4], bcPr, dbcPr, bdcPr, d2bcPr, dbdcPr, bd2cPr, cPi[4], dcPi[4], d2cPi[4], bcPi, dbcPi, bdcPi, d2bcPi, dbdcPi, bd2cPi, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, r0, r1, r2, r3, i0, i1, i2, i3; int ix = get_NUBasis_d2funcs_sse_s (spline->x_basis, x, &a, &da, &d2a); int iy = get_NUBasis_d2funcs_sse_s (spline->y_basis, y, &b, &db, &d2b); int iz = get_NUBasis_d2funcs_sse_s (spline->z_basis, z, &c, &dc, &d2c); int xs = spline->x_stride; int ys = spline->y_stride; int ys2 = 2*ys; int ys3 = 3*ys; complex_float* restrict coefs = spline->coefs; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) ((const float*)(spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz)+k)) complex_float *p = (complex_float*)P(0,0,0); // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)p , _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ ys), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); p += xs; _mm_prefetch ((const char*)(p ), _MM_HINT_T0); _mm_prefetch ((const char*)(p +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys ), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys +2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys2+2), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3), _MM_HINT_T0); _mm_prefetch ((const char*)(p+ys3+2), _MM_HINT_T0); // Compute cP, dcP, and d2cP products 1/4 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st quarter p = (complex_float*)P(0,0,0); tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[0]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[0]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[0]); p += xs; // 2nd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[1]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[1]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[1]); p += xs; // 3rd quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[2]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[2]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[2]); p += xs; // 4th quarter tmp0 = _mm_loadu_ps ((float*)(p )); tmp1 = _mm_loadu_ps ((float*)(p+2)); r0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i0 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys )); tmp1 = _mm_loadu_ps ((float*)(p+ys+2)); r1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i1 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys2 )); tmp1 = _mm_loadu_ps ((float*)(p+ys2+2)); r2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i2 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); tmp0 = _mm_loadu_ps ((float*)(p+ys3 )); tmp1 = _mm_loadu_ps ((float*)(p+ys3+2)); r3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (2, 0, 2, 0)); i3 = _mm_shuffle_ps (tmp0, tmp1, _MM_SHUFFLE (3, 1, 3, 1)); _MM_MATVEC4_PS (r0, r1, r2, r3, c, cPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, dc, dcPr[3]); _MM_MATVEC4_PS (r0, r1, r2, r3, d2c, d2cPr[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, c, cPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, dc, dcPi[3]); _MM_MATVEC4_PS (i0, i1, i2, i3, d2c, d2cPi[3]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], b, bcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], db, dbcPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], b, bdcPr); _MM_MATVEC4_PS ( cPr[0], cPr[1], cPr[2], cPr[3], d2b, d2bcPr); _MM_MATVEC4_PS (d2cPr[0], d2cPr[1], d2cPr[2], d2cPr[3], b, bd2cPr); _MM_MATVEC4_PS ( dcPr[0], dcPr[1], dcPr[2], dcPr[3], db, dbdcPr); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], b, bcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], db, dbcPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], b, bdcPi); _MM_MATVEC4_PS ( cPi[0], cPi[1], cPi[2], cPi[3], d2b, d2bcPi); _MM_MATVEC4_PS (d2cPi[0], d2cPi[1], d2cPi[2], d2cPi[3], b, bd2cPi); _MM_MATVEC4_PS ( dcPi[0], dcPi[1], dcPi[2], dcPi[3], db, dbdcPi); float *valr = ((float*)val) +0; float *vali = ((float*)val) +1; float *gradr0 = ((float *)grad)+0; float *gradi0 = ((float *)grad)+1; float *gradr1 = ((float *)grad)+2; float *gradi1 = ((float *)grad)+3; float *gradr2 = ((float *)grad)+4; float *gradi2 = ((float *)grad)+5; // Compute value _MM_DOT4_PS (a, bcPr, *valr); _MM_DOT4_PS (a, bcPi, *vali); // Compute gradient _MM_DOT4_PS (da, bcPr, *gradr0); _MM_DOT4_PS (a, dbcPr, *gradr1); _MM_DOT4_PS (a, bdcPr, *gradr2); _MM_DOT4_PS (da, bcPi, *gradi0); _MM_DOT4_PS (a, dbcPi, *gradi1); _MM_DOT4_PS (a, bdcPi, *gradi2); // Compute hessian _MM_DOT4_PS (d2a, bcPr, *(float*)(&hess[0])); _MM_DOT4_PS (a, d2bcPr, *(float*)(&hess[4])); _MM_DOT4_PS (a, bd2cPr, *(float*)(&hess[8])); _MM_DOT4_PS (da, dbcPr, *(float*)(&hess[1])); _MM_DOT4_PS (da, bdcPr, *(float*)(&hess[2])); _MM_DOT4_PS (a, dbdcPr, *(float*)(&hess[5])); _MM_DOT4_PS (d2a, bcPi, *((float*)(&hess[0])+1)); _MM_DOT4_PS (a, d2bcPi, *((float*)(&hess[4])+1)); _MM_DOT4_PS (a, bd2cPi, *((float*)(&hess[8])+1)); _MM_DOT4_PS (da, dbcPi, *((float*)(&hess[1])+1)); _MM_DOT4_PS (da, bdcPi, *((float*)(&hess[2])+1)); _MM_DOT4_PS (a, dbdcPi, *((float*)(&hess[5])+1)); // Multiply gradients and hessians by appropriate grid inverses // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } #undef _MM_MATVEC4_PS #undef _MM_DOT4_PS #endif einspline-0.9.2/src/bspline_eval_sse_d.h0000664000113000011300000017534011235572631015212 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_SSE_D_H #define BSPLINE_EVAL_SSE_D_H #include #include #ifdef HAVE_SSE3 #include #endif #include // extern __m128d // A0_01, A0_23, A1_01, A1_23, A2_01, A2_23, A3_01, A3_23, // dA0_01, dA0_23, dA1_01, dA1_23, dA2_01, dA2_23, dA3_01, dA3_23, // d2A0_01, d2A0_23, d2A1_01, d2A1_23, d2A2_01, d2A2_23, d2A3_01, d2A3_23; extern __m128d *restrict A_d; extern double* restrict Ad; extern double* restrict dAd; extern double* restrict d2Ad; // This returns, pack in r, the two four-element dot products given // by, r = [dot([a0,a1],[b0,b1], dot([a2,a3],[b2,b3]). Specifically // r_l = a0_l*b0_l + a0_h+b0_h + a1_l*b1_l + a1_h*b1_h // r_h = a2_l*b2_l + a2_h+b2_h + a3_l*b1_l + a3_h*b1_h #ifdef HAVE_SSE3 #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_hadd_pd (t0, t1); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_hadd_pd (t0,t0); \ _mm_store_sd (&(p), t1); \ } while (0); #else #define _MM_DDOT4_PD(a0, a1, a2, a3, b0, b1, b2, b3, r) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = _mm_add_pd(_mm_mul_pd (a2, b2),_mm_mul_pd (a3, b3)); \ r = _mm_add_pd(_mm_unpacklo_pd(t0,t1),_mm_unpackhi_pd(t0,t1)); \ } while(0); #define _MM_DOT4_PD(a0, a1, b0, b1, p) \ do { \ __m128d t0 = _mm_add_pd(_mm_mul_pd (a0, b0),_mm_mul_pd (a1, b1)); \ __m128d t1 = \ _mm_add_pd (_mm_unpacklo_pd(t0,t0), _mm_unpackhi_pd(t0,t0)); \ _mm_store_sd (&(p), t1); \ } while (0); #endif /************************************************************/ /* 1D single-precision, real evaulation functions */ /* NOTE: SSE does not seem to speed things up in 1D. */ /* Therefore, we simply copy the std routines. */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_d (UBspline_1d_d * restrict spline, double x, double* restrict val) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_d_vg (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_d_vgl (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict lapl) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Ad[ 2]*tp[2] + d2Ad[ 3]*tp[3])+ coefs[i+1]*(d2Ad[ 6]*tp[2] + d2Ad[ 7]*tp[3])+ coefs[i+2]*(d2Ad[10]*tp[2] + d2Ad[11]*tp[3])+ coefs[i+3]*(d2Ad[14]*tp[2] + d2Ad[15]*tp[3])); } inline void eval_UBspline_1d_d_vgh (UBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict hess) { eval_UBspline_1d_d_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_d (UBspline_2d_d * restrict spline, double x, double y, double* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, bP01, a23, b23, bP23, tmp0, tmp1, tmp2, tmp3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); } /* Value and gradient */ inline void eval_UBspline_2d_d_vg (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, bP01, dbP01, a23, b23, da23, db23, bP23, dbP23, tmp0, tmp1, tmp2, tmp3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_d_vgl (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01, dbP01, d2bP01, bP23, dbP23, d2bP23, tmp0, tmp1, tmp2, tmp3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); // Compute laplacian double sec_derivs[2]; _MM_DOT4_PD (d2a01, d2a23, bP01, bP23, sec_derivs[0]); _MM_DOT4_PD (a01, a23, d2bP01, d2bP23, sec_derivs[1]); double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; sec_derivs[0] *= dxInv * dxInv; sec_derivs[1] *= dyInv * dyInv; *lapl = sec_derivs[0] + sec_derivs[1]; #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_d_vgh (UBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; int xs = spline->x_stride; #define P(i,j) (spline->coefs+(ix+(i))*xs+(iy+(j))) // _mm_prefetch ((const char*)P(0,0), _MM_HINT_T0); // _mm_prefetch ((const char*)P(0,2), _MM_HINT_T0); // _mm_prefetch ((const char*)P(1,0), _MM_HINT_T0); // _mm_prefetch ((const char*)P(1,2), _MM_HINT_T0); // _mm_prefetch ((const char*)P(2,0), _MM_HINT_T0); // _mm_prefetch ((const char*)P(2,2), _MM_HINT_T0); // _mm_prefetch ((const char*)P(3,0), _MM_HINT_T0); // _mm_prefetch ((const char*)P(3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, a01, b01, da01, db01, d2a01, d2b01, a23, b23, da23, db23, d2a23, d2b23, bP01, dbP01, d2bP01, bP23, dbP23, d2bP23, tmp0, tmp1, tmp2, tmp3; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // Now compute bP, dbP, d2bP products tmp0 = _mm_loadu_pd (P(0,0)); tmp1 = _mm_loadu_pd(P(0,2)); tmp2 = _mm_loadu_pd (P(1,0)); tmp3 = _mm_loadu_pd(P(1,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP01); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP01); tmp0 = _mm_loadu_pd (P(2,0)); tmp1 = _mm_loadu_pd(P(2,2)); tmp2 = _mm_loadu_pd (P(3,0)); tmp3 = _mm_loadu_pd(P(3,2)); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, b01, b23, b01, b23, bP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, db01, db23, db01, db23, dbP23); _MM_DDOT4_PD (tmp0, tmp1, tmp2, tmp3, d2b01, d2b23, d2b01, d2b23, d2bP23); // Compute value _MM_DOT4_PD (a01, a23, bP01, bP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bP01, bP23, grad[0]); _MM_DOT4_PD (a01, a23, dbP01, dbP23, grad[1]); // Compute hessian _MM_DOT4_PD (d2a01, d2a23, bP01, bP23, hess[0]); _MM_DOT4_PD (a01, a23, d2bP01, d2bP23, hess[3]); _MM_DOT4_PD (da01, da23, dbP01, dbP23, hess[1]); double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; hess[0] *= dxInv * dxInv; hess[1] *= dxInv * dyInv; hess[3] *= dyInv * dyInv; hess[2] = hess[1]; #undef P } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_d (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val) { _mm_prefetch ((const char*) &A_d[0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[7],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, a23, b23, c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, bcP23, dbcP23, bdcP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // y-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // z-dependent vectors _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // Compute cP product 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st eighth tmp0 = _mm_loadu_pd (P(0,0,0)); tmp1 = _mm_loadu_pd (P(0,0,2)); tmp2 = _mm_loadu_pd (P(0,1,0)); tmp3 = _mm_loadu_pd (P(0,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (P(0,2,0)); tmp1 = _mm_loadu_pd (P(0,2,2)); tmp2 = _mm_loadu_pd (P(0,3,0)); tmp3 = _mm_loadu_pd (P(0,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); // 3rd eighth tmp0 = _mm_loadu_pd (P(1,0,0)); tmp1 = _mm_loadu_pd (P(1,0,2)); tmp2 = _mm_loadu_pd (P(1,1,0)); tmp3 = _mm_loadu_pd (P(1,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (P(1,2,0)); tmp1 = _mm_loadu_pd (P(1,2,2)); tmp2 = _mm_loadu_pd (P(1,3,0)); tmp3 = _mm_loadu_pd (P(1,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); // 5th eighth tmp0 = _mm_loadu_pd (P(2,0,0)); tmp1 = _mm_loadu_pd (P(2,0,2)); tmp2 = _mm_loadu_pd (P(2,1,0)); tmp3 = _mm_loadu_pd (P(2,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (P(2,2,0)); tmp1 = _mm_loadu_pd (P(2,2,2)); tmp2 = _mm_loadu_pd (P(2,3,0)); tmp3 = _mm_loadu_pd (P(2,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); // 7th eighth tmp0 = _mm_loadu_pd (P(3,0,0)); tmp1 = _mm_loadu_pd (P(3,0,2)); tmp2 = _mm_loadu_pd (P(3,1,0)); tmp3 = _mm_loadu_pd (P(3,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (P(3,2,0)); tmp1 = _mm_loadu_pd (P(3,2,2)); tmp2 = _mm_loadu_pd (P(3,3,0)); tmp3 = _mm_loadu_pd (P(3,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); // Now compute bcP products _MM_DDOT4_PD ( b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD ( b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_d_vg (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad) { _mm_prefetch((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, a23, b23, c23, da23, db23, dc23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, bcP23, dbcP23, bdcP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st eighth tmp0 = _mm_loadu_pd (P(0,0,0)); tmp1 = _mm_loadu_pd (P(0,0,2)); tmp2 = _mm_loadu_pd (P(0,1,0)); tmp3 = _mm_loadu_pd (P(0,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (P(0,2,0)); tmp1 = _mm_loadu_pd (P(0,2,2)); tmp2 = _mm_loadu_pd (P(0,3,0)); tmp3 = _mm_loadu_pd (P(0,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); // 3rd eighth tmp0 = _mm_loadu_pd (P(1,0,0)); tmp1 = _mm_loadu_pd (P(1,0,2)); tmp2 = _mm_loadu_pd (P(1,1,0)); tmp3 = _mm_loadu_pd (P(1,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); // 4th eighth tmp0 = _mm_loadu_pd (P(1,2,0)); tmp1 = _mm_loadu_pd (P(1,2,2)); tmp2 = _mm_loadu_pd (P(1,3,0)); tmp3 = _mm_loadu_pd (P(1,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); // 5th eighth tmp0 = _mm_loadu_pd (P(2,0,0)); tmp1 = _mm_loadu_pd (P(2,0,2)); tmp2 = _mm_loadu_pd (P(2,1,0)); tmp3 = _mm_loadu_pd (P(2,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); // 6th eighth tmp0 = _mm_loadu_pd (P(2,2,0)); tmp1 = _mm_loadu_pd (P(2,2,2)); tmp2 = _mm_loadu_pd (P(2,3,0)); tmp3 = _mm_loadu_pd (P(2,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); // 7th eighth tmp0 = _mm_loadu_pd (P(3,0,0)); tmp1 = _mm_loadu_pd (P(3,0,2)); tmp2 = _mm_loadu_pd (P(3,1,0)); tmp3 = _mm_loadu_pd (P(3,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); // 8th eighth tmp0 = _mm_loadu_pd (P(3,2,0)); tmp1 = _mm_loadu_pd (P(3,2,2)); tmp2 = _mm_loadu_pd (P(3,3,0)); tmp3 = _mm_loadu_pd (P(3,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD ( b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD ( b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD ( db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD ( db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD ( b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD ( b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_d_vgl (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict lapl) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, d2bcP01, dbdcP01, bd2cP01, bcP23, dbcP23, bdcP23, d2bcP23, dbdcP23, bd2cP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st eighth tmp0 = _mm_loadu_pd (P(0,0,0)); tmp1 = _mm_loadu_pd (P(0,0,2)); tmp2 = _mm_loadu_pd (P(0,1,0)); tmp3 = _mm_loadu_pd (P(0,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (P(0,2,0)); tmp1 = _mm_loadu_pd (P(0,2,2)); tmp2 = _mm_loadu_pd (P(0,3,0)); tmp3 = _mm_loadu_pd (P(0,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[1]); // 3rd eighth tmp0 = _mm_loadu_pd (P(1,0,0)); tmp1 = _mm_loadu_pd (P(1,0,2)); tmp2 = _mm_loadu_pd (P(1,1,0)); tmp3 = _mm_loadu_pd (P(1,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (P(1,2,0)); tmp1 = _mm_loadu_pd (P(1,2,2)); tmp2 = _mm_loadu_pd (P(1,3,0)); tmp3 = _mm_loadu_pd (P(1,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[3]); // 5th eighth tmp0 = _mm_loadu_pd (P(2,0,0)); tmp1 = _mm_loadu_pd (P(2,0,2)); tmp2 = _mm_loadu_pd (P(2,1,0)); tmp3 = _mm_loadu_pd (P(2,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (P(2,2,0)); tmp1 = _mm_loadu_pd (P(2,2,2)); tmp2 = _mm_loadu_pd (P(2,3,0)); tmp3 = _mm_loadu_pd (P(2,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[5]); // 7th eighth tmp0 = _mm_loadu_pd (P(3,0,0)); tmp1 = _mm_loadu_pd (P(3,0,2)); tmp2 = _mm_loadu_pd (P(3,1,0)); tmp3 = _mm_loadu_pd (P(3,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (P(3,2,0)); tmp1 = _mm_loadu_pd (P(3,2,2)); tmp2 = _mm_loadu_pd (P(3,3,0)); tmp3 = _mm_loadu_pd (P(3,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD ( b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD ( b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD ( db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD ( db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD ( b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD ( b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[0], cP[1], cP[2], cP[3], d2bcP01); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[4], cP[5], cP[6], cP[7], d2bcP23); _MM_DDOT4_PD ( b01, b23, b01, b23, d2cP[0], d2cP[1], d2cP[2], d2cP[3], bd2cP01); _MM_DDOT4_PD ( b01, b23, b01, b23, d2cP[4], d2cP[5], d2cP[6], d2cP[7], bd2cP23); _MM_DDOT4_PD ( db01, db23, db01, db23, dcP[0], dcP[1], dcP[2], dcP[3], dbdcP01); _MM_DDOT4_PD ( db01, db23, db01, db23, dcP[4], dcP[5], dcP[6], dcP[7], dbdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); // Compute laplacian double lx, ly, lz; _MM_DOT4_PD (d2a01, d2a23, bcP01, bcP23, lx); // d2x _MM_DOT4_PD (a01, a23, d2bcP01, d2bcP23, ly); // d2y _MM_DOT4_PD (a01, a23, bd2cP01, bd2cP23, lz); // d2z double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; lx *= dxInv*dxInv; ly *= dyInv*dyInv; lz *= dzInv*dzInv; *lapl = lx + ly + lz; #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_d_vgh (UBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict hess) { _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 8],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 9],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[10],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[11],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[12],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[13],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[14],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[15],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[16],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[17],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[18],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[19],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[20],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[21],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[22],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[23],_MM_HINT_T0); x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; int xs = spline->x_stride; int ys = spline->y_stride; // This macro is used to give the pointer to coefficient data. // i and j should be in the range [0,3]. Coefficients are read four // at a time, so no k value is needed. #define P(i,j,k) (spline->coefs+(ix+(i))*xs+(iy+(j))*ys+(iz+k)) // Prefetch the data from main memory into cache so it's available // when we need to use it. _mm_prefetch ((const char*)P(0,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(0,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(1,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(2,3,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,0,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,1,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,2,2), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,0), _MM_HINT_T0); _mm_prefetch ((const char*)P(3,3,2), _MM_HINT_T0); // Now compute the vectors: // tpx = [t_x^3 t_x^2 t_x 1] // tpy = [t_y^3 t_y^2 t_y 1] // tpz = [t_z^3 t_z^2 t_z 1] // a = A * tpx, b = A * tpy, c = A * tpz // da = dA * tpx, db = dA * tpy, dc = dA * tpz, etc. // A is 4x4 matrix given by the rows A0, A1, A2, A3 __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, a01, b01, c01, da01, db01, dc01, d2a01, d2b01, d2c01, a23, b23, c23, da23, db23, dc23, d2a23, d2b23, d2c23, cP[8], dcP[8], d2cP[8], bcP01, dbcP01, bdcP01, d2bcP01, dbdcP01, bd2cP01, bcP23, dbcP23, bdcP23, d2bcP23, dbdcP23, bd2cP23, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); tpx23 = _mm_set_pd (tx, 1.0); tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); tpy23 = _mm_set_pd (ty, 1.0); tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); tpz23 = _mm_set_pd (tz, 1.0); // x-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // y-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // z-dependent vectors _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // Compute cP, dcP, and d2cP products 1/8 at a time to maximize // register reuse and avoid rerereading from memory or cache. // 1st eighth tmp0 = _mm_loadu_pd (P(0,0,0)); tmp1 = _mm_loadu_pd (P(0,0,2)); tmp2 = _mm_loadu_pd (P(0,1,0)); tmp3 = _mm_loadu_pd (P(0,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[0]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[0]); // 2nd eighth tmp0 = _mm_loadu_pd (P(0,2,0)); tmp1 = _mm_loadu_pd (P(0,2,2)); tmp2 = _mm_loadu_pd (P(0,3,0)); tmp3 = _mm_loadu_pd (P(0,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[1]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[1]); // 3rd eighth tmp0 = _mm_loadu_pd (P(1,0,0)); tmp1 = _mm_loadu_pd (P(1,0,2)); tmp2 = _mm_loadu_pd (P(1,1,0)); tmp3 = _mm_loadu_pd (P(1,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[2]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[2]); // 4th eighth tmp0 = _mm_loadu_pd (P(1,2,0)); tmp1 = _mm_loadu_pd (P(1,2,2)); tmp2 = _mm_loadu_pd (P(1,3,0)); tmp3 = _mm_loadu_pd (P(1,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[3]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[3]); // 5th eighth tmp0 = _mm_loadu_pd (P(2,0,0)); tmp1 = _mm_loadu_pd (P(2,0,2)); tmp2 = _mm_loadu_pd (P(2,1,0)); tmp3 = _mm_loadu_pd (P(2,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[4]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[4]); // 6th eighth tmp0 = _mm_loadu_pd (P(2,2,0)); tmp1 = _mm_loadu_pd (P(2,2,2)); tmp2 = _mm_loadu_pd (P(2,3,0)); tmp3 = _mm_loadu_pd (P(2,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[5]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[5]); // 7th eighth tmp0 = _mm_loadu_pd (P(3,0,0)); tmp1 = _mm_loadu_pd (P(3,0,2)); tmp2 = _mm_loadu_pd (P(3,1,0)); tmp3 = _mm_loadu_pd (P(3,1,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[6]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[6]); // 8th eighth tmp0 = _mm_loadu_pd (P(3,2,0)); tmp1 = _mm_loadu_pd (P(3,2,2)); tmp2 = _mm_loadu_pd (P(3,3,0)); tmp3 = _mm_loadu_pd (P(3,3,2)); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, c01, c23, c01, c23, cP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3, dc01, dc23, dc01, dc23, dcP[7]); _MM_DDOT4_PD(tmp0,tmp1,tmp2,tmp3,d2c01,d2c23,d2c01,d2c23,d2cP[7]); // Now compute bcP, dbcP, bdcP, d2bcP, bd2cP, and dbdc products _MM_DDOT4_PD (b01, b23, b01, b23, cP[0], cP[1], cP[2], cP[3], bcP01); _MM_DDOT4_PD (b01, b23, b01, b23, cP[4], cP[5], cP[6], cP[7], bcP23); _MM_DDOT4_PD (db01, db23, db01, db23, cP[0], cP[1], cP[2], cP[3], dbcP01); _MM_DDOT4_PD (db01, db23, db01, db23, cP[4], cP[5], cP[6], cP[7], dbcP23); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[0], dcP[1], dcP[2], dcP[3], bdcP01); _MM_DDOT4_PD (b01, b23, b01, b23, dcP[4], dcP[5], dcP[6], dcP[7], bdcP23); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[0], cP[1], cP[2], cP[3], d2bcP01); _MM_DDOT4_PD (d2b01, d2b23, d2b01, d2b23, cP[4], cP[5], cP[6], cP[7], d2bcP23); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[0], d2cP[1], d2cP[2], d2cP[3], bd2cP01); _MM_DDOT4_PD (b01, b23, b01, b23, d2cP[4], d2cP[5], d2cP[6], d2cP[7], bd2cP23); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[0], dcP[1], dcP[2], dcP[3], dbdcP01); _MM_DDOT4_PD (db01, db23, db01, db23, dcP[4], dcP[5], dcP[6], dcP[7], dbdcP23); // Compute value _MM_DOT4_PD (a01, a23, bcP01, bcP23, *val); // Compute gradient _MM_DOT4_PD (da01, da23, bcP01, bcP23, grad[0]); _MM_DOT4_PD (a01, a23, dbcP01, dbcP23, grad[1]); _MM_DOT4_PD (a01, a23, bdcP01, bdcP23, grad[2]); // Compute hessian // d2x _MM_DOT4_PD (d2a01, d2a23, bcP01, bcP23, hess[0]); // d2y _MM_DOT4_PD (a01, a23, d2bcP01, d2bcP23, hess[4]); // d2z _MM_DOT4_PD (a01, a23, bd2cP01, bd2cP23, hess[8]); // dx dy _MM_DOT4_PD (da01, da23, dbcP01, dbcP23, hess[1]); // dx dz _MM_DOT4_PD (da01, da23, bdcP01, bdcP23, hess[2]); // dy dz _MM_DOT4_PD (a01, a23, dbdcP01, dbdcP23, hess[5]); // Multiply gradients and hessians by appropriate grid inverses double dxInv = spline->x_grid.delta_inv; double dyInv = spline->y_grid.delta_inv; double dzInv = spline->z_grid.delta_inv; grad[0] *= dxInv; grad[1] *= dyInv; grad[2] *= dzInv; hess[0] *= dxInv*dxInv; hess[4] *= dyInv*dyInv; hess[8] *= dzInv*dzInv; hess[1] *= dxInv*dyInv; hess[2] *= dxInv*dzInv; hess[5] *= dyInv*dzInv; // Copy hessian elements into lower half of 3x3 matrix hess[3] = hess[1]; hess[6] = hess[2]; hess[7] = hess[5]; #undef P } // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[0], tpx01), _mm_mul_pd (A_d[1], tpx23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[2], tpx01), _mm_mul_pd (A_d[3], tpx23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[4], tpx01), _mm_mul_pd (A_d[5], tpx23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[6], tpx01), _mm_mul_pd (A_d[7], tpx23)); // a01 = _mm_hadd_pd(tmp0, tmp1); // a23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[8], tpx01), _mm_mul_pd (A_d[9], tpx23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[10], tpx01), _mm_mul_pd (A_d[11], tpx23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[12], tpx01), _mm_mul_pd (A_d[13], tpx23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[14], tpx01), _mm_mul_pd (A_d[15], tpx23)); // da01 = _mm_hadd_pd(tmp0, tmp1); // da23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[16], tpx01), _mm_mul_pd (A_d[17], tpx23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[18], tpx01), _mm_mul_pd (A_d[19], tpx23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[20], tpx01), _mm_mul_pd (A_d[21], tpx23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[22], tpx01), _mm_mul_pd (A_d[23], tpx23)); // d2a01 = _mm_hadd_pd(tmp0, tmp1); // d2a23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[0], tpy01), _mm_mul_pd (A_d[1], tpy23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[2], tpy01), _mm_mul_pd (A_d[3], tpy23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[4], tpy01), _mm_mul_pd (A_d[5], tpy23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[6], tpy01), _mm_mul_pd (A_d[7], tpy23)); // b01 = _mm_hadd_pd(tmp0, tmp1); // b23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[8], tpy01), _mm_mul_pd (A_d[9], tpy23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[10], tpy01), _mm_mul_pd (A_d[11], tpy23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[12], tpy01), _mm_mul_pd (A_d[13], tpy23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[14], tpy01), _mm_mul_pd (A_d[15], tpy23)); // db01 = _mm_hadd_pd(tmp0, tmp1); // db23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[16], tpy01), _mm_mul_pd (A_d[17], tpz23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[18], tpy01), _mm_mul_pd (A_d[19], tpz23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[20], tpy01), _mm_mul_pd (A_d[21], tpz23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[22], tpy01), _mm_mul_pd (A_d[23], tpz23)); // d2b01 = _mm_hadd_pd(tmp0, tmp1); // d2b23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[0], tpz01), _mm_mul_pd (A_d[1], tpz23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[2], tpz01), _mm_mul_pd (A_d[3], tpz23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[4], tpz01), _mm_mul_pd (A_d[5], tpz23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[6], tpz01), _mm_mul_pd (A_d[7], tpz23)); // c01 = _mm_hadd_pd(tmp0, tmp1); // c23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[8], tpz01), _mm_mul_pd (A_d[9], tpz23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[10], tpz01), _mm_mul_pd (A_d[11], tpz23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[12], tpz01), _mm_mul_pd (A_d[13], tpz23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[14], tpz01), _mm_mul_pd (A_d[15], tpz23)); // dc01 = _mm_hadd_pd(tmp0, tmp1); // dc23 = _mm_hadd_pd(tmp2, tmp3); // tmp0 = _mm_hadd_pd(_mm_mul_pd (A_d[16], tpz01), _mm_mul_pd (A_d[17], tpz23)); // tmp1 = _mm_hadd_pd(_mm_mul_pd (A_d[18], tpz01), _mm_mul_pd (A_d[19], tpz23)); // tmp2 = _mm_hadd_pd(_mm_mul_pd (A_d[20], tpz01), _mm_mul_pd (A_d[21], tpz23)); // tmp3 = _mm_hadd_pd(_mm_mul_pd (A_d[22], tpz01), _mm_mul_pd (A_d[23], tpz23)); // d2c01 = _mm_hadd_pd(tmp0, tmp1); // d2c23 = _mm_hadd_pd(tmp2, tmp3); #endif einspline-0.9.2/src/nugrid.h0000664000113000011300000000520011035750327012643 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUGRID_H #define NUGRID_H #include typedef enum { LINEAR, GENERAL, CENTER, LOG } grid_type; // Nonuniform grid base structure typedef struct { // public data grid_type code; double start, end; double* restrict points; int num_points; int (*reverse_map)(void *grid, double x); } NUgrid; #ifdef __cplusplus extern "C" #endif typedef struct { // public data grid_type code; double start, end; double* restrict points; int num_points; int (*reverse_map)(void *grid, double x); // private data double a, aInv, b, bInv, center, even_half; int half_points, odd_one; bool odd; } center_grid; typedef struct { // public data grid_type code; double start, end; double* restrict points; int num_points; int (*reverse_map)(void *grid, double x); // private data double a, ainv, startinv; } log_grid; #ifdef __cplusplus extern "C" { #endif NUgrid* create_center_grid (double start, double end, double ratio, int num_points); NUgrid* create_log_grid (double start, double end, int num_points); NUgrid* create_general_grid (double *points, int num_points); void destroy_grid (NUgrid *grid); #ifdef __cplusplus } #endif #endif einspline-0.9.2/src/multi_bspline_eval_sse_z_cpp.cc0000664000113000011300000000276611015556550017451 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "multi_bspline_eval_sse_z_impl.h" einspline-0.9.2/src/TestNUBspline.c0000664000113000011300000005403211037742734014061 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #include "nubspline.h" #include #include #include #include #include #include #ifndef M_PI #define M_PI 3.1415926535897932384626433 #endif double drand48(); void PrintPassFail(bool pass) { if (pass) // Print green "Passed" fprintf (stderr, "%c[32mPassed%c[0m\n", 0x1B, 0x1B); else // Print red "Failed" fprintf (stderr, "%c[31mFailed%c[0m\n", 0x1B, 0x1B); } void PrintTest (char *name, bool pass) { int n = strlen (name); fprintf (stderr, "%s:", name); for (int i=n; i<57; i++) fprintf (stderr, " "); PrintPassFail (pass); } bool TestCenterGrid() { fprintf (stderr, "Testing CenterGrid: "); bool passed = true; NUgrid* grid = create_center_grid (-5.0, 7.0, 6.0, 200); for (int i=0; i<10000; i++) { double x = -5.0+12.0*drand48(); int lo = (*grid->reverse_map)(grid, x); assert (x >= grid->points[lo]); assert (x <= grid->points[lo+1]); } PrintPassFail (passed); return passed; } bool TestGeneralGrid() { fprintf (stderr, "Testing GeneralGrid: "); bool passed = true; NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 200); NUgrid* grid = create_general_grid (centgrid->points, 200); for (int i=0; i<10000; i++) { double x = -5.0+12.0*drand48(); int lo = (*grid->reverse_map)(grid, x); passed = passed && (x >= grid->points[lo]); passed = passed && (x <= grid->points[lo+1]); } PrintPassFail (passed); return passed; } bool close_float (float x, float y) { float max = fmaxf (x, y); return (fabs(x-y)/max < 1.0e-5); } bool TestNUB_1d_s() { double start = -5.0; double end = 7.0; int N = 200; NUgrid* grid = create_center_grid (start, end, 6.0, N); bool passed = true; float data[N]; for (int i=0; ipoints[26]; float val; eval_NUBspline_1d_s (periodic, x, &val); bool interp_passed = close_float (val, data[26]); PrintTest ("Interpolation", interp_passed); passed = passed && interp_passed; // Create spline with fixed first derivative: bc.lCode = DERIV1; bc.lVal = 1.5; bc.rCode = DERIV1; bc.rVal = -0.3; NUBspline_1d_s *fixed_first = create_NUBspline_1d_s (grid, bc, data); fprintf (stderr, "Testing 1D single-precsion fixed first derivative boundary conditions: \n"); eval_NUBspline_1d_s_vg (fixed_first, start, &sval, &sgrad); eval_NUBspline_1d_s_vg (fixed_first, end, &eval, &egrad); bool bc_passed = close_float (sgrad, 1.5) && close_float (egrad, -0.3); PrintTest ("Boundary conditions", bc_passed); x = grid->points[26]; eval_NUBspline_1d_s (periodic, x, &val); interp_passed = close_float (val, data[26]); PrintTest ("Interpolation", interp_passed); passed = passed && interp_passed && bc_passed; // Create spline with fixed second derivative: bc.lCode = DERIV2; bc.lVal = 1.5; bc.rCode = DERIV2; bc.rVal = -0.3; NUBspline_1d_s *fixed_second = create_NUBspline_1d_s (grid, bc, data); fprintf (stderr, "Testing 1d_s fixed second derivative boundary conditions: \n"); eval_NUBspline_1d_s_vgl (fixed_second, start, &sval, &sgrad, &slapl); eval_NUBspline_1d_s_vgl (fixed_second, end, &eval, &egrad, &elapl); bc_passed = close_float (slapl, 1.5) && close_float (elapl, -0.3); fprintf (stderr, "slapl = %1.8f elapl = %1.8f\n", slapl, elapl); PrintTest ("Boundary conditions", bc_passed); x = grid->points[26]; eval_NUBspline_1d_s (periodic, x, &val); interp_passed = close_float (val, data[26]); PrintTest ("Interpolation", interp_passed); passed = passed && interp_passed && bc_passed; return passed; } void GridSpeedTest() { NUgrid* centgrid = create_center_grid (-5.0, 7.0, 6.0, 2000); NUgrid* gengrid = create_general_grid (centgrid->points, 2000); int centsum=0, gensum=0; clock_t rstart, rend, cstart, cend, gstart, gend; rstart = clock(); for (int i=0; i<100000000; i++) { double x = -5.0 + 12.0*drand48(); } rend = clock(); cstart = clock(); for (int i=0; i<100000000; i++) { double x = -5.0 + 12.0*drand48(); centsum += (*centgrid->reverse_map)(centgrid, x); } cend = clock(); gstart = clock(); for (int i=0; i<100000000; i++) { double x = -5.0 + 12.0*drand48(); gensum += (*gengrid->reverse_map)(gengrid, x); } gend = clock(); double cent_time = (double)(cend-cstart+rstart-rend)/(double)CLOCKS_PER_SEC; double gen_time = (double)(gend-gstart+rstart-rend)/(double)CLOCKS_PER_SEC; fprintf (stderr, "%d %d\n", centsum, gensum); fprintf (stderr, "center_grid time = %1.3f s.\n", cent_time); fprintf (stderr, "general_grid time = %1.3f s.\n", gen_time); } void TestNUBasis() { NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); NUBasis* basis = create_NUBasis (centgrid, true); double bfuncs[4]; for (double x=-5.0; x<=7.0; x+=0.001) { get_NUBasis_funcs_d (basis, x, bfuncs); fprintf (stderr, "%1.12f %1.12f %1.12f %1.12f %1.12f\n", x, bfuncs[0], bfuncs[1], bfuncs[2], bfuncs[3]); } } void TestNUBspline() { NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); NUBasis* basis = create_NUBasis (centgrid, true); float data[20]; for (int i=0; i<20; i++) { double x = centgrid->points[i]; double angle = (x+5.0)/12.0 * 2.0*M_PI; data[i] = sin(angle); } BCtype_s bc; // bc.lCode = PERIODIC; bc.rCode = PERIODIC; bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0; bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0; //bc.lCode = NATURAL; bc.rCode = FLAT; NUBspline_1d_s *spline = create_NUBspline_1d_s (centgrid, bc, data); for (double x=-5.0; x<=7.0; x+=0.001) { float val, deriv; eval_NUBspline_1d_s_vg (spline, x, &val, &deriv); double angle = (x+5.0)/12.0 * 2.0*M_PI; fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, sin(angle), deriv); } } void TestNUBspline_d() { NUgrid* centgrid = create_center_grid (-5.0, 7.0, 10.0, 20); NUBasis* basis = create_NUBasis (centgrid, true); double data[20]; for (int i=0; i<20; i++) { double x = centgrid->points[i]; double angle = (x+5.0)/12.0 * 2.0*M_PI; data[i] = sin(angle); } BCtype_d bc; // bc.lCode = PERIODIC; bc.rCode = PERIODIC; bc.lCode = DERIV1; bc.lVal = 2.0*M_PI/12.0; bc.rCode = DERIV1; bc.rVal = 2.0*M_PI/12.0; //bc.lCode = NATURAL; bc.rCode = FLAT; NUBspline_1d_d *spline = create_NUBspline_1d_d (centgrid, bc, data); for (double x=-5.0; x<=7.0; x+=0.001) { double val, deriv; eval_NUBspline_1d_d_vg (spline, x, &val, &deriv); double angle = (x+5.0)/12.0 * 2.0*M_PI; fprintf (stderr, "%1.16e %1.16e %1.16e %1.16e\n", x, val, sin(angle), deriv); } } void TestNUB_2d_s() { int Mx=30, My=35; NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); float data[Mx*My]; for (int ix=0; ixstart; double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; double yi = y_grid->start; double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; for (int ix=0; ixstart; double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; double yi = y_grid->start; double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; for (int ix=0; ixstart; double xf = x_grid->end; double yi = y_grid->start; double yf = y_grid->end; double zi = z_grid->start; double zf = z_grid->end; for (int ix=0; ixsp_code = %d\n", spline->sp_code); destroy_Bspline (spline); } void TestNUB_3d_d() { int Mx=20, My=27, Mz=23; NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); double data[Mx*My*Mz]; for (int ix=0; ixstart; double xf = x_grid->end; double yi = y_grid->start; double yf = y_grid->end; double zi = z_grid->start; double zf = z_grid->end; for (int ix=0; ixsp_code = %d\n", spline->sp_code); destroy_Bspline (spline); } void TestNUB_3d_c() { int Mx=20, My=27, Mz=23; NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); complex_float data[Mx*My*Mz]; for (int ix=0; ixstart; double xf = x_grid->end; double yi = y_grid->start; double yf = y_grid->end; double zi = z_grid->start; double zf = z_grid->end; for (int ix=0; ixstart; double xf = x_grid->end; double yi = y_grid->start; double yf = y_grid->end; double zi = z_grid->start; double zf = z_grid->end; for (int ix=0; ixstart+ 0.9999*drand48()*(x_grid->end - x_grid->start); double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); } rend = clock(); start = clock(); for (int i=0; i<10000000; i++) { double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start); double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); eval_NUBspline_3d_s_vgh (spline, x, y, z, &val, grad, hess); } end = clock(); fprintf (stderr, "10,000,000 evalations in %f seconds.\n", (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); } void SpeedNUB_3d_z() { int Mx=200, My=200, Mz=200; NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); NUgrid *z_grid = create_center_grid (-1.8, 2.0, 2.8, Mz); complex_double *data = malloc (sizeof(complex_double)*Mx*My*Mz); for (int ix=0; ixstart+ 0.9999*drand48()*(x_grid->end - x_grid->start); double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); } rend = clock(); start = clock(); for (int i=0; i<10000000; i++) { double x = x_grid->start+ 0.9999*drand48()*(x_grid->end - x_grid->start); double y = y_grid->start+ 0.9999*drand48()*(y_grid->end - y_grid->start); double z = z_grid->start+ 0.9999*drand48()*(z_grid->end - z_grid->start); eval_NUBspline_3d_z_vgh (spline, x, y, z, &val, grad, hess); } end = clock(); fprintf (stderr, "10,000,000 evalations in %f seconds.\n", (double)(end-start-(rend-rstart))/(double)CLOCKS_PER_SEC); } void TestNUB_2d_d() { int Mx=30, My=35; NUgrid *x_grid = create_center_grid (-3.0, 4.0, 7.5, Mx); NUgrid *y_grid = create_center_grid (-1.0, 9.0, 3.5, My); double data[Mx*My]; for (int ix=0; ixstart; double xf = x_grid->end;// + x_grid->points[1] - x_grid->points[0]; double yi = y_grid->start; double yf = y_grid->end;// + y_grid->points[1] - y_grid->points[0]; for (int ix=0; ix #include #include "bspline_base.h" #include "multi_nubspline_structs.h" /************************************************************/ /* 1D double-precision, complex evaulation functions */ /************************************************************/ void eval_multi_NUBspline_1d_z (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals) { double a[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int xs = spline->x_stride; complex_double* restrict coefs0 = spline->coefs +(ix+0)*xs; complex_double* restrict coefs1 = spline->coefs +(ix+1)*xs; complex_double* restrict coefs2 = spline->coefs +(ix+2)*xs; complex_double* restrict coefs3 = spline->coefs +(ix+3)*xs; for (int n=0; nnum_splines; n++) vals[n] = (a[0]*coefs0[n] + a[1]*coefs1[n] + a[2]*coefs2[n] + a[3]*coefs3[n]); } void eval_multi_NUBspline_1d_z_vg (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads) { double a[4], da[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; } } } void eval_multi_NUBspline_1d_z_vgl (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict lapl) { double a[4], da[4], d2a[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int xs = spline->x_stride; for (int n=0; nnum_splines; n++) { vals[n] = 0.0; grads[n] = 0.0; lapl[n] = 0.0; } for (int i=0; i<4; i++) { complex_double* restrict coefs = spline->coefs + ((ix+i)*xs); for (int n=0; nnum_splines; n++) { vals[n] += a[i] * coefs[n]; grads[n] += da[i] * coefs[n]; lapl[n] += d2a[i] * coefs[n]; } } } void eval_multi_NUBspline_1d_z_vgh (multi_NUBspline_1d_z *spline, double x, complex_double* restrict vals, complex_double* restrict grads, complex_double* restrict hess) { eval_multi_NUBspline_1d_z_vgl (spline, x, vals, grads, hess); } // /************************************************************/ // /* 2D double-precision, complex evaulation functions */ // /************************************************************/ // void // eval_multi_NUBspline_2d_z (multi_NUBspline_2d_z *spline, // double x, double y, // complex_double* restrict vals) // { // _mm_prefetch ((const char*) &A_d[ 0],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 1],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 2],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 3],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 4],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 5],_MM_HINT_T0); // _mm_prefetch ((const char*) &A_d[ 6],_MM_HINT_T0); _mm_prefetch ((const char*) &A_d[ 7],_MM_HINT_T0); // x -= spline->x_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // // Zero-out values // __m128d mvals[N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // // Zero-out values // __m128d mvals[N], mgrads[2*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // complex_double lapl2[2*N]; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23, // d2a01, d2b01, d2a23, d2b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // Zero-out values // __m128d mvals[N], mgrads[2*N], mlapl[2*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // complex_double lapl2[2*N]; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // double ipartx, iparty, tx, ty; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // int xs = spline->x_stride; // int ys = spline->y_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, // a01 , b01, a23, b23, // da01 , db01, da23, db23, // d2a01, d2b01, d2a23, d2b23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // Zero-out values // __m128d mvals[N], mgrads[2*N], mhess[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01, b01, c01, a23, b23, c23, // tmp0, tmp1, r0, r1, i0, i1, val_r, val_i; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpx01, tpx23, tpx01, tpx23, a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpy01, tpy23, tpy01, tpy23, b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[0], A_d[1], A_d[2], A_d[3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[4], A_d[5], A_d[6], A_d[7], tpz01, tpz23, tpz01, tpz23, c23); // // Zero-out values // __m128d mvals[N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); // __m128d* restrict coefs1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); // __m128d* restrict coefs2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); // __m128d* restrict coefs3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // // Zero-out values // __m128d mvals[N], mgrads[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23, // d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // // Zero-out values // __m128d mvals[N], mgrads[3*N], mlapl[3*N]; // for (int n=0; ncoefs + (ix+i)*xs + (iy+j)*ys + (iz+k)*zs); // for (int n=0; nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; nx_grid.start; // y -= spline->y_grid.start; // z -= spline->z_grid.start; // double ux = x*spline->x_grid.delta_inv; // double uy = y*spline->y_grid.delta_inv; // double uz = z*spline->z_grid.delta_inv; // ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); // uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); // uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); // double ipartx, iparty, ipartz, tx, ty, tz; // tx = modf (ux, &ipartx); int ix = (int) ipartx; // ty = modf (uy, &iparty); int iy = (int) iparty; // tz = modf (uz, &ipartz); int iz = (int) ipartz; // int xs = spline->x_stride; // int ys = spline->y_stride; // int zs = spline->z_stride; // int N = spline->num_splines; // // Now compute the vectors: // // tpx = [t_x^3 t_x^2 t_x 1] // // tpy = [t_y^3 t_y^2 t_y 1] // // tpz = [t_z^3 t_z^2 t_z 1] // // a = A * tpx, b = A * tpy, c = A * tpz // // A is 4x4 matrix given by the rows A0, A1, A2, A3 // __m128d tpx01, tpx23, tpy01, tpy23, tpz01, tpz23, // a01 , b01, c01, a23, b23, c23, // da01 , db01, dc01, da23, db23, dc23, // d2a01, d2b01, d2c01, d2a23, d2b23, d2c23; // tpx01 = _mm_set_pd (tx*tx*tx, tx*tx); // tpx23 = _mm_set_pd (tx, 1.0); // tpy01 = _mm_set_pd (ty*ty*ty, ty*ty); // tpy23 = _mm_set_pd (ty, 1.0); // tpz01 = _mm_set_pd (tz*tz*tz, tz*tz); // tpz23 = _mm_set_pd (tz, 1.0); // // x-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpx01, tpx23, tpx01, tpx23, a01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpx01, tpx23, tpx01, tpx23, a23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpx01, tpx23, tpx01, tpx23, da01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpx01, tpx23, tpx01, tpx23, da23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpx01, tpx23, tpx01, tpx23, d2a01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpx01, tpx23, tpx01, tpx23, d2a23); // // y-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpy01, tpy23, tpy01, tpy23, b01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpy01, tpy23, tpy01, tpy23, b23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpy01, tpy23, tpy01, tpy23, db01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpy01, tpy23, tpy01, tpy23, db23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpy01, tpy23, tpy01, tpy23, d2b01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpy01, tpy23, tpy01, tpy23, d2b23); // // z-dependent vectors // _MM_DDOT4_PD (A_d[ 0], A_d[ 1], A_d[ 2], A_d[ 3], tpz01, tpz23, tpz01, tpz23, c01); // _MM_DDOT4_PD (A_d[ 4], A_d[ 5], A_d[ 6], A_d[ 7], tpz01, tpz23, tpz01, tpz23, c23); // _MM_DDOT4_PD (A_d[ 8], A_d[ 9], A_d[10], A_d[11], tpz01, tpz23, tpz01, tpz23, dc01); // _MM_DDOT4_PD (A_d[12], A_d[13], A_d[14], A_d[15], tpz01, tpz23, tpz01, tpz23, dc23); // _MM_DDOT4_PD (A_d[16], A_d[17], A_d[18], A_d[19], tpz01, tpz23, tpz01, tpz23, d2c01); // _MM_DDOT4_PD (A_d[20], A_d[21], A_d[22], A_d[23], tpz01, tpz23, tpz01, tpz23, d2c23); // // Zero-out values // //__m128d mvals[N], mgrads[3*N], mhess[6*N]; // __m128d mpack[10*N]; // for (int n=0; n<10*N; n++) // mpack[n] = _mm_setzero_pd(); // __m128d a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; // a[0]=_mm_unpacklo_pd(a01,a01); da[0]=_mm_unpacklo_pd(da01,da01); d2a[0]=_mm_unpacklo_pd(d2a01,d2a01); // a[1]=_mm_unpackhi_pd(a01,a01); da[1]=_mm_unpackhi_pd(da01,da01); d2a[1]=_mm_unpackhi_pd(d2a01,d2a01); // a[2]=_mm_unpacklo_pd(a23,a23); da[2]=_mm_unpacklo_pd(da23,da23); d2a[2]=_mm_unpacklo_pd(d2a23,d2a23); // a[3]=_mm_unpackhi_pd(a23,a23); da[3]=_mm_unpackhi_pd(da23,da23); d2a[3]=_mm_unpackhi_pd(d2a23,d2a23); // b[0]=_mm_unpacklo_pd(b01,b01); db[0]=_mm_unpacklo_pd(db01,db01); d2b[0]=_mm_unpacklo_pd(d2b01,d2b01); // b[1]=_mm_unpackhi_pd(b01,b01); db[1]=_mm_unpackhi_pd(db01,db01); d2b[1]=_mm_unpackhi_pd(d2b01,d2b01); // b[2]=_mm_unpacklo_pd(b23,b23); db[2]=_mm_unpacklo_pd(db23,db23); d2b[2]=_mm_unpacklo_pd(d2b23,d2b23); // b[3]=_mm_unpackhi_pd(b23,b23); db[3]=_mm_unpackhi_pd(db23,db23); d2b[3]=_mm_unpackhi_pd(d2b23,d2b23); // c[0]=_mm_unpacklo_pd(c01,c01); dc[0]=_mm_unpacklo_pd(dc01,dc01); d2c[0]=_mm_unpacklo_pd(d2c01,d2c01); // c[1]=_mm_unpackhi_pd(c01,c01); dc[1]=_mm_unpackhi_pd(dc01,dc01); d2c[1]=_mm_unpackhi_pd(d2c01,d2c01); // c[2]=_mm_unpacklo_pd(c23,c23); dc[2]=_mm_unpacklo_pd(dc23,dc23); d2c[2]=_mm_unpacklo_pd(d2c23,d2c23); // c[3]=_mm_unpackhi_pd(c23,c23); dc[3]=_mm_unpackhi_pd(dc23,dc23); d2c[3]=_mm_unpackhi_pd(d2c23,d2c23); // // Main computation loop // const int bs = 32; // for (int nstart=0; nstartcoefs + (ix+i)*xs + (iy+j)*ys + (iz+0)*zs); // __m128d* restrict c1 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+1)*zs); // __m128d* restrict c2 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+2)*zs); // __m128d* restrict c3 = (__m128d*)(spline->coefs + (ix+i)*xs + (iy+j)*ys + (iz+3)*zs); // #ifdef USE_PREFETCH_VGH // int nextIndex = i<<4 + j<<2 + k + 1; // int iNext = nextIndex >> 4; // int jNext = (nextIndex >> 2) & 3; // int kNext = nextIndex & 3; // if (nextIndex < 64) { // __m128d* restrict nextCoefs = (__m128d*)(spline->coefs + (ix+iNext)*xs + (iy +jNext)*ys + (iz+kNext)*zs); // for (int i=0,n=nstart; (nx_grid.delta_inv; // double dyInv = spline->y_grid.delta_inv; // double dzInv = spline->z_grid.delta_inv; // for (int n=0; n #include #include int posix_memalign(void **memptr, size_t alignment, size_t size); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Helper functions for spline creation //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// void init_sse_data(); void find_coefs_1d_d (Ugrid grid, BCtype_d bc, double *data, intptr_t dstride, double *coefs, intptr_t cstride); void solve_deriv_interp_1d_s (float bands[], float coefs[], int M, int cstride); // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_periodic_interp_1d_s (float bands[], float coefs[], int M, int cstride); // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_antiperiodic_interp_1d_s (float bands[], float coefs[], int M, int cstride); void find_coefs_1d_s (Ugrid grid, BCtype_s bc, float *data, intptr_t dstride, float *coefs, intptr_t cstride); //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Single-Precision, Real Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_UBspline_1d_s* create_multi_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, int num_splines) { // Create new spline multi_UBspline_1d_s* restrict spline = malloc (sizeof(multi_UBspline_1d_s)); spline->spcode = MULTI_U1D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->x_grid = x_grid; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int Nx; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); Nx = Mx+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); Nx = Mx+2; } int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = N; x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; #ifndef HAVE_SSE spline->coefs = malloc (sizeof(float)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, (sizeof(float)*Nx*N)); init_sse_data(); #endif return spline; } void set_multi_UBspline_1d_s (multi_UBspline_1d_s *spline, int num, float *data) { float *coefs = spline->coefs + num; int xs = spline->x_stride; find_coefs_1d_s (spline->x_grid, spline->xBC, data, 1, coefs, xs); } multi_UBspline_2d_s* create_multi_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid, BCtype_s xBC, BCtype_s yBC, int num_splines) { // Create new spline multi_UBspline_2d_s* restrict spline = malloc (sizeof(multi_UBspline_2d_s)); spline->spcode = MULTI_U2D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE spline->coefs = malloc ((size_t)sizeof(float)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, sizeof(float)*Nx*Ny*N); init_sse_data(); #endif return spline; } void set_multi_UBspline_2d_s (multi_UBspline_2d_s* spline, int num, float *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; float *coefs = spline->coefs + num; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, (intptr_t)My, coefs+coffset, (intptr_t)Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, coefs+doffset, (intptr_t)ys, coefs+coffset, (intptr_t)ys); } } multi_UBspline_3d_s* create_multi_UBspline_3d_s (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_s xBC, BCtype_s yBC, BCtype_s zBC, int num_splines) { // Create new spline multi_UBspline_3d_s* restrict spline = malloc (sizeof(multi_UBspline_3d_s)); spline->spcode = MULTI_U3D; spline->tcode = SINGLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; int N = num_splines; #ifdef HAVE_SSE if (N % 4) N += 4 - (N % 4); #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(float)*Nx*Ny*Nz*N)); init_sse_data(); #endif return spline; } void set_multi_UBspline_3d_s (multi_UBspline_3d_s* spline, int num, float *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; float *coefs = spline->coefs + num; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, (intptr_t)My*Mz, coefs+coffset, (intptr_t)Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, coefs+doffset, (intptr_t)Nz*zs, coefs+coffset, (intptr_t)Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, spline->zBC, coefs+doffset, (intptr_t)zs, coefs+coffset, (intptr_t)zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Single-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_UBspline_1d_c* create_multi_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, int num_splines) { // Create new spline multi_UBspline_1d_c* restrict spline = malloc (sizeof(multi_UBspline_1d_c)); spline->spcode = MULTI_U1D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->num_splines = num_splines; // Setup internal variables int M = x_grid.num; int N; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); N = M+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); N = M+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; spline->x_stride = num_splines; #ifndef HAVE_SSE spline->coefs = malloc (2*sizeof(float)*N*num_splines); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*N*num_splines); init_sse_data(); #endif return spline; } void set_multi_UBspline_1d_c (multi_UBspline_1d_c* spline, int num, complex_float *data) { complex_float *coefs = spline->coefs + num; BCtype_s xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; int xs = spline->x_stride; // Real part find_coefs_1d_s (spline->x_grid, xBC_r, (float*)data, (intptr_t)2, (float*)coefs, (intptr_t)2*xs); // Imaginarty part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+1, (intptr_t)2, ((float*)coefs+1), (intptr_t)2*xs); } multi_UBspline_2d_c* create_multi_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid, BCtype_c xBC, BCtype_c yBC, int num_splines) { // Create new spline multi_UBspline_2d_c* restrict spline = malloc (sizeof(multi_UBspline_2d_c)); spline->spcode = MULTI_U2D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; int N = num_splines; #ifdef HAVE_SSE if (N % 2) N++; #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE spline->coefs = malloc (2*sizeof(float)*Nx*Ny*N); spline->lapl2 = malloc (4*sizeof(float)*N); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(float)*Nx*Ny*N); posix_memalign ((void**)&spline->lapl2, 64, 4*sizeof(float)*N); #endif init_sse_data(); return spline; } void set_multi_UBspline_2d_c (multi_UBspline_2d_c* spline, int num, complex_float *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; complex_float* coefs = spline->coefs + num; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, (intptr_t)2*My, (float*)coefs+coffset, (intptr_t)2*Ny*ys); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, (intptr_t)2*My, ((float*)coefs)+coffset+1, (intptr_t)2*Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)coefs)+doffset, (intptr_t)2*ys, ((float*)coefs)+coffset, (intptr_t)2*ys); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)coefs)+doffset+1, (intptr_t)2*ys, ((float*)coefs)+coffset+1, (intptr_t)2*ys); } } multi_UBspline_3d_c* create_multi_UBspline_3d_c (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_c xBC, BCtype_c yBC, BCtype_c zBC, int num_splines) { // Create new spline multi_UBspline_3d_c* restrict spline = malloc (sizeof(multi_UBspline_3d_c)); spline->spcode = MULTI_U3D; spline->tcode = SINGLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; int N = spline->num_splines; #ifdef HAVE_SSE if (N % 2) N++; #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE spline->coefs = malloc ((size_t)2*sizeof(float)*Nx*Ny*Nz*N); spline->lapl3 = malloc (6*sizeof(float)*N); #else posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(float)*Nx*Ny*Nz*N); posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(float)*N); init_sse_data(); #endif return spline; } void set_multi_UBspline_3d_c (multi_UBspline_3d_c* spline, int num, complex_float *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; BCtype_s xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; complex_float *coefs = spline->coefs + num; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((float*)data)+doffset, (intptr_t)2*My*Mz, ((float*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs); // Imag part find_coefs_1d_s (spline->x_grid, xBC_i, ((float*)data)+doffset+1, (intptr_t)2*My*Mz, ((float*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((float*)coefs)+doffset, (intptr_t)2*Nz*zs, ((float*)coefs)+coffset, (intptr_t)2*Nz*zs); // Imag part find_coefs_1d_s (spline->y_grid, yBC_i, ((float*)coefs)+doffset+1, (intptr_t)2*Nz*zs, ((float*)coefs)+coffset+1, (intptr_t)2*Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((float*)coefs)+doffset, (intptr_t)2*zs, ((float*)coefs)+coffset, (intptr_t)2*zs); // Imag part find_coefs_1d_s (spline->z_grid, zBC_i, ((float*)coefs)+doffset+1, (intptr_t)2*zs, ((float*)coefs)+coffset+1, (intptr_t)2*zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Real Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_deriv_interp_1d_d (double bands[], double coefs[], int M, int cstride); // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs void solve_periodic_interp_1d_d (double bands[], double coefs[], int M, int cstride); void find_coefs_1d_d (Ugrid grid, BCtype_d bc, double *data, intptr_t dstride, double *coefs, intptr_t cstride); multi_UBspline_1d_d* create_multi_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, int num_splines) { // Create new spline multi_UBspline_1d_d* restrict spline = malloc (sizeof(multi_UBspline_1d_d)); spline->spcode = MULTI_U1D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int Nx; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); Nx = Mx+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); Nx = Mx+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data aligned for SSE operations if (N & 1) N++; #endif spline->x_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*N); #else posix_memalign ((void**)&spline->coefs, 64, sizeof(double)*Nx*N); init_sse_data(); #endif return spline; } void set_multi_UBspline_1d_d (multi_UBspline_1d_d* spline, int num, double *data) { double *coefs = spline->coefs + num; int xs = spline->x_stride; find_coefs_1d_d (spline->x_grid, spline->xBC, data, 1, coefs, xs); } void set_multi_UBspline_1d_d_BC (multi_UBspline_1d_d* spline, int num, double *data, BCtype_d xBC) { double *coefs = spline->coefs + num; int xs = spline->x_stride; find_coefs_1d_d (spline->x_grid, xBC, data, 1, coefs, xs); } multi_UBspline_2d_d* create_multi_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid, BCtype_d xBC, BCtype_d yBC, int num_splines) { // Create new spline multi_UBspline_2d_d* restrict spline = malloc (sizeof(multi_UBspline_2d_d)); spline->spcode = MULTI_U2D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data align for SSE operations if (num_splines & 1) N++; #endif spline->x_stride = Ny*N; spline->y_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(double)*Nx*Ny*N); #else posix_memalign ((void**)&spline->coefs, 64, (sizeof(double)*Nx*Ny*N)); init_sse_data(); #endif return spline; } void set_multi_UBspline_2d_d (multi_UBspline_2d_d* spline, int num, double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; double *coefs = spline->coefs + num; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, (intptr_t)My, coefs+coffset, (intptr_t)Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, coefs+doffset, (intptr_t)ys, coefs+coffset, (intptr_t)ys); } } multi_UBspline_3d_d* create_multi_UBspline_3d_d (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_d xBC, BCtype_d yBC, BCtype_d zBC, int num_splines) { // Create new spline multi_UBspline_3d_d* restrict spline = malloc (sizeof(multi_UBspline_3d_d)); spline->spcode = MULTI_U3D; spline->tcode = DOUBLE_REAL; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; int N = num_splines; #ifdef HAVE_SSE2 // We must pad to keep data align for SSE operations if (N & 1) N++; #endif spline->x_stride = Ny*Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc ((size_t)sizeof(double)*Nx*Ny*Nz*N); #else posix_memalign ((void**)&spline->coefs, 64, ((size_t)sizeof(double)*Nx*Ny*Nz*N)); init_sse_data(); #endif return spline; } void set_multi_UBspline_3d_d (multi_UBspline_3d_d* spline, int num, double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; double *coefs = spline->coefs + num; intptr_t zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, spline->xBC, data+doffset, (intptr_t)My*Mz, coefs+coffset, (intptr_t)Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, spline->yBC, coefs+doffset, (intptr_t)Nz*zs, coefs+coffset, (intptr_t)Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, spline->zBC, coefs+doffset, (intptr_t)zs, coefs+coffset, (intptr_t)zs); } } //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// //// Double-Precision, Complex Creation Routines //// //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////// // On input, bands should be filled with: // row 0 : abcdInitial from boundary conditions // rows 1:M: basis functions in first 3 cols, data in last // row M+1 : abcdFinal from boundary conditions // cstride gives the stride between values in coefs. // On exit, coefs with contain interpolating B-spline coefs multi_UBspline_1d_z* create_multi_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, int num_splines) { // Create new spline multi_UBspline_1d_z* restrict spline = malloc (sizeof(multi_UBspline_1d_z)); spline->spcode = MULTI_U1D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int Nx; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num); Nx = Mx+3; } else { x_grid.delta = (x_grid.end-x_grid.start)/(double)(x_grid.num-1); Nx = Mx+2; } x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; spline->x_stride = num_splines; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*num_splines); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*num_splines); init_sse_data(); #endif return spline; } void set_multi_UBspline_1d_z (multi_UBspline_1d_z* spline, int num, complex_double *data) { int Mx = spline->x_grid.num; int Nx; complex_double *coefs = spline->coefs + num; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; BCtype_d xBC_r, xBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; int xs = spline->x_stride; // Real part find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, (intptr_t)2, ((double*)coefs), (intptr_t)2*xs); // Imaginary part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, (intptr_t)2, ((double*)coefs)+1, (intptr_t)2*xs); } void set_multi_UBspline_1d_z_BC (multi_UBspline_1d_z *spline, int num, complex_double *data, BCtype_z xBC) { int Mx = spline->x_grid.num; int Nx; complex_double *coefs = spline->coefs + num; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; BCtype_d xBC_r, xBC_i; xBC_r.lCode = xBC.lCode; xBC_r.rCode = xBC.rCode; xBC_r.lVal = xBC.lVal_r; xBC_r.rVal = xBC.rVal_r; xBC_i.lCode = xBC.lCode; xBC_i.rCode = xBC.rCode; xBC_i.lVal = xBC.lVal_i; xBC_i.rVal = xBC.rVal_i; int xs = spline->x_stride; // Real part find_coefs_1d_d (spline->x_grid, xBC_r, (double*)data, (intptr_t)2, ((double*)coefs), (intptr_t)2*xs); // Imaginary part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+1, (intptr_t)2, ((double*)coefs)+1, (intptr_t)2*xs); } multi_UBspline_2d_z* create_multi_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid, BCtype_z xBC, BCtype_z yBC, int num_splines) { // Create new spline multi_UBspline_2d_z* restrict spline = malloc (sizeof(multi_UBspline_2d_z)); spline->spcode = MULTI_U2D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Nx, Ny; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; spline->x_stride = Ny*num_splines; spline->y_stride = num_splines; #ifndef HAVE_SSE2 spline->coefs = malloc (2*sizeof(double)*Nx*Ny*num_splines); spline->lapl2 = malloc (4*sizeof(double)*num_splines); #else posix_memalign ((void**)&spline->coefs, 64, 2*sizeof(double)*Nx*Ny*num_splines); posix_memalign ((void**)&spline->lapl2, 64, 4*sizeof(double)*num_splines); init_sse_data(); #endif return spline; } void set_multi_UBspline_2d_z (multi_UBspline_2d_z* spline, int num, complex_double *data) { int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Nx, Ny; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; complex_double *coefs = spline->coefs + num; int ys = spline->y_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data+doffset), (intptr_t)2*My, (double*)coefs+coffset, (intptr_t)2*Ny*ys); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, (intptr_t)2*My, ((double*)coefs)+coffset+1, (intptr_t)2*Ny*ys); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)coefs)+doffset, (intptr_t)2*ys, (double*)coefs+coffset, (intptr_t)2*ys); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, (double*)coefs+doffset+1, (intptr_t)2*ys, ((double*)coefs)+coffset+1, (intptr_t)2*ys); } } multi_UBspline_3d_z* create_multi_UBspline_3d_z (Ugrid x_grid, Ugrid y_grid, Ugrid z_grid, BCtype_z xBC, BCtype_z yBC, BCtype_z zBC, int num_splines) { // Create new spline multi_UBspline_3d_z* restrict spline = malloc (sizeof(multi_UBspline_3d_z)); spline->spcode = MULTI_U3D; spline->tcode = DOUBLE_COMPLEX; spline->xBC = xBC; spline->yBC = yBC; spline->zBC = zBC; spline->num_splines = num_splines; // Setup internal variables int Mx = x_grid.num; int My = y_grid.num; int Mz = z_grid.num; int Nx, Ny, Nz; if (xBC.lCode == PERIODIC || xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; x_grid.delta = (x_grid.end - x_grid.start)/(double)(Nx-3); x_grid.delta_inv = 1.0/x_grid.delta; spline->x_grid = x_grid; if (yBC.lCode == PERIODIC || yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; y_grid.delta = (y_grid.end - y_grid.start)/(double)(Ny-3); y_grid.delta_inv = 1.0/y_grid.delta; spline->y_grid = y_grid; if (zBC.lCode == PERIODIC || zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; z_grid.delta = (z_grid.end - z_grid.start)/(double)(Nz-3); z_grid.delta_inv = 1.0/z_grid.delta; spline->z_grid = z_grid; int N = num_splines; #ifdef HAVE_SSE2 if (N & 3) N += 4-(N & 3); #endif spline->x_stride = (intptr_t)Ny*(intptr_t)Nz*N; spline->y_stride = Nz*N; spline->z_stride = N; #ifndef HAVE_SSE2 spline->coefs = malloc ((size_t)2*sizeof(double)*Nx*Ny*Nz*N); spline->lapl3 = malloc (6*sizeof(double)*N); #else posix_memalign ((void**)&spline->coefs, 64, (size_t)2*sizeof(double)*Nx*Ny*Nz*N); posix_memalign ((void**)&spline->lapl3, 64, 6*sizeof(double)*N); init_sse_data(); #endif return spline; } void set_multi_UBspline_3d_z (multi_UBspline_3d_z* spline, int num, complex_double *data) { // Setup internal variables int Mx = spline->x_grid.num; int My = spline->y_grid.num; int Mz = spline->z_grid.num; int Nx, Ny, Nz; if (spline->xBC.lCode == PERIODIC || spline->xBC.lCode == ANTIPERIODIC) Nx = Mx+3; else Nx = Mx+2; if (spline->yBC.lCode == PERIODIC || spline->yBC.lCode == ANTIPERIODIC) Ny = My+3; else Ny = My+2; if (spline->zBC.lCode == PERIODIC || spline->zBC.lCode == ANTIPERIODIC) Nz = Mz+3; else Nz = Mz+2; BCtype_d xBC_r, xBC_i, yBC_r, yBC_i, zBC_r, zBC_i; xBC_r.lCode = spline->xBC.lCode; xBC_r.rCode = spline->xBC.rCode; xBC_r.lVal = spline->xBC.lVal_r; xBC_r.rVal = spline->xBC.rVal_r; xBC_i.lCode = spline->xBC.lCode; xBC_i.rCode = spline->xBC.rCode; xBC_i.lVal = spline->xBC.lVal_i; xBC_i.rVal = spline->xBC.rVal_i; yBC_r.lCode = spline->yBC.lCode; yBC_r.rCode = spline->yBC.rCode; yBC_r.lVal = spline->yBC.lVal_r; yBC_r.rVal = spline->yBC.rVal_r; yBC_i.lCode = spline->yBC.lCode; yBC_i.rCode = spline->yBC.rCode; yBC_i.lVal = spline->yBC.lVal_i; yBC_i.rVal = spline->yBC.rVal_i; zBC_r.lCode = spline->zBC.lCode; zBC_r.rCode = spline->zBC.rCode; zBC_r.lVal = spline->zBC.lVal_r; zBC_r.rVal = spline->zBC.rVal_r; zBC_i.lCode = spline->zBC.lCode; zBC_i.rCode = spline->zBC.rCode; zBC_i.lVal = spline->zBC.lVal_i; zBC_i.rVal = spline->zBC.rVal_i; complex_double *coefs = spline->coefs + num; int N = spline->num_splines; int zs = spline->z_stride; // First, solve in the X-direction for (int iy=0; iyx_grid, xBC_r, ((double*)data)+doffset, (intptr_t)2*My*Mz, ((double*)coefs)+coffset, (intptr_t)2*Ny*Nz*zs); // Imag part find_coefs_1d_d (spline->x_grid, xBC_i, ((double*)data)+doffset+1, (intptr_t)2*My*Mz, ((double*)coefs)+coffset+1, (intptr_t)2*Ny*Nz*zs); } // Now, solve in the Y-direction for (int ix=0; ixy_grid, yBC_r, ((double*)coefs)+doffset, (intptr_t)2*Nz*zs, ((double*)coefs)+coffset, (intptr_t)2*Nz*zs); // Imag part find_coefs_1d_d (spline->y_grid, yBC_i, ((double*)coefs)+doffset+1, (intptr_t)2*Nz*zs, ((double*)coefs)+coffset+1, (intptr_t)2*Nz*zs); } // Now, solve in the Z-direction for (int ix=0; ixz_grid, zBC_r, ((double*)coefs)+doffset, (intptr_t)2*zs, ((double*)coefs)+coffset, (intptr_t)2*zs); // Imag part find_coefs_1d_d (spline->z_grid, zBC_i, ((double*)coefs)+doffset+1, (intptr_t)2*zs, ((double*)coefs)+coffset+1, (intptr_t)2*zs); } } void destroy_multi_UBspline (Bspline *spline) { free (spline->coefs); free (spline); } einspline-0.9.2/src/bspline_eval_std_z.h0000664000113000011300000013446311012400563015224 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef BSPLINE_EVAL_STD_Z_H #define BSPLINE_EVAL_STD_Z_H #include #include extern const double* restrict Ad; extern const double* restrict dAd; extern const double* restrict d2Ad; /************************************************************/ /* 1D double-precision, complex evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_1d_z (UBspline_1d_z * restrict spline, double x, complex_double* restrict val) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); } /* Value and first derivative */ inline void eval_UBspline_1d_z_vg (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); } /* Value, first derivative, and second derivative */ inline void eval_UBspline_1d_z_vgl (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { x -= spline->x_grid.start; double u = x*spline->x_grid.delta_inv; double ipart, t; t = modf (u, &ipart); int i = (int) ipart; double tp[4]; tp[0] = t*t*t; tp[1] = t*t; tp[2] = t; tp[3] = 1.0; complex_double* restrict coefs = spline->coefs; *val = (coefs[i+0]*(Ad[ 0]*tp[0] + Ad[ 1]*tp[1] + Ad[ 2]*tp[2] + Ad[ 3]*tp[3])+ coefs[i+1]*(Ad[ 4]*tp[0] + Ad[ 5]*tp[1] + Ad[ 6]*tp[2] + Ad[ 7]*tp[3])+ coefs[i+2]*(Ad[ 8]*tp[0] + Ad[ 9]*tp[1] + Ad[10]*tp[2] + Ad[11]*tp[3])+ coefs[i+3]*(Ad[12]*tp[0] + Ad[13]*tp[1] + Ad[14]*tp[2] + Ad[15]*tp[3])); *grad = spline->x_grid.delta_inv * (coefs[i+0]*(dAd[ 1]*tp[1] + dAd[ 2]*tp[2] + dAd[ 3]*tp[3])+ coefs[i+1]*(dAd[ 5]*tp[1] + dAd[ 6]*tp[2] + dAd[ 7]*tp[3])+ coefs[i+2]*(dAd[ 9]*tp[1] + dAd[10]*tp[2] + dAd[11]*tp[3])+ coefs[i+3]*(dAd[13]*tp[1] + dAd[14]*tp[2] + dAd[15]*tp[3])); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (coefs[i+0]*(d2Ad[ 2]*tp[2] + d2Ad[ 3]*tp[3])+ coefs[i+1]*(d2Ad[ 6]*tp[2] + d2Ad[ 7]*tp[3])+ coefs[i+2]*(d2Ad[10]*tp[2] + d2Ad[11]*tp[3])+ coefs[i+3]*(d2Ad[14]*tp[2] + d2Ad[15]*tp[3])); } inline void eval_UBspline_1d_z_vgh (UBspline_1d_z * restrict spline, double x, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { eval_UBspline_1d_z_vgh (spline, x, val, grad, hess); } /************************************************************/ /* 2D double-precision, complex evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_2d_z (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_UBspline_2d_z_vg (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = (dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = (dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = (dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = (dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_UBspline_2d_z_vgl (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = spline->x_grid.delta_inv * (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = spline->y_grid.delta_inv * (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])) + spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ d2a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ d2a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ d2a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_UBspline_2d_z_vgh (UBspline_2d_z * restrict spline, double x, double y, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double ipartx, iparty, tx, ty; tx = modf (ux, &ipartx); ty = modf (uy, &iparty); int ix = (int) ipartx; int iy = (int) iparty; double tpx[4], tpy[4], a[4], b[4], da[4], db[4], d2a[4], d2b[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = ( dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = ( dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = ( dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = ( dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = ( a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[0] = spline->x_grid.delta_inv * ( da[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ da[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ da[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ da[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); grad[1] = spline->y_grid.delta_inv * ( a[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ a[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ a[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ a[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*(C(0,0)* b[0]+C(0,1)* b[1]+C(0,2)* b[2]+C(0,3)* b[3])+ d2a[1]*(C(1,0)* b[0]+C(1,1)* b[1]+C(1,2)* b[2]+C(1,3)* b[3])+ d2a[2]*(C(2,0)* b[0]+C(2,1)* b[1]+C(2,2)* b[2]+C(2,3)* b[3])+ d2a[3]*(C(3,0)* b[0]+C(3,1)* b[1]+C(3,2)* b[2]+C(3,3)* b[3])); hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * ( da[0]*(C(0,0)* db[0]+C(0,1)* db[1]+C(0,2)* db[2]+C(0,3)* db[3])+ da[1]*(C(1,0)* db[0]+C(1,1)* db[1]+C(1,2)* db[2]+C(1,3)* db[3])+ da[2]*(C(2,0)* db[0]+C(2,1)* db[1]+C(2,2)* db[2]+C(2,3)* db[3])+ da[3]*(C(3,0)* db[0]+C(3,1)* db[1]+C(3,2)* db[2]+C(3,3)* db[3])); hess[3] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * ( a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D double-precision, complex evaulation functions */ /************************************************************/ /* Value only */ inline void eval_UBspline_3d_z (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = (Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = (Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = (Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = (Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); b[0] = (Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = (Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = (Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = (Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); c[0] = (Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = (Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = (Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = (Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_UBspline_3d_z_vg (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4]; complex_double cP[16], bcP[4], dbcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_UBspline_3d_z_vgl (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_double cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = spline->x_grid.delta_inv * (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + + spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_UBspline_3d_z_vgh (UBspline_3d_z * restrict spline, double x, double y, double z, complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess) { x -= spline->x_grid.start; y -= spline->y_grid.start; z -= spline->z_grid.start; double ux = x*spline->x_grid.delta_inv; double uy = y*spline->y_grid.delta_inv; double uz = z*spline->z_grid.delta_inv; ux = fmin (ux, (double)(spline->x_grid.num)-1.0e-5); uy = fmin (uy, (double)(spline->y_grid.num)-1.0e-5); uz = fmin (uz, (double)(spline->z_grid.num)-1.0e-5); double ipartx, iparty, ipartz, tx, ty, tz; tx = modf (ux, &ipartx); int ix = (int) ipartx; ty = modf (uy, &iparty); int iy = (int) iparty; tz = modf (uz, &ipartz); int iz = (int) ipartz; // if ((ix >= spline->x_grid.num)) x = spline->x_grid.num; // if ((ix < 0)) x = 0; // if ((iy >= spline->y_grid.num)) y = spline->y_grid.num; // if ((iy < 0)) y = 0; // if ((iz >= spline->z_grid.num)) z = spline->z_grid.num; // if ((iz < 0)) z = 0; double tpx[4], tpy[4], tpz[4], a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; complex_double cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; tpx[0] = tx*tx*tx; tpx[1] = tx*tx; tpx[2] = tx; tpx[3] = 1.0; tpy[0] = ty*ty*ty; tpy[1] = ty*ty; tpy[2] = ty; tpy[3] = 1.0; tpz[0] = tz*tz*tz; tpz[1] = tz*tz; tpz[2] = tz; tpz[3] = 1.0; complex_double* restrict coefs = spline->coefs; a[0] = ( Ad[ 0]*tpx[0] + Ad[ 1]*tpx[1] + Ad[ 2]*tpx[2] + Ad[ 3]*tpx[3]); a[1] = ( Ad[ 4]*tpx[0] + Ad[ 5]*tpx[1] + Ad[ 6]*tpx[2] + Ad[ 7]*tpx[3]); a[2] = ( Ad[ 8]*tpx[0] + Ad[ 9]*tpx[1] + Ad[10]*tpx[2] + Ad[11]*tpx[3]); a[3] = ( Ad[12]*tpx[0] + Ad[13]*tpx[1] + Ad[14]*tpx[2] + Ad[15]*tpx[3]); da[0] = ( dAd[ 1]*tpx[1] + dAd[ 2]*tpx[2] + dAd[ 3]*tpx[3]); da[1] = ( dAd[ 5]*tpx[1] + dAd[ 6]*tpx[2] + dAd[ 7]*tpx[3]); da[2] = ( dAd[ 9]*tpx[1] + dAd[10]*tpx[2] + dAd[11]*tpx[3]); da[3] = ( dAd[13]*tpx[1] + dAd[14]*tpx[2] + dAd[15]*tpx[3]); d2a[0] = (d2Ad[ 2]*tpx[2] + d2Ad[ 3]*tpx[3]); d2a[1] = (d2Ad[ 6]*tpx[2] + d2Ad[ 7]*tpx[3]); d2a[2] = (d2Ad[10]*tpx[2] + d2Ad[11]*tpx[3]); d2a[3] = (d2Ad[14]*tpx[2] + d2Ad[15]*tpx[3]); b[0] = ( Ad[ 0]*tpy[0] + Ad[ 1]*tpy[1] + Ad[ 2]*tpy[2] + Ad[ 3]*tpy[3]); b[1] = ( Ad[ 4]*tpy[0] + Ad[ 5]*tpy[1] + Ad[ 6]*tpy[2] + Ad[ 7]*tpy[3]); b[2] = ( Ad[ 8]*tpy[0] + Ad[ 9]*tpy[1] + Ad[10]*tpy[2] + Ad[11]*tpy[3]); b[3] = ( Ad[12]*tpy[0] + Ad[13]*tpy[1] + Ad[14]*tpy[2] + Ad[15]*tpy[3]); db[0] = (dAd[ 1]*tpy[1] + dAd[ 2]*tpy[2] + dAd[ 3]*tpy[3]); db[1] = (dAd[ 5]*tpy[1] + dAd[ 6]*tpy[2] + dAd[ 7]*tpy[3]); db[2] = (dAd[ 9]*tpy[1] + dAd[10]*tpy[2] + dAd[11]*tpy[3]); db[3] = (dAd[13]*tpy[1] + dAd[14]*tpy[2] + dAd[15]*tpy[3]); d2b[0] = (d2Ad[ 2]*tpy[2] + d2Ad[ 3]*tpy[3]); d2b[1] = (d2Ad[ 6]*tpy[2] + d2Ad[ 7]*tpy[3]); d2b[2] = (d2Ad[10]*tpy[2] + d2Ad[11]*tpy[3]); d2b[3] = (d2Ad[14]*tpy[2] + d2Ad[15]*tpy[3]); c[0] = ( Ad[ 0]*tpz[0] + Ad[ 1]*tpz[1] + Ad[ 2]*tpz[2] + Ad[ 3]*tpz[3]); c[1] = ( Ad[ 4]*tpz[0] + Ad[ 5]*tpz[1] + Ad[ 6]*tpz[2] + Ad[ 7]*tpz[3]); c[2] = ( Ad[ 8]*tpz[0] + Ad[ 9]*tpz[1] + Ad[10]*tpz[2] + Ad[11]*tpz[3]); c[3] = ( Ad[12]*tpz[0] + Ad[13]*tpz[1] + Ad[14]*tpz[2] + Ad[15]*tpz[3]); dc[0] = (dAd[ 1]*tpz[1] + dAd[ 2]*tpz[2] + dAd[ 3]*tpz[3]); dc[1] = (dAd[ 5]*tpz[1] + dAd[ 6]*tpz[2] + dAd[ 7]*tpz[3]); dc[2] = (dAd[ 9]*tpz[1] + dAd[10]*tpz[2] + dAd[11]*tpz[3]); dc[3] = (dAd[13]*tpz[1] + dAd[14]*tpz[2] + dAd[15]*tpz[3]); d2c[0] = (d2Ad[ 2]*tpz[2] + d2Ad[ 3]*tpz[3]); d2c[1] = (d2Ad[ 6]*tpz[2] + d2Ad[ 7]*tpz[3]); d2c[2] = (d2Ad[10]*tpz[2] + d2Ad[11]*tpz[3]); d2c[3] = (d2Ad[14]*tpz[2] + d2Ad[15]*tpz[3]); int xs = spline->x_stride; int ys = spline->y_stride; int offmax = (ix+3)*xs + (iy+3)*ys + iz+3; // if (offmax > spline->coef_size) { // fprintf (stderr, "Outside bounds in spline evalutation.\n" // "offmax = %d csize = %d\n", offmax, spline->csize); // fprintf (stderr, "ix=%d iy=%d iz=%d\n", ix,iy,iz); // } #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = spline->x_grid.delta_inv * (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = spline->y_grid.delta_inv * (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = spline->z_grid.delta_inv * (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = spline->x_grid.delta_inv * spline->x_grid.delta_inv * (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = spline->x_grid.delta_inv * spline->y_grid.delta_inv * (da[0]*dbcP[0] + da[1]*dbcP[1] + da[2]*dbcP[2] + da[3]*dbcP[3]); hess[3] = hess[1]; // dx dz; hess[2] = spline->x_grid.delta_inv * spline->z_grid.delta_inv * (da[0]*bdcP[0] + da[1]*bdcP[1] + da[2]*bdcP[2] + da[3]*bdcP[3]); hess[6] = hess[2]; // d2y hess[4] = spline->y_grid.delta_inv * spline->y_grid.delta_inv * (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = spline->y_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = spline->z_grid.delta_inv * spline->z_grid.delta_inv * (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/Makefile.am0000664000113000011300000002023611262703273013244 00000000000000AM_LDFLAGS = $(ALL_STATIC) if WANT_FORTRAN MAYBE_FORTRAN = fbspline.c \ fbspline.h \ fmulti_bspline.c \ fmulti_bspline.h \ fnubspline.c \ fnubspline.h endif if WANT_BLIPS MAYBE_BLIPS = blip_create.c blip_create.h CHECK_BLIPS = test_blip endif if HAVE_CUDA MAYBE_CUDA = multi_bspline_create_cuda.cu \ multi_bspline_cuda_s_impl.h \ multi_bspline_cuda_c_impl.h \ multi_bspline_cuda_d_impl.h \ multi_bspline_cuda_z_impl.h \ multi_bspline_eval_cuda.h \ bspline_structs_cuda.h \ bspline_create_cuda.cu CHECK_CUDA = test_multi_cuda test_multi_cuda_SOURCES = test_multi_cuda.cu test_multi_cuda_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIB) $(CUDA_LIBS) endif if HAVE_SSE SINGLE_SRC = multi_bspline_eval_sse_s.c multi_bspline_eval_sse_s_cpp.cc \ multi_bspline_eval_sse_c.c multi_bspline_eval_sse_c_cpp.cc \ multi_bspline_eval_sse_s_impl.h multi_bspline_eval_sse_c_impl.h else SINGLE_SRC = multi_bspline_eval_std_s.c multi_bspline_eval_std_s_cpp.cc \ multi_bspline_eval_std_c.c multi_bspline_eval_std_c_cpp.cc \ multi_bspline_eval_std_s_impl.h multi_bspline_eval_std_c_impl.h endif if HAVE_SSE2 DOUBLE_SRC = multi_bspline_eval_sse_d.c multi_bspline_eval_sse_d_cpp.cc \ multi_bspline_eval_sse_z.c multi_bspline_eval_sse_z_cpp.cc \ multi_bspline_eval_sse_d_impl.h multi_bspline_eval_sse_z_impl.h \ multi_nubspline_eval_sse_z.c multi_nubspline_eval_sse_z_cpp.cc \ multi_nubspline_eval_sse_z_impl.h else DOUBLE_SRC = multi_bspline_eval_std_d.c multi_bspline_eval_std_d_cpp.cc \ multi_bspline_eval_std_z.c multi_bspline_eval_std_z_cpp.cc \ multi_bspline_eval_std_d_impl.h multi_bspline_eval_std_z_impl.h \ multi_nubspline_eval_std_z.c multi_nubspline_eval_std_z_cpp.cc \ multi_nubspline_eval_std_z_impl.h endif check_PROGRAMS = TestBspline TestNUBspline test_fbspline \ test_bspline_s test_bspline_d test_multi \ test_fmulti_bspline test_multi_cpp $(CHECK_BLIPS) bin_PROGRAMS = time_multi $(CHECK_CUDA) TestBspline_SOURCES = TestBspline.c TestNUBspline_SOURCES = TestNUBspline.c test_fbspline_SOURCES = test_fbspline.f test_fmulti_bspline_SOURCES = test_fmulti_bspline.f if WANT_BLIPS test_blip_SOURCES = test_blip.c endif test_bspline_s_SOURCES = test_bspline_s.c test_bspline_d_SOURCES = test_bspline_d.c test_multi_SOURCES = test_multi.c test_multi_cpp_SOURCES = test_multi_cpp.cc time_multi_SOURCES = time_multi.c LDADD = AM_CCFLAGS = -g TestBspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) TestNUBspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_fbspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_fmulti_bspline_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_blip_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_bspline_s_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_bspline_d_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_multi_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) test_multi_cpp_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) time_multi_LDADD = libeinspline.la $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) lib_LTLIBRARIES = libeinspline.la libeinspline_la_SOURCES = \ aligned_alloc.h \ bspline_base.h \ bspline_create.c \ bspline_create.h \ bspline_structs.h \ bspline_data.c \ bspline_eval_std_s.h \ bspline_eval_sse_s.h \ bspline_eval_std_c.h \ bspline_eval_sse_c.h \ bspline_eval_std_d.h \ bspline_eval_sse_d.h \ bspline_eval_std_z.h \ bspline_eval_sse_z.h \ multi_bspline.h \ multi_bspline_create.c \ multi_bspline_create.h \ multi_bspline_eval_c.h \ multi_bspline_eval_d.h \ multi_bspline_eval_s.h \ multi_bspline_eval_z.h \ multi_bspline_structs.h \ multi_nubspline_create.c \ nubspline_base.h \ nubspline_create.c \ nubspline_create.h \ nubspline_eval_sse_s.h \ nubspline_eval_std_s.h \ nubspline_eval_sse_c.h \ nubspline_eval_std_c.h \ nubspline_eval_sse_d.h \ nubspline_eval_std_d.h \ nubspline_eval_sse_z.h \ nubspline_eval_std_z.h \ nubspline_structs.h \ nubasis.h \ nubasis.c \ nugrid.h \ nugrid.c \ $(SINGLE_SRC) \ $(DOUBLE_SRC) \ $(MAYBE_BLIPS) \ $(MAYBE_CUDA) \ $(MAYBE_FORTRAN) libeinspline_la_LIBADD = $(FLIBS) $(FFTW3_LIBS) $(FFTW3F_LIBS) includedir=$(prefix)/include/einspline .cu.o: $(NVCC) -c $(NVCCFLAGS) $< .cu.lo: $(top_builddir)/cudalt.py $@ $(NVCC) -c $(NVCCFLAGS) $< include_HEADERS = bspline_base.h bspline_create.h bspline_eval_sse_c.h \ bspline_eval_sse_d.h bspline_eval_sse_s.h \ bspline_eval_sse_z.h bspline_eval_std_c.h \ bspline_eval_std_d.h bspline_eval_std_s.h \ bspline_eval_std_z.h bspline.h bspline_structs.h \ fbspline.h fmulti_bspline.h fnubspline.h \ multi_bspline.h \ multi_bspline_create.h multi_bspline_structs.h \ multi_bspline_eval_c.h multi_bspline_eval_d.h \ multi_bspline_eval_s.h multi_bspline_eval_z.h \ multi_nubspline.h \ multi_nubspline_create.h multi_nubspline_structs.h \ multi_nubspline_eval_c.h multi_nubspline_eval_d.h \ multi_nubspline_eval_s.h multi_nubspline_eval_z.h \ nubspline_base.h nubspline_create.h \ nubspline_eval_sse_s.h nubspline_eval_std_s.h \ nubspline_eval_sse_c.h nubspline_eval_std_c.h \ nubspline_eval_sse_d.h nubspline_eval_std_d.h \ nubspline_eval_sse_z.h nubspline_eval_std_z.h \ nubspline_structs.h nubasis.h nubspline.h nugrid.h \ multi_bspline_structs_cuda.h multi_bspline_create_cuda.h \ multi_bspline_eval_cuda.h bspline_eval_cuda.h \ bspline_structs_cuda.h bspline_create_cuda.h \ config.h EXTRA_DIST = multi_bspline_eval_cuda_c.cu einspline-0.9.2/src/nubspline_eval_std_d.h0000664000113000011300000006573411012400563015545 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_EVAL_STD_D_H #define NUBSPLINE_EVAL_STD_D_H #include #include #include "nubspline_structs.h" /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_d (NUBspline_1d_d * restrict spline, double x, double* restrict val) { double bfuncs[4]; int i = get_NUBasis_funcs_d (spline->x_basis, x, bfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_d_vg (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad) { double bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_d (spline->x_basis, x, bfuncs, dbfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict lapl) { double bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_d (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); double* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x, double* restrict val, double* restrict grad, double* restrict hess) { eval_NUBspline_1d_d_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_d (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val) { double a[4], b[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_NUBspline_2d_d_vg (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad) { double a[4], b[4], da[4], db[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict lapl) { double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+ a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline, double x, double y, double* restrict val, double* restrict grad, double* restrict hess) { double a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); double* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]); hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_d (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val) { double a[4], b[4], c[4]; int ix = get_NUBasis_funcs_d (spline->x_basis, x, a); int iy = get_NUBasis_funcs_d (spline->y_basis, y, b); int iz = get_NUBasis_funcs_d (spline->z_basis, z, c); double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_d_vg (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad) { double a[4], b[4], c[4], da[4], db[4], dc[4], cP[16], bcP[4], dbcP[4]; int ix = get_NUBasis_dfuncs_d (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_d (spline->y_basis, y, b, db); int iz = get_NUBasis_dfuncs_d (spline->z_basis, z, c, dc); double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict lapl) { double a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); double* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline, double x, double y, double z, double* restrict val, double* restrict grad, double* restrict hess) { double a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_d (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_d (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_d (spline->z_basis, z, c, dc, d2c); int xs = spline->x_stride; int ys = spline->y_stride; double* restrict coefs = spline->coefs; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[1]*dbcP[1] + da[1]*dbcP[1]); hess[3] = hess[1]; // dx dz; hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[1]*bdcP[1] + da[1]*bdcP[1]); hess[6] = hess[2]; // d2y hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/test_blip.c0000664000113000011300000001106311012400563013324 00000000000000#include "blip_create.h" #include "bspline.h" #include #include #include inline double dot (double a[3], double b[3]) { return a[0]*b[0] + a[1]*b[1] + a[2]*b[2]; } inline void cross (double a[3], double b[3], double axb[3]) { axb[0] = a[1]*b[2]-a[2]*b[1]; axb[1] = a[2]*b[0]-a[0]*b[2]; axb[2] = a[0]*b[1]-a[1]*b[0]; } inline void recip_lattice (double lattice[9], double recip[9]) { double *a0 = lattice+0; double *b0 = recip+0; double *a1 = lattice+3; double *b1 = recip+3; double *a2 = lattice+6; double *b2 = recip+6; double a1xa2[3]; cross (a1, a2, a1xa2); double vol = dot (a0, a1xa2); double prefactor = 2.0 * M_PI / vol; cross (a1, a2, b0); cross (a2, a0, b1); cross (a0, a1, b2); for (int i=0; i<3; i++) { b0[i] *= prefactor; b1[i] *= prefactor; b2[i] *= prefactor; } } void test_blip_1d_s (double a, double Gcut, double ratio) { double lattice[9] = { 0.0, a , a , a , 0.0, a , a , a , 0.0 }; double recip[9]; recip_lattice (lattice, recip); double *b0 = recip+0; double *b1 = recip+3; double *b2 = recip+6; int x_max = (int)ceil(Gcut / sqrt (dot (b0, b0))); int y_max = (int)ceil(Gcut / sqrt (dot (b1, b1))); int z_max = (int)ceil(Gcut / sqrt (dot (b2, b2))); int numG = 0; double G[3], G0[3], G1[3], G2[3]; // Count G-vectors for (int ix=-x_max; ix<=x_max; ix++) { G0[0] = (double)ix * b0[0]; G0[1] = (double)ix * b0[1]; G0[2] = (double)ix * b0[2]; for (int iy=-y_max; iy<=y_max; iy++) { G1[0] = (double)iy * b1[0]; G1[1] = (double)iy * b1[1]; G1[2] = (double)iy * b1[2]; for (int iz=-z_max; iz<=z_max; iz++) { G2[0] = (double)iz * b2[0]; G2[1] = (double)iz * b2[1]; G2[2] = (double)iz * b2[2]; G[0] = G0[0] + G1[0] + G2[0]; G[1] = G0[1] + G1[1] + G2[1]; G[2] = G0[2] + G1[2] + G2[2]; double gmag = dot (G, G); if (gmag < Gcut*Gcut) numG++; } } } fprintf (stderr, "There are %d G-vectors\n", numG); double *Gvecs = malloc (sizeof(double)*numG*3); complex_float *coefs = malloc (sizeof(complex_float)*numG); numG = 0; for (int ix=-x_max; ix<=x_max; ix++) { G0[0] = (double)ix * b0[0]; G0[1] = (double)ix * b0[1]; G0[2] = (double)ix * b0[2]; for (int iy=-y_max; iy<=y_max; iy++) { G1[0] = (double)iy * b1[0]; G1[1] = (double)iy * b1[1]; G1[2] = (double)iy * b1[2]; for (int iz=-z_max; iz<=z_max; iz++) { G2[0] = (double)iz * b2[0]; G2[1] = (double)iz * b2[1]; G2[2] = (double)iz * b2[2]; G[0] = G0[0] + G1[0] + G2[0]; G[1] = G0[1] + G1[1] + G2[1]; G[2] = G0[2] + G1[2] + G2[2]; double gmag = dot (G, G); if (gmag < Gcut*Gcut) { Gvecs[numG*3+0] = G[0]; Gvecs[numG*3+1] = G[1]; Gvecs[numG*3+2] = G[2]; coefs[numG] = (float)drand48() + (float)drand48()*1.0fi; numG++; } } } } UBspline_3d_s *blip, *interp; blip = create_blip_3d_s (lattice, Gvecs, coefs, numG, 4.0, true); // Now, evaluate spline on a line through the box FILE *fout = fopen ("blip_1d_s.dat", "w"); double y = 0.2; double z = 0.8; double u[3], r[3]; u[1] = y; u[2] = z; for (double x=0.0; x<1.0; x+=0.001) { u[0] = x; r[0] = u[0]*lattice[0] + u[1]*lattice[3] + u[2]*lattice[6]; r[1] = u[0]*lattice[1] + u[1]*lattice[4] + u[2]*lattice[7]; r[2] = u[0]*lattice[2] + u[1]*lattice[5] + u[2]*lattice[8]; float val, derivs[3]; eval_UBspline_3d_s_vg (blip, x, 0.2, 0.8, &val, derivs); double sum = 0.0; double gsum[3] = { 0.0, 0.0, 0.0}; double gex[3]; gsum[0] = 0.0; gsum[1] = 0.0; gsum[2] = 0.0; for (int i=0; i #include #include "nubspline_structs.h" /************************************************************/ /* 1D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_1d_s (NUBspline_1d_s * restrict spline, double x, float* restrict val) { float bfuncs[4]; int i = get_NUBasis_funcs_s (spline->x_basis, x, bfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]*bfuncs[0] +coefs[i+1]*bfuncs[1] + coefs[i+2]*bfuncs[2] +coefs[i+3]*bfuncs[3]); } /* Value and first derivative */ inline void eval_NUBspline_1d_s_vg (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad) { float bfuncs[4], dbfuncs[4]; int i = get_NUBasis_dfuncs_s (spline->x_basis, x, bfuncs, dbfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]*dbfuncs[0] + coefs[i+1]*dbfuncs[1] + coefs[i+2]*dbfuncs[2] + coefs[i+3]*dbfuncs[3]); } /* Value, first derivative, and second derivative */ inline void eval_NUBspline_1d_s_vgl (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict lapl) { float bfuncs[4], dbfuncs[4], d2bfuncs[4]; int i = get_NUBasis_d2funcs_s (spline->x_basis, x, bfuncs, dbfuncs, d2bfuncs); float* restrict coefs = spline->coefs; *val = (coefs[i+0]* bfuncs[0] + coefs[i+1]* bfuncs[1] + coefs[i+2]* bfuncs[2] + coefs[i+3]* bfuncs[3]); *grad = (coefs[i+0]* dbfuncs[0] + coefs[i+1]* dbfuncs[1] + coefs[i+2]* dbfuncs[2] + coefs[i+3]* dbfuncs[3]); *lapl = (coefs[i+0]*d2bfuncs[0] + coefs[i+1]*d2bfuncs[1] + coefs[i+2]*d2bfuncs[2] + coefs[i+3]*d2bfuncs[3]); } inline void eval_NUBspline_1d_s_vgh (NUBspline_1d_s * restrict spline, double x, float* restrict val, float* restrict grad, float* restrict hess) { eval_NUBspline_1d_s_vgl (spline, x, val, grad, hess); } /************************************************************/ /* 2D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_2d_s (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val) { float a[4], b[4]; int ix = get_NUBasis_funcs_s (spline->x_basis, x, a); int iy = get_NUBasis_funcs_s (spline->y_basis, y, b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); #undef C } /* Value and gradient */ inline void eval_NUBspline_2d_s_vg (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad) { float a[4], b[4], da[4], db[4]; int ix = get_NUBasis_dfuncs_s (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_s (spline->y_basis, y, b, db); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] *val = (a[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ a[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ a[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ a[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[0] = (da[0]*(C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3])+ da[1]*(C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3])+ da[2]*(C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3])+ da[3]*(C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3])); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); #undef C } /* Value, gradient, and laplacian */ inline void eval_NUBspline_2d_s_vgl (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict lapl) { float a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); *lapl = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]+ a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); #undef C } /* Value, gradient, and Hessian */ inline void eval_NUBspline_2d_s_vgh (NUBspline_2d_s * restrict spline, double x, double y, float* restrict val, float* restrict grad, float* restrict hess) { float a[4], b[4], da[4], db[4], d2a[4], d2b[4], bc[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); float* restrict coefs = spline->coefs; int xs = spline->x_stride; #define C(i,j) coefs[(ix+(i))*xs+iy+(j)] bc[0] = (C(0,0)*b[0]+C(0,1)*b[1]+C(0,2)*b[2]+C(0,3)*b[3]); bc[1] = (C(1,0)*b[0]+C(1,1)*b[1]+C(1,2)*b[2]+C(1,3)*b[3]); bc[2] = (C(2,0)*b[0]+C(2,1)*b[1]+C(2,2)*b[2]+C(2,3)*b[3]); bc[3] = (C(3,0)*b[0]+C(3,1)*b[1]+C(3,2)*b[2]+C(3,3)*b[3]); *val = (a[0]*bc[0] + a[1]*bc[1] + a[2]*bc[2] + a[3]*bc[3]); grad[0] = (da[0]*bc[0] + da[1]*bc[1] + da[2]*bc[2] + da[3]*bc[3]); grad[1] = (a[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ a[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ a[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ a[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[0] = (d2a[0]*bc[0] + d2a[1]*bc[1] + d2a[2]*bc[2] + d2a[3]*bc[3]); hess[1] = (da[0]*(C(0,0)*db[0]+C(0,1)*db[1]+C(0,2)*db[2]+C(0,3)*db[3])+ da[1]*(C(1,0)*db[0]+C(1,1)*db[1]+C(1,2)*db[2]+C(1,3)*db[3])+ da[2]*(C(2,0)*db[0]+C(2,1)*db[1]+C(2,2)*db[2]+C(2,3)*db[3])+ da[3]*(C(3,0)*db[0]+C(3,1)*db[1]+C(3,2)*db[2]+C(3,3)*db[3])); hess[3] = (a[0]*(C(0,0)*d2b[0]+C(0,1)*d2b[1]+C(0,2)*d2b[2]+C(0,3)*d2b[3])+ a[1]*(C(1,0)*d2b[0]+C(1,1)*d2b[1]+C(1,2)*d2b[2]+C(1,3)*d2b[3])+ a[2]*(C(2,0)*d2b[0]+C(2,1)*d2b[1]+C(2,2)*d2b[2]+C(2,3)*d2b[3])+ a[3]*(C(3,0)*d2b[0]+C(3,1)*d2b[1]+C(3,2)*d2b[2]+C(3,3)*d2b[3])); hess[2] = hess[1]; #undef C } /************************************************************/ /* 3D single-precision, real evaulation functions */ /************************************************************/ /* Value only */ inline void eval_NUBspline_3d_s (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val) { float a[4], b[4], c[4]; int ix = get_NUBasis_funcs_s (spline->x_basis, x, a); int iy = get_NUBasis_funcs_s (spline->y_basis, y, b); int iz = get_NUBasis_funcs_s (spline->z_basis, z, c); float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] *val = (a[0]*(b[0]*(P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3])+ b[1]*(P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3])+ b[2]*(P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3])+ b[3]*(P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]))+ a[1]*(b[0]*(P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3])+ b[1]*(P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3])+ b[2]*(P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3])+ b[3]*(P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]))+ a[2]*(b[0]*(P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3])+ b[1]*(P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3])+ b[2]*(P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3])+ b[3]*(P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]))+ a[3]*(b[0]*(P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3])+ b[1]*(P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3])+ b[2]*(P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3])+ b[3]*(P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]))); #undef P } /* Value and gradient */ inline void eval_NUBspline_3d_s_vg (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad) { float a[4], b[4], c[4], da[4], db[4], dc[4], cP[16], bcP[4], dbcP[4]; int ix = get_NUBasis_dfuncs_s (spline->x_basis, x, a, da); int iy = get_NUBasis_dfuncs_s (spline->y_basis, y, b, db); int iz = get_NUBasis_dfuncs_s (spline->z_basis, z, c, dc); float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*(b[0]*(P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3])+ b[1]*(P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3])+ b[2]*(P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3])+ b[3]*(P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]))+ a[1]*(b[0]*(P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3])+ b[1]*(P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3])+ b[2]*(P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3])+ b[3]*(P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]))+ a[2]*(b[0]*(P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3])+ b[1]*(P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3])+ b[2]*(P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3])+ b[3]*(P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]))+ a[3]*(b[0]*(P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3])+ b[1]*(P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3])+ b[2]*(P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3])+ b[3]*(P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]))); #undef P } /* Value, gradient, and laplacian */ inline void eval_NUBspline_3d_s_vgl (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict lapl) { float a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], bcP[4], dbcP[4], d2bcP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_s (spline->z_basis, z, c, dc, d2c); float* restrict coefs = spline->coefs; int xs = spline->x_stride; int ys = spline->y_stride; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); *val = ( a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]); grad[0] = (da[0]*bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); *lapl = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]) + (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]) + (a[0]*(b[0]*(P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3])+ b[1]*(P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3])+ b[2]*(P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3])+ b[3]*(P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]))+ a[1]*(b[0]*(P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3])+ b[1]*(P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3])+ b[2]*(P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3])+ b[3]*(P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]))+ a[2]*(b[0]*(P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3])+ b[1]*(P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3])+ b[2]*(P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3])+ b[3]*(P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]))+ a[3]*(b[0]*(P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3])+ b[1]*(P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3])+ b[2]*(P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3])+ b[3]*(P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]))); #undef P } /* Value, gradient, and Hessian */ inline void eval_NUBspline_3d_s_vgh (NUBspline_3d_s * restrict spline, double x, double y, double z, float* restrict val, float* restrict grad, float* restrict hess) { float a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4], cP[16], dcP[16], d2cP[16], bcP[4], dbcP[4], d2bcP[4], dbdcP[4], bd2cP[4], bdcP[4]; int ix = get_NUBasis_d2funcs_s (spline->x_basis, x, a, da, d2a); int iy = get_NUBasis_d2funcs_s (spline->y_basis, y, b, db, d2b); int iz = get_NUBasis_d2funcs_s (spline->z_basis, z, c, dc, d2c); int xs = spline->x_stride; int ys = spline->y_stride; float* restrict coefs = spline->coefs; #define P(i,j,k) coefs[(ix+(i))*xs+(iy+(j))*ys+(iz+(k))] cP[ 0] = (P(0,0,0)*c[0]+P(0,0,1)*c[1]+P(0,0,2)*c[2]+P(0,0,3)*c[3]); cP[ 1] = (P(0,1,0)*c[0]+P(0,1,1)*c[1]+P(0,1,2)*c[2]+P(0,1,3)*c[3]); cP[ 2] = (P(0,2,0)*c[0]+P(0,2,1)*c[1]+P(0,2,2)*c[2]+P(0,2,3)*c[3]); cP[ 3] = (P(0,3,0)*c[0]+P(0,3,1)*c[1]+P(0,3,2)*c[2]+P(0,3,3)*c[3]); cP[ 4] = (P(1,0,0)*c[0]+P(1,0,1)*c[1]+P(1,0,2)*c[2]+P(1,0,3)*c[3]); cP[ 5] = (P(1,1,0)*c[0]+P(1,1,1)*c[1]+P(1,1,2)*c[2]+P(1,1,3)*c[3]); cP[ 6] = (P(1,2,0)*c[0]+P(1,2,1)*c[1]+P(1,2,2)*c[2]+P(1,2,3)*c[3]); cP[ 7] = (P(1,3,0)*c[0]+P(1,3,1)*c[1]+P(1,3,2)*c[2]+P(1,3,3)*c[3]); cP[ 8] = (P(2,0,0)*c[0]+P(2,0,1)*c[1]+P(2,0,2)*c[2]+P(2,0,3)*c[3]); cP[ 9] = (P(2,1,0)*c[0]+P(2,1,1)*c[1]+P(2,1,2)*c[2]+P(2,1,3)*c[3]); cP[10] = (P(2,2,0)*c[0]+P(2,2,1)*c[1]+P(2,2,2)*c[2]+P(2,2,3)*c[3]); cP[11] = (P(2,3,0)*c[0]+P(2,3,1)*c[1]+P(2,3,2)*c[2]+P(2,3,3)*c[3]); cP[12] = (P(3,0,0)*c[0]+P(3,0,1)*c[1]+P(3,0,2)*c[2]+P(3,0,3)*c[3]); cP[13] = (P(3,1,0)*c[0]+P(3,1,1)*c[1]+P(3,1,2)*c[2]+P(3,1,3)*c[3]); cP[14] = (P(3,2,0)*c[0]+P(3,2,1)*c[1]+P(3,2,2)*c[2]+P(3,2,3)*c[3]); cP[15] = (P(3,3,0)*c[0]+P(3,3,1)*c[1]+P(3,3,2)*c[2]+P(3,3,3)*c[3]); dcP[ 0] = (P(0,0,0)*dc[0]+P(0,0,1)*dc[1]+P(0,0,2)*dc[2]+P(0,0,3)*dc[3]); dcP[ 1] = (P(0,1,0)*dc[0]+P(0,1,1)*dc[1]+P(0,1,2)*dc[2]+P(0,1,3)*dc[3]); dcP[ 2] = (P(0,2,0)*dc[0]+P(0,2,1)*dc[1]+P(0,2,2)*dc[2]+P(0,2,3)*dc[3]); dcP[ 3] = (P(0,3,0)*dc[0]+P(0,3,1)*dc[1]+P(0,3,2)*dc[2]+P(0,3,3)*dc[3]); dcP[ 4] = (P(1,0,0)*dc[0]+P(1,0,1)*dc[1]+P(1,0,2)*dc[2]+P(1,0,3)*dc[3]); dcP[ 5] = (P(1,1,0)*dc[0]+P(1,1,1)*dc[1]+P(1,1,2)*dc[2]+P(1,1,3)*dc[3]); dcP[ 6] = (P(1,2,0)*dc[0]+P(1,2,1)*dc[1]+P(1,2,2)*dc[2]+P(1,2,3)*dc[3]); dcP[ 7] = (P(1,3,0)*dc[0]+P(1,3,1)*dc[1]+P(1,3,2)*dc[2]+P(1,3,3)*dc[3]); dcP[ 8] = (P(2,0,0)*dc[0]+P(2,0,1)*dc[1]+P(2,0,2)*dc[2]+P(2,0,3)*dc[3]); dcP[ 9] = (P(2,1,0)*dc[0]+P(2,1,1)*dc[1]+P(2,1,2)*dc[2]+P(2,1,3)*dc[3]); dcP[10] = (P(2,2,0)*dc[0]+P(2,2,1)*dc[1]+P(2,2,2)*dc[2]+P(2,2,3)*dc[3]); dcP[11] = (P(2,3,0)*dc[0]+P(2,3,1)*dc[1]+P(2,3,2)*dc[2]+P(2,3,3)*dc[3]); dcP[12] = (P(3,0,0)*dc[0]+P(3,0,1)*dc[1]+P(3,0,2)*dc[2]+P(3,0,3)*dc[3]); dcP[13] = (P(3,1,0)*dc[0]+P(3,1,1)*dc[1]+P(3,1,2)*dc[2]+P(3,1,3)*dc[3]); dcP[14] = (P(3,2,0)*dc[0]+P(3,2,1)*dc[1]+P(3,2,2)*dc[2]+P(3,2,3)*dc[3]); dcP[15] = (P(3,3,0)*dc[0]+P(3,3,1)*dc[1]+P(3,3,2)*dc[2]+P(3,3,3)*dc[3]); d2cP[ 0] = (P(0,0,0)*d2c[0]+P(0,0,1)*d2c[1]+P(0,0,2)*d2c[2]+P(0,0,3)*d2c[3]); d2cP[ 1] = (P(0,1,0)*d2c[0]+P(0,1,1)*d2c[1]+P(0,1,2)*d2c[2]+P(0,1,3)*d2c[3]); d2cP[ 2] = (P(0,2,0)*d2c[0]+P(0,2,1)*d2c[1]+P(0,2,2)*d2c[2]+P(0,2,3)*d2c[3]); d2cP[ 3] = (P(0,3,0)*d2c[0]+P(0,3,1)*d2c[1]+P(0,3,2)*d2c[2]+P(0,3,3)*d2c[3]); d2cP[ 4] = (P(1,0,0)*d2c[0]+P(1,0,1)*d2c[1]+P(1,0,2)*d2c[2]+P(1,0,3)*d2c[3]); d2cP[ 5] = (P(1,1,0)*d2c[0]+P(1,1,1)*d2c[1]+P(1,1,2)*d2c[2]+P(1,1,3)*d2c[3]); d2cP[ 6] = (P(1,2,0)*d2c[0]+P(1,2,1)*d2c[1]+P(1,2,2)*d2c[2]+P(1,2,3)*d2c[3]); d2cP[ 7] = (P(1,3,0)*d2c[0]+P(1,3,1)*d2c[1]+P(1,3,2)*d2c[2]+P(1,3,3)*d2c[3]); d2cP[ 8] = (P(2,0,0)*d2c[0]+P(2,0,1)*d2c[1]+P(2,0,2)*d2c[2]+P(2,0,3)*d2c[3]); d2cP[ 9] = (P(2,1,0)*d2c[0]+P(2,1,1)*d2c[1]+P(2,1,2)*d2c[2]+P(2,1,3)*d2c[3]); d2cP[10] = (P(2,2,0)*d2c[0]+P(2,2,1)*d2c[1]+P(2,2,2)*d2c[2]+P(2,2,3)*d2c[3]); d2cP[11] = (P(2,3,0)*d2c[0]+P(2,3,1)*d2c[1]+P(2,3,2)*d2c[2]+P(2,3,3)*d2c[3]); d2cP[12] = (P(3,0,0)*d2c[0]+P(3,0,1)*d2c[1]+P(3,0,2)*d2c[2]+P(3,0,3)*d2c[3]); d2cP[13] = (P(3,1,0)*d2c[0]+P(3,1,1)*d2c[1]+P(3,1,2)*d2c[2]+P(3,1,3)*d2c[3]); d2cP[14] = (P(3,2,0)*d2c[0]+P(3,2,1)*d2c[1]+P(3,2,2)*d2c[2]+P(3,2,3)*d2c[3]); d2cP[15] = (P(3,3,0)*d2c[0]+P(3,3,1)*d2c[1]+P(3,3,2)*d2c[2]+P(3,3,3)*d2c[3]); bcP[0] = ( b[0]*cP[ 0] + b[1]*cP[ 1] + b[2]*cP[ 2] + b[3]*cP[ 3]); bcP[1] = ( b[0]*cP[ 4] + b[1]*cP[ 5] + b[2]*cP[ 6] + b[3]*cP[ 7]); bcP[2] = ( b[0]*cP[ 8] + b[1]*cP[ 9] + b[2]*cP[10] + b[3]*cP[11]); bcP[3] = ( b[0]*cP[12] + b[1]*cP[13] + b[2]*cP[14] + b[3]*cP[15]); dbcP[0] = ( db[0]*cP[ 0] + db[1]*cP[ 1] + db[2]*cP[ 2] + db[3]*cP[ 3]); dbcP[1] = ( db[0]*cP[ 4] + db[1]*cP[ 5] + db[2]*cP[ 6] + db[3]*cP[ 7]); dbcP[2] = ( db[0]*cP[ 8] + db[1]*cP[ 9] + db[2]*cP[10] + db[3]*cP[11]); dbcP[3] = ( db[0]*cP[12] + db[1]*cP[13] + db[2]*cP[14] + db[3]*cP[15]); bdcP[0] = ( b[0]*dcP[ 0] + b[1]*dcP[ 1] + b[2]*dcP[ 2] + b[3]*dcP[ 3]); bdcP[1] = ( b[0]*dcP[ 4] + b[1]*dcP[ 5] + b[2]*dcP[ 6] + b[3]*dcP[ 7]); bdcP[2] = ( b[0]*dcP[ 8] + b[1]*dcP[ 9] + b[2]*dcP[10] + b[3]*dcP[11]); bdcP[3] = ( b[0]*dcP[12] + b[1]*dcP[13] + b[2]*dcP[14] + b[3]*dcP[15]); bd2cP[0] = ( b[0]*d2cP[ 0] + b[1]*d2cP[ 1] + b[2]*d2cP[ 2] + b[3]*d2cP[ 3]); bd2cP[1] = ( b[0]*d2cP[ 4] + b[1]*d2cP[ 5] + b[2]*d2cP[ 6] + b[3]*d2cP[ 7]); bd2cP[2] = ( b[0]*d2cP[ 8] + b[1]*d2cP[ 9] + b[2]*d2cP[10] + b[3]*d2cP[11]); bd2cP[3] = ( b[0]*d2cP[12] + b[1]*d2cP[13] + b[2]*d2cP[14] + b[3]*d2cP[15]); d2bcP[0] = ( d2b[0]*cP[ 0] + d2b[1]*cP[ 1] + d2b[2]*cP[ 2] + d2b[3]*cP[ 3]); d2bcP[1] = ( d2b[0]*cP[ 4] + d2b[1]*cP[ 5] + d2b[2]*cP[ 6] + d2b[3]*cP[ 7]); d2bcP[2] = ( d2b[0]*cP[ 8] + d2b[1]*cP[ 9] + d2b[2]*cP[10] + d2b[3]*cP[11]); d2bcP[3] = ( d2b[0]*cP[12] + d2b[1]*cP[13] + d2b[2]*cP[14] + d2b[3]*cP[15]); dbdcP[0] = ( db[0]*dcP[ 0] + db[1]*dcP[ 1] + db[2]*dcP[ 2] + db[3]*dcP[ 3]); dbdcP[1] = ( db[0]*dcP[ 4] + db[1]*dcP[ 5] + db[2]*dcP[ 6] + db[3]*dcP[ 7]); dbdcP[2] = ( db[0]*dcP[ 8] + db[1]*dcP[ 9] + db[2]*dcP[10] + db[3]*dcP[11]); dbdcP[3] = ( db[0]*dcP[12] + db[1]*dcP[13] + db[2]*dcP[14] + db[3]*dcP[15]); *val = a[0]*bcP[0] + a[1]*bcP[1] + a[2]*bcP[2] + a[3]*bcP[3]; grad[0] = (da[0] *bcP[0] + da[1]*bcP[1] + da[2]*bcP[2] + da[3]*bcP[3]); grad[1] = (a[0]*dbcP[0] + a[1]*dbcP[1] + a[2]*dbcP[2] + a[3]*dbcP[3]); grad[2] = (a[0]*bdcP[0] + a[1]*bdcP[1] + a[2]*bdcP[2] + a[3]*bdcP[3]); // d2x hess[0] = (d2a[0]*bcP[0] + d2a[1]*bcP[1] + d2a[2]*bcP[2] + d2a[3]*bcP[3]); // dx dy hess[1] = (da[0]*dbcP[0] + da[1]*dbcP[1] + da[1]*dbcP[1] + da[1]*dbcP[1]); hess[3] = hess[1]; // dx dz; hess[2] = (da[0]*bdcP[0] + da[1]*bdcP[1] + da[1]*bdcP[1] + da[1]*bdcP[1]); hess[6] = hess[2]; // d2y hess[4] = (a[0]*d2bcP[0] + a[1]*d2bcP[1] + a[2]*d2bcP[2] + a[3]*d2bcP[3]); // dy dz hess[5] = (a[0]*dbdcP[0] + a[1]*dbdcP[1] + a[2]*dbdcP[2] + a[3]*dbdcP[3]); hess[7] = hess[5]; // d2z hess[8] = (a[0]*bd2cP[0] + a[1]*bd2cP[1] + a[2]*bd2cP[2] + a[3]*bd2cP[3]); #undef P } #endif einspline-0.9.2/src/nubspline_structs.h0000664000113000011300000001255411012400563015140 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef NUBSPLINE_STRUCTS_H #define NUBSPLINE_STRUCTS_H #include "bspline_base.h" #include "nubasis.h" typedef struct { spline_code sp_code; type_code t_code; void * restrict coefs; NUgrid *restrict x_grid; NUBasis *restrict x_basis; } NUBspline_1d; typedef struct { spline_code sp_code; type_code t_code; void * restrict coefs; int x_stride; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; } NUBspline_2d; typedef struct { spline_code sp_code; type_code t_code; void * restrict coefs; int x_stride, y_stride; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; } NUBspline_3d; /////////////////////////// // Single precision real // /////////////////////////// typedef struct { spline_code sp_code; type_code t_code; float* restrict coefs; NUgrid *restrict x_grid; NUBasis *restrict x_basis; BCtype_s xBC; } NUBspline_1d_s; typedef struct { spline_code sp_code; type_code t_code; float* restrict coefs; int x_stride; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; BCtype_s xBC, yBC; } NUBspline_2d_s; typedef struct { spline_code sp_code; type_code t_code; float* restrict coefs; int x_stride, y_stride; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; BCtype_s xBC, yBC, zBC; } NUBspline_3d_s; /////////////////////////// // Double precision real // /////////////////////////// typedef struct { spline_code sp_code; type_code t_code; double* restrict coefs; NUgrid* restrict x_grid; NUBasis* restrict x_basis; BCtype_d xBC; } NUBspline_1d_d; typedef struct { spline_code sp_code; type_code t_code; double* restrict coefs; int x_stride; NUgrid * restrict x_grid, * restrict y_grid; NUBasis * restrict x_basis, * restrict y_basis; BCtype_d xBC, yBC; } NUBspline_2d_d; typedef struct { spline_code sp_code; type_code t_code; double* restrict coefs; int x_stride, y_stride; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; BCtype_d xBC, yBC, zBC; } NUBspline_3d_d; ////////////////////////////// // Single precision complex // ////////////////////////////// typedef struct { spline_code sp_code; type_code t_code; complex_float* restrict coefs; NUgrid* restrict x_grid; NUBasis* restrict x_basis; BCtype_c xBC; } NUBspline_1d_c; typedef struct { spline_code sp_code; type_code t_code; complex_float* restrict coefs; int x_stride; NUgrid* restrict x_grid, *restrict y_grid; NUBasis* restrict x_basis, *restrict y_basis; BCtype_c xBC, yBC; } NUBspline_2d_c; typedef struct { spline_code sp_code; type_code t_code; complex_float* restrict coefs; int x_stride, y_stride; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; BCtype_c xBC, yBC, zBC; } NUBspline_3d_c; ////////////////////////////// // Double precision complex // ////////////////////////////// typedef struct { spline_code sp_code; type_code t_code; complex_double* restrict coefs; NUgrid *restrict x_grid; NUBasis *restrict x_basis; BCtype_z xBC; } NUBspline_1d_z; typedef struct { spline_code sp_code; type_code t_code; complex_double* restrict coefs; int x_stride; NUgrid *restrict x_grid, *restrict y_grid; NUBasis *restrict x_basis, *restrict y_basis; BCtype_z xBC, yBC; } NUBspline_2d_z; typedef struct { spline_code sp_code; type_code t_code; complex_double* restrict coefs; int x_stride, y_stride; NUgrid *restrict x_grid, *restrict y_grid, *restrict z_grid; NUBasis *restrict x_basis, *restrict y_basis, *restrict z_basis; BCtype_z xBC, yBC, zBC; } NUBspline_3d_z; #endif einspline-0.9.2/src/blip_create.c0000664000113000011300000001315011012400563013607 00000000000000#include "blip_create.h" #include #include #include #include "config.h" #ifdef _XOPEN_SOURCE #undef _XOPEN_SOURCE #endif #define _XOPEN_SOURCE 600 #include #include #include void init_sse_data(); inline void* FFTAlign (void* ptr) { size_t offset = 16 - (size_t)((size_t)ptr)&0x0f; return (void*) ((size_t)ptr+offset); } inline double dot (double a[3], double b[3]) { return (a[0]*b[0] + a[1]*b[1] + a[2]*b[2]); } // This function creates a single-precision real blip function from a // set of plane-wave coefficients. lattice is a 3x3 array specifying // the lattice vectors. The first lattice vector is given // contiguously at latice[0], the second at lattice[3], and the third // at lattice[6]. The next is a list of 3D G-vectors in the format: // G_x[0] G_y[0] G_z[0], G_x[1], G_y[1], G_z[1],... // Next, complex plane-wave coefficents are given, one for each // G-vector. Next, the number of G-vectors is given, followed by // a factor which increases the density of the real-space grid. A // factor of 1.0 uses the minimum density to avoid aliasing. Finally, // the last parameter specifies whether to take the real or imaginary part. // The spline is constructed to have domain [0,1) for x, y, and z coordinates. UBspline_3d_s* create_blip_3d_s (double *lattice, double *Gvecs, complex_float *coefs, int numG, double factor, bool useReal) { int max_ix=0, max_iy=0, max_iz=0; int Mx, My, Mz; double twoPiInv = 1.0/(2.0*M_PI); for (int i=0; i max_ix) max_ix = ix; if (abs(iy) > max_iy) max_iy = iy; if (abs(iz) > max_iz) max_iz = iz; } Mx = 4*max_ix + 1; My = 4*max_iy + 1; Mz = 4*max_iz + 1; Mx = (int) ceil(factor*Mx); My = (int) ceil(factor*My); Mz = (int) ceil(factor*Mz); // FFTs are a little faster with even dimensions. if ((Mx%2)==1) Mx++; if ((My%2)==1) My++; if ((Mz%2)==1) Mz++; fprintf (stderr, "(Mx, My, Mz) = (%d, %d, %d)\n", Mx, My, Mz); // Now allocate space for FFT box complex_float *fft_box, *alloc_ptr; fft_box = aligned_alloc (sizeof(complex_float)*Mx*My*Mz, 16); // Create FFTW plan fftwf_plan plan = fftwf_plan_dft_3d (Mx, My, Mz, (fftwf_complex*)fft_box, (fftwf_complex*)fft_box, 1, FFTW_ESTIMATE); // Zero-out fft-box for (int i=0; i 1.0e-10) gamma *= (3.0/(G[0]*G[0]*G[0]*G[0])*(3.0 - 4.0*cos(G[0]) + cos(2.0*G[0]))); else gamma *= 1.5; if (fabs(G[1]) > 1.0e-10) gamma *= (3.0/(G[1]*G[1]*G[1]*G[1])*(3.0 - 4.0*cos(G[1]) + cos(2.0*G[1]))); else gamma *= 1.5; if (fabs(G[2]) > 1.0e-10) gamma *= (3.0/(G[2]*G[2]*G[2]*G[2])*(3.0 - 4.0*cos(G[2]) + cos(2.0*G[2]))); else gamma *= 1.5; gamma *= scale; fft_box[(ix*My+iy)*Mz+iz] = coefs[i]/gamma; } // Execute the FFTW plan fftwf_execute (plan); // Destroy plan fftwf_destroy_plan (plan); // Now we have the coefficients in the FFT box. We must allocate a // little bit larger box to hold the B-spline coefficients UBspline_3d_s* restrict spline = malloc (sizeof (UBspline_3d_s)); spline->spcode = U3D; spline->tcode = SINGLE_REAL; Ugrid x_grid, y_grid, z_grid; int Nx = Mx + 3; int Ny = My + 3; int Nz = Mz + 3; x_grid.start = 0.0; x_grid.end = 1.0; x_grid.num = Mx; x_grid.delta = 1.0/(double)Mx; x_grid.delta_inv = 1.0/x_grid.delta; y_grid.start = 0.0; y_grid.end = 1.0; y_grid.num = My; y_grid.delta = 1.0/(double)My; y_grid.delta_inv = 1.0/y_grid.delta; z_grid.start = 0.0; z_grid.end = 1.0; z_grid.num = Mz; z_grid.delta = 1.0/(double)Mz; z_grid.delta_inv = 1.0/z_grid.delta; spline->x_grid = x_grid; spline->y_grid = y_grid; spline->z_grid = z_grid; spline->x_stride = Ny*Nz; spline->y_stride = Nz; spline->xBC.lCode = PERIODIC; spline->xBC.rCode = PERIODIC; spline->yBC.lCode = PERIODIC; spline->yBC.rCode = PERIODIC; spline->zBC.lCode = PERIODIC; spline->zBC.rCode = PERIODIC; #ifndef HAVE_SSE2 spline->coefs = malloc (sizeof(float)*Nx*Ny*Nz); #else posix_memalign ((void**)&spline->coefs, 16, sizeof(float)*Nx*Ny*Nz); #endif // Now copy data into spline coefficients, observing periodic boundary conditions for (int ix=0; ixcoefs[(ix*Ny+iy)*Nz+iz] = crealf (fft_box[(jx*My+jy)*Mz+jz]); else spline->coefs[(ix*Ny+iy)*Nz+iz] = cimagf (fft_box[(jx*My+jy)*Mz+jz]); } } } //free (alloc_ptr); aligned_free (fft_box); init_sse_data(); return spline; } einspline-0.9.2/src/multi_nubspline_eval_c.h0000664000113000011300000001050011035721612016067 00000000000000///////////////////////////////////////////////////////////////////////////// // einspline: a library for creating and evaluating B-splines // // Copyright (C) 2007 Kenneth P. Esler, Jr. // // // // This program is free software; you can redistribute it and/or modify // // it under the terms of the GNU General Public License as published by // // the Free Software Foundation; either version 2 of the License, or // // (at your option) any later version. // // // // This program is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // // GNU General Public License for more details. // // // // You should have received a copy of the GNU General Public License // // along with this program; if not, write to the Free Software // // Foundation, Inc., 51 Franklin Street, Fifth Floor, // // Boston, MA 02110-1301 USA // ///////////////////////////////////////////////////////////////////////////// #ifndef MULTI_NUBSPLINE_EVAL_C_H #define MULTI_NUBSPLINE_EVAL_C_H #include #include #include "multi_nubspline_structs.h" /************************************************************/ /* 1D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_NUBspline_1d_c (multi_NUBspline_1d_c *spline, double x, complex_float* restrict vals); void eval_multi_NUBspline_1d_c_vg (multi_NUBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_NUBspline_1d_c_vgl (multi_NUBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_NUBspline_1d_c_vgh (multi_NUBspline_1d_c *spline, double x, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); /************************************************************/ /* 2D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_NUBspline_2d_c (multi_NUBspline_2d_c *spline, double x, double y, complex_float* restrict vals); void eval_multi_NUBspline_2d_c_vg (multi_NUBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_NUBspline_2d_c_vgl (multi_NUBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_NUBspline_2d_c_vgh (multi_NUBspline_2d_c *spline, double x, double y, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); /************************************************************/ /* 3D float-precision, complex evaulation functions */ /************************************************************/ void eval_multi_NUBspline_3d_c (multi_NUBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals); void eval_multi_NUBspline_3d_c_vg (multi_NUBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads); void eval_multi_NUBspline_3d_c_vgl (multi_NUBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict lapl); void eval_multi_NUBspline_3d_c_vgh (multi_NUBspline_3d_c *spline, double x, double y, double z, complex_float* restrict vals, complex_float* restrict grads, complex_float* restrict hess); #endif einspline-0.9.2/COPYING0000644000113000011300000004312211012400654011437 00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. einspline-0.9.2/INSTALL0000644000113000011300000002231011012400654011431 00000000000000Installation Instructions ************************* Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc. This file is free documentation; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. Basic Installation ================== Briefly, the shell commands `./configure; make; make install' should configure, build, and install this package. The following more-detailed instructions are generic; see the `README' file for instructions specific to this package. The `configure' shell script attempts to guess correct values for various system-dependent variables used during compilation. It uses those values to create a `Makefile' in each directory of the package. It may also create one or more `.h' files containing system-dependent definitions. Finally, it creates a shell script `config.status' that you can run in the future to recreate the current configuration, and a file `config.log' containing compiler output (useful mainly for debugging `configure'). It can also use an optional file (typically called `config.cache' and enabled with `--cache-file=config.cache' or simply `-C') that saves the results of its tests to speed up reconfiguring. Caching is disabled by default to prevent problems with accidental use of stale cache files. If you need to do unusual things to compile the package, please try to figure out how `configure' could check whether to do them, and mail diffs or instructions to the address given in the `README' so they can be considered for the next release. If you are using the cache, and at some point `config.cache' contains results you don't want to keep, you may remove or edit it. The file `configure.ac' (or `configure.in') is used to create `configure' by a program called `autoconf'. You need `configure.ac' if you want to change it or regenerate `configure' using a newer version of `autoconf'. The simplest way to compile this package is: 1. `cd' to the directory containing the package's source code and type `./configure' to configure the package for your system. Running `configure' might take a while. While running, it prints some messages telling which features it is checking for. 2. Type `make' to compile the package. 3. Optionally, type `make check' to run any self-tests that come with the package. 4. Type `make install' to install the programs and any data files and documentation. 5. You can remove the program binaries and object files from the source code directory by typing `make clean'. To also remove the files that `configure' created (so you can compile the package for a different kind of computer), type `make distclean'. There is also a `make maintainer-clean' target, but that is intended mainly for the package's developers. If you use it, you may have to get all sorts of other programs in order to regenerate files that came with the distribution. Compilers and Options ===================== Some systems require unusual options for compilation or linking that the `configure' script does not know about. Run `./configure --help' for details on some of the pertinent environment variables. You can give `configure' initial values for configuration parameters by setting variables in the command line or in the environment. Here is an example: ./configure CC=c99 CFLAGS=-g LIBS=-lposix *Note Defining Variables::, for more details. Compiling For Multiple Architectures ==================================== You can compile the package for more than one kind of computer at the same time, by placing the object files for each architecture in their own directory. To do this, you can use GNU `make'. `cd' to the directory where you want the object files and executables to go and run the `configure' script. `configure' automatically checks for the source code in the directory that `configure' is in and in `..'. With a non-GNU `make', it is safer to compile the package for one architecture at a time in the source code directory. After you have installed the package for one architecture, use `make distclean' before reconfiguring for another architecture. Installation Names ================== By default, `make install' installs the package's commands under `/usr/local/bin', include files under `/usr/local/include', etc. You can specify an installation prefix other than `/usr/local' by giving `configure' the option `--prefix=PREFIX'. You can specify separate installation prefixes for architecture-specific files and architecture-independent files. If you pass the option `--exec-prefix=PREFIX' to `configure', the package uses PREFIX as the prefix for installing programs and libraries. Documentation and other data files still use the regular prefix. In addition, if you use an unusual directory layout you can give options like `--bindir=DIR' to specify different values for particular kinds of files. Run `configure --help' for a list of the directories you can set and what kinds of files go in them. If the package supports it, you can cause programs to be installed with an extra prefix or suffix on their names by giving `configure' the option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. Optional Features ================= Some packages pay attention to `--enable-FEATURE' options to `configure', where FEATURE indicates an optional part of the package. They may also pay attention to `--with-PACKAGE' options, where PACKAGE is something like `gnu-as' or `x' (for the X Window System). The `README' should mention any `--enable-' and `--with-' options that the package recognizes. For packages that use the X Window System, `configure' can usually find the X include and library files automatically, but if it doesn't, you can use the `configure' options `--x-includes=DIR' and `--x-libraries=DIR' to specify their locations. Specifying the System Type ========================== There may be some features `configure' cannot figure out automatically, but needs to determine by the type of machine the package will run on. Usually, assuming the package is built to be run on the _same_ architectures, `configure' can figure that out, but if it prints a message saying it cannot guess the machine type, give it the `--build=TYPE' option. TYPE can either be a short name for the system type, such as `sun4', or a canonical name which has the form: CPU-COMPANY-SYSTEM where SYSTEM can have one of these forms: OS KERNEL-OS See the file `config.sub' for the possible values of each field. If `config.sub' isn't included in this package, then this package doesn't need to know the machine type. If you are _building_ compiler tools for cross-compiling, you should use the option `--target=TYPE' to select the type of system they will produce code for. If you want to _use_ a cross compiler, that generates code for a platform different from the build platform, you should specify the "host" platform (i.e., that on which the generated programs will eventually be run) with `--host=TYPE'. Sharing Defaults ================ If you want to set default values for `configure' scripts to share, you can create a site shell script called `config.site' that gives default values for variables like `CC', `cache_file', and `prefix'. `configure' looks for `PREFIX/share/config.site' if it exists, then `PREFIX/etc/config.site' if it exists. Or, you can set the `CONFIG_SITE' environment variable to the location of the site script. A warning: not all `configure' scripts look for a site script. Defining Variables ================== Variables not defined in a site shell script can be set in the environment passed to `configure'. However, some packages may run configure again during the build, and the customized values of these variables may be lost. In order to avoid this problem, you should set them in the `configure' command line, using `VAR=value'. For example: ./configure CC=/usr/local2/bin/gcc causes the specified `gcc' to be used as the C compiler (unless it is overridden in the site shell script). Unfortunately, this technique does not work for `CONFIG_SHELL' due to an Autoconf bug. Until the bug is fixed you can use this workaround: CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash `configure' Invocation ====================== `configure' recognizes the following options to control how it operates. `--help' `-h' Print a summary of the options to `configure', and exit. `--version' `-V' Print the version of Autoconf used to generate the `configure' script, and exit. `--cache-file=FILE' Enable the cache: use and save the results of the tests in FILE, traditionally `config.cache'. FILE defaults to `/dev/null' to disable caching. `--config-cache' `-C' Alias for `--cache-file=config.cache'. `--quiet' `--silent' `-q' Do not print messages saying which checks are being made. To suppress all normal output, redirect it to `/dev/null' (any error messages will still be shown). `--srcdir=DIR' Look for the package's source code in directory DIR. Usually `configure' can determine that directory automatically. `configure' also accepts some other, not widely useful, options. Run `configure --help' for more details. einspline-0.9.2/cudalt.py0000775000113000011300000000160411061770255012251 00000000000000#!/usr/bin/python # libtoolish hack import sys import os out_lo = sys.argv[1] out_o = out_lo.replace(".lo", ".o") try: i = out_o.rindex("/") dir = out_o[0:i+1] + ".libs/" out_l = ".libs/" + out_o[i+1:] out_o = out_o[0:i+1] + ".libs/" + out_o[i+1:] except ValueError: dir = ".libs/" out_o = ".libs/" + out_o out_l = out_o #print out_o # Make lib dir try: os.mkdir(dir) except OSError: pass args = sys.argv[2:] args.extend(["-Xcompiler","-fPIC"]) # position indep code args.append("-o") args.append(out_o) rv = os.system(" ".join(args)) if rv != 0: #print "******** RV is ", rv sys.exit(1) f = open(out_lo, "w") f.write("# multi_bspline_cuda_c.lo - a libtool object file\n") f.write("# Generated by ltmain.sh - GNU libtool 1.5.22 Debian 1.5.22-4 (1.1220.2.365 2005/12/18 22:14:06)\n") f.write("pic_object='" + out_l + "'") f.close() sys.exit(0) einspline-0.9.2/install-sh0000755000113000011300000003160011012400653012405 00000000000000#!/bin/sh # install - install a program, script, or datafile scriptversion=2006-10-14.15 # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the # following copyright and license. # # Copyright (C) 1994 X Consortium # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to # deal in the Software without restriction, including without limitation the # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or # sell copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN # AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC- # TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # # Except as contained in this notice, the name of the X Consortium shall not # be used in advertising or otherwise to promote the sale, use or other deal- # ings in this Software without prior written authorization from the X Consor- # tium. # # # FSF changes to this file are in the public domain. # # Calling this script install-sh is preferred over install.sh, to prevent # `make' implicit rules from creating a file called install from it # when there is no Makefile. # # This script is compatible with the BSD install script, but was written # from scratch. nl=' ' IFS=" "" $nl" # set DOITPROG to echo to test this script # Don't use :- since 4.3BSD and earlier shells don't like it. doit="${DOITPROG-}" if test -z "$doit"; then doit_exec=exec else doit_exec=$doit fi # Put in absolute file names if you don't have them in your path; # or use environment vars. mvprog="${MVPROG-mv}" cpprog="${CPPROG-cp}" chmodprog="${CHMODPROG-chmod}" chownprog="${CHOWNPROG-chown}" chgrpprog="${CHGRPPROG-chgrp}" stripprog="${STRIPPROG-strip}" rmprog="${RMPROG-rm}" mkdirprog="${MKDIRPROG-mkdir}" posix_glob= posix_mkdir= # Desired mode of installed file. mode=0755 chmodcmd=$chmodprog chowncmd= chgrpcmd= stripcmd= rmcmd="$rmprog -f" mvcmd="$mvprog" src= dst= dir_arg= dstarg= no_target_directory= usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE or: $0 [OPTION]... SRCFILES... DIRECTORY or: $0 [OPTION]... -t DIRECTORY SRCFILES... or: $0 [OPTION]... -d DIRECTORIES... In the 1st form, copy SRCFILE to DSTFILE. In the 2nd and 3rd, copy all SRCFILES to DIRECTORY. In the 4th, create DIRECTORIES. Options: -c (ignored) -d create directories instead of installing files. -g GROUP $chgrpprog installed files to GROUP. -m MODE $chmodprog installed files to MODE. -o USER $chownprog installed files to USER. -s $stripprog installed files. -t DIRECTORY install into DIRECTORY. -T report an error if DSTFILE is a directory. --help display this help and exit. --version display version info and exit. Environment variables override the default commands: CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG " while test $# -ne 0; do case $1 in -c) shift continue;; -d) dir_arg=true shift continue;; -g) chgrpcmd="$chgrpprog $2" shift shift continue;; --help) echo "$usage"; exit $?;; -m) mode=$2 shift shift case $mode in *' '* | *' '* | *' '* | *'*'* | *'?'* | *'['*) echo "$0: invalid mode: $mode" >&2 exit 1;; esac continue;; -o) chowncmd="$chownprog $2" shift shift continue;; -s) stripcmd=$stripprog shift continue;; -t) dstarg=$2 shift shift continue;; -T) no_target_directory=true shift continue;; --version) echo "$0 $scriptversion"; exit $?;; --) shift break;; -*) echo "$0: invalid option: $1" >&2 exit 1;; *) break;; esac done if test $# -ne 0 && test -z "$dir_arg$dstarg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. # Otherwise, the last argument is the destination. Remove it from $@. for arg do if test -n "$dstarg"; then # $@ is not empty: it contains at least $arg. set fnord "$@" "$dstarg" shift # fnord fi shift # arg dstarg=$arg done fi if test $# -eq 0; then if test -z "$dir_arg"; then echo "$0: no input file specified." >&2 exit 1 fi # It's OK to call `install-sh -d' without argument. # This can happen when creating conditional directories. exit 0 fi if test -z "$dir_arg"; then trap '(exit $?); exit' 1 2 13 15 # Set umask so as not to create temps with too-generous modes. # However, 'strip' requires both read and write access to temps. case $mode in # Optimize common cases. *644) cp_umask=133;; *755) cp_umask=22;; *[0-7]) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then u_plus_rw= else u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac fi for src do # Protect names starting with `-'. case $src in -*) src=./$src ;; esac if test -n "$dir_arg"; then dst=$src dstdir=$dst test -d "$dstdir" dstdir_status=$? else # Waiting for this to be detected by the "$cpprog $src $dsttmp" command # might cause directories to be created, which would be especially bad # if $src (and thus $dsttmp) contains '*'. if test ! -f "$src" && test ! -d "$src"; then echo "$0: $src does not exist." >&2 exit 1 fi if test -z "$dstarg"; then echo "$0: no destination specified." >&2 exit 1 fi dst=$dstarg # Protect names starting with `-'. case $dst in -*) dst=./$dst ;; esac # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then if test -n "$no_target_directory"; then echo "$0: $dstarg: Is a directory" >&2 exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else # Prefer dirname, but fall back on a substitute if dirname fails. dstdir=` (dirname "$dst") 2>/dev/null || expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$dst" : 'X\(//\)[^/]' \| \ X"$dst" : 'X\(//\)$' \| \ X"$dst" : 'X\(/\)' \| . 2>/dev/null || echo X"$dst" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q' ` test -d "$dstdir" dstdir_status=$? fi fi obsolete_mkdir_used=false if test $dstdir_status != 0; then case $posix_mkdir in '') # Create intermediate dirs using mode 755 as modified by the umask. # This is like FreeBSD 'install' as of 1997-10-28. umask=`umask` case $stripcmd.$umask in # Optimize common cases. *[2367][2367]) mkdir_umask=$umask;; .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; *[0-7]) mkdir_umask=`expr $umask + 22 \ - $umask % 100 % 40 + $umask % 20 \ - $umask % 10 % 4 + $umask % 2 `;; *) mkdir_umask=$umask,go-w;; esac # With -d, create the new directory with the user-specified mode. # Otherwise, rely on $mkdir_umask. if test -n "$dir_arg"; then mkdir_mode=-m$mode else mkdir_mode= fi posix_mkdir=false case $umask in *[123567][0-7][0-7]) # POSIX mkdir -p sets u+wx bits regardless of umask, which # is incompatible with FreeBSD 'install' when (umask & 300) != 0. ;; *) tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 if (umask $mkdir_umask && exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 then if test -z "$dir_arg" || { # Check for POSIX incompatibilities with -m. # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or # other-writeable bit of parent directory when it shouldn't. # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. ls_ld_tmpdir=`ls -ld "$tmpdir"` case $ls_ld_tmpdir in d????-?r-*) different_mode=700;; d????-?--*) different_mode=755;; *) false;; esac && $mkdirprog -m$different_mode -p -- "$tmpdir" && { ls_ld_tmpdir_1=`ls -ld "$tmpdir"` test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" } } then posix_mkdir=: fi rmdir "$tmpdir/d" "$tmpdir" else # Remove any dirs left behind by ancient mkdir implementations. rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null fi trap '' 0;; esac;; esac if $posix_mkdir && ( umask $mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else # The umask is ridiculous, or mkdir does not conform to POSIX, # or it failed possibly due to a race condition. Create the # directory the slow way, step by step, checking for races as we go. case $dstdir in /*) prefix=/ ;; -*) prefix=./ ;; *) prefix= ;; esac case $posix_glob in '') if (set -f) 2>/dev/null; then posix_glob=true else posix_glob=false fi ;; esac oIFS=$IFS IFS=/ $posix_glob && set -f set fnord $dstdir shift $posix_glob && set +f IFS=$oIFS prefixes= for d do test -z "$d" && continue prefix=$prefix$d if test -d "$prefix"; then prefixes= else if $posix_mkdir; then (umask=$mkdir_umask && $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break # Don't fail if two instances are running concurrently. test -d "$prefix" || exit 1 else case $prefix in *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; *) qprefix=$prefix;; esac prefixes="$prefixes '$qprefix'" fi fi prefix=$prefix/ done if test -n "$prefixes"; then # Don't fail if two instances are running concurrently. (umask $mkdir_umask && eval "\$doit_exec \$mkdirprog $prefixes") || test -d "$dstdir" || exit 1 obsolete_mkdir_used=true fi fi fi if test -n "$dir_arg"; then { test -z "$chowncmd" || $doit $chowncmd "$dst"; } && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } && { test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false || test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1 else # Make a couple of temp file names in the proper directory. dsttmp=$dstdir/_inst.$$_ rmtmp=$dstdir/_rm.$$_ # Trap to clean up those temp files at exit. trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0 # Copy the file name to the temp name. (umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") && # and set any options; do chmod last to preserve setuid bits. # # If any of these fail, we abort the whole thing. If we want to # ignore errors from any of these, just make sure not to ignore # errors from the above "$doit $cpprog $src $dsttmp" command. # { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \ && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \ && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \ && { test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } && # Now rename the file to the real destination. { $doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null \ || { # The rename failed, perhaps because mv can't rename something else # to itself, or perhaps because mv is so ancient that it does not # support -f. # Now remove or move aside any old file at destination location. # We try this two ways since rm can't unlink itself on some # systems and the destination file might be busy for other # reasons. In this case, the final cleanup might fail but the new # file should still install successfully. { if test -f "$dst"; then $doit $rmcmd -f "$dst" 2>/dev/null \ || { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null \ && { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }; }\ || { echo "$0: cannot unlink or rename $dst" >&2 (exit 1); exit 1 } else : fi } && # Now rename the file to the real destination. $doit $mvcmd "$dsttmp" "$dst" } } || exit 1 trap '' 0 fi done # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-end: "$" # End: einspline-0.9.2/configure.ac0000664000113000011300000001512011273633714012707 00000000000000# -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.61) AC_INIT(einspline, 0.9.2, esler@uiuc.edu, einspline) AM_INIT_AUTOMAKE(einspline, 0.9.2) AC_CONFIG_SRCDIR([src/bspline.h]) AC_CONFIG_HEADER([src/config.h]) # Checks for programs. AC_PROG_CC_STDC AC_PROG_CXX PKG_PROG_PKG_CONFIG # Optimal compiler flags AX_CC_MAXOPT AX_CXX_MAXOPT AC_CXX_RESTRICT AC_ARG_ENABLE(fortran, AC_HELP_STRING([--disable-fortran],[disable fortran bindings])) if test "x$enable_fortran" != "xno"; then AC_MSG_RESULT([*************** Enabling F77! ***************]) AX_F77_MAXOPT fi #if test "x$enable_fortran" = "xyes"; then m4_defun([_LT_AC_LANG_F77_CONFIG], [:]) #fi AC_PROG_LIBTOOL AC_MSG_RESULT([enable_foratran = $enable_fortran]) AM_CONDITIONAL([WANT_FORTRAN], [test "$enable_fortran" != "no"]) AC_CHECK_LIB([m], [sqrt]) AC_MSG_CHECKING([LIBS = $LIBS]) # Checks for header files. AC_HEADER_STDC AC_CHECK_HEADERS([stdlib.h sys/time.h unistd.h]) AC_C_VARARRAYS AC_MSG_CHECKING([optimal compiler flags]) CFLAGS="${CFLAGS} -g" CXXFLAGS="${CXXFLAGS} -g" AC_MSG_RESULT([${CFLAGS}]) # Compile CUDA versions AC_ARG_ENABLE(cuda, AC_HELP_STRING([--enable-cuda], [compile CUDA routines])) AM_CONDITIONAL([HAVE_CUDA], [test "$enable_cuda" = "yes"]) if test "x$enable_cuda" = "xyes"; then NVCC="nvcc" NVCCFLAGS="-DNO_CUDA_MAIN -g -O3 -arch sm_13 -Drestrict=__restrict__" CUDA_LIBS="-L/usr/local/cuda/lib -lcudart" # LIBS="$LIBS -L/usr/local/cuda/lib -lcudart" fi ######################## # Check for CUDA ######################## AC_ARG_WITH([cuda], [ --with-cuda=PATH prefix where cuda is installed [default=auto]]) if test "x$with_cuda" != "x"; then CUDA_CFLAGS="-I$with_cuda/include" CUDA_LIBS="-L$with_cuda/lib -lcudart" NVCC="$with_cuda/bin/nvcc" LIBS="$LIBS $CUDA_LIBS" elseif test "$enable_cuda" = "yes" CUDA_CFLAGS="-I/usr/local/cuda/include" CUDA_LIBS="-L/usr/local/cuda/lib -lcudart" NVCC="nvcc" fi LIBS="$LIBS $CUDA_LIBS" NVCCFLAGS="-DNO_CUDA_MAIN -O3 -use_fast_math -g -arch sm_13 -Drestrict=__restrict__" AC_SUBST(CUDA_CFLAGS) AC_SUBST(CUDA_LIBS) AC_SUBST(NVCC) AC_SUBST(NVCCFLAGS) AC_SUBST(NVCC) AC_SUBST(NVCCFLAGS) AC_SUBST(CUDA_LIBS) # Compile with -pthread AC_ARG_ENABLE(pthread, AC_HELP_STRING([--enable-pthread], [compile with -pthread])) if test "x$enable_pthread" = "xyes"; then CFLAGS="$CFLAGS -pthread" CXXFLAGS="$CXXFLAGS -pthread" PTHREAD_FLAG="-pthread" else PTHREAD_FLAG="" fi AC_SUBST(PTHREAD_FLAG) # Compile with -openmp AC_ARG_ENABLE(openmp, AC_HELP_STRING([--enable-openmp], [compile with -openmp])) if test "x$enable_openmp" = "xyes"; then CFLAGS="$CFLAGS -openmp" CXXFLAGS="$CXXFLAGS -openmp" OPENMP_FLAG="-openmp" else OPENMP_FLAG="" fi AC_SUBST(OPENMP_FLAG) # Check if we want static-only binaries AC_ARG_ENABLE(prefetch, AC_HELP_STRING([--enable-prefetch[=N]],[use software prefetch instructions (default=no)])) if test "$enable_prefetch" = "yes"; then AC_DEFINE([USE_PREFETCH],[],[Use SSE prefetch]) AC_DEFINE([PREFETCH_AHEAD],[12],[Prefetch loop lead distance]) elif test "x$enable_prefetch" != "x"; then AC_DEFINE([USE_PREFETCH],[],[Use SSE prefetch]) AC_DEFINE_UNQUOTED([PREFETCH_AHEAD],$enable_prefetch,[Prefetch loop lead distance]) fi AC_ARG_ENABLE(all-static, AC_HELP_STRING([--enable-all-static], [build static binaries (default=no)])) if test "$enable_all_static" = "yes"; then ALL_STATIC="-all-static" else ALL_STATIC="" fi AC_SUBST(ALL_STATIC) AC_ARG_ENABLE(altivec, AC_HELP_STRING([--enable-altivec],[enable PowerPC SIMD extensions (default=no)])) if test "x$enable_altivec" = "xyes"; then CFLAGS="$CFLAGS -faltivec -DUSE_ALTIVEC" FFLAGS="$FFLAGS -DUSE_ALTIVEC" fi AC_ARG_ENABLE(sse, AC_HELP_STRING([--enable-sse],[enable SSE SIMD instructions])) if test "x$enable_sse" = "xyes"; then AC_CHECK_HEADER([xmmintrin.h], [AC_DEFINE([HAVE_XMMINTRIN_H],[1], [xmmintrin.h is available.])], [AC_MSG_ERROR([Cannot utilize SSE2 extensions without xmmintrin.h])]) AC_DEFINE([HAVE_SSE], [1], [SSE instructions are available]) fi if test "x$enable_sse" = "xyes"; then AX_EXT fi AM_CONDITIONAL([HAVE_SSE], [test "$ax_have_sse_ext" = "yes"]) AM_CONDITIONAL([HAVE_SSE2], [test "$ax_have_sse2_ext" = "yes"]) AM_CONDITIONAL([HAVE_SSE3], [test "$ax_have_sse3_ext" = "yes"]) AM_CONDITIONAL([HAVE_SSSE3], [test "$ax_have_ssse3_ext" = "yes"]) AM_CONDITIONAL([HAVE_SSE4_1], [test "$ax_have_sse4_1_ext" = "yes"]) AM_CONDITIONAL([HAVE_SSE4_2], [test "$ax_have_sse4_2_ext" = "yes"]) AC_ARG_ENABLE(blips, AC_HELP_STRING([--enable-blips],[enable routines for creating BLIPS (default=no)])) AM_CONDITIONAL([WANT_BLIPS], [test "$enable_blips" = "yes"]) if test "x$enable_blips" = "xyes"; then PKG_CHECK_MODULES(FFTW3, fftw3, fftw3_ok=yes, fftw3_ok=no) PKG_CHECK_MODULES(FFTW3F, fftw3f, fftw3f_ok=yes, fftw3f_ok=no) if test "x$fftw3_ok" = "xyes"; then AC_DEFINE([HAVE_FFTW3], [1], [FFTW3 library is available]) else AC_MSG_ERROR([Blip construction routines require that FFTW3 be installed.]) fi if test "x$fftw3f_ok" = "xyes"; then AC_DEFINE([HAVE_FFTW3F], [1], [FFTW3F library is available]) else AC_MSG_ERROR([Blip construction routines require that FFTW3 be installed.]) fi fi AC_ARG_ENABLE(precision, AC_HELP_STRING([--enable-precision],[use double-precision solve for single-precision splines])) if test "x$enable_precision" = "xyes"; then AC_DEFINE([HIGH_PRECISION], [1], [Use double-precision to solve for single-precision splines]) fi AC_ARG_ENABLE(profile, AC_HELP_STRING([--enable-profile],[instrument code with profiling information])) if test "x$enable_profile" = "xyes"; then CFLAGS="-pg $CFLAGS" FFLAGS="-pg $FFLAGS" LDFLAGS="-pg $LDFLAGS" fi AC_ARG_ENABLE(debug, AC_HELP_STRING([--enable-debug],[enable code for debugging checks])) if test "x$enable_debug" = "xyes"; then CFLAGS="-DDEBUG $CFLAGS" FFLAGS="-DDEBUG $FFLAGS" fi if test "x$enable_fortran" != "xno"; then AC_MSG_RESULT([*************** Enabling F77! ***************]) AC_F77_WRAPPERS AC_F77_LIBRARY_LDFLAGS AC_F77_DUMMY_MAIN fi # Checks for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL AC_C_CONST AC_C_INLINE AC_TYPE_SIZE_T # Checks for library functions. AC_FUNC_ERROR_AT_LINE AC_FUNC_STRTOD AC_CHECK_FUNCS([clock_gettime floor pow sqrt strtol posix_memalign]) AC_SUBST(PKGDATADEF) AC_CONFIG_FILES([ Makefile \ src/Makefile \ doc/Makefile \ www/Makefile \ einspline.pc ]) AC_OUTPUT einspline-0.9.2/www/0000777000113000011300000000000011311505425011316 500000000000000einspline-0.9.2/www/F77uniform.shtml0000664000113000011300000011321711115073142014253 00000000000000 einspline

Fortran 77 interface

The Fortran 77 wrappers provide a very similar interface to the einspline library as in C, with a few exceptions:
  • The fortran routine names are the C names, prefixed by an "f".
  • The spline object pointers are passed as INTEGER*8 arguments, to ensure proper functioning on 64-bit platforms.
  • Since F77 has no concept of structures, uniform grid objects have been replaced by extra parameters to the create_UBspline_x routines.
  • For 2D and 3D splines, data is assumed to be stored in contiguous, row-order format (C-style). If the data is statically allocated in F77, care must be take to ensure all element data is contiguous in memory.
Boundary condition type Code Meaning
PERIODIC 0 Use periodic boundary conditions. The value, first derivative and second derivative at the left boundary match those at the right boundary.
DERIV1 1 The value of the first derivative is specified in lVal or rVal.
DERIV2 2 The value of the second derivative is specified in lVal or rVal.
FLAT 3 The value of the first derivative is set to zero at the boundary.
NATURAL 4 The value of the second derivative is set to zero at the boundary.
ANTIPERIODIC 5 Use anti-periodic boundary conditions. The value, first derivative and second derivative at the left boundary are the negative of those at the right boundary.
Please see the documentation for the C routines for more information. The subroutine names and parameters are given below:

Uniform spline creation routines

One-dimensional:

Single-precision real

SUBROUTINE FCREATE_UBSPLINE_1D_S (x0, x1, num_x, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x0 IN REAL*8 First grid point
x1 IN REAL*8 Last grid point
num_x IN INTEGER # of grid points
x0_code IN INTEGER Left BC type
x0_val IN REAL*4 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN REAL*4 Right BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_UBSPLINE_1D_D (x0, x1, num_x, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x0 IN REAL*8 First grid point
x1 IN REAL*8 Last grid point
num_x IN INTEGER # of grid points
x0_code IN INTEGER Left BC type
x0_val IN REAL*8 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN REAL*8 Right BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_UBSPLINE_1D_C (x0, x1, num_x, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x0 IN REAL*8 First grid point
x1 IN REAL*8 Last grid point
num_x IN INTEGER # of grid points
x0_code IN INTEGER Left BC type
x0_val IN COMPLEX*8 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN COMPLEX*8 Right BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_UBSPLINE_1D_Z (x0, x1, num_x, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x0 IN REAL*8 First grid point
x1 IN REAL*8 Last grid point
num_x IN INTEGER # of grid points
x0_code IN INTEGER Left BC type
x0_val IN COMPLEX*16 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN COMPLEX*16 Right BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Two-dimensional:

Single-precision real

SUBROUTINE FCREATE_UBSPLINE_2D_S (x0, x1, num_x, y0, y1, num_y, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
x0_code IN INTEGER Left x BC type
x0_val IN REAL*4 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*4 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*4 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*4 Right y BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_UBSPLINE_2D_D (x0, x1, num_x, y0, y1, num_y, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
x0_code IN INTEGER Left x BC type
x0_val IN REAL*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*8 Right y BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_UBSPLINE_2D_C (x0, x1, num_x, y0, y1, num_y, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*8 Right y BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_UBSPLINE_2D_Z (x0, x1, num_x, y0, y1, num_y, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*16 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*16 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*16 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*16 Right y BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Three-dimensional:

Single-precision real

SUBROUTINE FCREATE_UBSPLINE_3D_S (x0, x1, num_x, y0, y1, num_y, z0, z1, num_z,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
z0 IN REAL*8 First z grid point
z1 IN REAL*8 Last z grid point
num_z IN INTEGER # of z grid points
x0_code IN INTEGER Left x BC type
x0_val IN REAL*4 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*4 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*4 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*4 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN REAL*4 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN REAL*4 Right z BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_UBSPLINE_3D_D (x0, x1, num_x, y0, y1, num_y, z0, z1, num_z,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
z0 IN REAL*8 First z grid point
z1 IN REAL*8 Last z grid point
num_z IN INTEGER # of z grid points
x0_code IN INTEGER Left x BC type
x0_val IN REAL*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*8 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN REAL*8 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN REAL*8 Right z BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_UBSPLINE_3D_C (x0, x1, num_x, y0, y1, num_y, z0, z1, num_z,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
z0 IN REAL*8 First z grid point
z1 IN REAL*8 Last z grid point
num_z IN INTEGER # of z grid points
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*8 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN COMPLEX*8 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN COMPLEX*8 Right z BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_UBSPLINE_3D_Z (x0, x1, num_x, y0, y1, num_y, z0, z1, num_z,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x0 IN REAL*8 First x grid point
x1 IN REAL*8 Last x grid point
num_x IN INTEGER # of x grid points
y0 IN REAL*8 First y grid point
y1 IN REAL*8 Last y grid point
num_y IN INTEGER # of y grid points
z0 IN REAL*8 First z grid point
z1 IN REAL*8 Last z grid point
num_z IN INTEGER # of z grid points
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*16 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*16 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*16 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*16 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN COMPLEX*16 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN COMPLEX*16 Right z BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Spline destruction routine

The following subroutine can be used to deallocate the memory for any Bspline object. Note that in the nonuniform case, the grid objects must be destroyed after the splines that refer to them.

SUBROUTINE FDESTROY_BSPLINE (spline) 
Argument Intent Type Description
spline IN INTEGER*8 Spline object handle

Uniform spline evalulation routines

One-dimensional

SUBROUTINE FEVAL_UBSPLINE_1D_S     (spline, x, val)
SUBROUTINE FEVAL_UBSPLINE_1D_D     (spline, x, val)
SUBROUTINE FEVAL_UBSPLINE_1D_C     (spline, x, val)
SUBROUTINE FEVAL_UBSPLINE_1D_Z     (spline, x, val)

SUBROUTINE FEVAL_UBSPLINE_1D_S_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_UBSPLINE_1D_D_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_UBSPLINE_1D_C_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_UBSPLINE_1D_Z_VG  (spline, x, val, grad)

SUBROUTINE FEVAL_UBSPLINE_1D_S_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_1D_D_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_1D_C_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_1D_Z_VGL (spline, x, val, grad, lapl)

SUBROUTINE FEVAL_UBSPLINE_1D_S_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_1D_D_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_1D_C_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_1D_Z_VGH (spline, x, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 Interpolation position
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated first derivative
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated second derivative
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated second derivative

Two-dimensional

SUBROUTINE FEVAL_UBSPLINE_2D_S     (spline, x, y, val)
SUBROUTINE FEVAL_UBSPLINE_2D_D     (spline, x, y, val)
SUBROUTINE FEVAL_UBSPLINE_2D_C     (spline, x, y, val)
SUBROUTINE FEVAL_UBSPLINE_2D_Z     (spline, x, y, val)

SUBROUTINE FEVAL_UBSPLINE_2D_S_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_UBSPLINE_2D_D_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_UBSPLINE_2D_C_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_UBSPLINE_2D_Z_VG  (spline, x, y, val, grad)

SUBROUTINE FEVAL_UBSPLINE_2D_S_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_2D_D_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_2D_C_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_2D_Z_VGL (spline, x, y, val, grad, lapl)

SUBROUTINE FEVAL_UBSPLINE_2D_S_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_2D_D_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_2D_C_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_2D_Z_VGH (spline, x, y, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 x coordinate for interpolation
y IN REAL*8 REAL*8 REAL*8 REAL*8 y coordinate for interpolation
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated gradient (2 elements)
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Laplacian
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Hessian (4 elements)

Three-dimensional

SUBROUTINE FEVAL_UBSPLINE_3D_S     (spline, x, y, z, val)
SUBROUTINE FEVAL_UBSPLINE_3D_D     (spline, x, y, z, val)
SUBROUTINE FEVAL_UBSPLINE_3D_C     (spline, x, y, z, val)
SUBROUTINE FEVAL_UBSPLINE_3D_Z     (spline, x, y, z, val)

SUBROUTINE FEVAL_UBSPLINE_3D_S_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_UBSPLINE_3D_D_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_UBSPLINE_3D_C_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_UBSPLINE_3D_Z_VG  (spline, x, y, z, val, grad)

SUBROUTINE FEVAL_UBSPLINE_3D_S_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_3D_D_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_3D_C_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_UBSPLINE_3D_Z_VGL (spline, x, y, z, val, grad, lapl)

SUBROUTINE FEVAL_UBSPLINE_3D_S_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_3D_D_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_3D_C_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_UBSPLINE_3D_Z_VGH (spline, x, y, z, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 x coordinate for interpolation
y IN REAL*8 REAL*8 REAL*8 REAL*8 y coordinate for interpolation
z IN REAL*8 REAL*8 REAL*8 REAL*8 z coordinate for interpolation
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated gradient (3 elements)
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Laplacian
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Hessian (9 elements)
einspline-0.9.2/www/Makefile.in0000664000113000011300000002402311273633721013312 00000000000000# Makefile.in generated by automake 1.10 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = www DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/m4/acx_pthread.m4 \ $(top_srcdir)/m4/ax_cc_maxopt.m4 \ $(top_srcdir)/m4/ax_cxx_maxopt.m4 \ $(top_srcdir)/m4/ax_f77_maxopt.m4 \ $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_c_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_f77_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ $(top_srcdir)/m4/ax_gcc_archflag.m4 \ $(top_srcdir)/m4/ax_gxx_archflag.m4 \ $(top_srcdir)/m4/ax_gcc_version.m4 \ $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ $(top_srcdir)/m4/ax_ext.m4 $(top_srcdir)/m4/ac_cxx_restrict.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALL_STATIC = @ALL_STATIC@ AMTAR = @AMTAR@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CUDA_CFLAGS = @CUDA_CFLAGS@ CUDA_LIBS = @CUDA_LIBS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ F77 = @F77@ FFLAGS = @FFLAGS@ FFTW3F_CFLAGS = @FFTW3F_CFLAGS@ FFTW3F_LIBS = @FFTW3F_LIBS@ FFTW3_CFLAGS = @FFTW3_CFLAGS@ FFTW3_LIBS = @FFTW3_LIBS@ FLIBS = @FLIBS@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ OBJEXT = @OBJEXT@ OPENMP_FLAG = @OPENMP_FLAG@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKGDATADEF = @PKGDATADEF@ PKG_CONFIG = @PKG_CONFIG@ POW_LIB = @POW_LIB@ PRTDIAG = @PRTDIAG@ PTHREAD_FLAG = @PTHREAD_FLAG@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_FLAGS = @SIMD_FLAGS@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_F77 = @ac_ct_F77@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = background.shtml \ benchmark.shtml \ doc.shtml \ download.shtml \ faq.shtml \ header.shtml \ footer.shtml \ index.shtml \ NUBinterface.shtml \ UBinterface.shtml \ bspline_logo.png \ c-BN300y.png \ NUBsplineBasis.png \ UBsplineBasis.png \ F77nonuniform.shtml \ F77uniform.shtml \ links.shtml \ news.shtml all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu www/Makefile'; \ cd $(top_srcdir) && \ $(AUTOMAKE) --gnu www/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ fi; \ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ else \ test -f $(distdir)/$$file \ || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-exec-am: install-html: install-html-am install-info: install-info-am install-man: install-pdf: install-pdf-am install-ps: install-ps-am installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: einspline-0.9.2/www/UBsplineBasis.png0000664000113000011300000017272211012400560014450 00000000000000PNG  IHDRwSsBIT|d IDATxydWy'geVf֒UUR7_Y_~7- /| É'fqlݺwߚna|hO}S8z=lw!d@* (W^y^\\Ğ={p%tww=\ kɓ'Qt.^!״'I@$@D"(˘o}[' pt+xvqСz(еppo~g>{Irϯp8ɕ^c~w+wZBR@Qe wk@;wdppdY8nlǶm{1~qz6k;XXJ>c^[m\-wۍ|O>$:}W\wߍR{/{1lw״#<{ø;Cr9;wU²N$;8f w{=y=so\k&'N#Galjཇ[|_ŗ%ͽCkBظA콇{@ lA. i^ƽ}uǽ,=^r1gM+ B6 lAhz<@"4a}>VЉW|J[#_>qk5'Ըx#<'/S[WZBLB>ϕZݿQ,rBa}çӨb06}>+_ Ed'Jj"x{Z/<p\?of;_xmȥ|W"vR(p&ѧ~?>R!dU'F=S,7}9n?9!N'r4J87j苏6m}۷n#J>Fۿs|&N ?O!BBMޙ{H_w&"[o!ht2!O~ww\.7K<_ |?O# }Brb)jB:Ϟ}ʯ<_[wO!7 ) LRe!]]d#8~7ʹϜ?Oq="reBٻ~]bL5re?_?Sqi;@o߀}s~8P*Zq&c|Q\Y$??u{<4Om{~x#Gہ2?g2\oFjxva2~q08H>嗁d25~tG$IZ$ :!{{_U.e$eV4 x8!?d,s?(ПﻏᱵmϏ?<(wZЃ zu>՟oNgDT_𑾾k|++_yXYYE{4pcUKAɟ>!*}m [0N{5޽n8$5˗i66W޵.<75YY஻4(t:~xKK;#&^//.Z~Y`^`˖k?AZ%j:>c nͺ?twӎ¦twlWbr1_\.nnqI#{xUSEuR/;ּ^x<ŋܽc-7npHLSÁCkahQd^z@`5Iöbkxyeܾu9Ŷ64yjxN'v; YX.jkaY7ܰ;&_m[nmx/:zN``:NtHz[:Ey.rtB·'^F_Goc4no lzAen:e$}Uছ=JUԯ,/X(P-/^Հ_t;q,gcG׼?M[ j_8N&J%$J%ܯlc`Z'gN}xuڂ_>M#rQ*4l2".͙B!ҙW_q(t_zF:cڔ.7촵$Yv_LG^ {xբq*ӊn1MS㕩Wpt(5SR6!96\>''͓8A7H&'c[PJ&7k?dþV:d3'p?{˭81c |$-u3`ؿR Up'LV2 d{J%q&}jGt:t+NX_D]:wmM ww{n Uɓʶ_3+2jo'ƑN۶܆K ozMК&qbeq^on j}eȈdIܺG戗iT:Ǧt84tϘ:_;8x. j8 v@.I/YSoA7 ]w]ИqMp:e2aV] uⰂN2m1XTM7`hrx}uaCC0ۚ=]]k:L%OccQMLGǰ_|O8$G`4S/SAo`_fdkMDI1 <qN>|wj:Z1}s.QA߹9"svwa֕z \ak9h$k=gTe!ߣCԲ# i5`ʯ\>{~}zZv a˱a_M]^vs[ڱo]K{8ϙHX\tbq\R^;zvlऑ')6ۮ;׿3{&'鎏6~޽pK[~?. l]F׿gco^1޾X*-;NE^mn3T,Jvnyv0nss"6fg}i'S;Btۄ ۸\z|=vEU] # E_∞uuaT2tW.>zo5. w{llZAoǞ=H!cxB;~uFf߅ Ci`f2 z;i&[ONz̅|;8o0y:xRn. sTCB<7fk]!dXD^[qXD=.HmV{y[mjrI҃HB! ˴A"{ۚ\a]ҭp&9֋!BK';sw*g{ =4Tة&"Ayyk4viw#y@.O֋٩ |n  ,[0jk^}=}{zOCC43hkdBC7!^@L]-Ȋυ ~ v<=)$vrZM7y%Iȅ tH;vhon4rIՅ ݻ[7yݻXY:lǮ]45pE~P`^+;]dzm"Iv2.^JˣMu+=f@d. P)6pEv64dTj/3)[w8{ 8Bq:djd]fE#/bWǠyQI O\>!AxvNʐYF|Ew74nлxQfCr`G\7՘5Zl\ip2NK*OO3;;o ilpJNl %j֫x)yïqE;bݻikxapiGB#p;ܸENNRQ['XX^T~-܂/˚w~۱m6<Q;ٝ^ αF;v0Gˍ$2܃|!ADx1I+.c{וw`5<ښ1TuLJءpYVܣtY.!#Y{I֥Z l/f[| )腌,~YEFSپo/ǒy<蠳Y櫍/ C}b5"q֋E;U%nÉmm]U A. _p %lpW8ҹvj+l- :d:U@bnkntg|_밷o7AOȚn?7?I$OJcXDa;zFmeɉ.w}2݃A@+ oӟ4B~| G'?z鳶GczymKP#WE {u}>FG0HO,',d\^k#G`!2W=z_T1*ߋnO7Ȩt!' 7e*j5iχ+>euUa֑FDƈ2 ]hկ|`93Cl}xNɉwd;{7AS\>66%o8y$jm{||.\+Wz+eKl҄\kQogvmXvƠǍmlî:.^Ŷ6&--p9\_d 7: 8ǩ1Rq 03$ }8qDNXFC `#b`IƋEx$ mjoIT6|/3hrښ:ppZ\mTڬq$rUF[p K1Kl>Nuk^Ѝp߅D(g>|FXWկc踶Xo6Ţ'}ʚRP3EF`JQ{kd+{44ԱqHZ%Z;=9+\|u, s"j0$HոYdXĨB=-I"c:ۅ'Zɓ"wx+.2zn>e͈+t?G\g, IQe>/ckDTud+J$ix "W...vgC)mٲBs ;C'N`Pd802&U QVeBYZba$Iet^@;myè%!  1>[i|ok}mzHRhdx:P>3iHdv3yJj՘4;Q,bx#Akū 2oh{o48"ոC/e2(Q[u!pnPth s~yݷ]-+F-%2ݭ"l5RS ե[ի36\0֧YML0X0$@ ^K86rLiȑ#8r!x vēO>ze|{>9tFTB >)<j|;'? hxTB'\cdH:ԩd&&MFNmAR59o{E$e $^sst)IMU*tYЇCʧYTm/´º퀪GPWwMNLިڗ19h=Y* C%YJȁ*Qֹ1R IBTj^o3PV՚LIK!90|h>M[ jycSSt=\RfIͽ f jk}4l]#ӥjek@{çj׉1IDkΓY5''2PYRngrL fԑ5\9jf.wzځO/A~'AO&׋rY)f"<-,Y顗,i z7.o>Yk; }jn5O˨XJ{>3 O.Ou!m)9Ĭs^=1if>=AbW['M*Ȯkáa䩫1*`1R [7KbEI3:cjy?بHAu V]˷zx>V)"Vd5;p[eA6Z8$ [tOSSF>'C2A&A53<3AoPLOWF@WdT^/2J*WX^lVdrd bhKjaAd&OrV0gke'oZH].NͪlF˷VU8Ɔ }T|ntTMںNIۘ&Oh>MPo^ ijAHYAzK=AyCy?hKh=@N# ^֢VR)AkKq wkil6s'!26%Oker)"SPTauB$C̆>&n blaDqtNӉ 6:"X"_k\MF#Ն:fy ShrwOD@$iAWv;xTE&U-Q֫ AP"= MZ*5DfZBimQjY f|[825E{KZIlWk@7֭ո#.ia |<Ǭshx̴&\.x85E 6Ͷʀ-$Oj\3kDJ׵lv\: ѩtCjkXđƖjizz&![qN`\6C`F X[r2lnF ICztlElI4'X-s(rLBq#IgfldRӁOqp:iD`+N&UOpU.#^A_^\>{EƦ iv@m=S*)˵ar㭥@Mq'AWnZf{%tKR]jge"~̬XX6zB74 7~=mkv8>Q9Q0_ڒ<aU[Cd$#'hpHc9ɊFy*BbLO5P4dַ嘭Ln B%jQyCf#FRN%c֧sZI+$tD/iL [3+%Xn@p@=Yqy>( $WU$sI-ohQ 嗮ŁOJh\a{׫jkŭ[MԷP0kXLu_h0`zǓV=j$O3+3ɓ&ҝ}jN\!_kZ܃_W*tC;X'MF$*AW'n^P.7*t-!Qoؚc$II]-IEff.FƭњѸ{㡧-n+"=Urң5OJ:XдG{RJX'qJPZY`\B"ˋfm mVau8v+vff2tPk@0/z%L**͍.wްdL$e{"3].73dSԍ ݩk5ڽ:lʼnb[Z333Ng,3զҹOSZ|a^Wd%/2@IfͮEGn8%IUd:?:- ~TȋVD%fR{<~m5\%ߞY`y{ uR W*oz&: /aa ^u,U]!~`fe$ăU.Z%c5Zk,TZ+ 5">uY-%OjəIm$!vۖ<&pT.y)t"hrc-`kZa`(GuQ`{P]d8G B!HV*+tSd6\Odbp9?W5yJ$OA֤ioR.E[Mǣ2=\˶6YESA BPQt+UaW:8T?f"JpI aC 0-ohAN:]O憦XuɘTP#Z훇 AZ1./}=MX z |m`p*aqyC$l-MD6Å.[Az ^Vm<fےh*nůefPv<ӷyzpƣKZRu }0J<[is-fύM+zPL^vb5 #n^̵!2V0@Em#hvI$o+ IE:5]`HVy9I&~@Yddر*XI! u;݈Zjy_ϖ˨o 3I;LHn@1 Iu1ss+X-]dfL Ӷ *"{ B8eId_TXmkTLٺ[B%2m2+*rIfmA"3?lvr T= }s@X52_6 -5T=A ]oغcFdHM7qKZ?H'[ `LXl=_ԲBVfD6x@(/'5!o9-ohН~ ܃ "ê,n0*2J:|k7Dt"r) fgXbk͸l#华eR!z_h{mJkEd|ZwX |2dQ |LovBc]>*4mfIXl (W6UUsij$,H\FCgqZ{pyY7{r t HImGX=Qbk`o&Os&8BRZrV*djS:k5"mqY9n.[xe]mSZ1`Ad08HVsi>n<4;K#u,f[Idx 8sE*fg-~n$OͪQZ2rnIB٫[XCAzMaatx |@:gAcBwufᶸ U#Z!Hb$JY˶!S4j5䝶ۙD_ߗ53+^ON fxކd $!©ҺQ.m"/2tOd&ŅS9%@A,=|w7$_=%SUDtMtL_=}<3k`m.*+q |OwTH&Tq3^ݐwߖ;@E&KXkHjlvC8 Ub5'zYVQ 6l=[.ے_BOpno[ AT*g?C[ne=S$ ǎ>>h{Y`5q^CjZsFլ8&U[|i2I9.oߛQanT3ˍNx&ƺ!".IZS51ecὔttCB<ηR6uWL'tUyƑuk^ư7_;x N1:tdwq2 /Y1N#"f_T5 |'RHqjXfrܿ`_`Ej&!a wC۩2Mx\5^Lн!vTJ!9QhV.Oع+j} + LЯ ݋#:x衇O*? {hH-7yL 2^ FݜW3|'"R5Q&yۍJcxA6xچnHRb[R\=̖i7w!"ۖ<%N6l½"IbWV?hx Y\\lۍݿE.:'NPX L6@O/ddvIznLe+}eZZ k_@7ֱ"}4> W.觞z Gq̙{:]Ӳt,C-8r»BŐ*!aqXJcA`z$dG71iyn@h8Yp4&S-ݬezAHZvJ 0n\5NC U*::O s8y~w_q-q2bjYnȑ#GpBs{kBwxO^cyy}p}(5O}Stx )ݸ_AVvq3 @E&]-sok<ݩg0#Ga1]-3wC 2ёJ9zǃLqDAO/%4Yu3XOV7 <@Ŗ;Gt-onGH*̢Te )*)"џo;Őz!1J`аu9]d=hO舱YsL*pnH,,VmIn)/۱ 4O=gCQpͷ'P155׾}k>;#.8XJ2X ə-`NWi/-y'*+.ѽ&)ă7/ 4䩿;*t";*$1`r /2b䆇s85_.{'|ط}]Tvf,C,7`1E*@Xc^ yzQ*̭2VnI?0FqbH xN7Onc@bN|}%<4H0wC o-8w<*tqb^,km/%rw9'}(lArg ln!:"$#lWmlTqGXcn7*̚yqcG{pQ{"ӃMtoT<"c ft#ܩaVf{4D+8j7&2רzα_h##38^i,#Xq#0\fb[܁5T{7 6!90Za9w# jÑ*苲W?+cJ[Hb ܩe W/ whdúLj`qlp88okYduڑjS N{CWt?Ȟ<@ mJ˝Zt \Xڰ  wjw 8_dtS@݉gCڑG,+]}m );w"A*E4+I\Y6u?)J6غ@}ΝFћm`\mpPvlدCl|C:by Hjn720bKKܹ" $2J4A}by il D[.(W zWR^ //!1l Xt=m ,2HsAs&Ӄ 1 ~ R0)ϿBp ?O!}+5?E N' wZfAR@.  0Ԑ𙻦Wnf2F&[(/_֐誣\݈`k/䩾̿B]fF<uܵ9{lMA͛Bu䝧ʹ 1ZǦ:r ޒ{`0xqܹH'lI%W ŐNwx)[ AG[=(w>V3䣘˰MUNL-4BL~\(Grf(:]gVR .ybI7J~;[΢,4Ϟ<1;E đM+)U1 Re:Q33ܹ)֘Ϯs"*F\aSDηJ_VQs8֩7rnn&<U!㉔M1Ad2z:KDB.~Yx1tM=ş;7A?ri($L!aACC.9;GRVCAOނ4Rm. ejdE3y 8y0_'y#/!Ϥ BN'bss]+t€{̧닢^$f[R }3Y՘TB_.z;vAg/̃ d&eTV^8pCv{XXn0s҈E B)or=^/w[R@kLdǝO#"a?fR[ɓ͜<kˁ=r'`- ٺ?jƵM+Hqzg|rQr}T _C i FO8nK8`A?:#`(lk`Ж卨G--w;l i0E| 2g|_#nvC8ښ, |̶B@Zt]"#epמUjiVCӕnR˶$[\VAb.?:ԕ nz _ 2n7p]lMZȴ/opNhdK%" s>-jw{]LgzzhI&M'ue:o%9~%9!ؾVlJ>Ȉ@@p 8 ]#N VZ$#R4=~TOUuιvz>#tds{VY_k4 .o}'\9hSdHvݹJk<gGC&d}=(lD䭍t㑍ѳF#=u'rDl c{/:DܩQ&c+xP1uJd>c[`Ck`(Yj_ِB2LLm ݀nޭ060F>gG:l}1("Q&# "y#= cYՓ4Ș32R!1{S<͆£z*Bn;FddkVSOj_en>ݫIh=ݞфy2ѻ8T6Kںc_ETI D{ <ڛ< 2aQݠ6өv ƾt̚mӔj[M֝YjҚ\6C.`A[w2O$b3zF)[wO I*axJY|{]$,qg+TY++zQ#ЮɃdBRQ\jt 6u7Ru'ȸ]׍5J'6<9̀ Lg?ODwO ^mͦzN,;zĠw_gOuh$]٫2?#d-i ˆLM^b^,*F8א[%2:Pm66L3R(%m>LFqD+Y@7dFzr]SFcNw+L&fGډOF|am[Վo4%g8/@#ٺŧM2ug) F1 3e2a=m<Z@vgN2xkij ]b_C7~dcC84d],IJ4pn Yc/Kg`glo+?xhy<ԁF&K ]ijƞ@N3U/=6Cz'}[ֹ*@+adžXq};)w"=ί/Sʕ3ciIJ*Ҩ5\V尐 epzT =Lz'&+g]!fo=]q֫3BUqT9:noyAF#fCFRRfbRz5}~DЏE񩛛ݎOhsuخ=YbR0kg&^ʽUpfCq~ lf=Sak;i!ckszpaB g^?3!R̓w =Q}Ħn6< .\Gs{ꩧ,7oFPhOjPA]]-B*.@d fm3Z?6D֭w ":; F/coH[a0L1WlM,k챵eYb;$D\׻!1c+x\]UvlK{A @?˗yWx饗](9'Iqòưx.wpW=H ̲FӑY6vCu뻺=l$=Cd4uTf{0"QG0{dd+k $&Q֝ZȨ]VN60ӽ^[X=ijwԆn6_xꩧd2lmm++|3P@H TUvU؈¾$T8ȽUJA&TKZ! lbk$džH#eC6[95/xЅ:UilHXmbk|ElԮȔYOf_(Q! 75!3V(J tOql-.w=_|M|z!olzwO_%;45GK2^Lek (M`a%Vmln :&^D^6I/>j2nJ`I[Ќ}-VCDh;F7& i% I4=+$k}yy|llzZ{wO'`>Uoİ;awԮ^[SsI d==O8WerG׾l_𔴙+C ]]QzBM⁋bR5!b" ׿fd g, ʽcm{9/$>g},uդ2C=i8JOEyf1O&y! 56,g O^DwmZ 6D"xt[!Ƀ 푡ٿ ; ukN' IDATVfffx_occ|>x'twַc|Ӄ)d Gg2:obg؋ȆuWd(w>Nn[PTQ_sWYg).T<PB"}5]-B4p゙H'C٘wvԽFO62t/$N2>5tuبn0Q+׾5կRV~!w}222˗_wã>Ja?cww__CP^qxb]`:ӏrןSƆu }d? k:TIB:&Ycn ]fAؐu*ڲT̂'[k0DLlݻńȄm@^hAe[,!/6$@tQ&~*N<ثcqEw200(?8>,GW_h?S?g?Y>E I@fQ+QڇMiM޺WφK{muWw2FsDO/Zc"E+X4)we?'븸LU(=Mqzv=)SW O63^'ceLc]_C( {衇<' 1?ǗYO} dÞ޲gIY (5aqeu~u }m!V*l|wHsO;UbϞG9_^B<-wq]krDblTYf>ˆ躌mWf^T>׷ xv@/2L&LWࢻS}_sI[y2޲Z)e0MVV&=@r6v L&!ZdQyd|UHv#rOAe?8Æ@C9J{Ȍ?6dPH74ݦ/'5{:,rd=Z֖7Y4 NW=qMF6e|cmj Yt& GH!`F{uU=1Y*}k<^c@>դy(hDm30؛FK0ѯ3^Y:dLja3H+UuuВ2F@ q]s|D>W @&Pw+x&--޳zmvZM3~Sq -ؐ@84zih8Mv]K@MQe@SYU;M闸@&f+ Ѷ9-W'5z  C{ JI*oH+q"BȤY/22̘ ׿"RS٭`aןl`.,0u!8 _ D@f*N^wu$r|:dm,Fؐ)UVގ;  ]8dbuCqtt.i6żfu B 㗡']W0HXyߖY[e4A6U0{brT*>գܬbTtL qi=8 u^y @&(xJzm-(CEj5Q nW/bLX) ⃌ H)O&' Ɔ_oD3Bؐc(KYc`>=5uIH-J5zL\x  7)d2 oM2Wm2Wb&6f1(P4pXBLj4p`"lddjb&p6(CbC q~Skk] gpG2@ 2YczA ҫO㋣U5XvXfkI:(C.(] ܃b8Hۺ0[OdMCRJ1PO'(35I,FC1TހĶ{V@uȭAZ~݁Mqiv;Tc^{Y@o4k|SK0ZUoAe2wl'Fԝ 5tE[ Cjx4Vm ۱)占v$}'HD-!LL&Y IK Г:&$PyclL5&SbU%9t/ $w|i!l*Ӆ9Tv+d,q'xS$@G8NQ[Z[],SRI_W_)ɲD RCtyv3Wm\|}0N.] EeZ+2\N_M>Oo/?%n۰216N.|uPeYb" k4'a4px̐xgO{q9Np&At7 06Xa-w)@K +oĴښ$u]3 YMBq0L![CmCwg5`Ck ЃJw^]UEq8WW\p~a4M7OcIWרD dXd G"!ty۱٬mFS+$/ɟ _(G#&^ܺms$v9ОS5 a5FdVWU#]ݶYQy#fXt>Oqi6G{Dd5Vlr&C9)1ưcI.^fjJՖuE8YdŃkƲ"Z:KgDP <u[qYAesssJ3wbee__t sE Ñi` =F]]<N]RPP51@& CMM͟ 2LKIFl印}=ck3*1S5 X76^Ҿ?F]q;Ųb֚,;Ψ]=[Q?\5aC 5Z\[amb,06$} }]*3[C|^!qLF?;aW`"Jt]٭0/\+'& "lj j([ucY] S7ch_Cld"o:M׌iTl/4$R@&h]w =_ UPk]٭P*_LQZ %@&ḫ1 a)bYYWW=2DPM3$le_˝;~$!tLġvdS]]:y=|0F &Pf)g2뺦T^4\F#9>C6UXklH2hQlHD[;il("eoGbC RiuhFuӈ)=IU=܏ه )w/ gz!zIt }gl`,iMQCL{|ŪSGG"D{Q*:L4Z~Ĭ B((߲"8>2wFbCbzlL=&q!KNQX01rX٫0Y o.#F!Ž5FjMR|ƴߚo"g?ЏDoh8f&rA fo|Z5WsĆh[^J3x2eC &5i`C!q!Bb 2]&c_ۺSY122Έ6[b| ɇΪދrds!wu \;5и| 3C"53?x<:ybĔ m6nC6HQZF/ݑ=szhUO4T=εn@Q2tKz@7^LyVU (]]~xYq =Bؾ+$er2t!1#fQA8Vag'f2ؐH3!`~?j !kj6]܏DHwzԡIQi 2Qxt۶ygp<|yy#<6}PăY(r†oG(F}W7jRAweYjY Dd6 l&@&J1h*z{R[ l%P1_!bU&2n#OiT* 1g,O&h<(3)^>Ose^y^z%~w??|_[=O>$拀#| 2(W{ "軺ŢYq39@&ʴ8u##z Z6D!}5kjfSXV?!10fLkpB>_c?5Ct}zam _O=L>{w~緽mkFeEӻiW뻺Q25ۦinDn1@[Ćr 6H{٨ntqh78"R`C({*YcD[)oLD#)ORJSG2~˗l&͐k_5ƺzW#j?s:x2ۑf,T*/d(x( k_eڸaD[x "ÅC233FGG|HSOzvַX0uJ̗e|Io]srض~wd'NdڣHS(5L:#D4wtlpC,6Lϫ^>L&2&v@#ȣAϬn/SʕHŶ9cÁ/T=3KI?XQc'Ou]/X,w=yҗ8lmm/sO:R419cWZ2]QJw'Nx>zP( /x~f944ޯ8r IDATdzpdxɿ)ݓ:ˎ< 2նEz/ 2eBL1r\(ȬLd!  7Ұun[4+{|~У|1:;Ny/kWd^ɞG!@<5 V 91kYZcd V )fm۱c4]gdjێt#dؐB;8>#~ara7_}ݹ._ݻJIН5JMbRC<с\YQ.f)X蔸!`GRc4AʘL;f }% NCѓL^S9xJ<:u54'he:䲶r3 F ֡|,bʼn )Vr  q'@YYli6kԛfkrdoY͑2/[Cѭ'EmC!twڙd CFl'x*C1c8?ԽSߡ֬P}'*xP^1'x:ק;f;Nw!"M-}g=FLH㨶֓z ?aҏ]K# W|tk654]FS/2Ɠں=@odc 3u dFnhӝ$xg눓ں u/[G"~ ܥ[ ;֣LDq %5x7|Jo2_nlj>{U,I\@irL.uyh(w?|AS*&K˴ݍiԤ t\dFYX׍sW {ta<Z! ,2[e9x cֿ4+`'æqD9FuUH^c@גlѤqcGD֠NR7šC=fo9<=5OVP54@F)Q=3^(p;@$ $)^81rL9_#4p ?T*gmrdC湯Dvz[)AXn2=O`2-6$6h2_qmc޴AFըl z_GdCL 'NAN#q!|`RoS߉GBY$81J'SX7L,&c5N4pD.{VIue!YcqِBA]5 $0ƈbvɉ3; dbOArA @$qB㖒 xZYQIv-9C(]_F{AcWux9mSwTcN odcH amSJ9m_u[CT tGۥ'u} tNݮ벒Qǐ% '۶^ V!lxI|:$o }o' d|Fuӈը!/ѵF?Wxӹ#ߗ5vNXZFu#֫vZzoy"gf|uIc_C2i0tO+{ 㹷NҺ{c{[ywkl7>dlL9AN<69% wud&$if2Yr\9mAZI5db &OMِuZMQ=y\O' Z4hq]7Ve% UۢbL_k+%9sS*L&OؐeC~ ^IRC/$}ؘ5& 33 $x_W;kd(dZL.5yr)1  O~63.ȌqOIryQ ҝ,[DOpgUh`qǶq V}ܴSj:Twjh =dГ!e1lH)>>Z PݬmRk7BD s#i)Ig_ jMs!>뺪y-T;eu{͡k_id2˪WP6K)񼋞jd [7[ڥ<%i/ g> IXCғ5jIZmIsEF}!80h&$YL+xr{2w LRʌVG'VS0{.<}~dyg91< H$^c@_^V41[_֘Z>d|@-6d( 23zj_ָd<@oxh!q}D )dIu`BِD~IJ<7Tyk%ؐ'>%\ɼ52xOΰ_" |F TUj[" RQ [^N1֘ I Q&@FgDYK *& ЗH!aoM|ˑkAB1i6۵ƶna]M:#>uK!^v+kl`C:A ZklZ Y2O3=Yc[Z&ŋ"23>!еI:m @ֲ#RZS48*ǀ.p{;TăNoW_tFCM뺮{f}Gd#N@Ӻ>/)CC]F[;-ҥK!]L{z FI[Ofk0l[(Cx5@m$SwZ-n;{w# mHh~2xu5Ѡ<Y# Iˀ徼 'O& -"lHO-ެdEZrg֨/aڼku},;YcnC'Uh:^g0MvSR5x$2tAR!Lz] {@u]՝*5J]Z(j`/L&g .ņfqW7-fCڪ%'P",uzg-ү>.2pg kKDz77ezCu9t0.,hPwTFi@_jmrA@7􀌞I/5E_YJLİdl}r$q) H.䂧BA՝l'gC<@F,*6d`~oK:Af~~^l_5>Ż,.v4j봵HN J0W(ԃIudiw)!'ݶmy.\#<>9ϟs]/~GGG?ʲ$v)ד5.3F6@_J U~jrigI NO] ]2.ne*##!j ] >G4Cq]٦zg!zzp:1)dۏO^x'IO;&{Z2$fCŪ =ȧ?i._+?#???s|//pF'||hew*uƸowɓo,(ս #pTTف= @F_#9Y(ݽ2tp| /3RiZAS7@F8k.e[_ҝX(m#KLrgyW},M&Cm/| ͆-3/%A{,-)&XP](1uigS2諡 n )uSku$2 fǙuFfC~e677y_{衇7IgVP_&^)J +_ZD(Wq5@7TU6k逌 060} B-0YdygK.TEY,-3X]^@/]tHdDח.]b^ --ߣp d>ةb:UзZ,x[^\ybVS@mK 꾝O @U$JwRo.-u@qv;"}666_ 3}_eoxk_5y^yk=5|;-.'Iʽ7,,2Ǿuw(jk&%AFq^ j;;&&KKj&訌Zb&J.ݤ/?OO)J=X*q/c%^"J%圗6Xȉ",pY#(@OdU5#] dFY2v' 2 0)غCz Ub/ۋ!++lJvs4[D8x 0>0Vm+'3Gu]}Y>m۸{^*gΜ V*|a|, ø"'k5BNAftqX-:C;-_Kr,F3wێO8PE.Pc#vvT@:C^gSa ,,vXjǁOںXd^gm ۱E3vL*ۺyx /ַͧ>z!n޼n.€O> 0; |Sw.:5o]{KqǷO<٬?Nϲ']*CVWyQ4xjO)JGtmQ[ySwCy6GOؐ70;<+r?sjP =~j[c@%_ED.GQ~rAMrPl .\zlVݾMU_c dNSZv8ON4&F* Ӭ~&9*o4(jo#Zm4-<%nRO-b; Q5ERSr?>™LY:@F:C- gb*q.\4 EOmq(i6gC2VC' nP():+MiEm ıO-g# 7lrfTW 7i4GAۃ-h.˻ˢ0dl`b:( f_zL֋2;Y`5m4k:]SS8ó4&{iyn}ȩS꪿ېfg٬m|-hӢ:ۀ5tRMfm4,F[_nX74'! t蒷,&}2H'xhx>;GF>tF$(}xXMޕL\.mޔǔ!o >9At>*(Lw*0jo, fm=1?J"T@lw|ǣߥ|="<(?TER([߭xq>N:(_*SIšn39nEi IDATYZT@BvHԲh> Gᴴӳ,ߑ9q@?}W١Yܟ|| 0;ˇ8-Eriq8, {xR/v)n)l޷R \~Jt: _N f˃͡fL|V6>}n 2%AM ࡓduq[K37m@OLǵ2/`ol%zhg]U-SdTӜBd,b,$Sa<)i8-Գ3FY&x )ސ\]-F Sj2;hu3 A:Ix%JR6Ag,nKfɻuds2C sb40 l 4̐KML-6AeBӜrnU(NB~e[ivXRkY8?,zԲqS`^?HA,n6^< #;%%i拷[)dv-@^e^+6D @E$o˂-*㩡SP&ߐS2:M.4 eplȗf)4仯Ylw>?RhlAa󅛬 6/| \*NG?JW\mN[^$eM>m*)G*u'>V Gjz^)La~dN뚴*8O}*b}-]F*_Xuy[W23֙4l ?dQ}-N!9lrdnX.F(Mw)>)]pZVc.2vnpٕwN } uqv\d.ߤAWU<rdV Uܺ)ȸM5JPv6صRhy@4/;-P){پ ,\vW7[/l_׽pg+[\Vk2{ ۑja7Aun`[4@Tv_^k$GSq2:ڒ?0ھn5'7qۊܨհp)mZ 7뺬m|U[CB! ]eXTƫ@ hP\A-a&XOߕU2C-3sk&N*Sy]eI~^ k%ǀ.,ju,\nl- u\;MML7Ӣ_-\]TWVi쫀LdZkY}ԍZ n*͛drDR7E " 'GX\YS'ol C:z ķȭ48P0ouQ1͛Pa2Yqwc՝hqwhz> _ZKdM>11+oP_|͛<[d9p Z?5(ta֨ޗZT{FЇ>zU xĻR nlwnTmocmn#ˑtʊoF}=&27nKgϦ2Qq񭬭*|')eզFxo߸?J8US TxsqZTRϿ$4H 2Zi_N NNBT$L&W_t%8,Y6otSC830 oɟ?C>/ݨV\gAbz /gb7n}qf.#lnj_R.uTWٳyL#xB; 6d\X,[VZܨVFNrϞEّ3z o3?V 4[-6rOnPÕ~X7sf gT@:{ӧe.k5Qs $U8uBę3QrTIo"G#&7v~UFffOk5Ǝ0zbFM M" 2pVkZ OqXl;oi9Iq^rX?ɡ\tNU4|AHm\YZM\^ZOrvu47"M\ۼ?7?K&ߙu9V<̍qmC)@v J%8qxuj;>](YWusԹx\u]d6Ս 160TK:I]__]{սO{6*jm} tX~*̨Cd<b%W{;*M(F%Zg=ੵpIbj|%?# eqzsS s"/^ܨ,,gYpE2W97v/d830Ti˃\xQ4k\msJ9vطUeg[̓,ŋ 3?ŋ)mVw۷n)FܹϝS/Pq|&l )OzoK$lnD}-,,@ өZOE<YVt\ۼ&;#K.)  ̏+FJ ,[[OCزj.]b^dZQ=J!;&feYp\M =K6m: ҥKZ >cXH|mM%-[9"WMɆ\tsT%O:K.I>/dk(GAlVdZyͺ]Zļ\YD@d]S ZesyJr * ' ѬQΎ'nOZ pE֝}Nf__)%e scs" p6B{_Zm-s @њGl .]t![ PΗg/G:'PyYTz}a# ؞V&0^w|a~^5$lc^Uy) <jվJ'N'<& ΗJܨVv .-A <|?9:" sJ l}}LϟWxZu]PnvBȕ+]Ӊ|r&#gwAD\=(R se N&. d^P]OoZ;^Ƿu<-2T$tbMRmT}8܏F ӕj3b׋T Pi"YJnm)'kTDz3t [_X*)ڲ,K%HOJUfهA<ȴF-e!pW_Ynr{òk ?l 2 j/t-;Q%&LYt˙$v|WLO>>76 TΏaC} "4q d gE[Cȕ}&yF;Q?{v26} *XM<몟c2<|>LUo:s=\ϸ{0;b6|4><<5|c,(w"[7LβHx6h;7 _So8pSgFbuQ㢜|$ qd5zv9spLJu2ޔK0HsZhɵٳg#*  4Lh?3Y0!?j"//?Az5ň˃Lr@|YXTy ..7fM__t:"FKLqxsbyg*72f^Qi3^ֿ>;QﮇN=b[PuG)):tQPրgɌrtlxSW6OcFE᫯gk#Ceѡz c뺕pr{dl =HNu ??ަhTM&k9{\LF@ ECw 5^2f 4fUzMj&~ZI4n˚繓I TgĞNC]2u#!6(WW7͆EZ`g6GdGu&h4wsNbѢEXz5z!l߾`0P[[ ˘/xꩧw/%Z $-P:EPkjE;"qHSjV<&gb55N/ͅ +:"#EZEZX~O'NFD8dАIH(r2vZ;G躮:$& 7Xړzm3'S"xjHJV40`%i]#60H D |Z5'V QRRG}t^VV.]kx߾}(((СCطoKDt+QZSe5-F\YS~(j"BmW-ңũ:&y.F2Jd]32U6]DԈ6!=VTU@6hxZ~dP?#Q%MO8î ^UVSrꚛLfG]84v+p#mvPzD֝5 CDp.=VndtT2Pk4DƄp1kF:IL/rS>2TaXndd'G{^AQljL遁6d55kĭك'͘i,w.gSt+kM '"t:;AAh^nt阆{K5F#48rǤ+uu5kxwX(q r+u2<3w:g5L d؆;1n+uF0v5B0V~z"t$]G1:#z-7?D(xr&@.f8CFPODndL,׎4L0#CAf|0lh6$9#{[23]<XY>LU[kܥjn"댘 S?+R{@;t??JeAf&Kk.P̘LTwV"C]Ȍ!#0UJ's8Vo2N;2b20dRU%QeTwV5etm9q$>>UȵU7)CEf& , ɌDeGTV2_TW|,85RwUgێ8A>>%X,6b8h4$9*YNn<(x2Ut`N&3(H $(T r#,-FYNf` v:e8{<3(H~TYɒ\ddvGTJTӢӠ4d+@.c nHfP|bOC&2&R%N`@fØ*Y*+Y;WVWW`n\h8:ʢ"71999ͮ1nܸ0X wA2we%Yv,B@VH.نjٔkEt!GVJ)3冴IԐ ۪0AA%! +KC,oIId=8,ܨ5/v \5#g:<YW ;Iہء;f0'S^.bdqWZعЛF2bڑsr2JۤɃ1I eOŀ)n6Ɇ!x<|V q`@"H< rGYYZ*< KWoCE{ǹ'0UeHo,"< rC}w@URA4 H{Q&5x2Y$ASEjF<#ْ"ftX,ȖS%;V&%IҶRG! )jJJ% ;u,D.8>eKl6{Dңmit.atY2!!x6TF=%Лݷ篍%a|& ,|vpb\AwK3`yrYZ-qڕy˖^ \z V+t2g %mYG 8}#/F qԡe )XL֑iLY/MX }b{S?;kk>Nt~. *%Kd u9N ˔uBhq8[%"V2 ۆ,APT 0ZN:}7N8@uNXf?Sߏkrl0'S00  %zx^CFrd2 .:&$lhͅw,TӖ P4Lrt^ zlYse \ʺ%VS􂋆j=8 )zyh(l#( e:eXqW&``@,Du"5f?{+D m@] / nn1nX* ޮhެ+!, 9|%rK\:BCy_bȞ'Źz"D"F%m%XB6 aaj6XFUq1S7~tx䞾'/ˢ,s$'/Ċe$$?ϦW W+V+W?wv`6"Q;LAɓ,P.7n 57̸A~q"1 'd 퐮dM$uͨ3҅͞4 ^jE'DV+iݎE߻t '.\P@l?b%eS \*~lKD  O=sE;hٟ #8QXAC6%<jlM;lQWxie2JBB~BE̙D"j'ZZV蓦OQ/,,76 #$==yF"__ݻOKDt&Bt]^|(*fs'CY8N+`ݬu8riQ`4ɇvh4c#-Gfh#"}z~f3 d=:tl|k= 12R1gM϶nؠ՛wpa$& 5JZk!lĀ͆3%Q ܉F#s6:ɺ-lqPKqa9E!!jVIc,7}bCDİapbi`<Ϛ푬1>/_D]w6QNbIh(;1<2[{pHč2xUk>dodo6FDcEF&ac Oz{(^"hnPױؼ%avڑgW+8ʝkm8xcdzeqk43<8ƃ)&<883Z,Zmެ`fc6ԎMVIQ^+ҍbK_rQؼ%l<%Ӗ *0J1!"<豬SSY C]yb1Fqk,ڴGɺh u=ЭDF݃Yx+n??~{z<<HPoGz&Vo΂3Ϡw<ӿ͑850Ki~71rov2}8Ӄ5g&[KDv@w7ք{\%e :e8l] w{Mȿo:2:O?e?o;"NGYJ>J +gD_(_#?pI>E 7SGo$"?-ɞ-8{vVCYs+^xmqqX>LAc[xvwcktIIHNǾZF^[fncj1x~tz@|tOhHjԊmxýf`_WFG{\6\CyLV~mDIpzN%* C<mq~6?֭,ȾGz_W"Na8ԭ@W@T7qnK, T__ .,d"$ʶ屿FVPdkV|zStYc ^ںc^""ɺ @ eT?-աwL.X_V%#S|YyN;ؔIqXuv;`|0 ';I'LX8tc 5bN̋[KXwSÆO+0 e}]]#&c^8{,Y/#:tQG6ܒrǴ11x.weީsN6.Nl(8Wf3 *F55i^>}˿q$>>lPw:; 8g& T6Ao;:2, S=Hd;3DSoÞ=׾;>@_%j<;3 c^f avtt JZf-)@bo^^mA]wfzbc^g'Y¶m^n: %̘LڃݻYN66?vWƍ7";:rgL z4 =y waÝ^u.ފ=U{p0 WB2̒ݻj7f͔u+ #n[^:;Y٦;;q:uХ]ۆw:;qwB!.K+`n\dfGJ஻$QKWl(yq4:IӒxA6x-vl4/}-c#ڽh+/={4r)_!f$ˑɓMXF1ލVÂۥk";<|We2, 3BV_b½};;'a9q9>r ֯wT+〻&,;>l__\ҽ(k)Slׯ3~ulj ٫mx6Y{<2IAEVؾRB;ǫ+c+0o{x4گAN v*/ pŋ 0zi ؎X`×|%9^c'"iޚEoGs4<ۋh^j4_mH¼;IM@mx YY^y|d{wu:\ }Gq+M*VEw\sd~BrJ5¸-"Qӏր[3 Â.,n0]1ɠ:t7tCq-Sft5@\5IDATO14$bޛXl6|g. u:0.Cy79zD?@s?'x=e{uvwnC˅Qz/ѽxw?Ay{W?lXpJD^W_텅} P0RWy=ユӈgu`yYSIOow//- &Wg+K7UHP@xW~[3NjFy55xm7mAdE/^İ=rVbp,v-; ?ϝyk܃ÃÙ?{S\\Co~׮^Eؑ vYq_~qE<Ʈ],Ţ_zN\{+|׸  A Q80C=?tGkx5`4lOO~$l>=O>5^H . o~~KD?odvu=B*YY\mۼKF#D]'ًBh4o^e')yc|%%D?C! "Ǔ;(H3}kDsY'FlR͚yhjSzjme׾WHel8HxTZI UWNnz8"SNQY<*.%%d24s&ysqqD?1QsO3vOrBCoU:yW$Xs3E0r3ovGDDc̈s:N&G<Ӎ,#½v&o7nf HnbηtM,t;uJ+BS{<17͌3DDKܜ-=>HQ'ORH㏳%lYCl?t7ѼѰu;^S/?OI4؋ph TZJ  䛗GKq~*irst$k=y6's#Om#nJVu< ކ D1CI;:bchbt_mI>O\}`qëUfGyVXѳ:>yd`i4D/ͮd"ZhR"ϼH::yuOw}"E=Ex`o}(>eEDDt%FمtCq1 lxtyyT4&e;<yb?<EzN-,Κj(--%T4f+uGCykn&<:t.̿xoh[ΟwmooPY[]q QyTpv6eѹslw=N})YY9:}Yz4>bo73ۣ OI:__6l޸<;.9,9lDƆNd%KŒABgТѵd^":O!'NwnjΞ=KDDD!!D+_gtf/ 33b6F; _Q|~36+bJz=sӦ1isYM;u M&SP@ݞD6{Hj3}` ; o@S"Eo7/_|QV+HǏQ3Pb *.Z,D͜I(̫:#>c@c޽{ u88'9 9n&Mn.yi4D;w:\"Vց#"Q:,s\;%n:&>v"Qj*QRjK6ԁ,Q{l6?'.7onDDd5C),m@#Y!顏"_ЮOw h;i)71TrD("#Y$G:zhT,ѵO3gL$^ӟ\!鎊q1aх -m0 `bxs!Wݷ/}io^ab~^.Mʼn,ZY\LQ'ON.wx562'If}}oDDI'T m~c3]w^G O>01['~6!._&V?H:2+w8=* ]:8HYgR|~hÖ ɡ+?zPe0`fCɄ>!>>8-zD*><ر PQJ1ձ{P j۾sƶmXSނrtPQ{w>w߁׶0sOgw;lzVJ33+gXs,cm-,W3W…^]ԳgqGL 6ED 90Z-,D6`y loGw_^KK7DP}۬ӽzw^N fͮbsaT=*~D*qx~HLO/ލe 0-dtTvTԋ:|GN|wg }kݵդ\cjZ,P.,9',w갴"#qkT2BC ^{5<ٳ?06lw|Ah4\twZ,|GNN.\gyryp˗c͚5vol٫z}H-$#tعs'~a,ZDz۷o bm۶/ ,Y"yT㏣̙3qTDo6#{U2Uý+L~@_,wOFzOM9{U\yJ}O*㳪BTr|D_)wO WWWײPTTT=֭[˗c׮]@U>T{C`BL;m-<<f===Nr噜{ ~;rrrގUV >쿵rd!G10XHMMœO>9K!..=^yX%nRbA{Yfyyy8}}}I 8osƌ0 AllW;!G{T?&z?T#J1v/b{U8ȕ};;;7nL{ȑއzDʡ+}*A<ߎݻw`{B_u|߄@.F UCPߏIho[#"hٲe6mDv۸4m49oxx~Z`eggӮ]>fOXC=D3gΤZl=#drw0 A=_|d_Z*TPB PUPBIաPB CWB *&TB *TL] *TP qM7aݺuя~r_s4*TxաPqh4oرc7xwTP!}*T\ "DGGʕ+?P PG*T\jŲe>令PKP !N einspline

Uniform splines

Spline creation

Each B-spline object type has an associated creation routine, to the user must pass the dimensions of the data, the boundary conditions to use, and a pointer to the mesh data.

Base data types

Each spline creation and evaluation routines have four versions, corresponding to the four support base data types. Following the LAPACK naming convention, these types are specified by single-letter codes:
  • s: single-precision real (float)
  • d: double-precision real (double)
  • c: single-precision complex (complex_float)
  • z: double-precision complex (complex_double)

Grid dimensions

The uniform grid structure, has the following elements, which must be specified:
  • double start : the first grid point
  • double end : the last grid point
  • int num : the number of grid points

Boundary conditions

The boundary conditions at the first and last grid point must be specified. They are specifed with BCtype_x structures, where x is one of {s, d, c, z}, as described above. For the real types (s and d) the structure contains the following elements
Type Name Description
bc_code lCode "left" boundary condition code
bc_code lCode "right" boundary condition code
d_type lVal "left" boundary condition value
d_type rVal "right" boundary condition value
BCtype_s and BCtype_d data structure elements
For the complex types, we must specify both the real and imaginary parts:
Type Name Description
bc_code lCode "left" boundary condition code
bc_code lCode "right" boundary condition code
d_type lVal_r "left" boundary condition real part
d_type lVal_i "left" boundary condition imag part
d_type rVal_r "right" boundary condition real part
d_type rVal_i "right" boundary condition imag part
BCtype_s and BCtype_d data structure elements

lCode and lVal specify the boundary conditions at the first grid point (the "left" boundary), while rCode and rVal specify the boundary conditions on the last grid point (the "right" boundary).
bc_code is one of the enumerated value, {PERIODIC, DERIV1, DERIV2, FLAT, NATURAL, ANTIPERIODIC }.
d_type is the the C type corresponding to {s, d, c, z}, i.e. {float, double, complex_float, complex_double}

The codes have the following meaning
Code Meaning
PERIODIC Use periodic boundary conditions. The value, first derivative and second derivative at the left boundary match those at the right boundary.
DERIV1 The value of the first derivative is specified in lVal or rVal.
DERIV2 The value of the second derivative is specified in lVal or rVal.
FLAT The value of the first derivative is set to zero at the boundary.
NATURAL The value of the second derivative is set to zero at the boundary.
ANTIPERIODIC Use anti-periodic boundary conditions. The value, first derivative and second derivative at the left boundary are the negative of those at the right boundary.

Data to be interpolated

The data to be interpolated should have Nx Ny Nz contiguous elements, arranged is row-order (C-style) format. That is, the offset of the (ix,iy,iz) element is (ix*(Ny+iy)*Nz+iz). Complex numbers are stored in the standard format of (real,imaginary) pairs, which the real element first.

Function prototypes:

Single-precision real:
UBspline_1d_s * create_UBspline_1d_s (Ugrid x_grid, BCtype_s xBC, float *data);
UBspline_2d_s * create_UBspline_2d_s (Ugrid x_grid, Ugrid y_grid,
                                      BCtype_s xBC, BCtype_s yBC, float *data);
UBspline_3d_s * create_UBspline_3d_s (Ugrid x_grid,   Ugrid y_grid, Ugrid z_grid,
                                      BCtype_s  xBC,  BCtype_s   yBC, BCtype_s   zBC, float *data);
Single-precision complex:
UBspline_1d_c * create_UBspline_1d_c (Ugrid x_grid, BCtype_c xBC, complex_float *data);
UBspline_2d_c * create_UBspline_2d_c (Ugrid x_grid, Ugrid y_grid,
                                      BCtype_c xBC, BCtype_c yBC, complex_float *data);
UBspline_3d_c * create_UBspline_3d_c (Ugrid x_grid,   Ugrid y_grid, Ugrid z_grid,
                                      BCtype_c  xBC,  BCtype_c   yBC, BCtype_c   zBC, 
                                      complex_float *data);
Double-precision real:
UBspline_1d_d * create_UBspline_1d_d (Ugrid x_grid, BCtype_d xBC, double *data);
UBspline_2d_d * create_UBspline_2d_d (Ugrid x_grid, Ugrid y_grid,
                                      BCtype_d xBC, BCtype_d yBC, double *data);
UBspline_3d_d * create_UBspline_3d_d (Ugrid x_grid,   Ugrid y_grid, Ugrid z_grid,
                                      BCtype_d  xBC,  BCtype_d   yBC, BCtype_d   zBC, double *data);
Double-precision complex:
UBspline_1d_z * create_UBspline_1d_z (Ugrid x_grid, BCtype_z xBC, complex_double *data);
UBspline_2d_z * create_UBspline_2d_z (Ugrid x_grid, Ugrid y_grid,
                                      BCtype_z xBC, BCtype_z yBC, complex_double *data);
UBspline_3d_z * create_UBspline_3d_z (Ugrid x_grid,   Ugrid y_grid, Ugrid z_grid,
                                      BCtype_z  xBC,  BCtype_z   yBC, BCtype_z   zBC, 
                                      complex_double *data);

Spline destruction

The memory used for spline storage can be freed simply by a call to
void
destroy_Bspline (void *spline);
The spline parameter can be a spline of any type and dimension, uniform or nonuniform.

Spline evaluation

For each of the four datatypes, there are four evaluation routines, depending on which quantities need to be computed:
  • Value only
  • Value and gradient
  • Value, gradient, and Laplacian
  • Value, gradient, and Hessian (matrix of 2nd derivatives)
For consistency, all results are returned through pointers passed to the evaluation routines. Currently, no bounds checking is done for the sake of speed. The user is responsible for ensuring that the points passed to the evaluation functions fall within the grids specified at the time of spline creation.

Function prototypes:

Quick Jump Table 1D 2D 3D
Single-precision real 1ds 2ds 3ds
Single-precision complex 1dc 2dc 3dc
Double-precision real 1dd 2dd 3dd
Double-precision complex 1dz 2dz 3dz

Single-precision real:

1D
inline void
eval_UBspline_1d_s     (UBspline_1d_s * restrict spline, 
		        double x, float* restrict val);

inline void
eval_UBspline_1d_s_vg  (UBspline_1d_s * restrict spline, double x, 
  		        float* restrict val, float* restrict grad);

inline void
eval_UBspline_1d_s_vgl (UBspline_1d_s * restrict spline, double x, 
			float* restrict val, float* restrict grad, float* restrict lapl);

inline void /* identical to above routine in 1D */
eval_UBspline_1d_s_vgh (UBspline_1d_s * restrict spline, double x, 
			float* restrict val, float* restrict grad, float* restrict hess);
2D
inline void
eval_UBspline_2d_s     (UBspline_2d_s * restrict spline, double x, double y, 
                        float* restrict val);

inline void
eval_UBspline_2d_s_vg  (UBspline_2d_s * restrict spline, double x, double y, 
  		        float* restrict val, float* restrict grad);

inline void
eval_UBspline_2d_s_vgl (UBspline_2d_s * restrict spline, double x, double y,
			float* restrict val, float* restrict grad, float* restrict lapl);

inline void 
eval_UBspline_2d_s_vgh (UBspline_2d_s * restrict spline, double x, double y,
			float* restrict val, float* restrict grad, float* restrict hess);
3D
inline void
eval_UBspline_3d_s     (UBspline_3d_s * restrict spline, double x, double y, double z,
                        float* restrict val);

inline void
eval_UBspline_3d_s_vg  (UBspline_3d_s * restrict spline, double x, double y, double z,
  		        float* restrict val, float* restrict grad);

inline void
eval_UBspline_3d_s_vgl (UBspline_3d_s * restrict spline, double x, double y, double z,
			float* restrict val, float* restrict grad, float* restrict lapl);

inline void 
eval_UBspline_3d_s_vgh (UBspline_3d_s * restrict spline, double x, double y,
			float* restrict val, float* restrict grad, float* restrict hess);

Single-precision complex:

1D
inline void
eval_UBspline_1d_c     (UBspline_1d_c * restrict spline, 
		        double x, complex_float* restrict val);

inline void
eval_UBspline_1d_c_vg  (UBspline_1d_c * restrict spline, double x, 
  		        complex_float* restrict val, complex_float* restrict grad);

inline void
eval_UBspline_1d_c_vgl (UBspline_1d_c * restrict spline, double x, 
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void /* identical to above routine in 1D */
eval_UBspline_1d_c_vgh (UBspline_1d_c * restrict spline, double x, 
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);
2D
inline void
eval_UBspline_2d_c     (UBspline_2d_c * restrict spline, double x, double y, 
                        complex_float* restrict val);

inline void
eval_UBspline_2d_c_vg  (UBspline_2d_c * restrict spline, double x, double y, 
  		        complex_float* restrict val, complex_float* restrict grad);

inline void
eval_UBspline_2d_c_vgl (UBspline_2d_c * restrict spline, double x, double y,
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void 
eval_UBspline_2d_c_vgh (UBspline_2d_c * restrict spline, double x, double y,
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);
3D
inline void
eval_UBspline_3d_c     (UBspline_3d_c * restrict spline, double x, double y, double z,
                        complex_float* restrict val);

inline void
eval_UBspline_3d_c_vg  (UBspline_3d_c * restrict spline, double x, double y, double z,
  		        complex_float* restrict val, complex_float* restrict grad);

inline void
eval_UBspline_3d_c_vgl (UBspline_3d_c * restrict spline, double x, double y, double z,
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void 
eval_UBspline_3d_c_vgh (UBspline_3d_c * restrict spline, double x, double y,
			complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);

Double-precision real:

1D
inline void
eval_UBspline_1d_d     (UBspline_1d_d * restrict spline, 
		        double x, double* restrict val);

inline void
eval_UBspline_1d_d_vg  (UBspline_1d_d * restrict spline, double x, 
  		        double* restrict val, double* restrict grad);

inline void
eval_UBspline_1d_d_vgl (UBspline_1d_d * restrict spline, double x, 
			double* restrict val, double* restrict grad, double* restrict lapl);

inline void /* identical to above routine in 1D */
eval_UBspline_1d_d_vgh (UBspline_1d_d * restrict spline, double x, 
			double* restrict val, double* restrict grad, double* restrict hess);
2D
inline void
eval_UBspline_2d_d     (UBspline_2d_d * restrict spline, double x, double y, 
                        double* restrict val);

inline void
eval_UBspline_2d_d_vg  (UBspline_2d_d * restrict spline, double x, double y, 
  		        double* restrict val, double* restrict grad);

inline void
eval_UBspline_2d_d_vgl (UBspline_2d_d * restrict spline, double x, double y,
			double* restrict val, double* restrict grad, double* restrict lapl);

inline void 
eval_UBspline_2d_d_vgh (UBspline_2d_d * restrict spline, double x, double y,
			double* restrict val, double* restrict grad, double* restrict hess);
3D
inline void
eval_UBspline_3d_d     (UBspline_3d_d * restrict spline, double x, double y, double z,
                        double* restrict val);

inline void
eval_UBspline_3d_d_vg  (UBspline_3d_d * restrict spline, double x, double y, double z,
  		        double* restrict val, double* restrict grad);

inline void
eval_UBspline_3d_d_vgl (UBspline_3d_d * restrict spline, double x, double y, double z,
			double* restrict val, double* restrict grad, double* restrict lapl);

inline void 
eval_UBspline_3d_d_vgh (UBspline_3d_d * restrict spline, double x, double y,
			double* restrict val, double* restrict grad, double* restrict hess);

Double-precision complex:

1D
inline void
eval_UBspline_1d_z     (UBspline_1d_z * restrict spline, 
		        double x, complex_double* restrict val);

inline void
eval_UBspline_1d_z_vg  (UBspline_1d_z * restrict spline, double x, 
  		        complex_double* restrict val, complex_double* restrict grad);

inline void
eval_UBspline_1d_z_vgl (UBspline_1d_z * restrict spline, double x, 
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void /* identical to above routine in 1D */
eval_UBspline_1d_z_vgh (UBspline_1d_z * restrict spline, double x, 
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
2D
inline void
eval_UBspline_2d_z     (UBspline_2d_z * restrict spline, double x, double y, 
                        complex_double* restrict val);

inline void
eval_UBspline_2d_z_vg  (UBspline_2d_z * restrict spline, double x, double y, 
  		        complex_double* restrict val, complex_double* restrict grad);

inline void
eval_UBspline_2d_z_vgl (UBspline_2d_z * restrict spline, double x, double y,
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void 
eval_UBspline_2d_z_vgh (UBspline_2d_z * restrict spline, double x, double y,
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
3D
inline void
eval_UBspline_3d_z     (UBspline_3d_z * restrict spline, double x, double y, double z,
                        complex_double* restrict val);

inline void
eval_UBspline_3d_z_vg  (UBspline_3d_z * restrict spline, double x, double y, double z,
  		        complex_double* restrict val, complex_double* restrict grad);

inline void
eval_UBspline_3d_z_vgl (UBspline_3d_z * restrict spline, double x, double y, double z,
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void 
eval_UBspline_3d_z_vgh (UBspline_3d_z * restrict spline, double x, double y,
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
einspline-0.9.2/www/NUBinterface.shtml0000664000113000011300000004470611012400560014615 00000000000000 einspline

Nonuniform Splines

Nonuniform grids

In order to create nonuniform spline objects, we must first create the nonuniform grids on which the spline will be defined. For multidimensional grids, a grid object is needed for each dimension (though a single grid object pointer may be used for more than one dimension).

Grid creation

Currently, there are two nonuniform grids implemented:

Center grid

The center grid grid is a set of points which are more closely packed near the center of the interval than out the outside. It is created by specifying four quantities:
  1. The first point
  2. The last point
  3. The ratio of the largest to smallest grid spacing
  4. The number of points
A center grid is created with the call
NUgrid*
create_center_grid (double start, double end, double ratio, 
		    int num_points);
The center grid function has an analytic inverse, which can be used to find the nearest grid point to a given point very quickly.

General grid

The general grid is a catch-all for all other types of grids. It is created with a call to
NUgrid*
create_general_grid (double *points, int num_points);
points is an array of points, which must be sorted in assending order. num_points is self-explanatory. While general_grid can be used for any type of grid spacing, it has the disadvantage that a bisection search must be performed to find the nearest grid point.

Grid destruction

Grids created with create_x_grid can be destroyed with a call to
void
destroy_grid (NUgrid* grid);


Nonuniform spline creation

After the necessary grids have been created, a nonuniform spline can be created using a very similar routines to the uniform ones. The most apparent difference is that the nonuniform grids are passed through pointers. Note that the nonuniform grids should not be destroyed until after the splines which use them.

Function prototypes:

Single-precision real:
NUBspline_1d_s * create_NUBspline_1d_s (NUgrid* x_grid, BCtype_s xBC, float *data);
NUBspline_2d_s * create_NUBspline_2d_s (NUgrid* x_grid, NUgrid* y_grid,
                                        BCtype_s xBC, BCtype_s yBC, float *data);
NUBspline_3d_s * create_NUBspline_3d_s (NUgrid* x_grid,   NUgrid* y_grid, NUgrid* z_grid,
                                        BCtype_s  xBC,  BCtype_s   yBC, BCtype_s   zBC, float *data);
Single-precision complex:
NUBspline_1d_c * create_NUBspline_1d_c (NUgrid* x_grid, BCtype_c xBC, complex_float *data);
NUBspline_2d_c * create_NUBspline_2d_c (NUgrid* x_grid, NUgrid* y_grid,
                                        BCtype_c xBC, BCtype_c yBC, complex_float *data);
NUBspline_3d_c * create_NUBspline_3d_c (NUgrid* x_grid,   NUgrid* y_grid, NUgrid* z_grid,
                                        BCtype_c  xBC,  BCtype_c   yBC, BCtype_c   zBC, 
                                        complex_float *data);
Double-precision real:
NUBspline_1d_d * create_NUBspline_1d_d (NUgrid* x_grid, BCtype_d xBC, double *data);
NUBspline_2d_d * create_NUBspline_2d_d (NUgrid* x_grid, NUgrid* y_grid,
                                        BCtype_d xBC, BCtype_d yBC, double *data);
NUBspline_3d_d * create_NUBspline_3d_d (NUgrid* x_grid,   NUgrid* y_grid, NUgrid* z_grid,
                                        BCtype_d  xBC,  BCtype_d   yBC, BCtype_d   zBC, double *data);
Double-precision complex:
NUBspline_1d_z * create_NUBspline_1d_z (NUgrid* x_grid, BCtype_z xBC, complex_double *data);
NUBspline_2d_z * create_NUBspline_2d_z (NUgrid* x_grid, NUgrid* y_grid,
                                        BCtype_z xBC, BCtype_z yBC, complex_double *data);
NUBspline_3d_z * create_NUBspline_3d_z (NUgrid* x_grid,   NUgrid* y_grid, NUgrid* z_grid,
                                        BCtype_z  xBC,  BCtype_z   yBC, BCtype_z   zBC, 
                                        complex_double *data);

Nonuniform spline destruction

The memory used for spline storage can be freed simply by a call to
void
destroy_Bspline (void *spline);
The spline parameter can be a spline of any type and dimension, uniform or nonuniform.

Nonuniform spline evaluation

For each of the four datatypes, there are four evaluation routines, depending on which quantities need to be computed:
  • Value only
  • Value and gradient
  • Value, gradient, and Laplacian
  • Value, gradient, and Hessian (matrix of 2nd derivatives)
For consistency, all results are returned through pointers passed to the evaluation routines. Currently, no bounds checking is done for the sake of speed. The user is responsible for ensuring that the points passed to the evaluation functions fall within the grids specified at the time of spline creation.

Function prototypes:

Quick Jump Table 1D 2D 3D
Single-precision real 1ds 2ds 3ds
Single-precision complex 1dc 2dc 3dc
Double-precision real 1dd 2dd 3dd
Double-precision complex 1dz 2dz 3dz

Single-precision real:

1D
inline void
eval_NUBspline_1d_s     (NUBspline_1d_s * restrict spline, 
		         double x, float* restrict val);

inline void
eval_NUBspline_1d_s_vg  (NUBspline_1d_s * restrict spline, double x, 
  		         float* restrict val, float* restrict grad);

inline void
eval_NUBspline_1d_s_vgl (NUBspline_1d_s * restrict spline, double x, 
		 	 float* restrict val, float* restrict grad, float* restrict lapl);

inline void /* identical to above routine in 1D */
eval_NUBspline_1d_s_vgh (NUBspline_1d_s * restrict spline, double x, 
			 float* restrict val, float* restrict grad, float* restrict hess);
2D
inline void
eval_NUBspline_2d_s     (NUBspline_2d_s * restrict spline, double x, double y, 
                        float* restrict val);

inline void
eval_NUBspline_2d_s_vg  (NUBspline_2d_s * restrict spline, double x, double y, 
  		         float* restrict val, float* restrict grad);

inline void
eval_NUBspline_2d_s_vgl (NUBspline_2d_s * restrict spline, double x, double y,
			 float* restrict val, float* restrict grad, float* restrict lapl);

inline void 
eval_NUBspline_2d_s_vgh (NUBspline_2d_s * restrict spline, double x, double y,
			 float* restrict val, float* restrict grad, float* restrict hess);
3D
inline void
eval_NUBspline_3d_s     (NUBspline_3d_s * restrict spline, double x, double y, double z,
                         float* restrict val);

inline void
eval_NUBspline_3d_s_vg  (NUBspline_3d_s * restrict spline, double x, double y, double z,
  		         float* restrict val, float* restrict grad);

inline void
eval_NUBspline_3d_s_vgl (NUBspline_3d_s * restrict spline, double x, double y, double z,
			 float* restrict val, float* restrict grad, float* restrict lapl);

inline void 
eval_NUBspline_3d_s_vgh (NUBspline_3d_s * restrict spline, double x, double y,
			 float* restrict val, float* restrict grad, float* restrict hess);

Single-precision complex:

1D
inline void
eval_NUBspline_1d_c     (NUBspline_1d_c * restrict spline, 
		         double x, complex_float* restrict val);

inline void
eval_NUBspline_1d_c_vg  (NUBspline_1d_c * restrict spline, double x, 
  		         complex_float* restrict val, complex_float* restrict grad);

inline void
eval_NUBspline_1d_c_vgl (NUBspline_1d_c * restrict spline, double x, 
			 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void /* identical to above routine in 1D */
eval_NUBspline_1d_c_vgh (NUBspline_1d_c * restrict spline, double x, 
			 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);
2D
inline void
eval_NUBspline_2d_c     (NUBspline_2d_c * restrict spline, double x, double y, 
                         complex_float* restrict val);

inline void
eval_NUBspline_2d_c_vg  (NUBspline_2d_c * restrict spline, double x, double y, 
  		         complex_float* restrict val, complex_float* restrict grad);

inline void
eval_NUBspline_2d_c_vgl (NUBspline_2d_c * restrict spline, double x, double y,
			 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void 
eval_NUBspline_2d_c_vgh (NUBspline_2d_c * restrict spline, double x, double y,
			 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);
3D
inline void
eval_NUBspline_3d_c     (NUBspline_3d_c * restrict spline, double x, double y, double z,
                         complex_float* restrict val);

inline void
eval_NUBspline_3d_c_vg  (NUBspline_3d_c * restrict spline, double x, double y, double z,
  		         complex_float* restrict val, complex_float* restrict grad);

inline void
eval_NUBspline_3d_c_vgl (NUBspline_3d_c * restrict spline, double x, double y, double z,
			 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict lapl);

inline void 
eval_NUBspline_3d_c_vgh (NUBspline_3d_c * restrict spline, double x, double y,
		 	 complex_float* restrict val, complex_float* restrict grad, complex_float* restrict hess);

Double-precision real:

1D
inline void
eval_NUBspline_1d_d     (NUBspline_1d_d * restrict spline, 
		         double x, double* restrict val);

inline void
eval_NUBspline_1d_d_vg  (NUBspline_1d_d * restrict spline, double x, 
  		         double* restrict val, double* restrict grad);

inline void
eval_NUBspline_1d_d_vgl (NUBspline_1d_d * restrict spline, double x, 
			 double* restrict val, double* restrict grad, double* restrict lapl);

inline void /* identical to above routine in 1D */
eval_NUBspline_1d_d_vgh (NUBspline_1d_d * restrict spline, double x, 
			 double* restrict val, double* restrict grad, double* restrict hess);
2D
inline void
eval_NUBspline_2d_d     (NUBspline_2d_d * restrict spline, double x, double y, 
                         double* restrict val);

inline void
eval_NUBspline_2d_d_vg  (NUBspline_2d_d * restrict spline, double x, double y, 
  		         double* restrict val, double* restrict grad);

inline void
eval_NUBspline_2d_d_vgl (NUBspline_2d_d * restrict spline, double x, double y,
			 double* restrict val, double* restrict grad, double* restrict lapl);

inline void 
eval_NUBspline_2d_d_vgh (NUBspline_2d_d * restrict spline, double x, double y,
			 double* restrict val, double* restrict grad, double* restrict hess);
3D
inline void
eval_NUBspline_3d_d     (NUBspline_3d_d * restrict spline, double x, double y, double z,
                         double* restrict val);

inline void
eval_NUBspline_3d_d_vg  (NUBspline_3d_d * restrict spline, double x, double y, double z,
  		         double* restrict val, double* restrict grad);

inline void
eval_NUBspline_3d_d_vgl (NUBspline_3d_d * restrict spline, double x, double y, double z,
			 double* restrict val, double* restrict grad, double* restrict lapl);

inline void 
eval_NUBspline_3d_d_vgh (NUBspline_3d_d * restrict spline, double x, double y,
			 double* restrict val, double* restrict grad, double* restrict hess);

Double-precision complex:

1D
inline void
eval_NUBspline_1d_z     (NUBspline_1d_z * restrict spline, 
		         double x, complex_double* restrict val);

inline void
eval_NUBspline_1d_z_vg  (NUBspline_1d_z * restrict spline, double x, 
  		         complex_double* restrict val, complex_double* restrict grad);

inline void
eval_NUBspline_1d_z_vgl (NUBspline_1d_z * restrict spline, double x, 
			 complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void /* identical to above routine in 1D */
eval_NUBspline_1d_z_vgh (NUBspline_1d_z * restrict spline, double x, 
			 complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
2D
inline void
eval_NUBspline_2d_z     (NUBspline_2d_z * restrict spline, double x, double y, 
                         complex_double* restrict val);

inline void
eval_NUBspline_2d_z_vg  (NUBspline_2d_z * restrict spline, double x, double y, 
  		         complex_double* restrict val, complex_double* restrict grad);

inline void
eval_NUBspline_2d_z_vgl (NUBspline_2d_z * restrict spline, double x, double y,
			 complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void 
eval_NUBspline_2d_z_vgh (NUBspline_2d_z * restrict spline, double x, double y,
			 complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
3D
inline void
eval_NUBspline_3d_z     (NUBspline_3d_z * restrict spline, double x, double y, double z,
                         complex_double* restrict val);

inline void
eval_NUBspline_3d_z_vg  (NUBspline_3d_z * restrict spline, double x, double y, double z,
  		         complex_double* restrict val, complex_double* restrict grad);

inline void
eval_NUBspline_3d_z_vgl (NUBspline_3d_z * restrict spline, double x, double y, double z,
			complex_double* restrict val, complex_double* restrict grad, complex_double* restrict lapl);

inline void 
eval_NUBspline_3d_z_vgh (NUBspline_3d_z * restrict spline, double x, double y,
			 complex_double* restrict val, complex_double* restrict grad, complex_double* restrict hess);
einspline-0.9.2/www/index.shtml0000664000113000011300000000404011012400560013402 00000000000000 einspline

What is it?

einspline is a C library for the creation and evaluation of interpolating cubic basis splines (B-splines) in 1, 2, and 3 dimensions. Fortran 77 wrappers are also included.

Features

  • Basis-splines require storage of only a single coefficient per mesh point. Standard tricubic spline representation requires 8 coefficients per mesh point.
  • Uses a simple interface to create B-spline objects.
  • Supports both uniform and nonuniform grids.
  • Uses periodic, fixed first-derivative, or fixed second derivative boundary conditions.
  • Works with four types:
    • Single-precision real
    • Double-precision real
    • Single-precsion complex
    • Double-precision complex
  • Four evaluation functions per type:
    • Value only
    • Value and gradient
    • Value, gradient, and Laplacian
    • Value, gradient, and Hessian matrix
  • Hand-coded SSE versions of evaluation routines for high-speed
    • SIMD operations to increase FLOPS
    • Data-prefetch to reduce memory latency
  • Easy installation with autoconf/automake/libtool
For more information and downloading, please go to the SourceForge project page.

Author

Kenneth P. Esler, Jr., Ph.D.
Carnegie Institution of Washington
Geophysical Laboratory
kesler AT ciw DOT edu einspline-0.9.2/www/F77nonuniform.shtml0000664000113000011300000010722111115073400014761 00000000000000 einspline

Fortran 77 interface

The Fortran 77 wrappers provide a very similar interface to the einspline library as in C, with a few exceptions:
  • The fortran routine names are the C names, prefixed by an "f".
  • The spline object pointers are passed as INTEGER*8 arguments, to ensure proper functioning on 64-bit platforms.
  • Since F77 has no concept of structures, uniform grid objects have been replaced by extra parameters to the create_UBspline_x routines.
  • For 2D and 3D splines, data is assumed to be stored in contiguous, row-order format (C-style). If the data is statically allocated in F77, care must be take to ensure all element data is contiguous in memory.
Boundary condition type Code Meaning
PERIODIC 0 Use periodic boundary conditions. The value, first derivative and second derivative at the left boundary match those at the right boundary.
DERIV1 1 The value of the first derivative is specified in lVal or rVal.
DERIV2 2 The value of the second derivative is specified in lVal or rVal.
FLAT 3 The value of the first derivative is set to zero at the boundary.
NATURAL 4 The value of the second derivative is set to zero at the boundary.
Please see the documentation for the C routines for more information. The subroutine names and parameters are given below:

Nonuniform grid creation routines

The general grid is a catch-all grid type which can be constructed from any monotonic sequence of grid points. To create it, we pass an array with the grid points, the number of points to the grid FCREATE_GENERAL_GRID. A handle to the grid object is returned in the parameter grid.

SUBROUTINE FCREATE_GENERAL_GRID (points, num, grid) 
Argument Intent Type Description
points IN REAL*8 Monotonic set of points for grid with num elements
num IN REAL*8 Number of elements in points array
grid OUT INTEGER*8 Grid handle

The center grid is a grid in which the points are clustered more closely around the center than at the outsides. It is useful for splines representing functions which have rapid oscillations near the origin and are more smooth further out. Atomic wave functions have this property. A create_center_grid function takes 5 arguments, as described below. The ratio argument is the ratio of the largest spacing to the smallest spacing. Thus with a ratio of 10, the points at the center would be spaced ten times closer together than the points at the left and right extremes of the grid.
SUBROUTINE FCREATE_CENTER_GRID (start, end, ratio, num, grid) 
Argument Intent Type Description
start IN REAL*8 First grid point
end IN REAL*8 Last grid point
ratio IN REAL*8 Largest spacing divided by smallest spacing
num IN REAL*8 Number of points in the grid
grid OUT INTEGER*8 Grid handle

Once the grids are created, their handles are passed to the nonuniform spline create routines documented below.


Nonuniform grid destruction routine

The grid objects must be destroyed once they are no longer needed. If they have been passed to a spline creation routine, they should not be destroyed until after the spline has been destroyed. The following routine can be used to destroy either of the nonuniform grids.
SUBROUTINE FDESTROY_GRID (grid)
Argument Intent Type Description
grid IN INTEGER*8 Handle of grid object

Nonuniform spline creation routines

One-dimensional:

Single-precision real

SUBROUTINE FCREATE_NUBSPLINE_1D_S (x_grid, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
x0_code IN INTEGER Left BC type
x0_val IN REAL*4 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN REAL*4 Right BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_NUBSPLINE_1D_D (x_grid, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
x0_code IN INTEGER Left BC type
x0_val IN REAL*8 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN REAL*8 Right BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_NUBSPLINE_1D_C (x_grid, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
x0_code IN INTEGER Left BC type
x0_val IN COMPLEX*8 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN COMPLEX*8 Right BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_NUBSPLINE_1D_Z (x_grid, x0_code, x0_val, x1_code, x1_val, data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
x0_code IN INTEGER Left BC type
x0_val IN COMPLEX*16 Left BC value
x1_code IN INTEGER Right BC type
x1_val IN COMPLEX*16 Right BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Two-dimensional:

Single-precision real

SUBROUTINE FCREATE_NUBSPLINE_2D_S (x_grid, y_grid, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
x0_code IN INTEGER Left x BC type
x0_val IN REAL*4 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*4 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*4 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*4 Right y BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_NUBSPLINE_2D_D (x_grid, y_grid, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
x0_code IN INTEGER Left x BC type
x0_val IN REAL*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*8 Right y BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_NUBSPLINE_2D_C (x_grid, y_grid, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*8 Right y BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_NUBSPLINE_2D_Z (x_grid, y_grid, 
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*16 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*16 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*16 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*16 Right y BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Three-dimensional:

Single-precision real

SUBROUTINE FCREATE_NUBSPLINE_3D_S (x_grid, y_grid, z_grid,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
z_grid IN INTEGER*8 Handle of z grid object
num_z IN INTEGER # of z grid points
x0_code IN INTEGER Left x BC type
x0_val IN REAL*4 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*4 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*4 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*4 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN REAL*4 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN REAL*4 Right z BC value
data IN REAL*4 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision real

SUBROUTINE FCREATE_NUBSPLINE_3D_D (x_grid, y_grid, z_grid,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
z_grid IN INTEGER*8 Handle of z grid object
x0_code IN INTEGER Left x BC type
x0_val IN REAL*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN REAL*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN REAL*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN REAL*8 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN REAL*8 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN REAL*8 Right z BC value
data IN REAL*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Single-precision complex

SUBROUTINE FCREATE_NUBSPLINE_3D_C (x_grid, y_grid, z_grid,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
z_grid IN INTEGER*8 Handle of z grid object
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*8 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*8 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*8 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*8 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN COMPLEX*8 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN COMPLEX*8 Right z BC value
data IN COMPLEX*8 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Double-precision complex

SUBROUTINE FCREATE_NUBSPLINE_3D_Z (x_grid, y_grid, z_grid,
                                  x0_code, x0_val, x1_code, x1_val, 
				  y0_code, y0_val, y1_code, y1_val, 
				  z0_code, z0_val, z1_code, z1_val, 
                                  data, spline)
Argument Intent Type Description
x_grid IN INTEGER*8 Handle of x grid object
y_grid IN INTEGER*8 Handle of y grid object
z_grid IN INTEGER*8 Handle of z grid object
x0_code IN INTEGER Left x BC type
x0_val IN COMPLEX*16 Left x BC value
x1_code IN INTEGER Right x BC type
x1_val IN COMPLEX*16 Right x BC value
y0_code IN INTEGER Left y BC type
y0_val IN COMPLEX*16 Left y BC value
y1_code IN INTEGER Right y BC type
y1_val IN COMPLEX*16 Right y BC value
z0_code IN INTEGER Left z BC type
z0_val IN COMPLEX*16 Left z BC value
z1_code IN INTEGER Right z BC type
z1_val IN COMPLEX*16 Right z BC value
data IN COMPLEX*16 Data to interpolate
spline OUT INTEGER*8 Handle for spline object

Spline destruction routine

The following subroutine can be used to deallocate the memory for any Bspline object. Note that in the nonuniform case, the grid objects must be destroyed after the splines that refer to them.

SUBROUTINE FDESTROY_BSPLINE (spline) 
Argument Intent Type Description
spline IN INTEGER*8 Spline object handle

Uniform spline evaulation routines

One-dimensional

SUBROUTINE FEVAL_NUBSPLINE_1D_S     (spline, x, val)
SUBROUTINE FEVAL_NUBSPLINE_1D_D     (spline, x, val)
SUBROUTINE FEVAL_NUBSPLINE_1D_C     (spline, x, val)
SUBROUTINE FEVAL_NUBSPLINE_1D_Z     (spline, x, val)

SUBROUTINE FEVAL_NUBSPLINE_1D_S_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_1D_D_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_1D_C_VG  (spline, x, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_1D_Z_VG  (spline, x, val, grad)

SUBROUTINE FEVAL_NUBSPLINE_1D_S_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_1D_D_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_1D_C_VGL (spline, x, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_1D_Z_VGL (spline, x, val, grad, lapl)

SUBROUTINE FEVAL_NUBSPLINE_1D_S_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_1D_D_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_1D_C_VGH (spline, x, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_1D_Z_VGH (spline, x, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 Interpolation position
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated first derivative
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated second derivative
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated second derivative

Two-dimensional

SUBROUTINE FEVAL_NUBSPLINE_2D_S     (spline, x, y, val)
SUBROUTINE FEVAL_NUBSPLINE_2D_D     (spline, x, y, val)
SUBROUTINE FEVAL_NUBSPLINE_2D_C     (spline, x, y, val)
SUBROUTINE FEVAL_NUBSPLINE_2D_Z     (spline, x, y, val)

SUBROUTINE FEVAL_NUBSPLINE_2D_S_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_2D_D_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_2D_C_VG  (spline, x, y, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_2D_Z_VG  (spline, x, y, val, grad)

SUBROUTINE FEVAL_NUBSPLINE_2D_S_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_2D_D_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_2D_C_VGL (spline, x, y, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_2D_Z_VGL (spline, x, y, val, grad, lapl)

SUBROUTINE FEVAL_NUBSPLINE_2D_S_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_2D_D_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_2D_C_VGH (spline, x, y, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_2D_Z_VGH (spline, x, y, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 x coordinate for interpolation
y IN REAL*8 REAL*8 REAL*8 REAL*8 y coordinate for interpolation
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated gradient (2 elements)
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Laplacian
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Hessian (4 elements)

Three-dimensional

SUBROUTINE FEVAL_NUBSPLINE_3D_S     (spline, x, y, z, val)
SUBROUTINE FEVAL_NUBSPLINE_3D_D     (spline, x, y, z, val)
SUBROUTINE FEVAL_NUBSPLINE_3D_C     (spline, x, y, z, val)
SUBROUTINE FEVAL_NUBSPLINE_3D_Z     (spline, x, y, z, val)

SUBROUTINE FEVAL_NUBSPLINE_3D_S_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_3D_D_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_3D_C_VG  (spline, x, y, z, val, grad)
SUBROUTINE FEVAL_NUBSPLINE_3D_Z_VG  (spline, x, y, z, val, grad)

SUBROUTINE FEVAL_NUBSPLINE_3D_S_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_3D_D_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_3D_C_VGL (spline, x, y, z, val, grad, lapl)
SUBROUTINE FEVAL_NUBSPLINE_3D_Z_VGL (spline, x, y, z, val, grad, lapl)

SUBROUTINE FEVAL_NUBSPLINE_3D_S_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_3D_D_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_3D_C_VGH (spline, x, y, z, val, grad, hess)
SUBROUTINE FEVAL_NUBSPLINE_3D_Z_VGH (spline, x, y, z, val, grad, hess)
Argument Intent _S Type _D Type _C Type _Z Type Description
spline IN INTEGER*8 INTEGER*8 INTEGER*8 INTEGER*8 Spline handle
x IN REAL*8 REAL*8 REAL*8 REAL*8 x coordinate for interpolation
y IN REAL*8 REAL*8 REAL*8 REAL*8 y coordinate for interpolation
z IN REAL*8 REAL*8 REAL*8 REAL*8 z coordinate for interpolation
val OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated value
grad OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated gradient (3 elements)
lapl OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Laplacian
hess OUT REAL*4 REAL*8 COMPLEX*8 COMPLEX*16 Interpolated Hessian (9 elements)
einspline-0.9.2/www/c-BN300y.png0000664000113000011300000025244611012400560013102 00000000000000PNG  IHDR,+dxbKGDC pHYs  tIME ,ih IDATxidys+kꭺzJ$(QRhjl¶dL8lG#3$h%J$ v4XKw++?YYU HNDFnY7y}GX{ڲ??C?>`?ǻ</`~D?ǭ!>`C>Pͻ\?ht4?ǚA dx?ǭʻFm!Į^*`@Z{ȼSgWc|Y֗Y \?(n39c̶?`%Vq_JyR-̫n XUVvp{ ^kNT<&AJ98m}l^{C=15(#D. ~ScG-Ƥh=ҴR]3+1`Reqk5 %ii0K*!1ieSOkq2rgT#saxNFk+H93*o=6u$I6R8u@8}k;XejǙEJ?YZ:8n^p9E4JʃH9Vc )]&'oC׮iT.W\%.'O~Kzhmŷ\&kqmܛvq=2z=Koѩ):BT*:pָJ՚QN ` H\c>SO}4Uhɲ&&9@Ig2_GpXkIӔ$I8}4w3Ç)jH! I MIFF_NFI1ff0??ϽK^gdd#G YZ[{yLNddd [g/R) ƈ-o]I9o J)z .]橧bii C̿kL5L6T*Jtet\Z2ƌ#eO#CRVc~~ FGG9|0APT^366CGF\ȑ;z<^xC82DM<*?OcqyΝ;pA|e#8("NSFUD;t#+ykL 2')%J%B0䥗^^S܆jJρ}1Q.yj311c,)KzClGJZ76 ľf PV5z0 )t55R$FJ\ž:ZJڣ(C[o ,р!]Blybn%I[oz0q4K_q LOOss=P՘#:jyH) `?8Ӭ^'΁L ̹v=CB`t~!ÐӧOkkqi`vvv J9t6g9ƈܚ"hcPP#j.DHO4ϿPY6l<;Rn./}y]|2w$I00;;KTncbb9vzPיfllR4d\!df/6ʜd`i2[2o;$,lQ5GK<<}1j5fgg9tI u!kBk &Fc'ĩbHeݰl;8Nc-.j'6K}q]w=2I8[iZ"H-|ccu* F{jSՈSTU&''۟XzX8Kƃ <HsQ d,,+fYvXkz,,,p~iRCJ֚4MlmX!}1uYH$ @iCĄiv~06,8R][T:̶v\ۜc#" Ih2!D Ybcc ??H$|o~<##ǎ~r2?cll8NB8N ISE*|_%֚??i277 ?tǏ^S.i6H)xO05!ĀImvv Y;>,?0.]}>|__Gnlvq\Fndk$)d KkscSy?j0vS;[7N8kk12?F>z sKy'`&N5JR&J4S)e/} kc&z +s}!ZmI"W\Gk=2Fc LNNrQ2Gȑ#>|ꏎ(.j+ 1NXoiJ)ַuN>O2Mi*ZhcḨd AP*0&v˘^b"ōԳOr)LÑ//qr}IHg-!11YVn#Rf 蘆T.ju>1z }ׁVFYѨT{xH&?Z/mTQrVnV7WT|O5NV5`Z TXk''O299y|dzz;u ~(lZ@mU*@~NZ;-;@ܹs,^J^<{J}҃BDžkh!R٥ƭE[ ]FVVi )t=`'S(|_)6 X 8eqyk&61;6Ʃ98>T#%JRy %C mthM(U|neV䏥@u Y8w\R%zey#sJ^ݬŕ5Z}uu;~NjG*K)p q|c Q122‰'p]cǎq):thUk4JMm2plFr7[%D_~{zrѣ&8l$g=-Xg va kk4_xwj[ xRQZbd;bTk7;%&Uu)!L4D0\* C|Wj8vb~{}8.S.O`Kys;e?eD ^[`fA\޲D)R1Bz}o 9 ۽{ V}B睶3PT(˃tJ)^}Uzi4LLLQ:t)N8(NcǎqQ_<o^ fhVxRLqt0(B)R)e,E)B BFAk:x] E|zIU|4My}a!%'&&>&Jҋ"ʾO%]e 76xei ׂ ¡@q" S8&MU,^TsW.`z ]&ga!ə&43T40V Zxn'U&a߰;@. )xq{g$I*$I+7eɃd 8Q >baj FF`~S<>{^Ca㙷s |RueycsGlWEiJ' Yij6hT*Ԃ_\kQ>Wnlj8㐃v "\F 09(̞9õnCmc7ťt~'QJZMs&5)qkM+ F`D0B:N汗#%:gF5B \:~ưkڔd4-J)],:JY\w5gK}qcB@sہY&=(ѣNM!Tt| *]Ƶ+WzwS2̰m۟n{ ċyST ]y'iz\XX๋?yc|mF#r8~O硇7$<Jq߉j[\D:~5f%)!דףAu3IPZPrq,,ؕ1tra- F_'9x=T~&j⼇y(؈"^_YQ*J;`TDQNfɀJ˥XEÇ;?`-oӧ?S^vD^w{G}0<1eV60~H/.2Z_+L4G?egg3;یT3a;~C裏Op9qgi6tΞE)qAʳau3X_H]KIux y]Oܠ8~r5t)B1c 8T*l Cc Rk׮i4HաΎy&|(˄aVÐV '3iʤRTy prg~hcPwVƠɛ3N' "s|*:Bq#E58I 񑎏E5$/vY&r=Y*J8@>B=IFGkWgqg&Vzk=r=of8 RJ4 BJTkNf;К~!Bo,*?",c ia+I:VPaDHS$̼:}a4vmkxS"p]xHS&ҔWJ|#}2j1G>NT)qJM򙴵Y":I4]#UXJR #*bHԉ8ְiX Gz$QPC29s(7ffޮֵ`i]f}Q4Cwr1S0 T1&R38y~80>N?Os&n92;ͨAi/տ{2%o|sQVqi|AFFF򭵽-,FJ72Bno.3t$K1Ip&zѻ95HC`EniM1)ƐigBYHJ6jd=߶OvM4.}\!XtS51 YFM8uc?xG221A9g6gj~?ӽvNMr*gyW/^$:;,2'DzI jQN oGY)SQ9C- 2gV9H8NF98{, .Č>c:Rɒ*ht$RR++\tK-voU`dc(Q4P%v$7f IL%Dz4MIBMYeff懢{ִmVWW9{,_y饗{ZR.q]w DQ1M/Bb iZ8A"LyjL>Lcj #%\2JaYjg  1 0z=`9"b5'|gycgiqLTbj|<2Z3RR+hJƃ"g%K++*Z059B*NRRi ЖP0M1Jxq"^WHie`A Hȴ"!5^ǔ}8fIk)@%@}j IDATcc>LOc=GxJ##|㏣XYʕ{,^jq<_zT2jL!UjiO{W#(Xʘ4R:Az[FKr"NSRV:J&Ivczz^NVQ.ӫTJ+%⋼9T[طSatRɵ*;V3'EJ!Qtkx^crqu0I8<6Dŋpn37|7mC?6#qy/IO\)qB9r0/~i*ދh. 625ӿK^^`YR+,[YaרKL}FR(#05WiVrrOr"D mS,P.>c^7wӉ6^O8kG$,)(rr>qW,Es4x^$I:}{nwV$6\cAFOK,ެWJ? L~?̓O>ū38f;Z6 gX80YJ)v>)%i9Bcb"ֽZ޻bO fI׮q3P7_]))."Θ1h`!fr'w;Gt}:‡΄%*ح֨]rR p>1: ˗ysao} m 2lr Fryi8;]_DyeIJ5Fm =H3-J ,QCƪNW<RJe9Uv.,v:zc;/c-0$L=/ӯ\tA~<ǽp t&VGrfU.jϒ$y<v0C<Nr6 Z]aV+b^Dž җij>FwI&'KZhz̲mZeCkk3Gg>~V{gin*##=9¥K3brE BB{tt~Q#Ӟe1U !XY 0Tb)w*1'ȴ1!HEkM8Қv0 v"ӣ4k5FUryPwXfbEJZ1fVS=t=&eH͈a8QѬODH7ʵqKj/)ӕ ^gnz $ a{>qH͟c:IJRhMT-u|Ӏtn)SOWG5?搓ERy-F]\"SRiM$%RU"rLזMNn5'vOR^эcj k:zxF9MiWXqQ#K$Oga1(I3JX59hi<'b>*q`O vc\V}֢JRrl@=h%y=dʹ FM %UK>H9hqS3aHK H,BvW:;_i:Yu5 FGG+c|Iz!.^+RJ:v):y|!]|#G8|j9<݋ {=J VJ_iwή}"vUuE D~c =eڇVm]%HStDh炊&=]/@=Zkb3XFlQ-mml瀥tU҅ br8f"LqL?J'"~pe6J@p _R$Jq l?#5 B䀕9pӡ纴4ͽ݌;ueBxiJu 9$E6tM*Z3U!喒BK)q^-_xtGBK&a=| TlއI.RڼiA/yq|7OLlAC,`c:/>o:ϝ8\}ƶvJR%)xVK17x4蕷H [n`YA}z4& sຼo~yZ wb\H-5"2z51bv:]HM<ڲ,O%^AF+Hwk]{g`D#D!:.p_& DF"Pi%Peˀ-a$%!D} c(lAV#"a)r_bumS0Iއ^P TA >!3 kc^6`{vcX{]Dk&x2Ԭ8Aa(Қ~$ RpƆjm/RbTztZV9?Kn+8וxRzPi{mAV3R#y[!ZJQ^ׯsEΝ=˘s}yFO)Ԁ1ۿ[<{vuA*Z׉Y Rx78uGnpl{Wp p21t_ur".]ӧI"}&FFe`Vc^Ϻ6f\)Z՚4 yu#Gc~ԘL𶖞pzuxw.zCˌv-RpTޛܕc9Ikr)V8`M֖:&ψڡlЙ[.gſ۬5.-.wU*)IIUC(G\kZbM6)Hi. M2qV.dʰӮr>_~Qā^RY&uRJyȭ#~яN "@2z|%ѸN01 k#EJ(Rd6i"7f:WG\~G;{c4? m^}$ FFxf*uǘICxC ^zlȱch,ytxaq(5zV$0X0qj!s588%'S݅}ᕵրČb''q̙H|B噔^+"]@XCoC!dHEHI/NPڰሔZ%db]nj!Ih j0eDJYM8X({e&Ô띘HaRQ)XzfC9kiYB^ c/f`=֒~$!(ĆxOTT.gI! FM#V2!'1ٕI3,c:/$1qMB6dӢ#zUeuh~rp<9*Ǐ3y' &yfVH*q']j3Xx8zJV.S<܆aǜtXY0XZP.c Z (gzdpez$i"%W9>St[m("IS5xi)JI8YV0bE5!pq,$v]\Tnu)).qHaxT}BM/NdJ!Zg[ 9bZá-R2tHgnek:mJ13EN"o.zG=zG?z}1p26Ez_e 7Ved;.Ƥ1iKEϚ-:EWϻ]C&` !yu~_dv|7<}kkkTMm 8Zb(!>uNVbk%uygQa{X/́'u+$++7֚N~Ӌc9X Ib9 $C$A١p˛Rhn]Nc:V!~$I}SxL%@|)N@c.A,Do)QRϥ_x˞NVke׷E[`!JIoE /☱(5 !RJS>?O~Ǚ{Q)VVX\X_鱸?MrJP*" qVH[+ʶm/h@r_tyco) 'ЇxGp0cRhuTJD) c1QuzG o,IϜO hjcur#m2Vr[C^lZ2 Z'%0бk] Hc]1X!lR݁TyvR)I }+!!Uץ$t[>l6o7 t_ˌM-$ {? _ٟ]UF+AzHƴ2Q޲sMz;6dB붩V/yݼ)"O~S׿BҚaiyQQHUtVy \J͑Sj~ϫ#ehl#uhj<}ueWJkybI_yw}S\x:Ym&~#:APx㸥 `5y9)r^I:9BK0]sZdمDi {}DaA.kIQ*%7ic?N;>a[beSUĀ(Rs=ؠ7&۰5wKץ^&IJ!ϕkQJ!-D0^BK.s n`vى}9ZSvGek9X99iJS OʧNMi'qi䁍0iH_q6̪8Amv63&ӥRY\^3l~_~kjjV;ޱjnw/F"DhA~ҴMtt4gt^ET*3j3jGWK9PZmPpv:Xꓓ|Z \Dge4IsVi42"7PnbSn}k9\[!]x)!(((h!H=^H$XC2!$q⒤ 17!g`{X@]d9-q ,۾W*$ $A)Ry"vX~Q?nxrhv%L|i67C7MvT/gr8t$3-kQ1&Nt Ɋמx/YX߬CV鵳zhVm4eZFk3US/)DLJ(6fżajwARAi;$J!PZsaa3WpʕA,juz )-+}R+,,<(ASa|yk%%.^L$|cUhWn֙A4IQtrC,HjtLPkgPAY_-$ÄXZ1exIQV(44If_1 ȴ$m L6afN9̵&[! *EbX!b1 .r Ǐgjr܃͘\L4}2˧OortrX}~4ʭL9Ҥ1%0zF: \ݳX!n7?>wR()鬮rgyhra' C_ݮZ1~80&Z1 J6B) ժ1F4U,o,oFf=E ǩP3}DaNH)D-` hb(*&wfUZ$EC[f+,-OpC;dnQ3>ȃrg_}g. `u@'ўޘG+?!KDt?1cs0z߳gM8gw{looo&+ӌ8t`{g..Ĺ ;mwv.,GR"c\p^Am"DDq¹Gf[[0L>{123$V`/nI˷k6D"(eŃW@/ϏOj=OXNT]NYw_~o$O=2 Yztz,ꃗ@sI.KTQVlxjb] VwUs\,@J3mF<8ӉFƘR9"2(j=uP|g:|ri_b"˒ L]4zy"׉GylP@^S2Q8y> ֊c3OG&=_i9>P`mllpyt:q|;z%eѪpcRāӑ2:U((J7Є%{ı>y9\YYaee~;lm wɲFxt3$0=6 ԇ/}"_sms"'0Y'$R(oQw)t! oϕgp$WyaՎBpXRNߡ2^BK kh^7/8nW4'MoZшiEb}RsBe+/g9Q/.`@(%qٷw۷ٺ{-nI#- .Tvs'yk[6Xz: C4ogQ(#ukD R^^75bq`d\a QoLL$&Z׎-=O~RZt4=,Ɣ |+:.3|@> w3a??Χ"/\ߝEgvJӣG(L*]a#E@PQ9o}YY%1!жcM+^Ns(cPUE* /I bΪ])[8#AZv.7wv>˽Nn~M/j$2~'N~GKy)A1"}c41fLwpO9ŘA&,g糌%o$YVJq j)5_L(Gz=.10DCQ׳ LykYP;;zuכsy4blo7`uGu|Oi{Kk37,// Xqs>eAPש4BX|euʯq(T~zpFR;Ug9Ǚ+1q09f~Q8V{Z8:j%P*&Iz־}liZܺG0g?q>Q--'{3#!6W7\%25ԵGeMSǀ@]RS ]Xd6e8&HV1 4BL%_7_G=>tL=YWW16WمVSWEk Zs_D2 Te4f7m Cctj*㴛dcr Ip?q <_,C2ƹ2#8C;Y%E D`R:K #&7|ix xmxE8y7ynu]礈}5ql"Eְ1:6|mO`ks6G? t׷B f׻>J #\c!km@Uq cj*Q-HdWy<=hj%#n :|jO~O[`"}ע(IQЍc8$ ?7.qf" *r'e#1opd%ُꞢfٕq2ϩʲ٩ XKTyK~:MYMS8k5ω9kֈ"TзDJ"z~fy4g 8n~a^|E:Ω>3_Oi+0rN{1,P7%5$=.CCHS،0,>խ[ yze C&ߕ‹7ݵ%Qn-qf5jh d%W."e"e01 "weU49tLk@J\zҏJp,fx-s;:o-Vvfn֢G#lYb !>RģϳDQUҬ~uMQ E6CN%JFiKnn Ie8̱vBYgS$iÞDnEusk-{a=h o1s[Cc؎"nwXuXV_fr}~>h+`}#_=)#I sVV 2,c:,#/ı!%KHJtq:\Q!HW-<"V }w{|X_''7Ъ%,;;]M*;l?M>̈́]@ηzf(J !g X7eNɫ) Ign0q0Ŷ3߰O(5!G"#JSQ4؂H?͜joyAeI昦}J%~(INς~i׳Qb4ak/⏿Zۀ?bKE+#gI!,N銈Q]7+Dիw 9ԑNEc#qt .ݙeBkzJ6-琢p&)H(WHˀƘ)f8W1^!+?Gbs$@ UСT:dkPNI(FDj[ksX`3]\l5Zݖo%GsvU%Cbϣ>yJ 0iC!I.;psUCG"4%enuix,tCA?G3ms,=.~UiK9E)KIg*65ؤWl3Jp bp5g۬}z*'%{uMNJ:GMl2r8ǭ[Pp+5srTxavGt:Gu]1OOAI?vADADY|=dдAϳYdMvos8w@Y.F!5|"HlEm$Ӕa8]2D?Moc^Y3Y\gA X?9zR M5N4(j0˨"٩TEvy}c؜}7 79<"!Q}n'f-1>>wRY9! ZbރBf!0d++e[V^b2I}y0i ((8wܱ6/sQiyb41~btRd4.@Wyba !sq-ȍiܸܜLgm,;9*Gw PdWȻ]\!l1t%ZkN^ (+Ьb\%DTu,{YEeRJPZ7|Vk"i_4w{èB8t7oĀ &Z|K P3e:Ayd971ֱraq;I(yxNL<%J~vBr@V<#ܼys6m_~ijDBO#OYaz.劳)sI­d)ag:njq[ "zIXE& Z&-| %mLų' ʁG"vp~WNbd3ή~4ɦr6IipL##[T&eD1A5pg])Ÿc}{1 @m[̓sD %:iN=4=7)$0e$ .\ƍq<{꧵4^ -VD661E%QB*tu@8盳Md%3'. Dw\827e ?+{.b2HiQ;Fj͸-8VRMώHHh+,*R86Uv܄}1a +a7_6aR=-u dT&Kg&iF;G\(mz%X(R2D]?` BeK%S3јZmwQU0Ќ+fBPK$ 9Hb=Ͻႀ|0;O?G{}?`^p6W_~yO)ڌ .]Luz<úƄ!0dSxO1q H( ,,`t&,!3<ꑟ߀g>-adu},sElAKk}l$`y=َE^4ׂ1Q H) @9ިy#gwL5"j; 7ө8u 5"%ks0̲1Zk_U>M4#PN iR^.08[S{68 :bRXpt(R DQFLggkkkh)˒,뚭-￯Ð~R!iA]oPgcH B!cW!5 A HTecj=7v)WoUt՟ceLp"f2l=e4uK/K)!o性3uq1O1wB51æ d:aȿ> "pJQ45_<*%S V VQ(0l'*ٛ;|KֈC爭m4+XÐ@l9[V2G LPU1M:Tp]1NYǹt~zn֍bLMYNg`!\v,lU!q,ѺÓPԝqGDȰ V R}z,hKVf6ҹ cR&Dѫ3זHdvڛK]׌ckC]ooǫ"Ir8^#wA%%:T(ǖ'Y]$ZHK݀&w%ӬF`|gw;/1a%Hp8#X;0zt) <9@,o2s6צ!(ŴA+EeMM cV)qJ}R8jGϞO=<% px6ߏfy衇~8ŋжq9$Z'L$1;s17KG,%DJ5Q{#9mmJW).&=zlvvunqo#w ;K(F=*#KmA 5BJJ?r;[׍hv(!kIJ|Q08 *Lq"Ŭ2}c?ƙjvB@7dc篪9A]/2\d0~0 Nâ_e)mbX[ iz7)}`!`gs_6~Y/:z˻iU6:BqSCJ#UMYnIw ngz8<TJ1ë́ɡDZt&.& ` #ۉc3O=ř `u%c=.x?@UU}` .hvZI|;ض=U׬YyQfc@)p4)0 xO=R¹sMeL@}RkI "Xe% 4eJS#vd, kPGQ#zK ׇiܜ;WBL(wG*~׸^J1-9(avNK#% 'qNcLIUc]ﲿ%_\ty#!>Evw~y;S?rs1\ϙ*\Dz#5b2:q#O89VVJnG1:M~nΦ|acq9aSY6ӟ4m47'N|'`A+ʼn+<($ ιw6~7ՈErjk ֆw 羿5Y6NO: z= ǝ;lGut7ΩlVGN.GpQ GXL 1[ &Xlaiy^W?,\#|yn׾:Oeam@]q!ǥ-υ(H)Cͳg^g9||_!|1.}Q _".(\~Ap\^Ju8j-EJC-KQ&ǹ LFm!&wcY4ȭjwMKv1Wq)1aHD+)S |{FYܤ? F%iY[;X;d$pZZhgCHӚY< /z/F ǎ I!\;:_H?э 4Y{{ /d[uͧ~_'ǏPb}Qr&pkTz8阺`Uzd3(ޠ&2q8fӿ:SH:(:E^έ5*z87rsURtU:{s?۷-c+zdrEݞJ(_v7Xt&R  o-79=JOp" AVKOi|W^pvE PcSBovaT乘ܑ p9'1.S&۬8vXb &QO2ˈ~l jXv{_E'F"7Ʊ6z1j,|7A#eϘ[KH8(<׿1?{5~ _'?c_?pP2&(YM];oœ] qƺJOqIl2B3W_7Kք&ÛoȺQ蟙 .2/K\ Q(&7 5(5ߓRͱ6!O c(BB_Y1l*nˋ~i?qFcMp!;2% k)Ь&3=a* z=KK)@~WuMUUSeI5YYˍ$oh /]vϯ/pzx<3; @<ϝZш0IZ%j:vQEE)˙8 C7dUOO Cͤ'y@kO) |-JE956o.4Tt@28C s(Nќ2B1e+N`L@QBH!S5 'McgPYTu٬/u>ri{!*C%Ӛ%R/P &h=K)1o2w8RIdq5kN?:YM Ӣ9dvxH>W0xG~ `ii}666[1vLvbV9r8Dv7JH=Xk) AE<*MyOWq>i [E% /Ӻ]BּMN&R;NY[ wb# MahQc81[_cO?}m?}k2)C#'Sz×#"d`5:@؊n}+ao] 5\ ?o’ h .Q׎<`S'=p8~uшIQkew{ qCȐ3-K7\lmd<CsX,ufx"RǬtN'˳j6Һa{|ncC |~{oj VmsX{' >WVOBPv:!4 fmgjE,Cni p*X&_4; S3 pC6K# W = ٭E&ucZG=;7` 심g)C, #HA -Dpw  #t1/!Gw^Qe.Z'-s"e:-p.YtEN,z igygWF!Yj]{ 皬IzQz-2= af; N0 z Sc{ rN|:Rqܽŋ@N*淿Ͷ{/j9ꄐ14'"NNkA;O[Icjl"=صE#c!x- e$auZ)jJ|u5]+ (AƷڰ,Tr:EDŽTx^V~lQt+2ـuM).Hk>vS:gfClufr1T΂38,R:dCcWHL%oo?JZ$Z^u4D!PᜡY _mɝŝޱ XJ!fos$9>c^v{Zr?`0@)EUU>~ӣӓ7bSڸuvlۖt vQ壔"vi- x<28y޸X^F<2 ªBkt\c%%2!bz*c1tyj- PCyxs)V5n3HgXeEh9)p@W GhT#Om/?G-TH])Y=bqrH ,V)]&7Z)5E20F8&IUiRjϝ0ϝc!|K!!4Xk:h,͸%+?04X4 ѡB"B`FDX1UZ? OuH8Yaz8YX˫[[<6r!տWߓZKE{Xn0 )==prϥ5BkD4R6\+Y*E$c!RiV|zRR9,r-m< h𣣗2 NxF>Yx4}xt-hMw(ȳ씛uccGiC!/]DO=IMҜ.Ja5AwYyb6+Wx9dٌo&7e-;}󪝭p&~sS̾]bO$Ed\. !eY S %~uT1I.^Ԭ{[ ӚW }{iZͯkbMU\M , l]so~՝!Aa/b^UxQY3MAm ʒKGG١\yeb1 <Ֆt>Jm3/Do0'Qa!CeZ$@KsE7pN$㓫jry;>~3(,0VY9;<$+KDRÇ\{F/?ou$I~Ҟ?zcSf,)RT,\HRRjÇǷ{O]O"_En޾[˦/K\dd?t؀ǏY01*IBpk~6F`U!/pj=#6;n=B8jBx664W! lJaKJ'f %eYբ*Gy}dj"QbJ eWޡ牕; )^4{E;yeKs-4喌[ĝmu\8TUM Upр"ᳯ>b_PhrosDZNﱮ&')5,HCej1o hA re0?F}8^Z=5!u; 8_;;_oճwk-W\H$ ϟΝ;g~p `5P] Ry0SBpU`\_w8ܫF?My 7YKN|+h*ٌm6ҥq~7N/vY@Ze8:lB.ln}daȵÈ%8[򫭋Ə5`)r _$+i7l![^'y,MwJJkՑi]>Ɵ&t UVn.7^DpUEqxXrPZ5Ö﫚r" !B\W{crZB+!1?~_3 -[!Dڧ ^98nj둗%Ç'mHQI,J!Pt5(܋1?umowKbA2f#Z0p 6).p4ŹuK"y^PuЪ,d U-}`6fEu[j:df8GwH4z?? ļcRr3Ujn /@O|M ZO9!(`)j<B8n YmOp KpP(l.s>96w!5kC/|zD Dq*z-}ΡW`x7wv׮!$ؠ8:j;$Tއ5}k4֒?}HX4U~mnRj խ=͕e 3q yRO>9ڗ29/ZDR8z,ԓ2 OvJV=9T0@U8>F}|֟ y>$lǪ ysikk/(ܹsV>ƯjƘ<e})s=? i?2].5mecy,OP%p\v{²knsqs]Ҽ sTq>^Fose;9cN6Fv^V "YDw6<+ڷ{Dfu~"c@YK嘩p5hk(F1"̈NdXSxa Jc`pN!X(텵D$I+Td«"E@Tr\ `+ҋSy/[p=[q(B'Ǜz+UUN͚>q,K| 1h-XfI)LYR U\JDXq/?-ev+B[z9ZtS /]bʧ,Qz{*X+X. n#G{Cvww i}2A pz],#+ r!M-eY]Y*iDZUrqu2~8=Be$6̈5 ސk~t-zyWKtUWZT 稄 KшZmj81Ӄ1a=2(HDĆLG%$ xϑt}@Ӥbw}(\$!OA)(u18[@&jeμ@>YRq$kk1_1D2&<><8΃) @Ľ.}a{TS ^'QUfީk˛cv=F=YWS墨ui?. *`|_,˖jZAx$KS}ZPK⽧Z1?\8c2ZEx׼7uke׮a:mԁcZ!Wma{8!v>缵$A Ab>v ϩ"r#=V##P8.nNQ*6Ny%.p.C{>zaiܭĊ:1Yk)>?g?qkfJx͇oƛ`56D1 u؞;09T+!Ο_xBkB.ЋmSqDydEAQE=`]z/;;;O21{XdqLPͿH*k9EPJq }g(:iàWljnEԱ9ܽKyB? !m;WTU-AUMwj(;?@8G|nҷQRB)T[FࣈEǬ:֌!q#upm]=ZhY)2ÚڶE8NZ0Bp/==sb0]/FDP*G)mv}>y~?G|~ Sp., /?/ћDx ߅?j5֮<=I {&I}ߏjҊb c(u|zzi=H"/ E"Y`=y\v TSEAytW~CZ[;? )d>'* Pf 6xL Ms7ެ'4%;wd6LUJJ\sR\آ䪊-|y]_Gj ?[;jݐXyN\*,bbA\bD7ҝ R;ehY [ 5HC]ՋETLi< ϑDm2>]f2vZRPj ʉ#lXyo>w=Z!1p~$xo>*0>eB] dUlc6T3pm>gEZ>99<$.R/4Hp.g-8^,)/]y'?5ak4*,)%{4enF.!~:Z}MN{_fO^}Ҿ U)zomQ1(X?>k\eaYukS|i@A+%łI,E(`\2 f1ZX,D$h)%e% qֆzBG3JX)t N $ }iF="JnjCB5s,#l8`eL7ב rGHog] Ֆe.,RD!*hA͈8NgK^?_OQᐇO#Bya;OK'_o37/O&"yn> ;q',Nǘa$vSJ8)k$e.Yw g)۟{YFf3RcNv1(hc)LSDMhW5 E5T\h",OU;|N,Jխ`;OF:P9׶aĚNzǭͳ @/;iUTOPW1<* ! Yc +^ަH6ߗ#J?hdGV Ũ>U!箒 Z(PʒszI7!MOA@ ۣ/e_+WjΪ(/c PVk=p&weޱGI~Ni4Sff3Zsk08#*^N/a:PiJck{!P1s&^rvJF?Ԗpsswy8ZY ))3/%5t\?1(EDc)jf0zOay ÄaF )J+!1mtZruө Ѻyg2PǧIv?mN[TW,ap5Y XL+E?X10ϙ-Hk,g+j"REB0I 4 ϶-D;ݞKP B#Ts̆)ZDtZ8n:<6J_P,^ove; +1]0^KĻT[-u h)ݩe}ezXP }|:𠌼锃$a֡?kZ5rZPKQĺs{3 i+"^W' '*(FZ'AKC{}8f9fS[]MUS&Zך(p8XNwb "'eMZ0vUFL_ƭQMTuM4u^j܉`J/9*//`;ڒ"-NK"Ivvw3z)ܻǭw/}7npRX`lFiR(i\,nqf*\EZ/˸<7Y:^:~6KH*3KWAkW^ۿ?AKĞTrqY0'$)%JtW^}cxu2[ň+87NTM;9>FʪLK.o[C ؽV}oS b@)+0bZdy_k`ybAӭX¨ueh"˒"0g^6N.Hul31*qo! 'w [DŜx:߰Rni%7k<0WqjlQQy9Rdgxzx^M{{>[Occ]w?_*I"\cB8O}7I bSeLo{@B5ټSIT: OK(UAJq\.9::IX&0Nʂ9)RDB0Y~&XfAΠ<&r@*=:ЩNet[Kc'˸e8:1#Hqni+kאrJfd9cn,cssGiC .tleU\)^'!,PQԑ>hUVh|>DŽ=?\BQ9ړ,VKܑϣH3D4BE^ vSm a+c<}ܣ1ekDE#֊ԱHfc6:Fݢx9rs4Ͼe͍ ϟg}g߮[c)am ;P:u'+pU!=(ͰM5|#CW[ t0 6[+!b]"%^N X!:lU2{Y~ B0LSFFTܾѣ:|GOz!RtBXh:´$bk؈Ѯ;d ώz*^xt$!ssw?t&bf3T(t)Ixz c4e0Ił$ ~ĊZX,jQk甋P]Ƶ3jQ72ylSuXY ^<nIFzn %> [FrTS6ygsje;bQhT>NÇƿW[[$Q?i>ݷ0ǘD f깛#g%L;.">U}#˒]v)0`z|o~bYIVUݿQ%YUQed~Jo &Z6g-Z"A\du8Jwږ_a{pJ*vC(kyɄxn{o߸>YkUXw*h[*4naO"L)&z#hۯF{=1bQlu4V G.O;j[=<|IګxsKJe2ѥ-P.jwDf,k]x {fN2#2]p%PosW)5!r;&1ӟfgXMPeǥ]Ed:aٻA"WDd9nX 6MtΉ|QsfB)4{)X;GT𪏯TX$Xuʼn}qy/~O${43 oF52EAϭy G"q ŋzeLGT2b1` z((G+XiYݻpcƣ)UY^$/KJc(Y"󢨵kJ$ =qUE<Sթ+X,bȅbθ4'!5xտj!!J :8r+kٟG_)G8xG^`gH9KS·NI&IR2:(Czx_+J[.y^ GqE?:H೬-8RTGk zƅUXkTǘ 3%3#򼎙"ƒsIz 7! yxŸܚcʃ:rq-S7Oͽ=y?K㘵m;;k{(uIx~ vKq96d,') _tS J!B^R'E"dp{[[C-YG{s\aC֠Y2xmʢ,KjӈDZ׏` 5JamD-DJd>PYIhleKנ:iU9?}ya[׻]9%'G]eZSwA9d~( iQֈC(RIS1HcIpUN0>RUc\TRv)G]]>_P΀'b ^;gd,{{kxUZǯl-n  #z %+s|-.llpasHJ_ѣG R֦B H<NjY8M)e%fs`Q8GpYv{fBF;eXySoc}m)phᱡ%Rړ^QA΍()gQr8-=`if6Sp(R+JU|gooki%{:V\]f]g9誂7Jd0[%~+/ @ 7尕x }AU.ۏ1$nmdYҏ"ŝ!_=޾[aݒ(ӏ". ")O~~ce|=6CD{o&hdʲ6 d mJka2gz0E;3n,9tuXge㭽=~:' N<|!DUE,#*pc L΃}TCQ!]BkFqb  k X|F!嬕S>)haŠ֚.7{Y\o}3K?M6zvP3+EݧR(N)E5R%)'h,0U.qf7 Q cƦ4%tx ܹ3IVOO<8,)L-p^`d)S )tpK浵˟Ȋ,T n:~/r ;mJֲ6Cn&> u~LM&T.w孛7!yvDMGi-e1$lEF#Aµ!Pk"B֙,B`Gw5?#*`I)e^uOk1qqc>ÿ׈u\'~y»^(t+F}Y`ֽnBٱָ*򲤨J.9R \"^JG45 굕C4)({KfeO;q0(+Gpg"eE1qs]O"kr"x":eX7c(s ~,C :BFlw:ICs$;GOYҚɄ.?O+Rg͛ P#5-77L sm!'*IV썛j Tjr(4w*WkL"B H VOUsUX1G9*aQ"dNJj`-&rY ,`c:d%˽zgO.͵=AႷTDN# xNSR!sp[VN&V,?hέe=_w/^p޼}礳ڗ]tVa5kiJ"%2I0eɼfU%.Mm IAs˥%רoc@W-Ur2l +(p; U_˗ X#.tw?;wkZGz'.[_نtdpx3ns>\XpRy NY+w\8 a޴Z#PT{(|Vٷ^,!>B'BNz*zQP(" #$8&kIYYo~|[ߪ!wtxXoLhlRjgfRgwwsGIxBb-,fPTgX[)nyXO KZ$G CZ8wBclnn~XX__gkwꀱ&[QBSϯI,>Kv&2uɪk=ƍ!ejx5g:w9+YVZme^r8QVAѰR NBP8$ژG= x#Chs=bKozMxx| ^CR,oD,HcPM(7*xwùDʹJ(j_띴XY%K\_ƨsT|L| aY]V[α(֛産Q][Z ik;뮥uУ%!뵀6ϹI/gYY!#+* yc(ɗY3 4۾&5$9f08 MCaEeX?k{oAڙT̲ᳯf1翜nRMqHhY%486Pu(2ۛYQukf;_sUhč7 (b{s+wS?K22e|)pl*(ہUS,ɲ)JU܂&677֧9[99[đe:pۋZ֢XfQcT)exi\>=*9y=r4Ik!pc &qT5^?mE#H\3R C"Ƴy ;o^e`uim Ҳqx>y`iH^xq&#`c-k;xY31nEQxL)3S6Ѯ1Qu`0e29lE5L&/2&Uŝ^q#H^E%Bd9;"gCvHNoew/oQuF[_wr;!7(ДmG*Gm,J k)cGl IYq>I{}YGD`Og^ytWcvJXXmE࠴6X ]G_#K\ ϭ{Ƙu(%.Q ^`c€GX*]IoZ ma}mAl :l@Ἃ/kO"N/݀aJ)/ޣkǵᢦ,Q Dhڟv^c?좆~v-'{v Iub\EƐzlL{6w #Vy {ѺD)Msn}6Bg)k''X-t ֚''̊"IUUz2EJJ6ym0uIUf_TkD9~pF.Z̫!F-D Q Eiƚ}͛4ꋃ矞zF)S[ $JF!4 r'<BR)IM:Y&Ow)H2,-?@Hln=  3d:UId Fྀ'2%V붛uBvQSHN3LvQ0I 8YK!',>+e6cˉ"]O2{ugrcŔz8 X#F,oZꃖl>gA-Iꚤ( dBpelmltkm1&ĉ]TZXx! qP\Xm1Eʲdl֧ORcEo8[`kYY5kLu^$wi]tvٜϹ:1h m7h :gc !0$kLUؒJxQT6Z~덯|i_:ǚVsZu`#Z~fN3Z5!Pr;wB\$^F${*B)ӽﵯ.T5'!a<˃`:nlHu|I* c! DXo%H{ODū( LRR%IYH1h r{(BiPHk6CQ.F [_Fh& -VG|eCʐ'5֪,) -S,f!Nuش6GL) 3+"QD6ދ\Ï<hP#z?F{EVh@b} e-,ҝqy.f3$%=Z]k"ڈԵ9s悃C*HSCyv[8}RP] 0Z&TR#Cs<;T= Wo@)cVED nbYƗ HSDJۧhQC /\3p뫿3~k23+dR1`BeC*+gg\Y.ɀHP]EYj^t$ IDATe-5lJ,(Z˲Bh/@QcVkK2m?7. 9ʲ/<f;vOOy2c2=x@,yA.#q֗clhNn?i #67%IRMP(ec$h6۾[Ab!)˖_@ÿ#09)+WD{+IJxgR&<|#3*yư3,O9 QJDb6[ݰ\:_CCl [[<8I y&>HyK/ܫ/U ܆SWx|1)֜1~ PV7rWdњa.J$e,XOb$j8e _k֔utdeLK֞Ps>xХ$UIӺk5/>x9_L&ݺmƃZE`,87_eK7&DQ;WK8g:B^KQ1uhiOKhٖn~ll1sC~`]c{lySx\ DYV5Q$a`0AJm 8[ʜU[srD2W#Ncp"q~S$!,5(b3pޓWk_qoV_֤[hD5y w+lWto0o&w~0_&+{sK,WHpsՊS1u(iߥv遺ֽaV+EirlF.|{XYk~zw~?Oy$IB4'_b6quƄ+Pڪ\Mmh'{HZd 1`]8_v|.HVw(X[݊L@$A(fc?s\kE“30 ήE9[8h ] 6* b91$sMCbQuA-) U𺒽ls5[Al$ֺ4vBTlDiCvA6֠;؝0$j)ə4|u*SYN(1I2eRV$}IQD1V/^JԿyYgÏIZ~h2*% E㪮gW^!$>+cMoI#Z]ᐲndl^\HN wׄ*4+bJCdn'|xr|6F>r[bw( A]@++Ѥ0D}ijкfH(K12 fsK%E;J 8"`슕L8Lv- Ɓɡ)Y6 ׂ91)5 G!G,K$H0pS).HIL[*u{("p >Xk᪦pAP^B&W9HXL%?{VKH%K8~Sυm #j6Ubq\I 656]Uα7|j޲ij)f{<*OK*.*MЪu\=~ªsBSnbssY7uQegX"jQwRtorT>BjtE:A+ѝxRLÇ<\,.sEBZWƪ^%&;OXAѲه֖D)˃1Uvm8?o6M>Fyx 84ǚ9 =%WR>!-gB}Yy K9D]ev"B«79bl|"8pVl[Rc!-p)`FJ2J)ňCj*`9飒AAQΥ8vDѲy-k(56l``M55o݀Pu&`;wn7YTD=CLs,6RJO>-l%˲EِR@ P87h iDʇĶ7awq\ ÷9-h5;n(\ ȣsivߗQn.sslll1)Sl]e){+f*:=\K7Xr#t,)B!i(|s;ᐿ{D,>O %[s+%,=cMy!\d\dQԎ?.-PSU}LڽcryOw_k񂓼šor`~&rql8bJp]SԶ0uϒ{)RDtSQv3n{zx-n@, Q ]kLY"v1Lq@"ޕ)fV[ǘ!ZW8Cf]'O#쨸OjfdB4`;7 %VE_UC`oE'p{8j-VRh._&Xis׍ )JӔ/JS=tʏ=uYbye.8^Z63Mg=^uo۪^ ض25B͘""5c0(M `iW0NT$@QjI&rmxU9lTtXrtyZbo|d1ͯo?}{GҽY@h>D[o:.&69Yl{øIŽ/nZzejO] 󌭭:wtܬeQ0.`6A ތ9{s?4Tŕ8W!J [YTW^v4t^3ݴNϘ*ݡr911p->r*J^z@ԥO;:Q>yF>=Z^J.}s;UXx4(DkJt/oDS] ~c"oDfYpH(_}l;W66xus$ZK/=UOrxKhIXjW#!qIVM v$ʠzmG?C:!A1CL3lvMV{ER ՗ܾ\iI $1diypP*m8x-`)Hɖ/cON-ql14Ϩk(2ÿRKNrm8d $OWDRT)Z͝(`s&TeѰ("j&-4Zf($.Y,lb95}ȷVJQ)ŪK$f[s{2 kgI=P1Bpw8 MX0pk4]C(ZI*(93}vb%䥗^x|&7޸DS۾>bV벨B\cs1hI# %9\͐Df<`sϱ\.Q=E`u^_m>u.٭VQk*3פ8Y-EWJEmёǪ}*c8mp,NN /dc .9կ:փP /̪R%씍+ŮrQppƘZQ,Bzp555YŬ=a fQ oݢ8:Nx+sTƐڄٙZR-4$Wk9%3H0LJ*jMD6P׎RL)咸݁a]S- F%A6lNU*eL淩?"v:"{f:^hm WXqB#{GuRJlSֶٱeƐW%yiĬu hL""ң?#:3b R*At0 0 P=dO4 / Π2eY`UlXq\r0~"` !~X? ?qWIxb 伪BV(`%$mє̦9Qh8D4 XH_,`e,y|g}*)݄lpS&Dsd&QC {ntU]?ZPJ@Ǟ8jHscpmN5$acX=N!+^v'DfgTjxV%nܼFnma"T +:aمu QJX[wyIDf(pxPD:bwR1yrt%7Г+x ^wglxoJՈڟ yqFx~YC, 6ΫAsWb8, 6.+{989 -kX?c0O~s‹:J),JJ6TVK̀ZMEv ns3qyl1,Tbs)[C!X56"Ak*'2A} EHE :l-P[Zjqs˰s%Nʭ _I&1߽[52KF dxg0%A~^).)9Ja:[R D1iqN')8ԦV;S2Xܙ/Cp4,Miaw8FEj R'TuywVK_Bꚺv'iqtO,ӆxT~O#3]';;;|^,ڵk . IާQjӿGqkZ1~j@ VфL IDAT(0pB%sאH~ %PSOoi!=Y FI i*%q:-85&}72qdY,)*opi>Q8/ّwIh!M$ 禩d3$\JE]k"%E7G:HC}ci,K1qtVv@x9(xaIR͕+[1A9~׸1="*EmIB* $ ϱQƜY 4M9 /;i*ȫٌ,IBsk~xN#s&y_}֭[, vww/\ctcm鴱 $H/)O{̷aޗRR..XqmCʇa<R1/)>LW޹Be4TA5^rIh"ME4Xk0]w`M"j9FaqJsV+NO1e]32܏/]69)a5=q*uj+xPvvYch kӽ "AFaLb͎u8,C$,3قdzgX$L q 2 pˌW'n A]6& ٰbcbUc <-1Ja1R2KSYvKN_L?+ dY4{q,p.Q`풢x;.VXi]>Τ{1 e n"4u*a^ZːK5wxH{:<$,6  iRB7oset m'W98Ѹ1uwP:;[vk趝VT}YM7(>1ll Oup| M,;/&S(Aea(*l%BxEנ[JONX5Me1c:L}LSJ2H@5ƫޗ@u!,Z6S8y4[x_cpԐ&grxthMY 䘥Mgו"$ԋZb&lrEUaU2 {Ϭ,#jιV]]j1K>VKRx!/3y;;;窣~rM>n/AvzڥGJf&8\׫i7.%L=~Zs E 'e#8^d|_ǼsQBϭxuQlAGKކdg#Fo%H-;Ֆmv A%Tl 8F+Ie rŢ\QV%B:A)-|Uq` j؏(kC"< !u- nk9ó3Vͬpi =F⼋R Yc8oQҲ1Bx6kKz%3*jcH=`ޖ, n=n/+#67+T q#VIBv:]zu{LyvS;]~^NYnat%M5={/g>1=b<c٨g}hu[ł8(E&e˳#))mXk1E}@~p?I v Rxo.9HmAF8c(*;}^޻8ItSŨ%aX63 ոaLZn;ȸ63 H疓B0-jQ9RݣྪTMs *a:y9Ekr0h?qܼy7|l۷yUUu*i$ d%QQtz9|fd׋=b\imp+ь?J5:qFj(˦00Nld&)`*4a|?5qqVacY{ Gap*q$, eEcM"'*FI kCr#lUsh=`㘢'O2M"G)eV !%D czشc0Άhxm![w2&5 DB2BJv=f1& x0bk5/;_}}k,K9=='O'In=^K|=6FqW_QW տb<>cwwM;lA%m=;}gTHmLlU/l.59s -'9:Lv7ϋUoް5b•)40D07;)Oܴ? pޛX}sZ^-ˬ g̈"PXvۈ; $0C )'(RC%YIdMRTD")əitյz]='soݪQ}oUu{=~) eh]Rp'c1G3R\/W3V 1 I{ (jJ)4eX}Ҽr =rmԨOc 0k:1h/1KM@ʝcJef9ۊV1%eSA5XI'`L83yUY] "ah2 2A@)%6N[ݰqjzQNóݬ7GGCD^j7 ZKM5Xȣ1^h֧(+HӝOCU<肩G=T2JPʠI&<*e5-o$ZzcD"dkjQZ7oeyyg7I*frF黮ojiEQXL#tJ4# <ʲڌ4O18&}Sv4+C9mтqRJlY6CE^j1{{|k_cs2GEBO)|Mn4I(xU`k~6X7}T 0vۊSbP.iGC}ZG L9ash"-M|)6L!!𵦻XTN-?zP1SY1YԐی:Kh߫HY4`%bܓw<{*a1?֔W.l?Wxk@r'W+xX_b(FAș1˃N@yՆTh6Cv .puL R0gڠ@(4GYzcH`l~3Y,Xs``U3+ݪ^h㸨P^PwV|mkXqJJN?8tʍW_m8Rf\A0NAbm fNvj u,S6ۿ,KR M<2KK1/]%e FcJ(}=ixrpʝ^ e?IP]Zj:v1gNj{+eCXEy!([ 76\ML1$*8:%OvХׯsN?C?#"J5:D C1NH ||E7c,IP/9rN*:Q~[knڏ1K( 5rUEuQI5/I[ ^uM1q1~W) MҔѵk*ə3-/,[<6Hwy_!a6M;}1$JQֻ|XvwyUr5W&=zM}:&0^"z=l(}!XNY":B= ZMSc| CfI4_,H7UwWMl1Tt:[sJTY؝Y!fqlBCV,W::<3n5ebk"I]DɒWX^gUh^k$@;N\ , ] OZӥE'>qjw WD׮a&/ &Zx#~i9>)hdt!NՑϣ DQTu -B zcHi#>xi:H^%GI.Jfn3<}? 7Y|Wi Z2Ǣo/]H=Z[,C*2ВĉFaE/9_E68>}|{1f# ޛޤy8M:{@oq_M]2Ϲ|]> h26Hױ#:B*E"[f4W_籓'zRr'H^tȍAWHN 'Zb,cp" d%(4-嶍b Cem(FܺP/,Xu:,YKN["pvX =Fp-+* ]tҔdwro,EG J˔)ʺfA *,f0F`>~q+0ܺ^ʢRNoxD#jj5w/M!oSOQԟAWhWhm=ca:GWcS7aZ,erdR1łnⱓ'ts*kh;kّ,R$`FU5x`U*{m+[[C +;Q;;qʠ*Æ`1^}C9Dw:2 |s9"X!IMU5X:գz(^ 732q?lɵ?sS+TRB[b=7P AI%(%JlQk"6leXDΈg^ܺ@<&u&ca}Sm?F,cT3tpW*J|>'~g~Vg/G=VlJP.R><UHjXZ^d}i OFE حw*T*ZT-†5N_5Kq IDATRXY<"`XyMg6רN'}H0I]mhNC:AT R |`pb'[ݥnصfSPai Kۗ_B؊ `-{Us☦J&b`>WL&4h2 f{ou_ N(zDr\tuD0&@J't;1iKӪb-f@o-<e1[)+IW;zUסj3"Mikq:rkEr' VUDZ=,^G^ᣏIvwYܹq EQKnKa1Ԛb6CiE[vEX[,C9\]Ϥf[ *@KH# __E)0HRonYg(%=VVCATa4:3|n{~UGXei),uǵFhQPj3T mf:EFk^$ ( cq|SG=dMsF ͛.Z[_5@ݕ4Nkp}:H^{bg-ߧEt8t[YF>S0DvHϫ1G-bg#|yG?U ;f6ƠՈ|\.)i0.*WeTVoihmsyq7(夔uU vyn;qsw1rb EBkY] x}''/cm)&,;X[04}| Kh;Қ (v^W߯6MKsc-OO#8=`o糟,+FeIlHhU Hx`,A6|$J7o,}.h8cfy- ɄAT1& 7f3nS;2|F#g9$TwkDW'dBrӍ&jVi-U;^(>r3SGUi'ǭB˪iVӦ%ڂMv\kf]c.*R1!T25Iuse-K`bRgxaltBUcR<[RÈUևwc8[ݤ6_U ϫ~,5pstzz-N;ҭ-0yW)-y5WY`x1U\+]ξHJB[Ѻ 6#U?_1Z|=EXX=TBy`@6WwKk{Gk]n'kEGʌbt@6s^T%]IaRY9C>"9&|%ݼIePaHEK հ}9+P\Vv_G9^+ 2NyWz)}ѸA ;a1F US.0)^Yb@"|ZVh\JJ7NHճ(*3"M˒볒WgRw)9"|f:E0^5J\,}s,1*NWU~LpyLE K.r=SsRHcH]):zZ;{W2??z_k[xxmK/DQ/ou_h4ٳzUX)ģOɯF|ey^ j1qhXhŘSnL):Zjg+(yUđSm.%)83`/]ha_t)UUxe B]$X&Ѝ94qrH ~:ai<Ŀɜc~^ ^V:"ӧ)CvgRpzZu4EK.55% &ٳ,gA.YZ2hm'mO05 ,/OPwW:NhTuuȚ!lvڑe=^s)(ȳ 5(:j nJޫǶtP X{{{ܺuɟ} `I._;!%O:|O,ooRq=l^)4W;pyL&] J)r!A@QTDz^68I{S]xNf|N|q+EIɘB3+uS TDYxH+w@h2g'Iɵv7})>wۮnu_O?͗տB|W|3H)t??}EYV7RVNu1wrє*˒YJUہf1;{{C Z#2u}b!8\!.B`1H#"7:ŬЛI˲˲,7nx#N(Emueu׳j]v*Ƹi[NYt019AJDvJp4kkU^mNewRAGcJAr;n9 ݄Re-wbQZZcnd`4+BwdTͬN?H)ZSbs~}X< ;9kYQ2sϱ1ce 8=s]ÃZbֺbI5ivf΁l8;;!m{GoeHS$<"y,2\a"P"C}ɰ8aejvg<o 6G1"I+K'a,[Sn5*  (ÐENf3d֮uQVyiMp #xipXID6E@yӲekKQ Tk=.:,xZCbS(b:' as`j{籹ŋ_%XRJIH$swg3.mnٳ\>-O\O*v 3 | ~ P}ԠUw{D@NO[tYP }EsvUUeqCqd X՜k-x72TR):K!gVWFQCSm^y5.^ʾ -_PifBN hJX>+L4,Ǥ|p8̙]gp؞Z7n(' q27Of3l* )1KKnjws0`@S !١X,ydo_e XO>4_ԧ8ܰ/ܸ'NTȲ0XfW 0uVX@(5INKwm,[ "*IGَY䡺4M3Ie ,+BM~u@&ݠq?{R2v9{y,/-ȩS\xu>gƞ1tQy^5&\ 6琮%[ۄi(C)`-%&7VWc>{{CǬؿ sio6,Ҕy0Owl+ByuXG=@)"G;J@>1{ϟC(kn7x (L}{ ((Sövۖ3YQ"xFQDL k}0FTB6T'۬[[)ax0[\$<@`)[q|?޹k< (!<347"Fܸ~5HyZ./| S`<0ZλjBh8}:gmm CũS(e޴1L㘽ł^1e;;Ǖ:kNF &P|3 }q]Iq?~~B}+x<'~8!nϞ?ϭ@I+۝E UGu/`;}֒$ }0 {UK*k+0] !YZ"$,6 :˲IP([;2-Ҕ/?뛛;;xIɓ|?)ne֥6ݟ IDATik\w赵zh3Z]1h^kcY|ΉѨ2JYnlVVzaXY!ѓOY[C A)c 'Y]['h{=X)`kmmc?O}?~UjqtKtn,C TmEvr.>uo[o՚JYHSQ,<ʲNBTQV hkF \5EôXZJN"ZP"N@ drm{(!Ð]wPb>D80䐭avUZ[x&ى,VD|L< Q:lHJpNZao6#Z.ZEqY[CT׌SWr2aN!(*SL}D_~eO__|Vao@Jz!| |'yTtS-"q`u4j_GkG3tm?_I0LJ(ͺ*WZ۞^6c&v \ؘcLE^,ZX[AZKQOi dZ(oӔϽ[,-☷<0:UfzǡnʬS{ ^_$y2=&m6{OVq=p8,kًc^Ny5S*3G_\=fo0}N-hvw wwYۣc W\~G?ӧ ӷ*`W +w X{Ngz> oT;.5( h@6o)tkTq~(u<)FB|_t߾*_ty9$ṧEȥj6QJJ]7Mn+{5@-t׾Fҥ 덚"p)%%-k-+ \櫯 Ax$ґLa%1ߌ(*__Cз&:f};-LjESqR,G/7qc\GZ|0l)ƪjӤU]6߫-ͪ9hVÎ*ʊ({ eߖ U__}w$Il6k)tu3;q7T^Q,lEXQꀬjiԷ m~Yz=ddP:m$ ɍ!/KD6/99zEfq4x4" шm]5H뵆| ^xP'B<^BWQ_KX:Ry{CǁnI%U5|;_*u~2?#?n^t=<| _8'XYYaee]8к}+s}$%ƴ_YGhGx+k-Yt䞇T5jGP=; ξf.v>GKT~Ut^vIy\~\tuP}ᇑ>Y:7cx}oϟ'v E%%qs&Z#~XU\dj*ʒO_Gmb֛y{˹s>ָIx?De]<h- ;6oRC)XR0Kh8<  ~F/q^۬ DBz^ZZ\˲Qޡ^E~}=#7Һ?Ǚ'7p9fIJ`tgΝ#训5{{{s }J!ȴ, ,53~wo~Y__V`xXo(^^K_{EnݺZ5;wָُ֭#=X{P@wP~aiLH~-s6 ]Q^i-EfuZXK$3pˬ|lưۭ+N| Ӽ\9im껙5j9FRmƣ~ ׀Z, ׮ooWDV>yfYU7t&H˒8˪T[۫j,$faEoINjI@#dσ(ya9%=zֲ:QD i22N&I7^NwEǂq" X(k9+Wʲ z?Hm+$ưHHzR^|eKKgzPtn3<~loos4A\G"G:jWW%Y&H T CJ VUi( utUi.yܸ1 ʦ^w $++hǪXK&iJ$Ir `'pX OkҷA>kKK|/M"&xUѽJoqG"DutۨQ44#8;-[/}%Ă,^D]X< _Wb[[r3m4jZrisiLX+47éۣX>BhGf!D7ZgPosW 뫄RF-x^r`d$e!H>Ts9rRGytզt jaP)ɹ.=ƾ~4iM #n@vU}6R+e7kSJN/CX0Jkc".g,~o"ΟK61Ϻ;|~Ju!):[oc޽n2LBq!>r>*m>RdQmXQAӆVgyT$VFe2~4 08z|B{=@SK4ljѸL!"L`39Ml-2SvT'&=rR_#L3T錚J81xjoW{q^,q=zt^$+ƒq:;;$U\)_F@EYqT>@4]'ϓ+XK@nE]<] 7__qD2Wz}RxQoLUpT {b4 %]>icXQ9IC+kΙd }׶g bғ0b??яՅ\.˹k?c^lقeY +WcyB(9Lp]J.-⢙3i-aplG6;hOt6p>4=|pd' fLtuk ,iH%6t."D&]q (J% }*TTPm2)y79x4OO! pv;þ}0Z.kx9;jÏu=&'/X\̞9m[h &%F6Ɵ9 Α#GXz5_Y)z{CJɕW^}}Y|I{9FFFrǢ/Vtu13ݻyAftu1% Pd-kYٵ.;5r4(U@ԛ%(>M_1jFw(wtd2GGFQ&ISSPI"'Y.zF-x2js`LjCx&nb.cBeWap uf0ҥKپ};W'Djd65^v7}ٶs0{=0/ g #FG./h|w6/M"qh46 G;F66KF%%uccX>AJdOչ rihL C$RD/`Ժ?螇Hl6-j[zٷ/4mo9ӡW`Y"鉋I;-XSz^qiàc2==LGȇ1֬Ymyr9u!gߚ*O0rէu 0M9s+V >H.3P-0Q\czGyڨ.iXLS4<J UHuB0]yqT\or{eJa`{AR}3RyqTEPQ NbnBCp xed 2>~>&>R ZfJi[>LOnῴw/[^~|;y/u 4dL,$fea詑xΜ".*ٵkg2(npxd ttCkz3LTYT,tW? ,kM['}cmYz :N=ZKX@i>vDX G>3MÏrZ݀+yFxyH-ZPVc>î0::##&A \e:*ǿ-\w7V{;زc_.{ϟgTJxpvm<3|z~j5Nҥج".B'RR5 MbyVRRi*LXݲ@Ӑt Gm,twә&@B9,3.M)q+Cd} H%-ٹSC<.QլT곭 )cצM~|V'a ?x]oKy(&]U++Бq~__XSbd<*ˈY"!L18B$ I%bE]ױ>D"<yrӦQ-)S<ȰB g ض5~Cy]5KMNoEZǛɭ\.lrn&ub+zwCJɬYwlr~i]73R&ilyus9ڲYn:ڪ o 1{42IV|t9z1Lv\VgSNV:Ay^Z&p"{86bTKˣkY $K2f iZdaZleS4҈:1G+1m5>l6?6\.EAЦ # ] 1 )u4M'XV;iE2>^<k .m t'|+b||uqwrUWUB:>}:ӧOꫯf۶m^=/*]p(ۖ`6f.㭞5/;7;$J,l6J)rIh0>^u5r TPJ !4 I,GW4P1,|_sg9s0#A_~XBi,Y{z7l_ݵJ%Ҵ*0287hҟJZY+VUZ}"~-0f״;i߯1~>B?y٬:)a)H+D f~R*fAEd5aԎ+|xi4YS+ bl6,DFx<if躅dL)3x5 %ׅ3+I5bB>;ػw/Oa)l|odڵ<:t)',|~PStZתNv qo^sRTT*%bNTzjy":jIG8N% ~H.&I.e a``۷o[a)U,^^`ݺuLT'",ә*?s}Qn6:;;R$& ݛհZ j.5RB|qv)j}Y? />h:&LjN[ 67A@6%Q i~tVkxŊ !3{jxWm~"A6ik;iRP.Td`ӦMu]|T'",sٳgOkg}z^z ۛx,kS>VӜ5D.t;2ضr1*:DҦŭ q] $jHm/VZ!$]u}}_~[o[",Yb+VHvEOOsΥP(\9IͺZh-nRbvҐNcrYO")TC(K1F&Fa~b§#hW?>V"ϫD-W<7of,^nNⶄ"tW|s[Cz0 vtPl+sBͮRq]4a#jx^~y׭֡C8z(( PvG[[]w\s g͚5<\z̟?$=LĄF:&tfҷ _7В1 c]IYu]lql'@GNw)CpSD+a:??h"u"(Rx y=ƍٰa7nd̙\r%tuu%1QńL\(+RkX3H 5"V`1%l =J֏#u{+_ ˗/W'98a)6,[}Cܹ{Z Ø$vIӤ^KWضJ zynRuGjԃ%*P0J89N6 p۶u#ZUeݻwpWO|B5K˲Xd _~9_ڵk>}:/N|r,hsJvIf6Jibmf)q1N$0m}Qwh5qq`\~<躺a)ӧj*j6l 0 ŋ3k,4Mu)Ҫq=icc0dJy<"j zmR7~b3<\f]",w3|`ɒ%tM<g I+1Y57 2b:F6;6AMX[x^|ߡmFZ]BJI:F|N:GVi]t7o~>򑏰m6nݻqQVBוj&\),kaV=pѴ*9N.7Hgv,k )KìZk 'c W-܂m>N1s{ ѣl޳}"'S#iqʤ%,<8%u$8.BdYM2/%wWOp\r`"dr:Q9Q,UxW80 ,^}s?g?1EfZ>O&lN# ,8=?~,U g+ۚR@&EYRIs"dv)euDt]gƌ|+_a#[od#bskCl~E-K泞{k-,bӐu+SޓJ 5l{\!K_3_"̙k1^n=v9WT+*ǎII Mul$8JۛDƣ\umuTΝnWwEgxRrO;dΜ9|q166ʯ~RM ;.=(EC7K 4%L矶=B:9L&êUx'_53.q)_sU9x&T[m6ɠNT'P&hY5wƌ##z"kpsB7ҥKTc7]'?YKZƲ̄-N 4M\~3MT* 闠iF@@C\G?7vẮaqG\1y:锰U;?2TPPxu6n|p"A s CO64K{ޤp+MRBDYBAEZ aڼyL8DR1IYiam:MW EȪGp&D={~K 0 SAPqS<(& 7L&Xc1aգzJ؜ pd YP7 Z޽[aAUMKAۮ?g6GZNVL$ϒe2WcZױwq) k8I:oֲL\|ȑ:)3tvNgydڴ^r<d!B[bqÇ9p`/;vlcϞ8vHaYfj`e2de%ibsѽZ8N*jL ERsO4"oTTر^~9*ۮQpZu]nyxp:\Qw4{۩B[&_׍CU:el-T"}Zu8m 8%$N8i4܈It[&VoBBFj`5QdpHџq :m*JW&)ŤPEV oҪqSS:u"o,Mfds#X ]ڞ*j:S$uD+RQ™N$wըL5y=AJAK0xR@AAT,ma)(((n1NXk&IENDB`einspline-0.9.2/www/news.shtml0000664000113000011300000000220211012400560013245 00000000000000 einspline

1/18/08

Releasing einspline-0.8.0. This release contains new functions which, for a given evaluation point, evaluate a whole set of splines with a single evaluation call. For large numbers of splines, this can result in a very significant speedup -- in some cases 4x! The functions are very similar to the single-spline version. Only uniform multi-splines have been implemented so far.

10/31/07

einspline-0.7.7 was released today. It fixes an off-by-one error in the construction of nonuniform B-spline bases.

5/11/07

Today, einspline 0.6 will be released today. This release includes Fortran 77 wrappers to the C library.

5/8/07

einspline-0.5 was released today. This is the initial public release of the library. I have tested it, but not extensively. Please report any bugs to kpesler AT ciw DOT edu. einspline-0.9.2/www/doc.shtml0000664000113000011300000000532511012400560013047 00000000000000 einspline

B-spline Background

Some introductory background on interpolating B-splines and their construction can be found here.

Installation

einspline is written in standard C conforming to the C99 standard. To install it, you will need a compiler which supports this standard. It uses autoconf/automake/libtool for installation, but these are not required to build the library. The following is a quick set of instructions for installing einspline.

  1. Download the source from here .
  2. Unpack the tarball with a command such as
     tar xvpzf einspline-0.5.tar.gz 
  3. cd to the einspline root directory.
  4. Automatically configure for your machine by running the configure script.
      ./configure
  5. For more configuration options run:
      ./configure --help
    For example, to configure the library to be installed in your home directory (such as on a supercomputer where you do not have root access), run
      ./configure --prefix=$HOME 
  6. To take maximum advantage of SSE instructions, configure with "--enable-sse"
  7. To utilize software prefetch (in some routines) use "--enable-prefetch". This improves speed significantly on some processors and hurts on others.
  8. Build the library with
      make 
  9. Finally, install the library with
      make install 

Library API

C interface

libbspline organizes spline routines into two groups: those which are specified on a uniform grid (or multidimensional mesh), and those on nonuniform grids. The uniform versions of the routines are documented below, followed by the nonuniform versions.

Single uniform Splines

Multiple uniform Splines

Single nonuniform Splines

FORTRAN 77 interface

Single uniform splines

Multiple uniform splines

Single nonuniform splines

Python interface

This interface has not yet been coded. einspline-0.9.2/www/faq.shtml0000664000113000011300000000375211012400560013053 00000000000000 einspline
  1. Where did the name come from?
    The rather whimsical name, which rhymes with Einstein, is easy to remember and suggests its origin in the physics, rather than computer graphics, community.

  2. What was the motivation for writing einspline?
    There don't appear to be any generally-available libraries for the creation of 2D and 3D interpolating B-splines. These turn out to be very useful for representing orbitals in Quantum Monte Carlo computations. They are also useful for many other things, but QMC was my main motivation.

  3. Why stop at three-dimensional splines?
    I didn't have a use for higher than 3D. The ideas are quite easy to generalize to higher dimensionality, but the coding can get a bit tedious. If there is a good application for it, and I had time, I might code a set of 4D routines.

  4. Are there binding for languages other than C?
    There are currently Fortran bindings. I would like to C++ and (possibly) Python bindings when time permits.

  5. What are the licensing terms of einspline?
    einspline is licensed under the GNU Public License.

  6. How do I compile the library on Windows?
    I have no idea. I almost never use Windows and have no compilers for that platform. One user has reported that he was able to get the library to compile, but I cannot help you with that.

  7. Where are all the other frequently asked questions in this FAQ?
    Besides this one, they haven't been written, yet.

einspline-0.9.2/www/benchmark.shtml0000664000113000011300000000044711012400560014234 00000000000000 einspline einspline-0.9.2/www/bspline_logo.png0000664000113000011300000004604011012400560014412 00000000000000PNG  IHDR<sBIT|d pHYsٍ|tEXtSoftwarewww.inkscape.org< IDATxwߺ\ aoPB,2T6- te-*RV)( ;yw5l=K/dY-}Fh4aZ*XYi4ЂGSO p1g3Shj)vVՙFS==TL]x 883hjk; uT-x41k{՚FS˘0M)NG U5rGP6/Qֿt4A_04*xBm.̬.@=kNHuVMтGSOt)daZVl |Ye^J CSOh FSr?8[ҘFS+h'G))ʶY(~^yn䧤'tвFSr( |ZOy~ur$;3v M=ZxKKl Z_C;_65k%?56hm{=`N`'#ٶ}!pEmۃEVF$xG)@;{&e"<M ucH`E{OK9Q]; pmyW)1l^:OF[x4"sY q<:pYoHEض=!*KkljNVXm{Mj mh mm3#RSض'pT/pEs8>~B[x4hg\UfTXJdNǏ ^s 1=]/zct&sUFS*<2K0N=6n.ym{y}mc8*+-k4FYׂGoyp+p,u(vl^]7u"}-v*E[x4hWևo48.π7@s*+e)534-<MQ? ,h,  F0v[ƥ0F/B[x4;lN#"g|ڑ0FK݁T>~y^-Tc&ugi;I\kǤh2hS۶?u)xl l .3m۾ ~ڴܑ nG0p&KL}$ϧqpo_邇Tтm{`\6tbö `@L8ζc<{5JHUG[xz%<`nl\M]欻x;dz`נo]mh\6:kso `2jF׀d`i|gVi#fϤ5SM{Lm{elUĎm.'LmRGE<[xLm@4WZYrN-8w0tC0ysЏq}p!耆.Bi)etv%jOX:Uha 4 ~9dQÎ{y{`6B{@k~nCH1QGS:⼾)"ay/{ ,Budy I`Z;-Ndo9WGÛ Fliㄆp wrDgHkke<0]hZBg?2|Sie=E6C$vtʥC.oŽ7L87LDR.Ջ/xl9C;A7<&Smy k?-rG g7р{Y L}R՞ENn 7_Gg qȏYs<Kڸ Ōik @|#o <B;cjf1-w+X`f;5n9;ooJL}x[;I"cZN$c!puO 4𝪗蓂G\|<u ۶G!fUtO'r?-r .G;Wr~?XlZ#0xE1- S}skKHӅi;16mo*`Z.;=?~o(Bn5{MxPcZ}zf}e$]:Ғs*pVX дze \d`OrJ B1N$RTOdXxҶ=41*ۄnso,%oݎp͌-XC 4-vw>+s!/귑/v~JN;pi. bZwj\OuNEd<NZi'%v$'Ӏ'dCK7mQhF_5-wu~qaZkbOHӁLkk,B|VB2Qek [ .6-w* ݱ굫1mg=ߜ,F]Dݕ Xpo51~>! ع77Co|q=U@ZʶAX6PiPo!60L>ʶ7oT<㭊.Y({ȶՀG\6F]ڟy=9G#5eEw.yuZS;rUj=ϛ {-FGY RyiG[pmgAUæNDd@;<)̮ri7Is|i#vƈSrgT|^kAA^Ǵw νI [.Dˎl[3J*MdaDa6+ÁY/v^6 |燕DZʶTk#y*{0j}~ |4<lFJg*[#{'B(y^/l7"R,sQP[_ZΟ!5LN+`8w߹ Q㏈;l1f8] b}Ӯ $-9+TW]xHwfyAlSw |TKE$Q(ɵ}lPEHuljZ:\<wGG;>< `bZwlwrpdRR>?qK\LS)?G J}̩V;8|gN;'"\.s2sB!y1~ @ *uyϓ R&܏֧kWZ%,-3;uL;_\w)X.웤oߙ<|g"q.Đ1Lדk ߩGC_RoBĚ&N<RPB,v2"ȭFXE-<"?sF`Zkp)"|l<]O0p!z;录䉸Zr`tNO`w,iu!⻛fe9{'#]& mNƚ u%ŭ.| U!Du*\Ál1-׎{,~9R,S޳y~-x$)vv P4N 㠠G^8o)_q*7n6CsO5Y7٨i2Ϩ&DDݞX1-DrsAKMYGO+O}|XԢzw3-𝏲6 U.ɭ< ǒ 7674DX7|k,HCckSqyfTYxvUC;Ǎ ܑn@CL=ׇ紿1.]8MuTCy;OoŸ0N{S`Z JvP6Wcԋ;]D`\՘[D l6I`u/B9Q3]L=4q_z))ScCǴ,1'&{i/2.0RpcfEEo lQ."`Z,_y!Q 2bdm բ7<оerbZ;+jf7΋p:z?V6_Nm~"KؕZ?<W ڶGZji~Kwy60PuZFEKn sp[o;9m@&v-r3U9~g9|H~ۗk$LpXݬ$[&!NTXD3_N3|K r~r7kZDФ1C&ʙZ7}º4Rh@Tz:JW#<d ijwOT̲A\@CZ ȇ*+yb#lX~_I`Df1NR֟|'ؑAG:C֪IYHT$⼙]$:Duwz_)d?/?kSkh)f"wlto$jjTySۮ[f6}3nnJ4u-V߹;ãD3-wED C6+bv?S6f,-xUXV%`ƪ尗^8ZNJajr7fmZJ 4IMw3-w&RV,sl78kMulJnYZpϾCqԂ8{fѳ(^IZfi+37ib<[B4bw7fc-ZaJ<<|qitcVV+{iQNR_,E^gVu#+~gGRQ+r-;kf\҂GrOV_DOXE|HnѦ}>𝏀m@st eZLm0 b '(5VWb7Zoݙpux U*qk"Dۙ~ڥ`2S A2r$?2XYdZ낥l= #cV??Qa7Ĝ-? r?jAmqE *x*jbrլw7YKW==Y7{q 5"g4ЂGmI5`pzAQrGlr5]{fok =![M;S IDAT.ZxI UN5 ObD^PSkgT@ MT<5rrAno}_QC۰3*Q}>}c̭Fqɨilj@DB4G+&`P*xjPҗt(7%<ꅲ?[x zE 9!Mmwq)!J2X \#𝮼AMǭf4Q?%%n(֎*w'&S=<=&*bO=IWÝR-ԋ[$V6>6`WX=4è? pp=BEV94^dXi) oqgw8lY%MmMXAY﯂TR+1zZdD/R+3;3ی6g7ͅ54.1XXw˴fcY*u 6+KyK1?MYkO2 Ǔa796MC/EO¢'0F_puDľv&|NZԎQGv}2 jN~$]jӥZxK)QjVY۶n/ijQ!wfWe&p6=AaH8lgŪ<zW S)RXd3Lˍ5Imqse}hSz RC&s Y7 E4eC՘K\Hm8DZ#GWHYv +-xj O+DrUa]aZnb!-S%MrŞׂǶq{a;".nUKq#?VZ_ JseR(5#+RmbZ>rY  @ l+%byv^uǫ&6-UMr75咖Y]r(m)Cw&%Fk uW 9 L´\R<YרLmYDZNW㕃zC\zXK ۶)n.F"ln4Xoiav:jgV wI2?%N;[ʺ3jS11PIOՀSc_-0TrTi[x`ZZ ۆ0in>3cV%V NqP-Ao"(@gQ&)]g%VTR@۶H/8ѶmMel۶kvM4mQo"v<8-wfW(m詻erwR6?Me`D{L˭Uk̺%k0.𬥽R[![dZ13UP;W?ҳ@b<NW6oI#ӪNwc%*xl>Lf3aCx^! d)D@jd۶#m{m.Y1qyɅʶ's?yrP~eZnWH옖Ef}٪o9?紽VϴK6Bnx].D-<{%|t8!kIeL]OgrK-P4ckԱҮ,] U2/wV"PYjͼ3wxĥ*l8Z0U=S}%cJ=c0+aE$Z*"j|¸e3z?<y*KȞSϒ@EE1-wqRfz̝Kd pX;bcRܝA KxAl6%awz/{Z9jrOCQܩ\웠1u'7wpZ[b%*a&lN1cZƂNlM=!Ny%(Uc\;My;*vAR>yw{*<R69wy%{öo# *# %cZƈ̭SCI7\)R|u{T lD<́w"4_]:g̉͟kS i"7 wz~82ҷk)_^  |'֚BIn#]ynT68+E`ˬMIrM`2S9Ǵ-,xYUL˽84kӉYVQǸ N`J5bǴܽ)Ij~̩nSo4dDjƶm:qW2m`/Du`^ ~;VVk皖4,Ѷ[4}3+\Ncx |rh[hUo!1 8|R |瞄mQPHqzVGELNn9ISk亡vq*xrgZn+1-Qaw ɚYOMƙ_;_Wr쨘'"1G^${Xx#n<۱sH>AiIo8y(y(mhCed\:qɶ lz+´Ư>[Xk<ÿL@X*+)%y{{)O L3-i|LmB =)᳁}QT r6F_V'e}u΢ܼ mZ/+R,<prS~QA؀MK>nD] |G5ik"Ue>ʲXټyVΠO'AW080g"rQ*𝒃M` JDX e.URA#a\L%}qQ]yq神ZQ/AMe}Y3A>w= 6 |(c8!Z#k`TVBFOi!A!nz@Ԁ!KI i;uт'fZ cIRRD!|5 cs+"ͱ^Q.q#3r#QEt?yκ4 N'𝏢WXg221b:qI"nĜRJ洗 Z7/1"Q87DXv(\CX2t"b'=w:fs{\Dffv_ߩ5SocO@ѕBDP9l)0wB&b* 5 #J2#^;7'2np,n~s֡q |ga ܾAhi:37_#j-݇JDbZ16>A?@)فCQQ>`e5z*.l[&իpl%~9l vGXg5DeوOF#8)kٌ\&}B![ Qe; tp ۴X/k[ð22YZtjC%Y*AdSYߩ:F=!G#\yKhna3b i̅^ |ҸouuYߙeV\1IcZuTw~)de.D&h`;eR%]s)nU.JR6bKfɬ8̮ʄ =2k_~)xZ Ԯ6Z3a&YLo Ú+IqA⋷Jف覞-^$LZ)^K3tm7%9iET3pf;%9f_Ŵ#bZv@d]5( pR;ץ= 垄TŨ1h1u7%JoOalH ' f"!#fǠwJ0-~֤YfY9WXz5 KJb\fFgu>R%^pd8#˓SSRG!ܓOVP;vL?D ߯5h=b Z<9blX}|,'d"{#j/ 1!Zp __L_|!YrˤRZΌ+^n Úi6Y6qmw> 1Nn"Jv>Bd1~S~Ga EX~+Z0ϵg,<-1Q4cYėf&22SK2lLf"*NET}5 #g46mBSdz/DD{-B65MmbM•Gg[Ym8cQٱr-n0 xJ.AOCU @WaL[Ml Sn:|Aϝ~KŸzk4~\O{a)ueJ{aOO]q$T7X\-r;%-AS.X5͇e^wNsz^@ ²bkռGfm9!2~" _駴Lj[ c]bQ3#,;Ld 莵zuX0ʺ /Va£3!t~ D<-9BBn7lf!ZӔSZkik0mh%T>X4Y0L[#V"?GS?fjFSfM_YS06B? տ5}Y e0VDi-u) zIr+ku-]2e 2"{cGOA\^" "5 ;R}wak1'c ڔac!?fET vii4}iq? 8Q 8jH@OaGہ[q IDAT5 \X.' [#0!,1%^[x4>Fa D hq3pak$ub!w"D+iK)y\U>3hkaш@xnR'mh쏀%@:[B'VG*'mDVLۀi /咩W6C xLGsZ ccHD|d'z~Nj)0Bxth Y?rH7Gm鹞To  ȔfLkh FS^#4rsp+BSqi1Ggp-pSk~ĸM#O/hDe,@!WD㡰< ,cwM-<MbMRօc5 Nc )tphrE^ĦOFEIdh*:-h45Na Cx !t 6NXO 5 _c ? o 2'Z λ"Dа |[.gK[ ?t W$GWZhjY4Llw}ha8s$x Y[ˈsh=RLU2hkDO!KcQ/Z !Da$D[x40F#9)OF.mJEC W;eFIl'к1z P-5B5-otp߹Zh#2|qˋ,<- {F< \=1F;7rtψ]*<``o-2ݾ5 󎈶h4U0l8Á.W,%YxZ c|[dh*ElFƊGv/O]д{6M |4 }{EvBA 5 7QȔ;E-t4:EaQ=*GK#ݴ< pX;w&=Fh1c#n Dd`W/1EOal\HBGcƷgD1Ĩt ,>(CH&5M鴆BǷ-"3s*O{--oS?/XF -2Z jEv^@0|3qS hb-~$_|K "\_>Sp@;GFSȚ\; LacH=TBφuSGѤ, ? !v%X)/MG.‚3 qO7  cqcRҲFd#cwEX~VЯ#DE4;m3-QDe}y040l 4ND>*-x4MȂ[ҝIL^r3x9J9M4h4ui#f9'Z c]DDfDeen_0&-1\Q9fvMZA8MDh4UP֗ߵ{ QgfVco,FXݵ^6*rjIENDB`einspline-0.9.2/www/download.shtml0000664000113000011300000000155111012400560014106 00000000000000 einspline

Requirements

For the standard C library, a compiler which supports the C99 standard and a working install of pkg-config is all that is required.

For the optional blip-generation facilities, a working install of FFTW 3 is required, with both the single and double-precision libraries compiled.

Source code

To download the einspline source code, please visit the SourceForge project page. einspline-0.9.2/www/NUBsplineBasis.png0000664000113000011300000021216411012400560014561 00000000000000PNG  IHDRwSsBIT|d IDATxy|}?,%[mlK>06ؘ$@!RBi&4іk 9IIHBHHNm|'ۺeZ}gfwvvfvvwVZ~;̳<~)㩜ʔcphjjœ9sp뭷&QXXw=~]waڵS=m)iz@tODP(r<Ӹ;ݻw˲-Zsnz******F^;::099cؿ?"HWM7ݤEO^ (@(**B0x?׿OEEEEE%_kAφ{BcժU=IqH,p@ףX?Op}>OQt~iFm旫*1nc=^82yM&s5J~!)p:Ph=4MOyJ0 ^x!7 zݺus:tuu5vEh43_;wtOA~Lyitǐ{qbA4*9fyd򚟪'A>2'KJJ{K-^?<(N'^z%<Nlٲ@ u/"n&X阶J^B4ׄ<}xvB***ׂ?8҂+> n6ɓ'\%r(ٜ鞂$5\WqCB ٷ_p4FFϛgkb^ 禒s^8b&3vN4.un6cNtm=g?iz'zL/?//dM[*/MljtOv$wlűc1K@E)TAWQDp ?J|rphz g\" {l1g8ɑ`b iJr0ƢE D@Cs OUTTfG鞂O$Jr:n7j[5(x @߶-LΛgkb^ r ʥƸo%h( &O:^('IUTTUUTfajϋ?7@Jgj Hzmu@tEGxcl N'iȖy'7c.诳7p==4p!$ MBpm K㝉 t|i]#zWfBq1wp(V 7 ;sÞ=x'?llo}V|> w%z\GGUW%P`,,'ĩlSkW~`h;塇wY\#o?_؇9ƼUjkk# @k+puw TlkkCqGGFkmEN2\OiraN4? Ӛm^/M4}餱mؘ" tc#Mxc5_cGNOIv[SQ9YC^k pOu5OW"9h'̣kk&p;Gq;6݊Ͼ@8<H t`qA6n7VXnDї➭Mӻĭn7a3raǴ=gg`$_$' |d.rJc݅+Qn-嵗c[׶/䍯Z̚lLYɅ lYv;+)/o~C.& ,x##]%UUi<ݍ/UT`ݞuu86>ρ.#W+feo{e2pͼk ypڷd\I&KdՑ-4Z8&fgUU, #;kLjAF[ ̙3…@S3)\8ŭ>,X,B!;1VV&>|>`$ DBiÆcW_ Ɠ 2S-)Ixu:a$ˬ|:oj"M?srz**YÁl,,2M{q ٻl>ii;iΞP!5;v X,%$-]`4@L& )蕕@YYu D<bSqJu:Q~")0Řo,V_~iRaL8qwc@I sqxg| aj7j4j|1~K`re㡇cԩLAer z͞b2NBxedΚ%ygJKahи`sxMSdZ[Ndt90k_Y| bòp!{N%hsL&tn}">2BR`` =n4:|ߍOzAA 0/( ߵ55s^H? è5Ĉ _=CMhQA޽ūbn(-G*-3}.}ܦM/=}J.ү^ocfC4<Ӏ=٭⧔'Iт@K A L^-ee5\_Z{ԧ끵kIL9p=U.Kx|Y2Da""e/?6o^L&P-Eႀ:M'ާA7l(*BgB>CħzAO M9k:'( U*Vł85 4ϊ$[2Cz~Dh h;.ooE8_|1T893QKVЩup/gߞ'~ӧ{lݴ))9U42ϕ&$hTQQw:Iqfc.Ȧ \ Θxc9ex/\l\"H}Z- X+v8ȍ(*"q4&1j%#z=,t*ͨ!եvtC%ԄXXg( ƳXAVJ2m6|pd4oADo'a={l~{XZt;@hՊo \up\'$~{(%+w,V`ԛo_ًoo?p)_B+*d ܮEkŏim%i#CGpYeI)pz촼Ν#?͋?&J$We0DOKQ2O,.0Vt㖔ϗ(k"(z x$<6!tUݓR% pnB^.#nq.  L[䕅خKɢEҥD>ĝmk۰/w~x}83~7$ҏ>~f(k( ~x\ь;^#?]E==~;44C੧tZo@QHOCJcs'1ΤB~&ѡXZ4q N$jg%?D_6 b1$s,t dq 3+Bzxct:[?&H-}it +fBq2gق'1E5 EEr)V QIknӟvO_y+V?dK"=4n7^./(@q S;G_^NΞ%hkk?ψg[xg'wLL.N')}LE%/0d>~>'zs%#^}{0ŏ{[sD/-Mul݊C. $E"$~fMc/|+Il5/s܄LpWWr[++I9ATr]T-hƭ yb.# Xflݺ5AѝaĭX17z$\݉9E3LQa+ؘ7<"y,F# ԾτI*5+WVd )[̾g7_D6|z[!JOc' ^~wg.YAu߀`;"R;K a4#s0>+ا@ߩS$XKG?ѓl>__TΉYغ|%}(D^],)ሻbsYo=FnY;WaOLN ?r(.&]b0A:93ꞔQE9jUwCoMR e 벻G燕ѯ}mg3TApQh*3Ki?+_![`0_:I&&Ef2-ApqO 75k>~NNJZ)0ٳ0Mc<JK3͖QB%|qAg2أ4 w$"h4AxrrbE{V&=W[X^ Az442:rXeGsTkצ>W5ˮ]YM!%/~"4&u+ũSxk9 ?M Gt& ~϶?^mۀ#G^w=[w$}.PreO!?kŧ}=3~ K7nG d[^Q!@lc)f,e8nJz[X!c7r/9D Xu!ym f%c'D}=I)*׋Fv OPa}NIEe%YnˠQ]:pDz;`3y泠dn^|3s4H /rzc.YA۾wpy*!A /hbIi3AףcTk.˗ף:ehXX".MPj.MmX|O**l[9) ә(֋ZM Yf3)ts06+FxDwY{Qh,1{ g?` |N4NzhXd9-1h,$U2+iSn1XH1X" 1u)A".DA?=8:2~Z.Pp rXnGf ` 3eF(UYM~lۀ&Or03.ɷקS*ܻ^݅/f~s#+R% crE[`ՊCYX4|BģQx7 ye+P[P:k`Si4Dy2B!h)*fX4$ }r2Q5D/Yq\:E3q3tf.:_[xGD}}VjR$g͟'^$3E'Y\e#6:;Yi|etkF( W|9t 6`Aϑ4|{2R Ia(\" Av;fqbXGC!t*m,Bz} zs-t@PLF ]\,Q(,M}Je}޼,3>ZmB"b*lz0//7'D*R_'ˎ93g/d<;ƫ^Upf2'ϋ7zXQ%3iE2K>3'Tpf2xe~MS{iT_^egM shkcJF^=PvN6Qsv~m6 B<K/_ HDWt90hnqeEm=D#`B s8 IDAT:]˛o[זOZmmm~x(=pl6!w?XtY =vNl;U6^~`,'P_O&'Q9If( 39B`$~=)tO2ܱ1Y_$dfB!<XjV\}{W¦Ma̝;O<\-/5cjFj2bVddu5߽B#q:io)iz${(D:Ȧ#[nU}ttH:x֭e!]\FBo@y<F1Bߺu+ 7MAvܲek,) }֭qAg붛L'Vpʠyg _$,e G(3Ɂ s\pI_[AJ nAk_'M_1jn}jΗAсv|/w^~e'?[o;v߰3ҭ'p4Z}}\w 8Xtc2h7gItUU$UݎX)fIl-" i!A6ɳ#DAXH4Yo&ΆeYAFI x.w`]ޘo ^ze2i mI[F)Y~hD)( z,YB%{]\5Dz-a x'&?\W_=5^C}Ys=h4(((%<%}#Q~_f| bGbv ̄k%K^d/ Vz&~ɃZ(=\zy ϑh}ξX)ҥ۬QR.2"ZAA$t[mk4n ݨ5AgY| /Zm:`k= w, Z)Hb,Pً.CН0FC!ԧeeŒwu)ݜ!Z}߾by;Xcl’/W =qg`<{A$9&Yss3ߏ/ѥ OFgg'>Oc`9Esz^9JV!%4MoE1׬!wSdv ZJ*QR4[&p}h}< }=P4$B/5Bы1]${l"UT!AX q@BZ&SL(,3V+8 za!S7RxJ[.LAds}^Dt?~-Y)YZk4Mb˼- ຅ACi7S!㏕^Ї*ZEdEEE;oÛoU7O'uRIN >ܘ6 R4KdǃhɓRoueE(BJ<3**"r'l*weD(PXF'BF".wE%jMs$.*N@][,GIcdS@ի _3gϢ٧ 6l%^{\khZ/~ yYb_|nő j>l6Bdͤ.Z&`pd]ԩyt ?5 ^__O\f^__Ne :27/^__ֈ@cCh;V fON",.&[ڸ^l*-sZiĈWa @>*yeRHt1g<`e ќJsZGGr>H&_YZ[C:?@0aZ6scqэx[E*ƫq#!^+sWp8(5akjj0&`=So*oܢdY{_eqU$xlV4.Zt.-P@ZqGLY!T[{-Qen4&-${DP$.d+rO\M)-t־ cpmpKUl'4;:n IM X~?9(cpbgO]%kooSO=۷ßEo|"舗a=vZZZ욛a418:: $kmmM~.oQ. ۚ2OP~KI)&EqqV t:xXM…5'n ~,t APT$r"c1JseaЧ{,X-x9(#蝎Nae&.17IR""q444W^y%|A`x<8y$BL'|pk׮Ekk+.\gy&_5*uFsK3p.lRTe6dI{wϜ!Q*K%Dܑ}ξ-aϰrHu`P MΊǓn ܓbRY9XX6 }@9cƥRpUar ztW,-E Τq=`کʲr%ɳͰ-`GXʽJq݂p|8): MAOa_A7 я~СCַ{}}}1w4ł{Cs*`YS٬79W-y$o(`Y+ %ۍ ,8[lbj=~W?fS :;.|)z8!rokkKt;Yx̍tF J2D`h>CvA= _q۝ cH>,$`$B-BOB=|2ƟdpF͎ )( Xtq˄I$:F:pdAWk]Ͽ-.ˤնjw^koޜd[vAXpB(pJFܑϟ9gϖt6N ]ҚLa :BٲtgPY˱Q(TFBt˝ +@~ -x)B,]6),@Z-K`h0h9~t }o^Рfe'1QH I <3UeBQۂw+ \dYfPۍfsV7:1("2}pX dwZr_+4Ң",5 pI6E|0 D>2oBB@B;.$Fa :;WR, VCnzХ(a+Ъ- }֬.Z.7eExtr)Gn=XT(Nli,oD t~̀$Ev3 Ugj'p0MYCnG_ x<vE,%T%[ kڏ ^mNVE$Y''5UZ>[#㛞 3@KQ0k41+,Cxܥ,t&%{a!P 1Ԑm"@Uf, 3.wD^y͈ߋ*?)EaMK!A}`޼xKKV~岹~3!/+h6 Zq<"*tm8vcBv-w9b^:/qS,:DMgrkyrw]BccwM(㹙Bz}}},YItà5$cs1ɪ's߫f~ :;Ph*L<e0Mϕt٥Hg" /gR4A7H&%ʫ.^L(-K4YWbn\6݄; L\ _<:e?؇ҬJ{HHy[1|N{XC }!sLդ9.㪮&cy_t.XAcMw9#AAgqťr YFxAa  :ۑ-JG1韔r\L&.w@tf_ ,O7 `8s\ϹItu90aAW^$fgNUUaS&lR#`NJ*zuڍOx"7 q, .U~hΟ'7lIP) &-'2KY(eM2Ybp :9Yht@$ 1t 'NIqw tBRCOgąt\B.yf3"4˚Es,[CcYLǂe>n{骫֌E4jUݷ+3O}n"|\ Ο>~"<@]5rKy>6FZNB!o3M;y<$B7jʠ%hM,$+-t E'^T:+l/t1$ M~HWˉ9,GG to3^t%`b}~,Z+"WPs6b{Ov&$!:ń*ir+oN\/ѹ4ul]'t:)㔲ЁԂ^mO C_g;Eb,VN ],).Cq@T8H$>ЍFa;M{Lm `]BѐkFAt(q>|l.%Su`?ZflNBlۈ;ۏƍir z"5`6,V;v6(?,X]Mѵǃ E"Kqd?~L'dܹ㖚K= z$"rgOeŲ@أZЙx_uIJm6[`2:rfK- L&%NZH-~Aϴi$Y-!|1RU.%?wCV o4tUMtB=tw}i"`B δgΐ8W(?TZ<(бPA[ 1l9 Xzl?lے|E^f);Ṋi"X!@ڇ.XQ2|/~R{~.up*IX^,f%9[FBPc4*Z7%q==KB[֔f1ypMM ѓaẂ| zl ehNzd&b(2qN=ev]$ֆ!Nd,#ctBCiҪvKz9ZީbH$,w8wZ rch,t1 crqHLSJW 1AQ5zI2)y?rfCs| \L%k㞏3{c !gVbGw+]Hhr%I;BG7:1pG"]׺2 `݃VU%%fMrg|\K;AOm-+Iٛ6 $g.wc -(' s`3'|ϔOu5IZJQo2a4Jh1ET45ɷ YT[n; {jB3U3([{3XQ_0{6xAX@|X"QWٝ-8gPvO̚= B!81R.w Zfs1lIvA ]Z.wF.w ')h^AF"SQ")0jR'J[6O1smLESYÌ B46A_U zzlAmuO3.YAϤ+뱳gXq<7m Se(m:^]l~ULh{?oI ,! =9w\FrO!>1 .WϹPR{<Cg]+PW$ψYl. COQRTghjcϞ[.wgrOu͖K-tUXtɋf X虖93@A\ϟ8)Y^ r1eUItWތ+/ͮv9LlY_=-E{Q]۶ (ɭ>J[E5d٘J%ARGΖTs:eY˛]eC_ F#u S\qxl,J.(l5T #{\_Galթ'嵗ܵu )dȊ0LݛƊrnd.r 2*B >I ]2Yz3wLR=6Z)N]@"h\rt>t CG(CG"+\ =]A1h:>sHHDQA](1*;@TXPvH)`u8ti+]r񜉨!-Z/Ϟ=d59d:-"  f JwDJxBxJ2] }rlY$Q^;Bit@ $(aY.n3LK4M}N'g.f3YLs L@Iqt!;[S!GkAS[G`y򩙐j! y[k12BC,X7{| =! [RUzQ>E,bE\J :MӊX1;kt;#rL ]*w`6f>mw&Nlq E$^#CG ׍']-sYe .%ONs՛L %vw~+t^O&QioMpS$݄$6^O,H&.?gg;Mʖ~eJ`=n'd IDATBgйIqb߯T =5js0RUU.R%.9H z*\򪍍$41!/X^ZsQ˚5v&m&7TAς5  r{YW eSUzјꙶ`wwscCnrOB珛 >Ni-a r:'AAߺu+L& " '1:#4pBOш@4 ~e,tR~v!~Nu&,K,`U 2sL&BrU$x*]^u ;F:XZ4X(eMiC3ϸd] fg{>x^ʇle$˜ȑ.]%]jb1!XwEcUt@Bw`hKYAk&$CiYYr\qPWl.NfTV&XAW9"]4UUQ$& }Est^B==GQ"]ty.3bO A߷X:Wrp\Ћt:tIq\ zcڐgAVn1\ :=pXAa;:Fе" K hRB+a¾T AZkfHÏ'BvA(b XZ4m岪z4&uqҶn:!gɚ5/>_̗(PjnYc ɭq!*YJ%ObCZ̶5+5XY*qRix|tJ VQ: gݖ5{e[Z\ } 9,^BeۧfKgq;,\85!%kj֠/]!"ʗlj}- '^شZLa; KU @JU^N,>{8;& wFT !q dn GD R#t|APM('ccdB:{o/J?.i叠׿O{3By%id[eE h)Nzv]Ar첌l_E8I-R͏K~f+BI.͸eƐCg7P4a 贀ΎaDXۯSuff~p|A!UF(6p?!HiEQUm9&SE~\W]x?gͲUW.Z>.]V;By%+Ja[rtf}c,]*j/9 VO ;} eMb3G|23r(*V].a[@+EIM&Ҽe̕`,L|8"4%;7)\ӱjb9}:c WЧ5;pv.K3P]V׬矠77祠1ޓ^.CUUaO. *)&.'.fXX"1EIV_D,lz:IqV:#ub>t:?cT4=A㲪|-/FTAWYp]8=v:Lљ|t~izqq6wSBg;erwE"-t?ij܉NDKBDsd`.w? `7(6ǓⲵG#蹴C4[8j \-|rj/ؕ$ {c{ hooFSS̙[oU_~gϖSIAjXY/ǝL|r).Ԕ 觼^ԛ͢VYEQٺ\uU˖ HJиŦb_'a xF %liDiyL'ZI_ص1W1~YA'ɹfMLB%zYn0ƶ( &J Rl;Jk:K1)ҦslH)v{< @jH|5䷳*-VT 8$0你1iEeQQ K .w<o}[9U}U/xBʕ9?F45D#3>5[X2 0:tT 6{ ySfQnjJgL "謅.V)N MBE\lQd*p_sR,PI}\wvu2%B?p@uK Wb1xp8ūcaB `oSx(PYY{O/\lᬋʰAM8HOt?=-Ah4)]k@+"(/B tz.=R2'=X억+qxpbb$D^+3ЙzX$YhO=T_{{f)P^":xpJ2̰'po4!TV JW#gf{Ѕ-6Ct^KfPo :ެ / :cZ('ү\A,K'ܥ_ rsMt1}."|> p+Qrc#U-"NtF>_YVU7o㩧ĝ}{cc# ;x[{^ux8~x,>22јgQz ƪYq=0qS^/ZmNPǴ<1@mmRQAYϯMo˚b\^*3.w+MڅaS5M5E@zx,sR^(Ea$ }(Dwuuo?54|xG0k f.%IQڊV4>H1RqG4K/~ى-[ ɐ/~ )UիpdIΟ9 W׋fD6(ws?gƃWc)6$k2m.Ք.:)MBvsh B<kD宋h@" 6d^ T誢v;@L&9ӚDzh=2@r}- I谼j9 "8@V$Q ?8҂+> n6ɓ' ڊ|/ZZZqF]vʶ^O&*1tg3 E)EᴋʈQl.FdO**] }XR1@1B!^n8."kDt zo RV!f5:S@MŢE@4 8y.8x 7w`~#皛lxOOb,,]b١q]Vd 08.nMjFLFPaQB/Y-6 ڰ&Sn!,tŅ&"oa [Iq~ PiIKL@"ǂ^Di;yC@vZ?y4M:xJu3a\5&wr s,ApǢ;:Nryiu#YB&eۊaeť:^#!q]3&q0h#zڐsBm67t-kٓ?L`kkaFQ(@PI s.˫ $.&SUeE L&uTR H ¬rw{Fe7o@ηɝ_YԄh_1y ]bBs~plsAׄ1t8J1 >g!m`,WVЅ+@c lmH˹& M(Kr;RCf<(U(d z˫.Z Od?ү,MMk5@0KVsʪ8sS3(^rnNϢC1WbR&n[Q\ȅp[I:*LQItIi][I^ ]ct N$9 Ql<)n#E`*A} ^]?-Ĩ3 };IMB$+̊ Ap  D:|M5qPDi<}+#E8@aX`'6@|%EUJAb rɲ#T( ?g#%%p:O ͘*y6P w.U]ѓ3~:0c"$M[Nϝldž< 2HhM 4:oE%JZ$n8 ,%qkǎ]$5KhvҠZ8yMIlK$J"iw{gq\Ν93sfܹs;o~(nThɪX{J&ѐpm+HD^+QB]-,ymqHqEdU,'x0܌gLJ{u Єm=]΁mBQ1N_{}91ىCg7ldnB[)l;MdS%^j@*Dg(X͑oN&U5Z GTB$Gr% ]{h=QWP|C۟25H#NّjIcЃsXQNdؓH&*+❞rLBJưX#e=(Q 1;n y(.#oTۼŴET!s6 DFؔXcex r0mf" ]bB-^c|gÁ|Cݹ8ReWlYBmŋS8{ ݋-6e tPrgFs~-?ړ=펻YDg7* XͭGZ#U|u^ ,0:NFĶ~e"92WE!.q{zmAӎowSR 1ɤйUH Xɚ:^yX\.ӻw;=I~ۺaep۹WiW"v/BK*{ ۄ44B?{Ŗq"E}=!u`v63#9?JǺX*lg]H/3bs#r| ]%l)eD"v2$lHQ!9t`w55zTy:hޏ][C ky5r/ hDPbЃ9"vqjחcc{aJ['BG]v[C,ĩSvj-&Mq)2@Si)l1]ŭAiiFE)7e\,لNQD<(.R!E, i9O.Е&9הyIBW,tuU${7;Q9K19;;UE`p(bwj.& Ag5E(N$0#I\a"Q h[pJBO:^dB b,(4+ !t{!Ei|+BoJxTkkw!&Ƃ8qmz|ݳv㘮gV~H) $ pam =(\ȲI.\Yc2A=Ѻ IDAT:D^ÍꄴibMCaBf]DMN][Z[њluCO+ "R~QG:^"j9&tPVQWUz6 (@_;!tEqB[Z-KX5T|;QVxカU '@ۄnmB @So_G!P7Q8bTrU@ x"Bol.K9Bȡ .SU$T^C@R/ʣHI)4yT5mIr벼I@h*}j %QcUS/Pǡ==V8]Av= =('O@'zN$wrrH{VmN ]З (/XH/f!b´5!@ r9 Chݶ(NmkfIhQ!w^ʡі5]x( j$tںV ܺeR'{N@"5oGlYB)ntt]Qe͖QXt~SS˚AU[ؾz't~P!wk# [(t,ل:449tŐC/C Ǣ֯qIZ]S"ɭi ksJ[0)xe°WqBTNb֯1Cyg& #좸2} PbQ"ZB֯Y5BYzM-nH`X, ˪06&ӳ {% b֯##$_]4l,{HE轃X,b&5SƤֲ CUN^CˣoW&5CWU#Eqńe] cȽ.rOI;plz[Z ԉ"#r \FB* VX.?!c|i]]hMT '@ۄ6:qg4Q33\UA!JRrv;;h,ArDQ:JXX&N@6K'"zPBln)t/u FZ(yPL:PscAm] {'{NVMA`tX66~Քh5W9i*{ oW`Zrcu\c*wB َЭr% PcQ*wAm[K&:%pq }Pɥ+SqQnE5U'̈'@ܥm X =؄rZH^ "" ^to[|*5R'$24kf'@]$ޠ Krk]T 95 &Cxp7݋BoN(* *8q/sz<q$t^#s ]wMi4O;S/.޷eؠ]׉ iރ;"=tpK: <8{,Μ9o|\O@LQ.]'g 'jЙq av G-.Tw}^kK貪"(Nqz|,!I `Dε;qpRȽ`&5ж.+t}Czx-UK:E^:$Ɩ5]xCeCY'C+xRVX]ܱxZ~Tp п/cll ϟǫ_|m?!9q&ʄxmMCYi Me(b1ה*q pЙ֯D>tiDz>졣R05<QB&WBVUܣQKWBzSR qB~5^SBwcE\(@ `؜Z)] mA۫ZUS5{֯4L=}*T\gyB/ 'x(' /X:~7_W- m*)*"eA%R*`B;?+BPAzc :*tӐ;tllN_Y!bRQݷ @H]ZR>t;W+ejzInHI7ޏ)4g҃WX`VSAg<Э_GGI5ՈͻњlP>665=zǏ_l__޽{=Eϧ1 PP2NY3T0fE@8aJT C i|oZ#RM6Ҵ0-/C%0Щz+tss讋4PC_fho'_J[_'nxV LÝ;.drrM<=(A.c }nnںA IH˿ &''3τr4<ġkv>YU Ѻ^@G]G)lAm]؄N=$=۴%2V!wGI^ Dt@9@"!ˣXm1OWWWЦYBRoz)8pk<ܹsN Z-&YljC?ahm1SQM3+Rȝ%_PT{Rtkje"`?ik &'u⑸v9%kܼIy;@rv3l?(%ԼB?z(16ire}>( J\9*<_c88B- XU4FL݉I畸FWN]FH2"@mrzWs{___BG\nJCq9E֯VEqnq+c[WS8 'B^u|i6NOkkP_/]"}sm~8isahh}{2ٺP3.E:^~e<7oGA>͛7*^}U|__u'(+D -16+r9;oR v9G<󊂥H/d,fxk,BwSNb\9 @;/nY)HάеS:1ȇ?)P 8k1nB[]BEqsFvNrmJ45Oկ~x'?I@:ƵkP0`|׾5O>Iݸd2C,a|I)&2tviuSQ2 (LIzG(݌P-NpH6S,SЙNzAĤ ,foD2ZdTIkR*d0zCDX:nyJfJIMM$ZpmʱX(]_tvڶ65eߕH^>UUC-2Z5E^]ֵQ]ɞ܇/zM BvQ>VJ <-ktw"(+ T5 %,5_eRG  _ =G<7J!wzo[(tk9W9/.n:pC 2)[ Un]d]D<ƼxiZ–%t+cz;dH[\zKvsz*],0 pT_ڃx֯ Ɛ;֯znTK֯brϛ!m[K"$Y*S誺i]k{=2Ee X/60?L耵LPQm3i] rcx j1k ۄ=Bd8͢'GS3B =>O!t?SeS`*ۯь]u>S;Xuk@ ~e_c1#mkty#KRkUUs}NQ/uϡk{sViYFVQGRP?wQәr}xRHV ԠU'HZ,ž+n=4KH45s460 y ='qg簡8NY3AZgf;)8Z- BoI+:2:GQˢ8Ctc8U%:^_[ҦaQBh߄"i#E71bNv2qQDw,IJ.U޶@AK8ut5ly *@U&tںbbMCI ]k!`2l}rSZZ SrCga>İrڶX9)t[z*Tݍ;]`SL\s)RJ^/P sIlwpp3u-pxx䙺ݺFM^p}Ѓ܀cSho' )W*BA*:g o&zGQ+CЍEYUt;OZ@)Tv$TwTDw"UvU{P8U\l{M^p钧@r'z*ʪ* 9PJ$R,"`3֤5 L Iy;k|*BEtWQA@T, *t}Q]f`P@c$R(Ak]7IKi۸]BUZHtZ:jvĖ%ta-| lyHnZ($tMOp QRs{*R 9tH+ WQݞV>>%X,~M!:m5zl.(X1NM KK6IV.qЌSz>[+Z˚ 8OMN<~ӟW3Ykز T{R)A[(BE&oXQ[Er8 -$7.qũ*XM ԃF''])I*=oQG]GPBg?[O>ӧO㥗^­[o}.]" dzl?X w[xC@0E:G+|/7qMUUW :BlX)狛׵T0 *Rݢ(B5P zxrgP0qmy/9t-/&']ρ*V@*ƬETпx!">*q &xvYGFΖq"jD{;Y@U`q3(o&Rl/7dINjrhhYE 74ҕG!SWڇlK֯ @еxr͡T! SBg(ZV9tFݭ)o4G"hD U''\SKݭ>[NN_sULoLtb'*rG|+_SO= .W^)A>=aT[2*xPTեBXH?BT`≾kC>N׆KyTTa{JvUCCCeSM]~Շ¦^k=`,>XZ2FRV:GhQNṵ9rqe~UK-kz4 Yz֯.!pv8MR[0.4Bx"Ξ=d2'NOO 2w//:le__^ S<-֋HH!̂eA(;B4B7OifQeU ,^vtUz%ڇ0܍nW.9tǐ5urWTK{W.ZW5Z܄(,*}̑qpmȇGM2: 3m*nZN;3pVȝKG"AzSx,za hڲFd.($='Cdy 0Bߏ{/^{5Z^Br9Rx ?}Q|;,~駟ƿ| IDAT0:::piNP<0@ЃZl[Ekz QF>-3΄%٢ !m,5j4$>!wSx`ЭB W=[M{|m"ɈIBG,SgOWjc̋݉f޽$z}tnVA68~HbK#{7 0DQ{^|[͛7qub~~:deB XOOa%>S|)t Ě%/L\!wC=,&eU.e4i f sFYG)$Nq֯U~r״5يl1!؃Ns92m?@ڲG;S+Yъ1kC?tN8kW'| N S|zCZ f73vr!CNEq;;dr癉rS BO&7LFltUEqU^76onD'qhqqaJK,Pc1D!<)kFaGxە^t<`2 jJPzY2;&;L,ߵBk[" 9T[yEqCA|sw  ݍK'`xߖ5[T50M=M}`kQ2%E,[Ɖl=8Qк? `}]ruFTz=d3~5>QD" _A@<$Ke֯ p{g kUOQd Be"m_mrzXr nlNڧO1^of:3C&='qg=ol:/FG(YAρ@Z75EVq2 hN4#MnO;z8BVjZ2 rf*doVkJNA CT7AQV^,cc)'qiRhiZƖ%tW֯!Qge-kj75-REBIa7Z2 Qg"T ?>?Js\h_͐{ǰ~eUٟ/'DH4`aa%qM5'+m1ymN_(7jTE?p0 2qrr3Yز !zX,v'g8uͫB.k@ȝ*B'\*E":(D ^Z( I_DCB9pDAܒa }Y={gΜ7 m?0̙3xߍ'~\B_G tLpv`umfn-mQdž+mBOpqq r7qmM!S*nA%"j;qf-EK+9gBuȎk@vTT;E?}Lect8r#X V/ccc8<^}U[{1kx7q9<(xQBUWl68`=& !w/nqb8WQG;`nc)%FQ\^BW-+nLb\/͒9: ^Wn`~{p ݉/''):J (ja\zPm<E'>^x˗wcyyi}GGI|?}m}h7~5Nא:͵}W1(*Q6 ''ʡsfUjY(ΆK ]O0̐m1}u[2iseƒB&7#ֵ`Z(z{CaS6,ưG~vqsss?3|K_Ba72=ZxDAı.S9ew`:kH =?NH8 0&M]U d*cs?ّ'3NC:T) ГO}hhyme֯}yt+=6Okt* >0DZր͢8'+RnwB9ZG^5-1 tBmܾj >=ϝ?H7Hr*tXϯ~gK٬9eo$Լlv+44GZBn\P3ܝm~:/Pe}L,M`Z(h뚏{9h< 8Z?/~#@Qb- ѣhnnXg/_ƃ>!+@m( ЇJΝ;g>+@'I*(~=@sgݽEANkn ]}=BwZX}E)ӇnG]TIkC{z5 @[֩rz[N~ }m͕_G~ p7!߽+PNq$Fs044H`OyBbxg~`}}/2y͛7# 0_Ru/|'*H}׮khk݁ f!Z:u͍KW dѩq~ ضfnr/)tS<`OMM6 }m{d׋*Ep ϏB#"Z&'ϟjtUH;` FFl:|_|At:>5or~N =kr>ȴ~uʡ3S!w:]V[W9t0ΏK57o`.= DGGcǻCn⺏p_(X\H2űr ׵n=>*i.Ÿneiqu ;C(hN43C ,y3 Tgr7Xѝ_uUA@X q7q}$biBSC5V(um '\zKfv+H&qc}b|ugGXBÀx߰MV'!Ē ZQtyAw~B&s\#TWVCtRMvE.+nrSœCٚY:Q4](t&8:@N}A hwk hN[NGO,M B# yz ##dD :$|߀4m5G_T3!Dw;9GZL!ƀ tlf 8 ÀN9t^hǍ^e| tBҖ}MM//p}ГX(#suo;$x% h;TE&[%T& NV[d72Br=!Z8u2^dp8p;@f@nܙ|GEU`~!< XۿgCӟ&ղmmR(~e  a~=_)}B`%A.8" Ǻ6:iP'L>W V~d0 b|iBǭ ##ZGw#j:&: qi7G?Iֹ# [ h98>^cz6X[>HYU%aeeMÏ<< >.gYPmw(:d+|fZp;BcsD iIrm:4Z%>_\!ї ?S{ְݲS*~X;F.^;47Fw_E=yYOϝBr(CBk+J)EqT6UUCRAJ"MytޖlC{zs~])VI^ ƗCpHf!5;% . iރd.^qmBga~B~k3 oeU5\xtJc'RspmMjՇegBׅ3ZUe,J;z}$"-svRιHvϓL =E(.[ɮ`)BH-_'{N6o]\ {Bw ث "aVBM9ty_s[8zccY> ]C e26%jFT%<8rWU8zM&rPv2BrBwղFCݡ"S A<і0:9WA ZaҲM,q;.h2Z&uul \fnpI!w(t9@x́9$`]!mkn6 =W̙BWr׮s.g}|Bw דz;2zn/9rV 0=2x p]>4X=v6 $,p0ɡ{RaW#NܨmG[m_?;ׇk nBqp1!YbB zR}Z猀D |/ʡ}S(M!wIE}C?N.=J UK__bzZK:>ҩ)r-`ao2۹+{U?EUÑ#d1w׻wZ%>3 i7VnTĖ%t[TN^ !҇~&Gzq {Ȯ})l{ŵB7 >t -)tYFH9{ٌUϡB<2SR MM6E[\ Pe{pYqAqA-kԎӄבq9z9(:qۄnD6KEU$A\jI"^,uT6`w",chdUE%R4*L躸Xf)L2Y 8z}$RXII)4lcS#4fE>'Bok#ǁ .{=.H)޽>}VhGn)S{3YUz]agytJAa&;"{ʡ- e?$ad}l&ЭBPUS:@,ܝ]]JJ)d`C.B{w A a$NH&efzUU8:; =j*Q=M[.z^-`%aZ;@p.}ٵHEUA*t|FnCNEq" ۲Bfȝ[5;!wov.qAܗ˦-hnBR)$,RR* w#G|(mB,[izD(ww}W{ {$]Mmhi  Z|zx1g/t|ǎ 1 5&<.бcLB'gU'?Bǐ$ݤ5&a5m7B|Hdzh1^ 9bI }W" O>ɵ=U".CN|Hk 4|+ ݈ ӧ+{2 .ԧp!(p8OMB( }X܇u Y{Y8p`Xĵ\1:><0Pr=I:U ؆{L[ fhjaXrA@uX=Q`_Hf=QAx'kq׵##@6;Ytx۫-KL2闬B?>Dt%Jw 1LdV!w@Ew"tW{&x ,/szT!Pi)N -U>t',9 )BS94q^n=QŽ#@;ore (%_\qhnZkÛ3oV<6q:ybb65d*i/㦷nA+t^,:;x{&Jf 1r[WC4nYD62NEqBw*knsfB$tHwA[K&q+TNJx9'5¤84.^imBEF*S5js JVEB߷GmbW {͟6~t;"`&P=  !w`ӭͮ(DF,Y)[95U nq'QPBHOMkBoDR/V=Nqwз||nd _BUGȣ߼ILЗvO2|\f65M;|1V+ {{ZCw(kn6z*E^. <ܽ /"Z~ ǎPLrUΟS= lFDolYBgZHAV+ "4Z*R7ol#\f65Oq?2GGlz)ǀ(l~asjߏ~CCa%"IQA>:G}zZwOPt{,eL?"33_SBI~fIVo*rpIҩB?,T]8lYB7AUB?sgRNdUnЋ>EDQs9M*t`,CtzqrSrguqeWZUr; e9Ƙ}Ƚ9N! 5UU1D&Mz-p}:AIbS}vpŃ^h.&t[T)\_l [? a~f8d.3q=fu2N\[/tFjCUce2r$t!w:] :<(R\Sw_ \{t=@RHKxBl/sl|"yƙgymBx rUi zOAQ\dS GX =!Ř*f6f+tP[^6%w@[ͽBܭ_QQ1 Q]J2<}K++D(tѬSy+r ~mZYÙG&@{oT4B6S!s݃Ӽu0ccCßɣ/.Bd9}C@ $n]5Z;^,}5?wR^RЭ!ԇN TJJ]\WQ-fˢXk;`Ir n8ϟS8Uz)81-K&ٳ9 SCDi2'GG!t:a駇,ֵ 0~ ?3]؇.89BC\9Ο4Psiᚈ$OUN(N%H5*'tv' :| fOMa(tsK8Q'붘%Sgwʮd/&t&xwO[Tb_sC-NrlW@rsAsp"T [ &t+B? -HI$ϗC$ׄCeyyIBG,K82xmCṳfЇq Jp<Nع`wntw⍙g}zn&!p4FFmgR8Qs#8}߾'p&5X=,PB/[Zr*Қl*ؐHx] fnE-^5n)npH{HB&B<>,|e"u=0*tvآ1BwSd7s9(g9t3ą N5ApvmB[7H։>N8l1k;R:Pq#8_{q Ntw\[piɕ 9B׊E455:_ۚ%ϛBH,^QZHiE))t ^BFĘI*ع*0Ν9쯫CNQ0k1|u걻92{>L F5XG߶qۄB?q% t .Zg;W\doІmqxߓH@RU,5۸MB0ܗ6m8VGȊ,مBnv!l!$Ҍ'" er*w:4BRID!tAQB߱Ts  ޲ȣ__2J;z 0B/- ^ ("b~-AQU1XM>0@l-V ]{3_KkB\L Uzr7(t*wrRRN!zV#tfB!Bw( ++Ptwb1X>Pa+X Me괬q U| oQ[K֯JܹꞐ FR)4D"؟L/xkU.8@R{>ÓzqDT7>]M|K }m e=4% "Vr+H2T,$*wB֯ZD(f ԨУ A}Yp?Q 1>((_CaݻO c8PWgЯ-^J;!Qh}W.4>q_zP>g̙37NUU|; >Oa¾YIdAř30s9dD+2( 8_jխ[df="؝HHrEW7f?D V+Xet *^ (pl"(7E˪(W"Pʡ7jDQr@Yqן\]]Y]UI m %ˊ ɓ$f ={\*A׫}* /ꫯ_}vt_я~7x>fí (8nJG)]30  qu ]UULo(tH}%5mP kc,S"tFȝv!Y9[.RZݩ DBu%B/*ʡ]e }u̡d Bz y9_;G͚UvV@Pm<E'>^x.˜h?8?:US~F>3ZT , Iym삀-n5\1B/i]}xmIP詔u|v>hijУ WJ^ @)SP .;iRFu 5̐;ck T%bJCנI\_jưG~vq됵%Ecc#^{I:4XKJ#D S$aPZPGB7&'-x B ])'% h =Al ]d2S' ѶUU ]:;1侐!sAپRY!]V8XWb4FUk݇MGHS)т8sAk_yBV$Yn~⩧B(IZ Qqt]L pUp0I*''=`0Z';vrܞ/,]]Kj(C{ iZoOzVQDPy o&C!lRDJ>O=`=UyЇrt0K>44DzEAmdTU5_O&iex,k\ww?iO%P$}qGWUWHOOn7ި鎡wUiwfFfٲ-Y.MPG A"ld&l% i]Boa&"ے{{?є{g4I>s=3s_(-/|'jz444_;y$.] qw}jwGŬVEe&}Av?bMBh=( 2ҵ.KWZ1(\Z2`9 XQ`q=@.B sP!q$tGCd!UI—]EAG//7q`7ޞ8Uj5B}>˄gf͢2ćC5&HW$nV/1cኅPM@d0 ].c˖-x饗,, ^{5<hڊ+euV?3^}Uh4lݺ]veؿXb,NPʔ_4{iE˅A75ERV*EL,]8z AE]#IXй0:#D2%llBg\8B-t{8) 4B?%kh[>քB觇okk9H41.GB$2|`:|GO<\NNZLBUS C"ģAղQ3@xݼ4J)-t,a`h;w9vȔo003:@eL;;0>+2O0sjs@ [߄#t0Y &\$ ʖ #-P46?>M YP_oZ"I ]B& :w+HU*@X:#0PΘIy0T-85(*D_ 8сKA>^vG`ͻm6LZB1 0ox/#a,/] q8KrXgҒ`7-PK`FEhR[\ ZkP$5/0Q LlYĊn=^@Cz(RI-G ĩ*|uњP#˽*Rh{mm- @aKB>BM!4=.T\$ /2NSd @q!i*XY=]() Rs{jD\rͰ?$,H_-s!Q#Qǵ@A4@R\:51Ť8 ̈́ӭR)}L\VCÎyХ}(%==\]H[`O{)`y{*X>T&fЋ"æMЕ*!HCV˃/2~'UFSʔIt(\ 4pHrJ2@)Xb@ -$s ]%`R55c{qط~ o^2+βjS+X0MN6|D8$K bL˙9BQ4B ),xCJ] ,H2! ]$jQ.w DQ,uBS|hXgIY1AEhMX R'ՈzcIKgx"qXp!,aq`رq~ CW8U6SL2%Q =]ч!IQJJ9 ac}ΜFWQT2)-#dBwB"_.XYY8)|77*b-ms.hCfʍ'xX>_DN?1i o$?!8`bTTb_׾^/NbO q.YvPA,tt >KBX@]n@ R*TOENR=Ne18p !aDXZ7QB**0>Mg'Y,'O2,r80RKjF !FG8B7[D6pdd-BJdA~, IDATKV իO}|"Ia5(80 ۺuW"{,ɦEŋs4o"HgQ.X_%2 =mSm@G2ymD x-0:[b{3rBnmzQ"W Elu쉱hqXX0y8l^$  `<4;X tTe˰k'ɓǵa|Gȃ=c 6LWbpJݝڼ(z뮹&8HFA͸p 'v' }Iz]]\.^WW,9 z<Jli Cqs77 t]ܙXOB_hCP˗?}dR~a`F1z_Nt|i6C0XRb X'h5҅ ;7fwFf3` K02BZ!g]r >hW+Q ^K"NHtX? !Ƙ>XEBxPך#s3L>&z(ǂʂeq.!/K.AХg8K$>7'"&5^I|2qivxm)vBգ?̟J2Fh1@aQ6BOïi%t' ƨ 2iBL;#t>IIi &tWKF\*B8}>dZR\ۍ굥Л>cc:qx bF9p$A >V΋~+O>إVGV 5J|<|3IKl"*8uj{, B˦^$~=u-\xaѢ0 =knƌ]8 =-eYLyJK=̼+r lXm6*}68g F^/ďHy, y'~~mq }挙Y˽CRT!++AAǃ2RXZ^/-Ú9~JwO;UNBǙЃeڵ_'ø`HIK,m1?`ge!ea_>xǎz=F;R-Ztu!'u87|3 3жiuVZ-t Rq"%P dgǭCp |, ry e2 :NmzT=@ʲoߏ2e2t[g%VU!`hjJp(/ Z-M]tSɰv-P_7:3n,L*7wvLzBgjxD;FF.'esPϡ1^U4w"1r>JRr<%Bz'P/nx/''&BX!o<f:? |*m̧4G92xBgY@&nJN.$jR2}Y,t`Hc Z-PT4fGz`QߋZ\  &J2tY'$rϜ9&q~֏cpIhEZږvܭtu y&nQѠBXρ 41e'fڗf3|Zn:h1vR }IO&#uVV0zfJHн^vs%@C6K[聒3D칤8WKj[F\21{~>iRe2\v#G&CTv{ccΡjL\MMpxa/\đ<_On6/oLZBgB!  l3HEOi뱻c7طpehiqK.8xqBϤkpT2eYLu&=X >ևo>͸p(AmJCХR 4C, fMpj.;7"[ L.B?((@AwOjkkvcJF}>3kkk4^[[ý0`nرEZ-cb\mm-s'PQ, hx="rݎk.&"k)O^[GF!uPBylKhU8|J?VWGX9VkF_M;ҥ 4 \ʅ qGz(P(PUفQ(u",tT= `gu, -IxKo^lU6rd2ф^WVF&աFyE-6uuuhlʇATUkKp-L" ;DÑq$:K_>uteh4"[&Ú \ä%H\=Uh l#~PS)):~>C"ג%pqF àJAce3L 'vxP  ]}ddy9 "@r]91MrT9ȑDap(*B~>s`@iܙDpi/0B5 Z P!,)]bvA q_sXn.O0F# $ ]%9sh˻JFmd28:0PڶʢqOY~1`Z b*aݙgp1\u.$FFh; Jz\/p0 FZNaF=F22$dc`((BAc2jd@!UӒ:;GgS_,aq0D,0~3\=ZB+>l $6[M&pہ7xd c^m~6Tn]p;3 B(/[&0`Ҍ:!FΛlgAPR) '/B׋"FB3EX&SйmL"^&Y;, Bt,tL+9&Ch\X0b= dTO&hΞMy~!4aX4UPX NoJkN;+hd .r5 =6Q-1 B>5pțocF8}quonVҥ-2B%h4ht:k23kD-t7ˢ0XU{|d9Ћ@ B L&h* |>9 ;C%'t;乱c& 5=K i0·f?r tXRWs;syT>`P]LPo2IW\!j5_Dºb$8/G%Kxb<31[,lٲ]Naz5PPvZN\ՌeP]l܌Jdpf>\gGe15UWYE⋉;`Ѣ@AaV!~q/J ) *Civ7Qɲ2⊊ͤ5iJ̍T oaI钨8t:CE ;w&t}qf%F#n5.P//hhh@}}=v؁7;ٳkV-7V:V>F\1ʔ"Džj_twg&pz4yS(- TQxstt/BA'K:@hd$`:ȍrg K ]F(.sx=Q n#-vsIk^/(̝QBwܼvz4:.e}ѧ\}5Aq|t^/~H$z~xVIENN>lH -PG&qUn.$ 4PԨﮇёB۪$pbEuSfܓYR)* yȎe]0 ^~"ssA=$NI 7`de!tǹMJ/uTEE:wDzQQr^P@\@HjN%t^ }\P!}nx,_)8fY`cW?V'2b_*mظxkpz)B/$LxBohhlFuHsMM <?϶YV2ذxZ;FFpM* r]w$tX2c0؀|O^:+ TڢN!De<MZ7 ЇyY=VzJ|0ZۈLVwu9#eVzYpV:KXdd` Bg硸K|GEHxN͙ m.Jꂄ I/}2T\*ƃ%PUE I|!ʉ\ز焔n`X r'l5eل102|tn:;FyzAAb .ThrPL$ӽAIH~QV|$?<<T9Ln![vL@=ιys) G&p滭^ZM{s}>+{|^VoN8" mm@Db}hm?MLNVP \l邮^RT rFZv;$: Z^Fy 6>Vy\\}} >bjn=BϦ7c* =Lvɀk/]?/ VegcŒzX|`0Mke|m` K$&<k#!:###0QPa۰>@Jg F#OBT]~{?ݞ=: nuZm{*VҼBaBwmȕZ[v, PTT6FXb`%bʔ=9k/PQRLrKt> @]c}T.6BXٳbZFX`bbLc, \_\0 z !'Obҥ:Ģ6߲UVoKipjEǃƜ7 Ai^Y4֛ƅTJwu 'N@SRļ@>d[ND)}DzqJ6a λ C]I6$:-TG& EZK8,tBv҅G-tc<{o/PR3w-Db1FYCnI^Pe9N3w;eš)P󲲠J$`6:]q!0>n!e˖k׮'KX?:,JEs3wO1yI^/DJcmKGEFZQl G~6: Y,X=fr}!BwoܝN*abźܛNTTffy9d͛`4_`ԵǯFӉ ߧ?) ;ظx4K>0 ͓ yA虀{eIǨ+P*76>*q^Tf]w&I;wk?PYSCe`wLF`Xxaq]ٙ57SS?a%fA9 C4w'm6I)sIuuT%"MsBC0`ܹ0 òo-] B˛sBr_Gr9xr"zӉo=60vj= TWgǑ]Nhlڊq<,롔HS@?-سބ7Ƈ[nuj/ᖂQͭ.LZB3.^zilZXdE 6ٌ3EK_;ve GB& qP0 |!sTgܹt`XT"EU0 g 3)ԴCDKbIx,.UhbfeO!Hg\X8֎xؖ'sN'"ǁF|oR4o(7mn֭r %(tj:AJk 8sK_`Og2kM܏N;#D $H ۅ";;B7\{\P ] ssa.w`r_TŚN@R~8ǴhӦCF$,]8k3ks_t pJ\s74o$>rġw啴I 2Ҹutplb3gHEº:̙C =>5 a&ro|#C,u\VqzXݣl;!m\>}k2A\XzEE=K8 8R\p̜9h269ϡr4{ҥI'X?m=RJLӅmU }pHtm|D6vO~`ziKxD͐{w/jJ@4>r@>R|,^LA4jd889v!F)Q-%tinr:^p7LaW(wsx+ +҆/H@чa z8Ng<7.~hMwBL\&y&YgS\5㪤rsazq(&DtLI3_X&Y2.7Hsx{.98!Gk8|(ӕce>V]sM H$UW$y_׾R  i9y4-H.x>6LE9F"t0B7d0B4tCHгC4.=+rYv;}s-BNr?phʛ uEkoE X~IIwe5UjPt٨r&%Rz󒆼^Od8 ]$d2hr\:7¿ J D{ރ[^/putgD)+[cXB=R-[dYujp ?s淀7mZ7,!hp8w10]4G 0D%q()}8i4~9s0ݐ|7067Q8_4fGii$)/twC/kH5 =< Q陯n.f8sߢ6҆>jUJW]EIO>D ؽvQ]BJ#rBk+ϋ-ZЮ\]-P`&D*:#\ fo4"Ȯik~?&x],f{ck;@ =V[v?.Aqee{Ґ8Od$6VJ-Y'un(A=鐁hnsм.=9~Ww7-+*XIHFBy/Ӣ%zz0WI,cU^O[Nò* #l}ՍM◝_@S nB]׃G~$uQǰйykjhU-.ռnJꓧHo/#Ҧ^*ŅXF1غ^?]71X|>ddD*f9Xo4[vr!`%ڒD&R@gO.=aAZ+EZSX=hS`S N o{=yu99Kx/U+)9yAéHFᇩnzǾ>8X6L,Nmχ𭲲16vov*ĉnK/ً Vm}mfbU$.Dkv -IH 7aVe<\Z ~jV>H3Fs7à6%/iqMu@ pWPb2"` I6;ӳ ml .JhBWCaΤ|У=IrB5F#|nn|e腖*p,,;( ?faa"-١蜁&(MЛMsn M md$~z!XRyn@BA/> z.PdEBOO.KASđ# ̡c9ЧdO\"j$=a(*က Kc(,.f:sTtIKӆ"!`腖="1:t`0olX$Ы򪠒p/v;(/'".N lL;۵TߒqMn. wT|CPv;lorX?ioǖҨG = > 5g-ˣ2^7O.%Z->U1sBwbR&0Kй9fBp8wvFzABtPk@S60'yֆI^d2"[  LNՒ alL.e\5R) 2Y8wtPjFggI@, 41#Et  ׉.7AB27R]rzU66$TU*,V }}nb(*ʇ~JkAb}><ގ˃&=!&2 !Rg?#D'_9 fvrfKdI'A>jH9.!WZWB-{Ie_Hr?w8 < dԄ!e_~ر"D>'{i_XǴ͇tםx1C:pSZ P1Q Of@9x폱s>onKȿ&ڑ#Mbn\L˙dgC'BeeZD 5t#`Y!'&HSWm(* CqJm.M?ˣe煔i`t vs;5TR[)CbcsBa_WR~5)q54=+^7.FU/. b$- |ش)nKiDM Gc<Ԅ,<[9yqS7n=/--s:簸d16لmQ]|K s73 fw1;B"U"<^:9e2*)vnhV]@5O l[2N6\AaHpJm IWˣ/ =4fGQ\Lc~?ZGZQ#,6K"f͢Ɇ!m.$($H|tF:D?Sβ1:.3]LKt:,bKI}STo=!cx]U5pSԓ_ {n7NR*?xፘ)w[q6M&<g')SMd._۽ PYCOfYu:1_r0E?'\ssLz:KBg,垟O,|M>AagfI %ZbOWv>,K{Ϗa9㣯6`,K\^Q?gNSF#'꙾7GuK7)- n;WCU-L^/mj–Rbaz2үB1O[ OTT`xX)Js+}D$k{]^D7عƋ"]Z6S%tE p|Wp}dGaeJ( lomPtnKjJ~lLYy0 Vª)KƃZ*#Mw7\|0/;qB~>ӟ?ytx=;pHiTJΝG_dŸ|U4"bKt,n9u 3j=&.zqs{:?J[7C&^®]x駁o;jBؼOG_ܟ2T^StԪ?1  2}-s¼y 4 \oJ6/й?;mF:MoYh š.SAݞp9^O?{F\i0@B2ǵUע҅c4!#7SWo'.*m!#x&/pfh"\}gOp뾳hu:ƼyPN^b0i?L,ρi: c/YYƅ OyO<1HhF{aq[p%e͆=W~x oޅ 8s{:w\,r|M C(t0Q:7RFT![[[b%8wNΉ;i^O55%̬,X@lrxĚ5}~|2<Dk kPSwμ瞣_uI˜j4T,@~|`ՔU+4)ZPSv y<4>2!*~(A;>p/kDLjүiرS&dh(cJ|~/WIyA]GE{޾'Z{0G儘R9<8!o%K GC| BRBv$绺o'{ɦME$E׋DÁM'ObS~>S^#lGñT"_6Tw߆Lr`ϜƆ{K}o+hn~8Aq0P>K6Vn[:%  (V1?WrE &q`9]09ZUR)h8t 7nEn<]w̻-Өx+3gsuPWزa,-]FTl}^vmOԄ_ϜS?F\㘣9sPZ`v]>|}l<7g i  j3F;2E^`gfipCcJ,Tkc('>{xM<F{?{jii`^|xK.Nh p;訪$HWa~j-R,RZ(D[(PDa)6 jR"EDL"I @JyN2I8ɘ$I#{=g}s&|>N9|XN']%/;Y3%֐}Cp1JzVhq(K"2Zlnrb~ OLDtVXuZHzWS <yJk#H[#_ NqQa'LƇLKIXe^J N4W/,bd`fkRt4 :x ⮻ڵklSaG/jA:tPFFP]Zk߫1n;ߘaJdea 1oSh} Q9eJ261];>uy 7 3l{ R/{8(} p:˦aO+,*nIIAl8lvl6rjoXjo@\ZEC3 ĔЍѡnGŋc`g}o‘#._wUy{f:&36ll\X2|j*@/Q+&j_ڐ>'Φ/\hPsObCh3 ;3fVlj$@~ XZtDž#% p^nhm܀C1Ϫ^~=y˭ԖksjOvw$߽w\WNd ٩YRŸJ^C2~QQGz:Ir*>>껾2d^t*|[ai!ҕ䏵^ntt+̜{F%نA> ١KQN3%=Z)kÆZw|AH>77C7 egߑ#⪳gYgΐwM/L6\qאx`llؖ-[C[;eOX,}r2:zKM ϏUf˖AÔ)dQQuU߬ ~;Ķ99rni;w: 5[9?vy S 꽼N-"ΟCzI ##ٽ[3GVai!;{;-oi]'۪I//3""yysyNSDoo?}jcǒ=oC?>rNxw'oޢƍNg-W^!I+#>PRRI2O9 &⨗뿴 !O]HKS{pϩG7;i>\ҹX,'$T9UnΞUMgrr`L \jfK8 &ZD&% rg:Kd˖OJvYFW~ ٷoӎcv"fUs;Lq1.\8""bb3+*g(CNNۗ\t8lWFvFnBZnN{+]eŘ7[~=I8bOR>#]jL).ϞUS$ݻ;FpMuBAT7}8HǓSEjfӦm;XnO^Ԗk<{V?^9`LJX>8{^aJ`&&*G.ڿ=ѓvhv'5;`WsAASΟ~Q]$yjj^ٻW:uwԿ-[ ? :),s`4߿F2Kޟ|R-Tt۠ %ڶmȑUCN72#l$I'wfBo$`"e?<Ц ڭHJBOX KKÔe k#M]Cj @HL:,r!eZwn#U==Z}(Y,jZyhYɄn%$dŒN8VXR| ѵ_B-9S}mfSQMϝSfwTz XBEbuƒK_ cZii:RS:9?DDF|F#C__ Vf3ZyyiI{l$H(qESV+,d:ZL{7i[oTw>ظQݠAޟکW:o21BA# 4ǏP@xx8f͚bxW2 wj+qIhlV4k `d 4M+5͞ rYf;5@&$zd4ͥ '@uU\i),$hu! iZgcx(uiUtC¯{h4|}//ܚ Z0;dѴ"hJێv hzv >mWJC0 Z iŎzW}u{Z jN(4 7K{-ux^ali9_`ZM ?BӧZndNLL8hwR62  l6cΜ9ؼy3 @AAoߎyp=ֲ3gĜ9s`2p…zۼ{=wC4 6 cǎuYJ91<Wu*vu)((p4~pÏ`Ŋ7o ;w..zTTTT__eNM6vyTtR$''#&&}EC]XϏ>]*e;ozթ}sI̘1eЬoU%##w&k w >]}NB\kS;>~ʽt-[t|rr2ѧODZ~!66vKV\i>/^cذaXf{!:ozͽ4)kIff& R@!''۷^5KgϞ=xbL<ѣqe[qGbM;:1hGBW^x:v숹s7߄N(A^h vX cyJi4`6n ?gnPTTtСIdѽ}X_{{?zdd㧡T^Am{مTbN2 &ԏ;ozѩAs+{+ >+뿂m۶P{B1}t\)b87w_ǣ=|IN4C%w}4iR'N`ΝYXXH,--ٳ9h 8k֬b{ 鳲3228w\vޝ:t(,X@k u^.uO7|,wAAGA. 8tAAġ  ]A<qp'bXp!0aL6z&B#.7 Xln݊)))xuV$$$\oAh$]nBH]vO1r- M&$996 CޢDChL8ёp",,:K$Bc.7 0`t]ݻ1j(Cжm, E $ $$?ѣGcΝxGx 4Y' A. 8tAAġ  ]A<q CA@  xA. 8tAAġ ; RIENDB`einspline-0.9.2/www/footer.shtml0000664000113000011300000000053311012400560013574 00000000000000

SourceForge.net Logo
einspline-0.9.2/www/background.shtml0000664000113000011300000001250511012400560014417 00000000000000 libbspline

Background

Basis splines were invented by developed by I.J. Schoenberg and made numerically stable by Carl de Boor for the parametric representation of curves, surfaces, volumetric data, etc. The are more commonly known as B-splines.

One dimension

The basis

As the name suggests, B-splines are constructed as a linear combination of basis functions, which are hat-like functions of compact support, i.e. each function is nonzero only in a bounded range. The basis is constructed in such a way as to guarantee the continuity of the value, and perhaps derivatives of the B-spline, depending of the B-spline degree, N. The B-spline basis is determined by two pieces of information:
  1. an ascending sequence of points, or knot values, which are abscissas for the spline function
  2. the B-spline degree, N.
The degree determines how smooth the spline will be. For degree N, the value and first N-1 derivatives of the spline function will be continuous. Cubic B-splines (N=3) are perhaps the most common form, and have continuous value, first, and second derivatives.

Uniform case:

In the case of a grid with uniform spacing, the basis functions for a cubic B-spline look like:
The vertical lines show the locations of the knots and the colored lines show the basis functions. Note that at any value of x, only four basis functions are nonzero.

Nonuniform case:

We can also construct a grid with nonuniform spacing. This is useful if the function to be represented has detailed structure concentrated over one part of it's range. For example, the radial wave functions for an atom have rapid oscillations near the nucleus, but are much more slowly varying at large distance. For a nonuniform grid spacing, the basis looks like:
The basis function of degree n centered around grid point i can be given recursively as:
For a specific degree, these can be evaluated quickly without the explicity use of function recursion, as is done in einspline.

Interpolating equations

Once the basis is established, we must solve for the B-spline coefficients. The einspline library chooses the coefficients such that the B-spline interpolates the data, i.e. the B-spline curve passes through the data values given at the knots.
In periodic boundary conditions, these equations may be written in matrix form as
where .
For fixed first or second-derivative boundary condtions, they take the form
The einspline library solves these equations efficiently using row-reduction and back substitution.

Multi-dimensional B-splines

Tensor-product bases

The one-dimensional B-spline can be generalized to two or more dimensions. To do this, we can construct a two-dimensional basis consisting of the tensor product of one-dimensional basis functions in each direction. For example, for a 1D cubic B-spline, their are four non-zero basis functions at each point, x. In 2D, we construct a 1D basis for x and y separately, then construct the 2D basis as the tensor product and of the x and y basis functions. Thus, for each point in the 2D space, their are 16 nonzero product basis functions. Similarly, in 3D, there are 64 nonzero basis functions which contribute a tricubic B-spline. 4D B-splines could also be constructed, but are not implemented in the einspline library at this time.

The great advantage of B-splines is that the number of floating point value which needs to be stored per mesh point does not increase with dimensionality. In contrast, the more commonly used splines required 2d floating point values per mesh point. Thus, we save a factor of 8 in storage in 3D. Furthermore, the B-splines can be made to give exactly the same result as standard splines, within numerical round-off error.

A multi-dimensional interpolating B-spline can be constructed solving the interpolating equations for each direction in sequence. That is, for a 2D spline, we first solve the interpolating equations in the x direction for each value of y, using the data to be interpolated, F(xi, yj), as the right-hand-sides (RHS) of the equations. This yields of set of coefficients, Fx(xi, yj). We then solve the interpolating equations in the y direction, using these Fx coefficients as the RHS, yielding the final 2D B-spline coefficients.

einspline-0.9.2/www/links.shtml0000664000113000011300000000134111012400560013414 00000000000000 einspline

Useful links:

  1. Wikipedia article on B-splines:
  2. iBiblio page with instructive Java applets
  3. Mathworld's B-spline page
  4. A brief history of B-splines
einspline-0.9.2/www/header.shtml0000664000113000011300000000103411012400560013523 00000000000000
einspline-0.9.2/www/Makefile.am0000664000113000011300000000115411012400560013261 00000000000000EXTRA_DIST = background.shtml \ benchmark.shtml \ doc.shtml \ download.shtml \ faq.shtml \ header.shtml \ footer.shtml \ index.shtml \ NUBinterface.shtml \ UBinterface.shtml \ bspline_logo.png \ c-BN300y.png \ NUBsplineBasis.png \ UBsplineBasis.png \ F77nonuniform.shtml \ F77uniform.shtml \ links.shtml \ news.shtml einspline-0.9.2/acinclude.m40000664000113000011300000000222611015557175012615 00000000000000AC_DEFUN([AX_CC_OPTION], [ AC_REQUIRE([AC_PROG_CC]) AC_MSG_CHECKING([if ${CC-cc} accepts $2 option]) echo 'void f(){}' > conftest.c if test -z "`${CC-cc} $2 -c conftest.c 2>&1`"; then $1=$3 AC_MSG_RESULT([yes]) else $1=$4 AC_MSG_RESULT([no]) fi rm -f conftest* ]) AC_DEFUN([AX_F77_OPTION], [ AC_REQUIRE([AC_PROG_F77]) AC_MSG_CHECKING([if ${F77-f77} accepts $2 option]) echo 'void f(){}' > conftest.c if test -z "`${F77-f77} $2 -c conftest.c 2>&1`"; then $1=$3 AC_MSG_RESULT([yes]) else $1=$4 AC_MSG_RESULT([no]) fi rm -f conftest* ]) m4_include([m4/acx_pthread.m4]) m4_include([m4/ax_cc_maxopt.m4]) m4_include([m4/ax_cxx_maxopt.m4]) m4_include([m4/ax_f77_maxopt.m4]) m4_include([m4/ax_check_compiler_flags.m4]) m4_include([m4/ax_compiler_vendor.m4]) m4_include([m4/ax_cxx_compiler_vendor.m4]) m4_include([m4/ax_c_compiler_vendor.m4]) m4_include([m4/ax_f77_compiler_vendor.m4]) m4_include([m4/ax_gcc_aligns_stack.m4]) m4_include([m4/ax_gcc_archflag.m4]) m4_include([m4/ax_gxx_archflag.m4]) m4_include([m4/ax_gcc_version.m4]) m4_include([m4/ax_gcc_x86_cpuid.m4]) m4_include([m4/ax_ext.m4]) m4_include([m4/ac_cxx_restrict.m4]) einspline-0.9.2/configure0000775000113000011300000304721111273633722012340 00000000000000#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.61 for einspline 0.9.2. # # Report bugs to . # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, # 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) as_nl=' ' IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH if test "x$CONFIG_SHELL" = x; then if (eval ":") 2>/dev/null; then as_have_required=yes else as_have_required=no fi if test $as_have_required = yes && (eval ": (as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=\$LINENO as_lineno_2=\$LINENO test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" && test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; } ") 2> /dev/null; then : else as_candidate_shells= as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. case $as_dir in /*) for as_base in sh bash ksh sh5; do as_candidate_shells="$as_candidate_shells $as_dir/$as_base" done;; esac done IFS=$as_save_IFS for as_shell in $as_candidate_shells $SHELL; do # Try only shells that exist, to save several forks. if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { ("$as_shell") 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : _ASEOF }; then CONFIG_SHELL=$as_shell as_have_required=yes if { "$as_shell" 2> /dev/null <<\_ASEOF if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi : (as_func_return () { (exit $1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = "$1" ); then : else exitcode=1 echo positional parameters were not saved. fi test $exitcode = 0) || { (exit 1); exit 1; } ( as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; } _ASEOF }; then break fi fi done if test "x$CONFIG_SHELL" != x; then for as_var in BASH_ENV ENV do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done export CONFIG_SHELL exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"} fi if test $as_have_required = no; then echo This script requires a shell more modern than all the echo shells that I found on your system. Please install a echo modern shell, or manually run the script under such a echo shell if you do have one. { (exit 1); exit 1; } fi fi fi (eval "as_func_return () { (exit \$1) } as_func_success () { as_func_return 0 } as_func_failure () { as_func_return 1 } as_func_ret_success () { return 0 } as_func_ret_failure () { return 1 } exitcode=0 if as_func_success; then : else exitcode=1 echo as_func_success failed. fi if as_func_failure; then exitcode=1 echo as_func_failure succeeded. fi if as_func_ret_success; then : else exitcode=1 echo as_func_ret_success failed. fi if as_func_ret_failure; then exitcode=1 echo as_func_ret_failure succeeded. fi if ( set x; as_func_ret_success y && test x = \"\$1\" ); then : else exitcode=1 echo positional parameters were not saved. fi test \$exitcode = 0") || { echo No shell found that supports shell functions. echo Please tell autoconf@gnu.org about your system, echo including any error possibly output before this echo message } as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir fi echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" # Check that we are running under the correct shell. SHELL=${CONFIG_SHELL-/bin/sh} case X$ECHO in X*--fallback-echo) # Remove one level of quotation (which was required for Make). ECHO=`echo "$ECHO" | sed 's,\\\\\$\\$0,'$0','` ;; esac echo=${ECHO-echo} if test "X$1" = X--no-reexec; then # Discard the --no-reexec flag, and continue. shift elif test "X$1" = X--fallback-echo; then # Avoid inline document here, it may be left over : elif test "X`($echo '\t') 2>/dev/null`" = 'X\t' ; then # Yippee, $echo works! : else # Restart under the correct shell. exec $SHELL "$0" --no-reexec ${1+"$@"} fi if test "X$1" = X--fallback-echo; then # used as fallback echo shift cat </dev/null 2>&1 && unset CDPATH if test -z "$ECHO"; then if test "X${echo_test_string+set}" != Xset; then # find a string as large as possible, as long as the shell can cope with it for cmd in 'sed 50q "$0"' 'sed 20q "$0"' 'sed 10q "$0"' 'sed 2q "$0"' 'echo test'; do # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ... if (echo_test_string=`eval $cmd`) 2>/dev/null && echo_test_string=`eval $cmd` && (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null then break fi done fi if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then : else # The Solaris, AIX, and Digital Unix default echo programs unquote # backslashes. This makes it impossible to quote backslashes using # echo "$something" | sed 's/\\/\\\\/g' # # So, first we look for a working echo in the user's PATH. lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for dir in $PATH /usr/ucb; do IFS="$lt_save_ifs" if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then echo="$dir/echo" break fi done IFS="$lt_save_ifs" if test "X$echo" = Xecho; then # We didn't find a better echo, so look for alternatives. if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then # This shell has a builtin print -r that does the trick. echo='print -r' elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && test "X$CONFIG_SHELL" != X/bin/ksh; then # If we have ksh, try running configure again with it. ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh} export ORIGINAL_CONFIG_SHELL CONFIG_SHELL=/bin/ksh export CONFIG_SHELL exec $CONFIG_SHELL "$0" --no-reexec ${1+"$@"} else # Try using printf. echo='printf %s\n' if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then # Cool, printf works : elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` && test "X$echo_testing_string" = 'X\t' && echo_testing_string=`($ORIGINAL_CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL export CONFIG_SHELL SHELL="$CONFIG_SHELL" export SHELL echo="$CONFIG_SHELL $0 --fallback-echo" elif echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo '\t') 2>/dev/null` && test "X$echo_testing_string" = 'X\t' && echo_testing_string=`($CONFIG_SHELL "$0" --fallback-echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then echo="$CONFIG_SHELL $0 --fallback-echo" else # maybe with a smaller string... prev=: for cmd in 'echo test' 'sed 2q "$0"' 'sed 10q "$0"' 'sed 20q "$0"' 'sed 50q "$0"'; do if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null then break fi prev="$cmd" done if test "$prev" != 'sed 50q "$0"'; then echo_test_string=`eval $prev` export echo_test_string exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "$0" ${1+"$@"} else # Oops. We lost completely, so just stick with echo. echo=echo fi fi fi fi fi fi # Copy echo and quote the copy suitably for passing to libtool from # the Makefile, instead of quoting the original, which is used later. ECHO=$echo if test "X$ECHO" = "X$CONFIG_SHELL $0 --fallback-echo"; then ECHO="$CONFIG_SHELL \\\$\$0 --fallback-echo" fi tagnames=${tagnames+${tagnames},}CXX tagnames=${tagnames+${tagnames},}F77 exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='einspline' PACKAGE_TARNAME='einspline' PACKAGE_VERSION='0.9.2' PACKAGE_STRING='einspline 0.9.2' PACKAGE_BUGREPORT='esler@uiuc.edu' ac_unique_file="src/bspline.h" # Factoring default headers for most tests. ac_includes_default="\ #include #ifdef HAVE_SYS_TYPES_H # include #endif #ifdef HAVE_SYS_STAT_H # include #endif #ifdef STDC_HEADERS # include # include #else # ifdef HAVE_STDLIB_H # include # endif #endif #ifdef HAVE_STRING_H # if !defined STDC_HEADERS && defined HAVE_MEMORY_H # include # endif # include #endif #ifdef HAVE_STRINGS_H # include #endif #ifdef HAVE_INTTYPES_H # include #endif #ifdef HAVE_STDINT_H # include #endif #ifdef HAVE_UNISTD_H # include #endif" ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datarootdir datadir sysconfdir sharedstatedir localstatedir includedir oldincludedir docdir infodir htmldir dvidir pdfdir psdir libdir localedir mandir DEFS ECHO_C ECHO_N ECHO_T LIBS build_alias host_alias target_alias INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA am__isrc CYGPATH_W PACKAGE VERSION ACLOCAL AUTOCONF AUTOMAKE AUTOHEADER MAKEINFO install_sh STRIP INSTALL_STRIP_PROGRAM mkdir_p AWK SET_MAKE am__leading_dot AMTAR am__tar am__untar CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT DEPDIR am__include am__quote AMDEP_TRUE AMDEP_FALSE AMDEPBACKSLASH CCDEPMODE am__fastdepCC_TRUE am__fastdepCC_FALSE CXX CXXFLAGS ac_ct_CXX CXXDEPMODE am__fastdepCXX_TRUE am__fastdepCXX_FALSE PKG_CONFIG build build_cpu build_vendor build_os host host_cpu host_vendor host_os PRTDIAG F77 FFLAGS ac_ct_F77 SED GREP EGREP LN_S ECHO AR RANLIB CPP CXXCPP LIBTOOL WANT_FORTRAN_TRUE WANT_FORTRAN_FALSE HAVE_CUDA_TRUE HAVE_CUDA_FALSE CUDA_CFLAGS CUDA_LIBS NVCC NVCCFLAGS PTHREAD_FLAG OPENMP_FLAG ALL_STATIC SIMD_FLAGS HAVE_SSE_TRUE HAVE_SSE_FALSE HAVE_SSE2_TRUE HAVE_SSE2_FALSE HAVE_SSE3_TRUE HAVE_SSE3_FALSE HAVE_SSSE3_TRUE HAVE_SSSE3_FALSE HAVE_SSE4_1_TRUE HAVE_SSE4_1_FALSE HAVE_SSE4_2_TRUE HAVE_SSE4_2_FALSE WANT_BLIPS_TRUE WANT_BLIPS_FALSE FFTW3_CFLAGS FFTW3_LIBS FFTW3F_CFLAGS FFTW3F_LIBS FLIBS LIBOBJS POW_LIB PKGDATADEF LTLIBOBJS' ac_subst_files='' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS CXX CXXFLAGS CCC PKG_CONFIG F77 FFLAGS CPP CXXCPP FFTW3_CFLAGS FFTW3_LIBS FFTW3F_CFLAGS FFTW3F_LIBS' # Initialize some variables set by options. ac_init_help= ac_init_version=false # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` eval enable_$ac_feature=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid feature name: $ac_feature" >&2 { (exit 1); exit 1; }; } ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'` eval enable_$ac_feature=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package | sed 's/[-.]/_/g'` eval with_$ac_package=\$ac_optarg ;; -without-* | --without-*) ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid package name: $ac_package" >&2 { (exit 1); exit 1; }; } ac_package=`echo $ac_package | sed 's/[-.]/_/g'` eval with_$ac_package=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) { echo "$as_me: error: unrecognized option: $ac_option Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 { (exit 1); exit 1; }; } eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` { echo "$as_me: error: missing argument to $ac_option" >&2 { (exit 1); exit 1; }; } fi # Be sure to have absolute directory names. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 { (exit 1); exit 1; }; } done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. If a cross compiler is detected then cross compile mode will be used." >&2 elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || { echo "$as_me: error: Working directory cannot be determined" >&2 { (exit 1); exit 1; }; } test "X$ac_ls_di" = "X$ac_pwd_ls_di" || { echo "$as_me: error: pwd does not report name of working directory" >&2 { (exit 1); exit 1; }; } # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$0" || $as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$0" : 'X\(//\)[^/]' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || echo X"$0" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 { (exit 1); exit 1; }; } fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2 { (exit 1); exit 1; }; } pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures einspline 0.9.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/einspline] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF Program names: --program-prefix=PREFIX prepend PREFIX to installed program names --program-suffix=SUFFIX append SUFFIX to installed program names --program-transform-name=PROGRAM run sed PROGRAM on installed program names System types: --build=BUILD configure for building on BUILD [guessed] --host=HOST cross-compile to build programs to run on HOST [BUILD] _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of einspline 0.9.2:";; esac cat <<\_ACEOF Optional Features: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors --enable-portable-binary disable compiler optimizations that would produce unportable binaries --disable-fortran disable fortran bindings --enable-shared[=PKGS] build shared libraries [default=yes] --enable-static[=PKGS] build static libraries [default=yes] --enable-fast-install[=PKGS] optimize for fast installation [default=yes] --disable-libtool-lock avoid locking (might break parallel builds) --enable-cuda compile CUDA routines --enable-pthread compile with -pthread --enable-openmp compile with -openmp --enable-prefetch=N use software prefetch instructions (default=no) --enable-all-static build static binaries (default=no) --enable-altivec enable PowerPC SIMD extensions (default=no) --enable-sse enable SSE SIMD instructions --enable-blips enable routines for creating BLIPS (default=no) --enable-precision use double-precision solve for single-precision splines --enable-profile instrument code with profiling information --enable-debug enable code for debugging checks Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) --with-gcc-arch= use architecture for gcc -march/-mtune, instead of guessing --with-gnu-ld assume the C compiler uses GNU ld [default=no] --with-pic try to use only PIC/non-PIC objects [default=use both] --with-tags[=TAGS] include additional configurations [automatic] --with-cuda=PATH prefix where cuda is installed default=auto Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS C/C++/Objective C preprocessor flags, e.g. -I if you have headers in a nonstandard directory CXX C++ compiler command CXXFLAGS C++ compiler flags PKG_CONFIG path to pkg-config utility F77 Fortran 77 compiler command FFLAGS Fortran 77 compiler flags CPP C preprocessor CXXCPP C++ preprocessor FFTW3_CFLAGS C compiler flags for FFTW3, overriding pkg-config FFTW3_LIBS linker flags for FFTW3, overriding pkg-config FFTW3F_CFLAGS C compiler flags for FFTW3F, overriding pkg-config FFTW3F_LIBS linker flags for FFTW3F, overriding pkg-config Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF einspline configure 0.9.2 generated by GNU Autoconf 2.61 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by einspline $as_me 0.9.2, which was generated by GNU Autoconf 2.61. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; 2) ac_configure_args1="$ac_configure_args1 '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac done done $as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } $as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo cat <<\_ASBOX ## ---------------- ## ## Cache variables. ## ## ---------------- ## _ASBOX echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo cat <<\_ASBOX ## ----------------- ## ## Output variables. ## ## ----------------- ## _ASBOX echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then cat <<\_ASBOX ## ------------------- ## ## File substitutions. ## ## ------------------- ## _ASBOX echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then cat <<\_ASBOX ## ----------- ## ## confdefs.h. ## ## ----------- ## _ASBOX echo cat confdefs.h echo fi test "$ac_signal" != 0 && echo "$as_me: caught signal $ac_signal" echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer explicitly selected file to automatically selected ones. if test -n "$CONFIG_SITE"; then set x "$CONFIG_SITE" elif test "x$prefix" != xNONE; then set x "$prefix/share/config.site" "$prefix/etc/config.site" else set x "$ac_default_prefix/share/config.site" \ "$ac_default_prefix/etc/config.site" fi shift for ac_site_file do if test -r "$ac_site_file"; then { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special # files actually), so we avoid doing that. if test -f "$cache_file"; then { echo "$as_me:$LINENO: loading cache $cache_file" >&5 echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { echo "$as_me:$LINENO: creating cache $cache_file" >&5 echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 echo "$as_me: former value: $ac_old_val" >&2;} { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 echo "$as_me: current value: $ac_new_val" >&2;} ac_cache_corrupted=: fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 echo "$as_me: error: changes in the environment can compromise the build" >&2;} { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu am__api_version='1.10' ac_aux_dir= for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do if test -f "$ac_dir/install-sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install-sh -c" break elif test -f "$ac_dir/install.sh"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/install.sh -c" break elif test -f "$ac_dir/shtool"; then ac_aux_dir=$ac_dir ac_install_sh="$ac_aux_dir/shtool install -c" break fi done if test -z "$ac_aux_dir"; then { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" >&5 echo "$as_me: error: cannot find install-sh or install.sh in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" >&2;} { (exit 1); exit 1; }; } fi # These three variables are undocumented and unsupported, # and are intended to be withdrawn in a future Autoconf release. # They can cause serious problems if a builder's source tree is in a directory # whose full name contains unusual characters. ac_config_guess="$SHELL $ac_aux_dir/config.guess" # Please don't use this var. ac_config_sub="$SHELL $ac_aux_dir/config.sub" # Please don't use this var. ac_configure="$SHELL $ac_aux_dir/configure" # Please don't use this var. # Find a good install program. We prefer a C program (faster), # so one script is as good as another. But avoid the broken or # incompatible versions: # SysV /etc/install, /usr/sbin/install # SunOS /usr/etc/install # IRIX /sbin/install # AIX /bin/install # AmigaOS /C/install, which installs bootblocks on floppy discs # AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag # AFS /usr/afsws/bin/install, which mishandles nonexistent args # SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" # OS/2's system install, which has a completely different semantic # ./install, which can be erroneously created by make from ./install.sh. { echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5 echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; } if test -z "$INSTALL"; then if test "${ac_cv_path_install+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. # Account for people who put trailing slashes in PATH elements. case $as_dir/ in ./ | .// | /cC/* | \ /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \ ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \ /usr/ucb/* ) ;; *) # OSF1 and SCO ODT 3.0 have their own names for install. # Don't use installbsd from OSF since it installs stuff as root # by default. for ac_prog in ginstall scoinst install; do for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then if test $ac_prog = install && grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # AIX install. It has an incompatible calling convention. : elif test $ac_prog = install && grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then # program-specific install script used by HP pwplus--don't use. : else ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c" break 3 fi fi done done ;; esac done IFS=$as_save_IFS fi if test "${ac_cv_path_install+set}" = set; then INSTALL=$ac_cv_path_install else # As a last resort, use the slow shell script. Don't cache a # value for INSTALL within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. INSTALL=$ac_install_sh fi fi { echo "$as_me:$LINENO: result: $INSTALL" >&5 echo "${ECHO_T}$INSTALL" >&6; } # Use test -z because SunOS4 sh mishandles braces in ${var-val}. # It thinks the first close brace ends the variable substitution. test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}' test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' { echo "$as_me:$LINENO: checking whether build environment is sane" >&5 echo $ECHO_N "checking whether build environment is sane... $ECHO_C" >&6; } # Just in case sleep 1 echo timestamp > conftest.file # Do `set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` if test "$*" = "X"; then # -L didn't work. set X `ls -t $srcdir/configure conftest.file` fi rm -f conftest.file if test "$*" != "X $srcdir/configure conftest.file" \ && test "$*" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". { { echo "$as_me:$LINENO: error: ls -t appears to fail. Make sure there is not a broken alias in your environment" >&5 echo "$as_me: error: ls -t appears to fail. Make sure there is not a broken alias in your environment" >&2;} { (exit 1); exit 1; }; } fi test "$2" = conftest.file ) then # Ok. : else { { echo "$as_me:$LINENO: error: newly created file is older than distributed files! Check your system clock" >&5 echo "$as_me: error: newly created file is older than distributed files! Check your system clock" >&2;} { (exit 1); exit 1; }; } fi { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } test "$program_prefix" != NONE && program_transform_name="s&^&$program_prefix&;$program_transform_name" # Use a double $ so make ignores it. test "$program_suffix" != NONE && program_transform_name="s&\$&$program_suffix&;$program_transform_name" # Double any \ or $. echo might interpret backslashes. # By default was `s,x,x', remove it if useless. cat <<\_ACEOF >conftest.sed s/[\\$]/&&/g;s/;s,x,x,$// _ACEOF program_transform_name=`echo $program_transform_name | sed -f conftest.sed` rm -f conftest.sed # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= { echo "$as_me:$LINENO: WARNING: \`missing' script is too old or missing" >&5 echo "$as_me: WARNING: \`missing' script is too old or missing" >&2;} fi { echo "$as_me:$LINENO: checking for a thread-safe mkdir -p" >&5 echo $ECHO_N "checking for a thread-safe mkdir -p... $ECHO_C" >&6; } if test -z "$MKDIR_P"; then if test "${ac_cv_path_mkdir+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in mkdir gmkdir; do for ac_exec_ext in '' $ac_executable_extensions; do { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #( 'mkdir (GNU coreutils) '* | \ 'mkdir (coreutils) '* | \ 'mkdir (fileutils) '4.1*) ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext break 3;; esac done done done IFS=$as_save_IFS fi if test "${ac_cv_path_mkdir+set}" = set; then MKDIR_P="$ac_cv_path_mkdir -p" else # As a last resort, use the slow shell script. Don't cache a # value for MKDIR_P within a source directory, because that will # break other packages using the cache if that directory is # removed, or if the value is a relative name. test -d ./--version && rmdir ./--version MKDIR_P="$ac_install_sh -d" fi fi { echo "$as_me:$LINENO: result: $MKDIR_P" >&5 echo "${ECHO_T}$MKDIR_P" >&6; } mkdir_p="$MKDIR_P" case $mkdir_p in [\\/$]* | ?:[\\/]*) ;; */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; esac for ac_prog in gawk mawk nawk awk do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_AWK+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$AWK"; then ac_cv_prog_AWK="$AWK" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_AWK="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AWK=$ac_cv_prog_AWK if test -n "$AWK"; then { echo "$as_me:$LINENO: result: $AWK" >&5 echo "${ECHO_T}$AWK" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$AWK" && break done { echo "$as_me:$LINENO: checking whether ${MAKE-make} sets \$(MAKE)" >&5 echo $ECHO_N "checking whether ${MAKE-make} sets \$(MAKE)... $ECHO_C" >&6; } set x ${MAKE-make}; ac_make=`echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'` if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.make <<\_ACEOF SHELL = /bin/sh all: @echo '@@@%%%=$(MAKE)=@@@%%%' _ACEOF # GNU make sometimes prints "make[1]: Entering...", which would confuse us. case `${MAKE-make} -f conftest.make 2>/dev/null` in *@@@%%%=?*=@@@%%%*) eval ac_cv_prog_make_${ac_make}_set=yes;; *) eval ac_cv_prog_make_${ac_make}_set=no;; esac rm -f conftest.make fi if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } SET_MAKE= else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } SET_MAKE="MAKE=${MAKE-make}" fi rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." am__isrc=' -I$(srcdir)' # test to see if srcdir already configured if test -f $srcdir/config.status; then { { echo "$as_me:$LINENO: error: source directory already configured; run \"make distclean\" there first" >&5 echo "$as_me: error: source directory already configured; run \"make distclean\" there first" >&2;} { (exit 1); exit 1; }; } fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi # Define the identity of the package. PACKAGE=einspline VERSION=0.9.2 cat >>confdefs.h <<_ACEOF #define PACKAGE "$PACKAGE" _ACEOF cat >>confdefs.h <<_ACEOF #define VERSION "$VERSION" _ACEOF # Some tools Automake needs. ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"} AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"} AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"} AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"} MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"} install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} # Installed binaries are usually stripped using `strip' when the user # run `make install-strip'. However `strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the `STRIP' environment variable to overrule this program. if test "$cross_compiling" != no; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_STRIP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { echo "$as_me:$LINENO: result: $STRIP" >&5 echo "${ECHO_T}$STRIP" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_STRIP="strip" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 echo "${ECHO_T}$ac_ct_STRIP" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" # We need awk for the "check" target. The system "awk" is bad on # some platforms. # Always define AMTAR for backward compatibility. AMTAR=${AMTAR-"${am_missing_run}tar"} am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -' ac_config_headers="$ac_config_headers src/config.h" # Checks for programs. DEPDIR="${am__leading_dot}deps" ac_config_commands="$ac_config_commands depfiles" am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo done .PHONY: am__doit END # If we don't find an include directive, just comment out the code. { echo "$as_me:$LINENO: checking for style of include used by $am_make" >&5 echo $ECHO_N "checking for style of include used by $am_make... $ECHO_C" >&6; } am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # We grep out `Entering directory' and `Leaving directory' # messages which can occur if `w' ends up in MAKEFLAGS. # In particular we don't look at `^make:' because GNU make might # be invoked under some other name (usually "gmake"), in which # case it prints its new name instead of `make'. if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then am__include=include am__quote= _am_result=GNU fi # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then am__include=.include am__quote="\"" _am_result=BSD fi fi { echo "$as_me:$LINENO: result: $_am_result" >&5 echo "${ECHO_T}$_am_result" >&6; } rm -f confinc confmf # Check whether --enable-dependency-tracking was given. if test "${enable_dependency_tracking+set}" = set; then enableval=$enable_dependency_tracking; fi if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' fi if test "x$enable_dependency_tracking" != xno; then AMDEP_TRUE= AMDEP_FALSE='#' else AMDEP_TRUE='#' AMDEP_FALSE= fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="gcc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="${ac_tool_prefix}cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { echo "$as_me:$LINENO: result: $CC" >&5 echo "${ECHO_T}$CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_CC+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CC="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 echo "${ECHO_T}$ac_ct_CC" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&5 echo "$as_me: error: no acceptable C compiler found in \$PATH See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } # Provide some information about the compiler. echo "$as_me:$LINENO: checking for C compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (ac_try="$ac_compiler --version >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler --version >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -v >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -v >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -V >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -V >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; } ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # # List of possible output files, starting from the most likely. # The algorithm is not robust to junk in `.', hence go to wildcards (a.*) # only as a last resort. b.out is created by i960 compilers. ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out' # # The IRIX 6 linker writes into existing files which may not be # executable, retaining their permissions. Remove them first so a # subsequent execution test works. ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { (ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link_default") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi { echo "$as_me:$LINENO: result: $ac_file" >&5 echo "${ECHO_T}$ac_file" >&6; } if test -z "$ac_file"; then echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: C compiler cannot create executables See \`config.log' for more details." >&5 echo "$as_me: error: C compiler cannot create executables See \`config.log' for more details." >&2;} { (exit 77); exit 77; }; } fi ac_exeext=$ac_cv_exeext # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { echo "$as_me:$LINENO: checking whether the C compiler works" >&5 echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; } # FIXME: These cross compiler hacks should be removed for Autoconf 3.0 # If not cross compiling, check that we can run a simple program. if test "$cross_compiling" != yes; then if { ac_try='./$ac_file' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { echo "$as_me:$LINENO: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&5 echo "$as_me: error: cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi fi fi { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } rm -f a.out a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $cross_compiling" >&5 echo "${ECHO_T}$cross_compiling" >&6; } { echo "$as_me:$LINENO: checking for suffix of executables" >&5 echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; } if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of executables: cannot compile and link See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest$ac_cv_exeext { echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 echo "${ECHO_T}$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT { echo "$as_me:$LINENO: checking for suffix of object files" >&5 echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; } if test "${ac_cv_objext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&5 echo "$as_me: error: cannot compute suffix of object files: cannot compile See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 echo "${ECHO_T}$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; } if test "${ac_cv_c_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; } GCC=`test $ac_compiler_gnu = yes && echo yes` ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; } if test "${ac_cv_prog_cc_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 CFLAGS="" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } if test "${ac_cv_prog_cc_c89+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_c89=$ac_arg else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6; } ;; xno) { echo "$as_me:$LINENO: result: unsupported" >&5 echo "${ECHO_T}unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CC" am_compiler_list= { echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } if test "${am_cv_CC_dependencies_compiler_type+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CC_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf case $depmode in nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; none) break ;; esac # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. if depmode=$depmode \ source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CC_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CC_dependencies_compiler_type=none fi fi { echo "$as_me:$LINENO: result: $am_cv_CC_dependencies_compiler_type" >&5 echo "${ECHO_T}$am_cv_CC_dependencies_compiler_type" >&6; } CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then am__fastdepCC_TRUE= am__fastdepCC_FALSE='#' else am__fastdepCC_TRUE='#' am__fastdepCC_FALSE= fi case $ac_cv_prog_cc_stdc in no) ac_cv_prog_cc_c99=no; ac_cv_prog_cc_c89=no ;; *) { echo "$as_me:$LINENO: checking for $CC option to accept ISO C99" >&5 echo $ECHO_N "checking for $CC option to accept ISO C99... $ECHO_C" >&6; } if test "${ac_cv_prog_cc_c99+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_prog_cc_c99=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include #include // Check varargs macros. These examples are taken from C99 6.10.3.5. #define debug(...) fprintf (stderr, __VA_ARGS__) #define showlist(...) puts (#__VA_ARGS__) #define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__)) static void test_varargs_macros (void) { int x = 1234; int y = 5678; debug ("Flag"); debug ("X = %d\n", x); showlist (The first, second, and third items.); report (x>y, "x is %d but y is %d", x, y); } // Check long long types. #define BIG64 18446744073709551615ull #define BIG32 4294967295ul #define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0) #if !BIG_OK your preprocessor is broken; #endif #if BIG_OK #else your preprocessor is broken; #endif static long long int bignum = -9223372036854775807LL; static unsigned long long int ubignum = BIG64; struct incomplete_array { int datasize; double data[]; }; struct named_init { int number; const wchar_t *name; double average; }; typedef const char *ccp; static inline int test_restrict (ccp restrict text) { // See if C++-style comments work. // Iterate through items via the restricted pointer. // Also check for declarations in for loops. for (unsigned int i = 0; *(text+i) != '\0'; ++i) continue; return 0; } // Check varargs and va_copy. static void test_varargs (const char *format, ...) { va_list args; va_start (args, format); va_list args_copy; va_copy (args_copy, args); const char *str; int number; float fnumber; while (*format) { switch (*format++) { case 's': // string str = va_arg (args_copy, const char *); break; case 'd': // int number = va_arg (args_copy, int); break; case 'f': // float fnumber = va_arg (args_copy, double); break; default: break; } } va_end (args_copy); va_end (args); } int main () { // Check bool. _Bool success = false; // Check restrict. if (test_restrict ("String literal") == 0) success = true; char *restrict newvar = "Another string"; // Check varargs. test_varargs ("s, d' f .", "string", 65, 34.234); test_varargs_macros (); // Check flexible array members. struct incomplete_array *ia = malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10)); ia->datasize = 10; for (int i = 0; i < ia->datasize; ++i) ia->data[i] = i * 1.234; // Check named initializers. struct named_init ni = { .number = 34, .name = L"Test wide string", .average = 543.34343, }; ni.number = 58; int dynamic_array[ni.number]; dynamic_array[ni.number - 1] = 543; // work around unused variable warnings return (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == 'x' || dynamic_array[ni.number - 1] != 543); ; return 0; } _ACEOF for ac_arg in '' -std=gnu99 -c99 -qlanglvl=extc99 do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_c99=$ac_arg else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c99" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c99" in x) { echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6; } ;; xno) { echo "$as_me:$LINENO: result: unsupported" >&5 echo "${ECHO_T}unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c99" { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c99" >&5 echo "${ECHO_T}$ac_cv_prog_cc_c99" >&6; } ;; esac if test "x$ac_cv_prog_cc_c99" != xno; then ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99 else { echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5 echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; } if test "${ac_cv_prog_cc_c89+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cc_c89=$ac_arg else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6; } ;; xno) { echo "$as_me:$LINENO: result: unsupported" >&5 echo "${ECHO_T}unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5 echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89 else ac_cv_prog_cc_stdc=no fi fi ;; esac { echo "$as_me:$LINENO: checking for $CC option to accept ISO Standard C" >&5 echo $ECHO_N "checking for $CC option to accept ISO Standard C... $ECHO_C" >&6; } if test "${ac_cv_prog_cc_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi case $ac_cv_prog_cc_stdc in no) { echo "$as_me:$LINENO: result: unsupported" >&5 echo "${ECHO_T}unsupported" >&6; } ;; '') { echo "$as_me:$LINENO: result: none needed" >&5 echo "${ECHO_T}none needed" >&6; } ;; *) { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6; } ;; esac ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu if test -z "$CXX"; then if test -n "$CCC"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_CXX+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$CXX"; then ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CXX=$ac_cv_prog_CXX if test -n "$CXX"; then { echo "$as_me:$LINENO: result: $CXX" >&5 echo "${ECHO_T}$CXX" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$CXX" && break done fi if test -z "$CXX"; then ac_ct_CXX=$CXX for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_CXX"; then ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_CXX="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CXX=$ac_cv_prog_ac_ct_CXX if test -n "$ac_ct_CXX"; then { echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5 echo "${ECHO_T}$ac_ct_CXX" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$ac_ct_CXX" && break done if test "x$ac_ct_CXX" = x; then CXX="g++" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac CXX=$ac_ct_CXX fi fi fi fi # Provide some information about the compiler. echo "$as_me:$LINENO: checking for C++ compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (ac_try="$ac_compiler --version >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler --version >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -v >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -v >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -V >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -V >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5 echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6; } if test "${ac_cv_cxx_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi { echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5 echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6; } GXX=`test $ac_compiler_gnu = yes && echo yes` ac_test_CXXFLAGS=${CXXFLAGS+set} ac_save_CXXFLAGS=$CXXFLAGS { echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5 echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6; } if test "${ac_cv_prog_cxx_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_save_cxx_werror_flag=$ac_cxx_werror_flag ac_cxx_werror_flag=yes ac_cv_prog_cxx_g=no CXXFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cxx_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 CXXFLAGS="" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cxx_werror_flag=$ac_save_cxx_werror_flag CXXFLAGS="-g" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_cxx_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi { echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5 echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6; } if test "$ac_test_CXXFLAGS" = set; then CXXFLAGS=$ac_save_CXXFLAGS elif test $ac_cv_prog_cxx_g = yes; then if test "$GXX" = yes; then CXXFLAGS="-g -O2" else CXXFLAGS="-g" fi else if test "$GXX" = yes; then CXXFLAGS="-O2" else CXXFLAGS= fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu depcc="$CXX" am_compiler_list= { echo "$as_me:$LINENO: checking dependency style of $depcc" >&5 echo $ECHO_N "checking dependency style of $depcc... $ECHO_C" >&6; } if test "${am_cv_CXX_dependencies_compiler_type+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_CXX_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp` fi for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf case $depmode in nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; none) break ;; esac # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. if depmode=$depmode \ source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_CXX_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_CXX_dependencies_compiler_type=none fi fi { echo "$as_me:$LINENO: result: $am_cv_CXX_dependencies_compiler_type" >&5 echo "${ECHO_T}$am_cv_CXX_dependencies_compiler_type" >&6; } CXXDEPMODE=depmode=$am_cv_CXX_dependencies_compiler_type if test "x$enable_dependency_tracking" != xno \ && test "$am_cv_CXX_dependencies_compiler_type" = gcc3; then am__fastdepCXX_TRUE= am__fastdepCXX_FALSE='#' else am__fastdepCXX_TRUE='#' am__fastdepCXX_FALSE= fi if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args. set dummy ${ac_tool_prefix}pkg-config; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_path_PKG_CONFIG+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi PKG_CONFIG=$ac_cv_path_PKG_CONFIG if test -n "$PKG_CONFIG"; then { echo "$as_me:$LINENO: result: $PKG_CONFIG" >&5 echo "${ECHO_T}$PKG_CONFIG" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_path_PKG_CONFIG"; then ac_pt_PKG_CONFIG=$PKG_CONFIG # Extract the first word of "pkg-config", so it can be a program name with args. set dummy pkg-config; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_path_ac_pt_PKG_CONFIG+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $ac_pt_PKG_CONFIG in [\\/]* | ?:[\\/]*) ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS ;; esac fi ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG if test -n "$ac_pt_PKG_CONFIG"; then { echo "$as_me:$LINENO: result: $ac_pt_PKG_CONFIG" >&5 echo "${ECHO_T}$ac_pt_PKG_CONFIG" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_pt_PKG_CONFIG" = x; then PKG_CONFIG="" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac PKG_CONFIG=$ac_pt_PKG_CONFIG fi else PKG_CONFIG="$ac_cv_path_PKG_CONFIG" fi fi if test -n "$PKG_CONFIG"; then _pkg_min_version=0.9.0 { echo "$as_me:$LINENO: checking pkg-config is at least version $_pkg_min_version" >&5 echo $ECHO_N "checking pkg-config is at least version $_pkg_min_version... $ECHO_C" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } PKG_CONFIG="" fi fi # Optimal compiler flags { echo "$as_me:$LINENO: checking for C compiler vendor" >&5 echo $ECHO_N "checking for C compiler vendor... $ECHO_C" >&6; } if test "${ax_cv_c_compiler_vendor+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_c_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #if !($vencpp) thisisanerror; #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_compiler_vendor=`echo $ventest | cut -d: -f1`; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext done fi { echo "$as_me:$LINENO: result: $ax_cv_c_compiler_vendor" >&5 echo "${ECHO_T}$ax_cv_c_compiler_vendor" >&6; } # Make sure we can run config.sub. $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 || { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5 echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;} { (exit 1); exit 1; }; } { echo "$as_me:$LINENO: checking build system type" >&5 echo $ECHO_N "checking build system type... $ECHO_C" >&6; } if test "${ac_cv_build+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_build_alias=$build_alias test "x$ac_build_alias" = x && ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"` test "x$ac_build_alias" = x && { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 echo "$as_me: error: cannot guess build type; you must specify one" >&2;} { (exit 1); exit 1; }; } ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` || { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5 echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;} { (exit 1); exit 1; }; } fi { echo "$as_me:$LINENO: result: $ac_cv_build" >&5 echo "${ECHO_T}$ac_cv_build" >&6; } case $ac_cv_build in *-*-*) ;; *) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5 echo "$as_me: error: invalid value of canonical build" >&2;} { (exit 1); exit 1; }; };; esac build=$ac_cv_build ac_save_IFS=$IFS; IFS='-' set x $ac_cv_build shift build_cpu=$1 build_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: build_os=$* IFS=$ac_save_IFS case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac { echo "$as_me:$LINENO: checking host system type" >&5 echo $ECHO_N "checking host system type... $ECHO_C" >&6; } if test "${ac_cv_host+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "x$host_alias" = x; then ac_cv_host=$ac_cv_build else ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` || { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5 echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;} { (exit 1); exit 1; }; } fi fi { echo "$as_me:$LINENO: result: $ac_cv_host" >&5 echo "${ECHO_T}$ac_cv_host" >&6; } case $ac_cv_host in *-*-*) ;; *) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5 echo "$as_me: error: invalid value of canonical host" >&2;} { (exit 1); exit 1; }; };; esac host=$ac_cv_host ac_save_IFS=$IFS; IFS='-' set x $ac_cv_host shift host_cpu=$1 host_vendor=$2 shift; shift # Remember, the first character of IFS is used to create $*, # except with old shells: host_os=$* IFS=$ac_save_IFS case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac # Check whether --enable-portable-binary was given. if test "${enable_portable_binary+set}" = set; then enableval=$enable_portable_binary; acx_maxopt_portable=$withval else acx_maxopt_portable=no fi # Try to determine "good" native compiler flags if none specified via CFLAGS if test "$ac_test_CFLAGS" != "set"; then CFLAGS="" case $ax_cv_c_compiler_vendor in dec) CFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then CFLAGS="$CFLAGS -arch host" fi;; sun) CFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then CFLAGS="$CFLAGS -xarch=generic" fi;; hp) CFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then CFLAGS="$CFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi { echo "$as_me:$LINENO: checking whether C compiler accepts $xlc_opt" >&5 echo $ECHO_N "checking whether C compiler accepts $xlc_opt... $ECHO_C" >&6; } ax_save_FLAGS=$CFLAGS CFLAGS="$xlc_opt" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_c_flags_$xlc_opt" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CFLAGS="-O3 -qansialias -w $xlc_opt" else CFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* CFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the CFLAGS environment var. *" echo "* and re-run configure.) For more info, man cc. *" echo "******************************************************" fi ;; intel) CFLAGS="-O3" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[234]:*:*|*6[789b]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[347]:*:*:*|*f4[1347a]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do { echo "$as_me:$LINENO: checking whether C compiler accepts $flag" >&5 echo $ECHO_N "checking whether C compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$CFLAGS CFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_c_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_c_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_c_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then icc_archflag=$flag; break else : fi done fi { echo "$as_me:$LINENO: checking for icc architecture flag" >&5 echo $ECHO_N "checking for icc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $icc_archflag" >&5 echo "${ECHO_T}$icc_archflag" >&6; } if test "x$icc_archflag" != xunknown; then CFLAGS="$CFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems CFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems { echo "$as_me:$LINENO: checking whether C compiler accepts -malign-double" >&5 echo $ECHO_N "checking whether C compiler accepts -malign-double... $ECHO_C" >&6; } if test "${ax_cv_c_flags__malign_double+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-malign-double" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__malign_double=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__malign_double=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__malign_double { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CFLAGS="$CFLAGS -malign-double" else : fi # -fstrict-aliasing for gcc-2.95+ { echo "$as_me:$LINENO: checking whether C compiler accepts -fstrict-aliasing" >&5 echo $ECHO_N "checking whether C compiler accepts -fstrict-aliasing... $ECHO_C" >&6; } if test "${ax_cv_c_flags__fstrict_aliasing+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-fstrict-aliasing" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__fstrict_aliasing=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__fstrict_aliasing=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__fstrict_aliasing { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CFLAGS="$CFLAGS -fstrict-aliasing" else : fi # note that we enable "unsafe" fp optimization with other compilers, too { echo "$as_me:$LINENO: checking whether C compiler accepts -ffast-math" >&5 echo $ECHO_N "checking whether C compiler accepts -ffast-math... $ECHO_C" >&6; } if test "${ax_cv_c_flags__ffast_math+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-ffast-math" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__ffast_math=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__ffast_math=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__ffast_math { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CFLAGS="$CFLAGS -ffast-math" else : fi # Check whether --with-gcc-arch was given. if test "${with_gcc_arch+set}" = set; then withval=$with_gcc_arch; ax_gcc_arch=$withval else ax_gcc_arch=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: " >&5 echo "${ECHO_T}" >&6; } if test "${ax_cv_gcc_archflag+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_gcc_archflag="unknown" if test "$GCC" = yes; then if test "x$ax_gcc_arch" = xyes; then ax_gcc_arch="" if test "$cross_compiling" = no; then case $host_cpu in i[3456]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in *6f?:*:*:*) ax_gcc_arch="core2 nocona pentium3";; 1067?:*:*:*) ax_gcc_arch="core2 nocona pentium3";; *5[48]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; *5??:*:*:*) ax_gcc_arch=pentium ;; *6[3456]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[01]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[234]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6[9d]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; *6[78b]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6??:*:*:*) ax_gcc_arch=pentiumpro ;; *f3[347]:*:*:*|*f41347:*:*:*) case $host_cpu in x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; esac ;; *f4a:*:*:*) ax_gcc_arch="nocona" ;; *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; esac ;; *:68747541:*:*) # AMD case $ax_cv_gcc_x86_cpuid_1 in *5[67]?:*:*:*) ax_gcc_arch=k6 ;; *5[8d]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; *5[9]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; *60?:*:*:*) ax_gcc_arch=k7 ;; *6[12]?:*:*:*) ax_gcc_arch="athlon k7" ;; *6[34]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; *6[68a]?:*:*:*) ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0x80000006 output" >&5 echo $ECHO_N "checking for x86 cpuid 0x80000006 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0x80000006+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0x80000006=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0x80000006, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0x80000006=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0x80000006=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x80000006" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0x80000006" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # L2 cache size case $ax_cv_gcc_x86_cpuid_0x80000006 in *:*:*[1-9a-f]??????:*) # (L2 = ecx >> 16) >= 256 ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; *) ax_gcc_arch="athlon-4 athlon k7" ;; esac ;; *f[4cef8b]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; *f??:*:*:*) ax_gcc_arch="k8" ;; esac ;; *:746e6543:*:*) # IDT case $ax_cv_gcc_x86_cpuid_1 in *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; *58?:*:*:*) ax_gcc_arch=winchip2 ;; *6[78]?:*:*:*) ax_gcc_arch=c3 ;; *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; esac ;; esac if test x"$ax_gcc_arch" = x; then # fallback case $host_cpu in i586*) ax_gcc_arch=pentium ;; i686*) ax_gcc_arch=pentiumpro ;; esac fi ;; sparc*) # Extract the first word of "prtdiag", so it can be a program name with args. set dummy prtdiag; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_path_PRTDIAG+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $PRTDIAG in [\\/]* | ?:[\\/]*) ac_cv_path_PRTDIAG="$PRTDIAG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/" for as_dir in $as_dummy do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_path_PRTDIAG="$as_dir/$ac_word$ac_exec_ext" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_PRTDIAG" && ac_cv_path_PRTDIAG="prtdiag" ;; esac fi PRTDIAG=$ac_cv_path_PRTDIAG if test -n "$PRTDIAG"; then { echo "$as_me:$LINENO: result: $PRTDIAG" >&5 echo "${ECHO_T}$PRTDIAG" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` case $cputype in *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; *supersparc*|*tms390z5[05]*) ax_gcc_arch="supersparc v8" ;; *hypersparc*|*rt62[056]*) ax_gcc_arch="hypersparc v8" ;; *cypress*) ax_gcc_arch=cypress ;; esac ;; alphaev5) ax_gcc_arch=ev5 ;; alphaev56) ax_gcc_arch=ev56 ;; alphapca56) ax_gcc_arch="pca56 ev56" ;; alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; alphaev6) ax_gcc_arch=ev6 ;; alphaev67) ax_gcc_arch=ev67 ;; alphaev68) ax_gcc_arch="ev68 ev67" ;; alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; powerpc*) cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` case $cputype in *750*) ax_gcc_arch="750 G3" ;; *740[0-9]*) ax_gcc_arch="$cputype 7400 G4" ;; *74[4-5][0-9]*) ax_gcc_arch="$cputype 7450 G4" ;; *74[0-9][0-9]*) ax_gcc_arch="$cputype G4" ;; *970*) ax_gcc_arch="970 G5 power4";; *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" ;; esac fi # not cross-compiling fi # guess arch if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then for arch in $ax_gcc_arch; do if test "x$acx_maxopt_portable" = xyes; then # if we require portable code flags="-mtune=$arch" # -mcpu=$arch and m$arch generate nonportable code on every arch except # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac else flags="-march=$arch -mcpu=$arch -m$arch" fi for flag in $flags; do { echo "$as_me:$LINENO: checking whether C compiler accepts $flag" >&5 echo $ECHO_N "checking whether C compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$CFLAGS CFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_c_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_c_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_c_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then ax_cv_gcc_archflag=$flag; break else : fi done test "x$ax_cv_gcc_archflag" = xunknown || break done fi fi # $GCC=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $ax_cv_gcc_archflag" >&5 echo "${ECHO_T}$ax_cv_gcc_archflag" >&6; } if test "x$ax_cv_gcc_archflag" = xunknown; then : else CFLAGS="$CFLAGS $ax_cv_gcc_archflag" CXXFLAGS="$CFLAGS $ax_cv_gcc_archflag" fi ;; esac if test -z "$CFLAGS"; then echo "" echo "********************************************************" echo "* WARNING: Don't know the best CFLAGS for this system *" echo "* Use ./configure CFLAGS=... to specify your own flags *" echo "* (otherwise, a default of CFLAGS=-O3 will be used) *" echo "********************************************************" echo "" CFLAGS="-O3" fi { echo "$as_me:$LINENO: checking whether C compiler accepts $CFLAGS" >&5 echo $ECHO_N "checking whether C compiler accepts $CFLAGS... $ECHO_C" >&6; } ax_save_FLAGS=$CFLAGS CFLAGS="$CFLAGS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_c_flags_$CFLAGS" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then : else echo "" echo "********************************************************" echo "* WARNING: The guessed CFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure CFLAGS=... to specify your own flags *" echo "********************************************************" echo "" CFLAGS="" fi fi { echo "$as_me:$LINENO: checking for C++ compiler vendor" >&5 echo $ECHO_N "checking for C++ compiler vendor... $ECHO_C" >&6; } if test "${ax_cv_cxx_compiler_vendor+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_cxx_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { #if !($vencpp) thisisanerror; #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_cxx_compiler_vendor=`echo $ventest | cut -d: -f1`; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext done fi { echo "$as_me:$LINENO: result: $ax_cv_cxx_compiler_vendor" >&5 echo "${ECHO_T}$ax_cv_cxx_compiler_vendor" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu # Check whether --enable-portable-binary was given. if test "${enable_portable_binary+set}" = set; then enableval=$enable_portable_binary; acx_maxopt_portable=$withval else acx_maxopt_portable=no fi # Try to determine "good" native compiler flags if none specified via CXXFLAGS if test "$ac_test_CXXFLAGS" != "set"; then CXXFLAGS="" case $ax_cv_cxx_compiler_vendor in dec) CXXFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then CXXFLAGS="$CXXFLAGS -arch host" fi;; sun) CXXFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then CXXFLAGS="$CXXFLAGS -xarch=generic" fi;; hp) CXXFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then CXXFLAGS="$CXXFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi { echo "$as_me:$LINENO: checking whether C++ compiler accepts $xlc_opt" >&5 echo $ECHO_N "checking whether C++ compiler accepts $xlc_opt... $ECHO_C" >&6; } ax_save_FLAGS=$CXXFLAGS CXXFLAGS="$xlc_opt" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_cxx_flags_$xlc_opt" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_cxx_flags_$xlc_opt" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_cxx_flags_$xlc_opt" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CXXFLAGS="-O3 -qansialias -w $xlc_opt" else CXXFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* CXXFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the CXXFLAGS environment var. *" echo "* and re-run configure.) For more info, man cxx. *" echo "******************************************************" fi ;; intel) CXXFLAGS="-O3 -restrict" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[234]:*:*|*6[789b]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[347]:*:*:*|*f4[1347a]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do { echo "$as_me:$LINENO: checking whether C++ compiler accepts $flag" >&5 echo $ECHO_N "checking whether C++ compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$CXXFLAGS CXXFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_cxx_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_cxx_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_cxx_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then icc_archflag=$flag; break else : fi done fi { echo "$as_me:$LINENO: checking for icc architecture flag" >&5 echo $ECHO_N "checking for icc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $icc_archflag" >&5 echo "${ECHO_T}$icc_archflag" >&6; } if test "x$icc_archflag" != xunknown; then CXXFLAGS="$CXXFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems CXXFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems { echo "$as_me:$LINENO: checking whether C++ compiler accepts -malign-double" >&5 echo $ECHO_N "checking whether C++ compiler accepts -malign-double... $ECHO_C" >&6; } if test "${ax_cv_cxx_flags__malign_double+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CXXFLAGS CXXFLAGS="-malign-double" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_cxx_flags__malign_double=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_cxx_flags__malign_double=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_cxx_flags__malign_double { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CXXFLAGS="$CXXFLAGS -malign-double" else : fi # -fstrict-aliasing for gcc-2.95+ { echo "$as_me:$LINENO: checking whether C++ compiler accepts -fstrict-aliasing" >&5 echo $ECHO_N "checking whether C++ compiler accepts -fstrict-aliasing... $ECHO_C" >&6; } if test "${ax_cv_cxx_flags__fstrict_aliasing+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CXXFLAGS CXXFLAGS="-fstrict-aliasing" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_cxx_flags__fstrict_aliasing=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_cxx_flags__fstrict_aliasing=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_cxx_flags__fstrict_aliasing { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CXXFLAGS="$CXXFLAGS -fstrict-aliasing" else : fi # note that we enable "unsafe" fp optimization with other compilers, too { echo "$as_me:$LINENO: checking whether C++ compiler accepts -ffast-math" >&5 echo $ECHO_N "checking whether C++ compiler accepts -ffast-math... $ECHO_C" >&6; } if test "${ax_cv_cxx_flags__ffast_math+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CXXFLAGS CXXFLAGS="-ffast-math" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_cxx_flags__ffast_math=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_cxx_flags__ffast_math=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_cxx_flags__ffast_math { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then CXXFLAGS="$CXXFLAGS -ffast-math" else : fi # Check whether --with-gcc-arch was given. if test "${with_gcc_arch+set}" = set; then withval=$with_gcc_arch; ax_gcc_arch=$withval else ax_gcc_arch=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: " >&5 echo "${ECHO_T}" >&6; } if test "${ax_cv_gcc_archflag+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_gcc_archflag="unknown" if test "$GCC" = yes; then if test "x$ax_gcc_arch" = xyes; then ax_gcc_arch="" if test "$cross_compiling" = no; then case $host_cpu in i[3456]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in *5[48]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; *5??:*:*:*) ax_gcc_arch=pentium ;; *6[3456]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[01]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[234]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6[9d]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; *6[78b]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6??:*:*:*) ax_gcc_arch=pentiumpro ;; *f3[347]:*:*:*|*f41347:*:*:*) case $host_cpu in x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; esac ;; *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; esac ;; *:68747541:*:*) # AMD case $ax_cv_gcc_x86_cpuid_1 in *5[67]?:*:*:*) ax_gcc_arch=k6 ;; *5[8d]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; *5[9]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; *60?:*:*:*) ax_gcc_arch=k7 ;; *6[12]?:*:*:*) ax_gcc_arch="athlon k7" ;; *6[34]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; *6[68a]?:*:*:*) ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0x80000006 output" >&5 echo $ECHO_N "checking for x86 cpuid 0x80000006 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0x80000006+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0x80000006=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0x80000006, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0x80000006=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0x80000006=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x80000006" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0x80000006" >&6; } ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu # L2 cache size case $ax_cv_gcc_x86_cpuid_0x80000006 in *:*:*[1-9a-f]??????:*) # (L2 = ecx >> 16) >= 256 ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; *) ax_gcc_arch="athlon-4 athlon k7" ;; esac ;; *f[4cef8b]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; *f??:*:*:*) ax_gcc_arch="k8" ;; esac ;; *:746e6543:*:*) # IDT case $ax_cv_gcc_x86_cpuid_1 in *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; *58?:*:*:*) ax_gcc_arch=winchip2 ;; *6[78]?:*:*:*) ax_gcc_arch=c3 ;; *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; esac ;; esac if test x"$ax_gcc_arch" = x; then # fallback case $host_cpu in i586*) ax_gcc_arch=pentium ;; i686*) ax_gcc_arch=pentiumpro ;; esac fi ;; sparc*) # Extract the first word of "prtdiag", so it can be a program name with args. set dummy prtdiag; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_path_PRTDIAG+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $PRTDIAG in [\\/]* | ?:[\\/]*) ac_cv_path_PRTDIAG="$PRTDIAG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/" for as_dir in $as_dummy do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_path_PRTDIAG="$as_dir/$ac_word$ac_exec_ext" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_PRTDIAG" && ac_cv_path_PRTDIAG="prtdiag" ;; esac fi PRTDIAG=$ac_cv_path_PRTDIAG if test -n "$PRTDIAG"; then { echo "$as_me:$LINENO: result: $PRTDIAG" >&5 echo "${ECHO_T}$PRTDIAG" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` case $cputype in *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; *supersparc*|*tms390z5[05]*) ax_gcc_arch="supersparc v8" ;; *hypersparc*|*rt62[056]*) ax_gcc_arch="hypersparc v8" ;; *cypress*) ax_gcc_arch=cypress ;; esac ;; alphaev5) ax_gcc_arch=ev5 ;; alphaev56) ax_gcc_arch=ev56 ;; alphapca56) ax_gcc_arch="pca56 ev56" ;; alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; alphaev6) ax_gcc_arch=ev6 ;; alphaev67) ax_gcc_arch=ev67 ;; alphaev68) ax_gcc_arch="ev68 ev67" ;; alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; powerpc*) cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` case $cputype in *750*) ax_gcc_arch="750 G3" ;; *740[0-9]*) ax_gcc_arch="$cputype 7400 G4" ;; *74[4-5][0-9]*) ax_gcc_arch="$cputype 7450 G4" ;; *74[0-9][0-9]*) ax_gcc_arch="$cputype G4" ;; *970*) ax_gcc_arch="970 G5 power4";; *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" ;; esac fi # not cross-compiling fi # guess arch if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then for arch in $ax_gcc_arch; do if test "x$acx_maxopt_portable" = xyes; then # if we require portable code flags="-mtune=$arch" # -mcpu=$arch and m$arch generate nonportable code on every arch except # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac else flags="-march=$arch -mcpu=$arch -m$arch" fi for flag in $flags; do { echo "$as_me:$LINENO: checking whether C++ compiler accepts $flag" >&5 echo $ECHO_N "checking whether C++ compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$CXXFLAGS CXXFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_cxx_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_cxx_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_cxx_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then ax_cv_gcc_archflag=$flag; break else : fi done test "x$ax_cv_gcc_archflag" = xunknown || break done fi fi # $GCC=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $ax_cv_gcc_archflag" >&5 echo "${ECHO_T}$ax_cv_gcc_archflag" >&6; } if test "x$ax_cv_gcc_archflag" = xunknown; then : else CXXFLAGS="$CXXFLAGS $ax_cv_gcc_archflag" fi ;; esac if test -z "$CXXFLAGS"; then echo "" echo "**********************************************************" echo "* WARNING: Don't know the best CXXFLAGS for this system *" echo "* Use ./configure CXXFLAGS=... to specify your own flags *" echo "* (otherwise, a default of CXXFLAGS=-O3 will be used) *" echo "**********************************************************" echo "" CXXFLAGS="-O3" fi { echo "$as_me:$LINENO: checking whether C++ compiler accepts $CXXFLAGS" >&5 echo $ECHO_N "checking whether C++ compiler accepts $CXXFLAGS... $ECHO_C" >&6; } ax_save_FLAGS=$CXXFLAGS CXXFLAGS="$CXXFLAGS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_cxx_flags_$CXXFLAGS" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_cxx_flags_$CXXFLAGS" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CXXFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_cxx_flags_$CXXFLAGS" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then : else echo "" echo "**********************************************************" echo "* WARNING: The guessed CXXFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure CXXFLAGS=... to specify your own flags *" echo "**********************************************************" echo "" CXXFLAGS="" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for restrict" >&5 echo $ECHO_N "checking for restrict... $ECHO_C" >&6; } if test "${ac_cxx_restrict+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cxx_restrict=no ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu for ac_kw in restrict __restrict__ __restrict; do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { void* $ac_kw bar ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cxx_restrict=$ac_kw; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext done ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { echo "$as_me:$LINENO: result: $ac_cxx_restrict" >&5 echo "${ECHO_T}$ac_cxx_restrict" >&6; } if test "$ac_cxx_restrict" != "restrict"; then ac_kw="$ac_cxx_restrict" if test "$ac_kw" = unsupported; then ac_kw=""; fi cat >>confdefs.h <<_ACEOF #define restrict $ac_cxx_restrict _ACEOF fi # Check whether --enable-fortran was given. if test "${enable_fortran+set}" = set; then enableval=$enable_fortran; fi if test "x$enable_fortran" != "xno"; then { echo "$as_me:$LINENO: result: *************** Enabling F77! ***************" >&5 echo "${ECHO_T}*************** Enabling F77! ***************" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu if test -n "$ac_tool_prefix"; then for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_F77+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$F77"; then ac_cv_prog_F77="$F77" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_F77="$ac_tool_prefix$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi F77=$ac_cv_prog_F77 if test -n "$F77"; then { echo "$as_me:$LINENO: result: $F77" >&5 echo "${ECHO_T}$F77" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$F77" && break done fi if test -z "$F77"; then ac_ct_F77=$F77 for ac_prog in g77 xlf f77 frt pgf77 cf77 fort77 fl32 af77 xlf90 f90 pgf90 pghpf epcf90 gfortran g95 xlf95 f95 fort ifort ifc efc pgf95 lf95 ftn do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_F77+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_F77"; then ac_cv_prog_ac_ct_F77="$ac_ct_F77" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_F77="$ac_prog" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_F77=$ac_cv_prog_ac_ct_F77 if test -n "$ac_ct_F77"; then { echo "$as_me:$LINENO: result: $ac_ct_F77" >&5 echo "${ECHO_T}$ac_ct_F77" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -n "$ac_ct_F77" && break done if test "x$ac_ct_F77" = x; then F77="" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac F77=$ac_ct_F77 fi fi # Provide some information about the compiler. echo "$as_me:$LINENO: checking for Fortran 77 compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (ac_try="$ac_compiler --version >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler --version >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -v >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -v >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } { (ac_try="$ac_compiler -V >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compiler -V >&5") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } rm -f a.out # If we don't use `.F' as extension, the preprocessor is not run on the # input file. (Note that this only needs to work for GNU compilers.) ac_save_ext=$ac_ext ac_ext=F { echo "$as_me:$LINENO: checking whether we are using the GNU Fortran 77 compiler" >&5 echo $ECHO_N "checking whether we are using the GNU Fortran 77 compiler... $ECHO_C" >&6; } if test "${ac_cv_f77_compiler_gnu+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF program main #ifndef __GNUC__ choke me #endif end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_compiler_gnu=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_f77_compiler_gnu=$ac_compiler_gnu fi { echo "$as_me:$LINENO: result: $ac_cv_f77_compiler_gnu" >&5 echo "${ECHO_T}$ac_cv_f77_compiler_gnu" >&6; } ac_ext=$ac_save_ext ac_test_FFLAGS=${FFLAGS+set} ac_save_FFLAGS=$FFLAGS FFLAGS= { echo "$as_me:$LINENO: checking whether $F77 accepts -g" >&5 echo $ECHO_N "checking whether $F77 accepts -g... $ECHO_C" >&6; } if test "${ac_cv_prog_f77_g+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else FFLAGS=-g cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_f77_g=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_prog_f77_g=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_prog_f77_g" >&5 echo "${ECHO_T}$ac_cv_prog_f77_g" >&6; } if test "$ac_test_FFLAGS" = set; then FFLAGS=$ac_save_FFLAGS elif test $ac_cv_prog_f77_g = yes; then if test "x$ac_cv_f77_compiler_gnu" = xyes; then FFLAGS="-g -O2" else FFLAGS="-g" fi else if test "x$ac_cv_f77_compiler_gnu" = xyes; then FFLAGS="-O2" else FFLAGS= fi fi G77=`test $ac_compiler_gnu = yes && echo yes` ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu FSAVE="$FFLAGS" { echo "$as_me:$LINENO: checking for Fortran 77 compiler vendor" >&5 echo $ECHO_N "checking for Fortran 77 compiler vendor... $ECHO_C" >&6; } if test "${ax_cv_f77_compiler_vendor+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_f77_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER,__IFC,__IFORT ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" cat >conftest.$ac_ext <<_ACEOF program main #if !($vencpp) thisisanerror; #endif end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_f77_compiler_vendor=`echo $ventest | cut -d: -f1`; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext done fi { echo "$as_me:$LINENO: result: $ax_cv_f77_compiler_vendor" >&5 echo "${ECHO_T}$ax_cv_f77_compiler_vendor" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu # Check whether --enable-portable-binary was given. if test "${enable_portable_binary+set}" = set; then enableval=$enable_portable_binary; acx_maxopt_portable=$withval else acx_maxopt_portable=no fi # Try to determine "good" native compiler flags if none specified via FFLAGS if test "$ac_test_FFLAGS" != "set"; then FFLAGS="" case $ax_cv_f77_compiler_vendor in dec) FFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then FFLAGS="$FFLAGS -arch host" fi;; sun) FFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then FFLAGS="$FFLAGS -xarch=generic" fi;; hp) FFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then FFLAGS="$FFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts $xlc_opt" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts $xlc_opt... $ECHO_C" >&6; } ax_save_FLAGS=$FFLAGS FFLAGS="$xlc_opt" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_f77_flags_$xlc_opt" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_f77_flags_$xlc_opt" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_f77_flags_$xlc_opt" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then FFLAGS="-O3 -qansialias -w $xlc_opt" else FFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* FFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the FFLAGS environment var. *" echo "* and re-run configure.) For more info, man cxx. *" echo "******************************************************" fi ;; intel) FFLAGS="-O3 -ansi_alias" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[234]:*:*|*6[789b]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[347]:*:*:*|*f4[1347a]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts $flag" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$FFLAGS FFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_f77_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_f77_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_f77_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then icc_archflag=$flag; break else : fi done fi { echo "$as_me:$LINENO: checking for icc architecture flag" >&5 echo $ECHO_N "checking for icc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $icc_archflag" >&5 echo "${ECHO_T}$icc_archflag" >&6; } if test "x$icc_archflag" != xunknown; then FFLAGS="$FFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems FFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts -malign-double" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts -malign-double... $ECHO_C" >&6; } if test "${ax_cv_f77_flags__malign_double+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$FFLAGS FFLAGS="-malign-double" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_f77_flags__malign_double=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_f77_flags__malign_double=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_f77_flags__malign_double { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then FFLAGS="$FFLAGS -malign-double" else : fi # -fstrict-aliasing for gcc-2.95+ { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts -fstrict-aliasing" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts -fstrict-aliasing... $ECHO_C" >&6; } if test "${ax_cv_f77_flags__fstrict_aliasing+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$FFLAGS FFLAGS="-fstrict-aliasing" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_f77_flags__fstrict_aliasing=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_f77_flags__fstrict_aliasing=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_f77_flags__fstrict_aliasing { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then FFLAGS="$FFLAGS -fstrict-aliasing" else : fi # note that we enable "unsafe" fp optimization with other compilers, too { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts -ffast-math" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts -ffast-math... $ECHO_C" >&6; } if test "${ax_cv_f77_flags__ffast_math+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$FFLAGS FFLAGS="-ffast-math" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_f77_flags__ffast_math=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_f77_flags__ffast_math=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_f77_flags__ffast_math { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then FFLAGS="$FFLAGS -ffast-math" else : fi # Check whether --with-gcc-arch was given. if test "${with_gcc_arch+set}" = set; then withval=$with_gcc_arch; ax_gcc_arch=$withval else ax_gcc_arch=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: " >&5 echo "${ECHO_T}" >&6; } if test "${ax_cv_gcc_archflag+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_cv_gcc_archflag="unknown" if test "$GCC" = yes; then if test "x$ax_gcc_arch" = xyes; then ax_gcc_arch="" if test "$cross_compiling" = no; then case $host_cpu in i[3456]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0 output" >&5 echo $ECHO_N "checking for x86 cpuid 0 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu case $ax_cv_gcc_x86_cpuid_0 in *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in *5[48]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; *5??:*:*:*) ax_gcc_arch=pentium ;; *6[3456]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[01]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[234]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6[9d]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; *6[78b]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6??:*:*:*) ax_gcc_arch=pentiumpro ;; *f3[347]:*:*:*|*f41347:*:*:*) case $host_cpu in x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; esac ;; *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; esac ;; *:68747541:*:*) # AMD case $ax_cv_gcc_x86_cpuid_1 in *5[67]?:*:*:*) ax_gcc_arch=k6 ;; *5[8d]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; *5[9]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; *60?:*:*:*) ax_gcc_arch=k7 ;; *6[12]?:*:*:*) ax_gcc_arch="athlon k7" ;; *6[34]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; *6[68a]?:*:*:*) ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 0x80000006 output" >&5 echo $ECHO_N "checking for x86 cpuid 0x80000006 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_0x80000006+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_0x80000006=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 0x80000006, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_0x80000006=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_0x80000006=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_0x80000006" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_0x80000006" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu # L2 cache size case $ax_cv_gcc_x86_cpuid_0x80000006 in *:*:*[1-9a-f]??????:*) # (L2 = ecx >> 16) >= 256 ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; *) ax_gcc_arch="athlon-4 athlon k7" ;; esac ;; *f[4cef8b]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; *f??:*:*:*) ax_gcc_arch="k8" ;; esac ;; *:746e6543:*:*) # IDT case $ax_cv_gcc_x86_cpuid_1 in *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; *58?:*:*:*) ax_gcc_arch=winchip2 ;; *6[78]?:*:*:*) ax_gcc_arch=c3 ;; *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; esac ;; esac if test x"$ax_gcc_arch" = x; then # fallback case $host_cpu in i586*) ax_gcc_arch=pentium ;; i686*) ax_gcc_arch=pentiumpro ;; esac fi ;; sparc*) # Extract the first word of "prtdiag", so it can be a program name with args. set dummy prtdiag; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_path_PRTDIAG+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $PRTDIAG in [\\/]* | ?:[\\/]*) ac_cv_path_PRTDIAG="$PRTDIAG" # Let the user override the test with a path. ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_dummy="$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/" for as_dir in $as_dummy do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_path_PRTDIAG="$as_dir/$ac_word$ac_exec_ext" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS test -z "$ac_cv_path_PRTDIAG" && ac_cv_path_PRTDIAG="prtdiag" ;; esac fi PRTDIAG=$ac_cv_path_PRTDIAG if test -n "$PRTDIAG"; then { echo "$as_me:$LINENO: result: $PRTDIAG" >&5 echo "${ECHO_T}$PRTDIAG" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` case $cputype in *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; *supersparc*|*tms390z5[05]*) ax_gcc_arch="supersparc v8" ;; *hypersparc*|*rt62[056]*) ax_gcc_arch="hypersparc v8" ;; *cypress*) ax_gcc_arch=cypress ;; esac ;; alphaev5) ax_gcc_arch=ev5 ;; alphaev56) ax_gcc_arch=ev56 ;; alphapca56) ax_gcc_arch="pca56 ev56" ;; alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; alphaev6) ax_gcc_arch=ev6 ;; alphaev67) ax_gcc_arch=ev67 ;; alphaev68) ax_gcc_arch="ev68 ev67" ;; alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; powerpc*) cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` case $cputype in *750*) ax_gcc_arch="750 G3" ;; *740[0-9]*) ax_gcc_arch="$cputype 7400 G4" ;; *74[4-5][0-9]*) ax_gcc_arch="$cputype 7450 G4" ;; *74[0-9][0-9]*) ax_gcc_arch="$cputype G4" ;; *970*) ax_gcc_arch="970 G5 power4";; *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" ;; esac fi # not cross-compiling fi # guess arch if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then for arch in $ax_gcc_arch; do if test "x$acx_maxopt_portable" = xyes; then # if we require portable code flags="-mtune=$arch" # -mcpu=$arch and m$arch generate nonportable code on every arch except # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac else flags="-march=$arch -mcpu=$arch -m$arch" fi for flag in $flags; do { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts $flag" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts $flag... $ECHO_C" >&6; } ax_save_FLAGS=$FFLAGS FFLAGS="$flag" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_f77_flags_$flag" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_f77_flags_$flag" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_f77_flags_$flag" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then ax_cv_gcc_archflag=$flag; break else : fi done test "x$ax_cv_gcc_archflag" = xunknown || break done fi fi # $GCC=yes fi { echo "$as_me:$LINENO: checking for gcc architecture flag" >&5 echo $ECHO_N "checking for gcc architecture flag... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $ax_cv_gcc_archflag" >&5 echo "${ECHO_T}$ax_cv_gcc_archflag" >&6; } if test "x$ax_cv_gcc_archflag" = xunknown; then : else CXXFLAGS="$CXXFLAGS $ax_cv_gcc_archflag" fi ;; esac if test -z "$FFLAGS"; then echo "" echo "**********************************************************" echo "* WARNING: Don't know the best FFLAGS for this system *" echo "* Use ./configure FFLAGS=... to specify your own flags *" echo "* (otherwise, a default of FFLAGS=-O3 will be used) *" echo "**********************************************************" echo "" FFLAGS="-O3" fi { echo "$as_me:$LINENO: checking whether Fortran 77 compiler accepts $FFLAGS" >&5 echo $ECHO_N "checking whether Fortran 77 compiler accepts $FFLAGS... $ECHO_C" >&6; } ax_save_FLAGS=$FFLAGS FFLAGS="$FFLAGS" cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval `echo "ax_cv_f77_flags_$FFLAGS" | $as_tr_sh`=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval `echo "ax_cv_f77_flags_$FFLAGS" | $as_tr_sh`=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext FFLAGS=$ax_save_FLAGS eval ax_check_compiler_flags=$`echo "ax_cv_f77_flags_$FFLAGS" | $as_tr_sh` { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then : else echo "" echo "**********************************************************" echo "* WARNING: The guessed FFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure FFLAGS=... to specify your own flags *" echo "**********************************************************" echo "" FFLAGS="" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi #if test "x$enable_fortran" = "xyes"; then #fi # Check whether --enable-shared was given. if test "${enable_shared+set}" = set; then enableval=$enable_shared; p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac else enable_shared=yes fi # Check whether --enable-static was given. if test "${enable_static+set}" = set; then enableval=$enable_static; p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac else enable_static=yes fi # Check whether --enable-fast-install was given. if test "${enable_fast_install+set}" = set; then enableval=$enable_fast_install; p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac else enable_fast_install=yes fi { echo "$as_me:$LINENO: checking for a sed that does not truncate output" >&5 echo $ECHO_N "checking for a sed that does not truncate output... $ECHO_C" >&6; } if test "${lt_cv_path_SED+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Loop through the user's path and test for sed and gsed. # Then use that list of sed's as ones to test for truncation. as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for lt_ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$lt_ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$lt_ac_prog$ac_exec_ext"; }; then lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" fi done done done IFS=$as_save_IFS lt_ac_max=0 lt_ac_count=0 # Add /usr/xpg4/bin/sed as it is typically found on Solaris # along with /bin/sed that truncates output. for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do test ! -f $lt_ac_sed && continue cat /dev/null > conftest.in lt_ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >conftest.in # Check for GNU sed and select it if it is found. if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then lt_cv_path_SED=$lt_ac_sed break fi while true; do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo >>conftest.nl $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break cmp -s conftest.out conftest.nl || break # 10000 chars as input seems more than enough test $lt_ac_count -gt 10 && break lt_ac_count=`expr $lt_ac_count + 1` if test $lt_ac_count -gt $lt_ac_max; then lt_ac_max=$lt_ac_count lt_cv_path_SED=$lt_ac_sed fi done done fi SED=$lt_cv_path_SED { echo "$as_me:$LINENO: result: $SED" >&5 echo "${ECHO_T}$SED" >&6; } { echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5 echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; } if test "${ac_cv_path_GREP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Extract the first word of "grep ggrep" to use in msg output if test -z "$GREP"; then set dummy grep ggrep; ac_prog_name=$2 if test "${ac_cv_path_GREP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_path_GREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in grep ggrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue # Check for GNU ac_path_GREP and select it if it is found. # Check for GNU $ac_path_GREP case `"$ac_path_GREP" --version 2>&1` in *GNU*) ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;; *) ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" echo 'GREP' >> "conftest.nl" "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_GREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_GREP_found && break 3 done done done IFS=$as_save_IFS fi GREP="$ac_cv_path_GREP" if test -z "$GREP"; then { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_GREP=$GREP fi fi { echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5 echo "${ECHO_T}$ac_cv_path_GREP" >&6; } GREP="$ac_cv_path_GREP" { echo "$as_me:$LINENO: checking for egrep" >&5 echo $ECHO_N "checking for egrep... $ECHO_C" >&6; } if test "${ac_cv_path_EGREP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if echo a | $GREP -E '(a|b)' >/dev/null 2>&1 then ac_cv_path_EGREP="$GREP -E" else # Extract the first word of "egrep" to use in msg output if test -z "$EGREP"; then set dummy egrep; ac_prog_name=$2 if test "${ac_cv_path_EGREP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_path_EGREP_found=false # Loop through the user's path and test for each of PROGNAME-LIST as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_prog in egrep; do for ac_exec_ext in '' $ac_executable_extensions; do ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext" { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue # Check for GNU ac_path_EGREP and select it if it is found. # Check for GNU $ac_path_EGREP case `"$ac_path_EGREP" --version 2>&1` in *GNU*) ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;; *) ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >"conftest.in" while : do cat "conftest.in" "conftest.in" >"conftest.tmp" mv "conftest.tmp" "conftest.in" cp "conftest.in" "conftest.nl" echo 'EGREP' >> "conftest.nl" "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break ac_count=`expr $ac_count + 1` if test $ac_count -gt ${ac_path_EGREP_max-0}; then # Best one so far, save it but keep looking for a better one ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_max=$ac_count fi # 10*(2^10) chars as input seems more than enough test $ac_count -gt 10 && break done rm -f conftest.in conftest.tmp conftest.nl conftest.out;; esac $ac_path_EGREP_found && break 3 done done done IFS=$as_save_IFS fi EGREP="$ac_cv_path_EGREP" if test -z "$EGREP"; then { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5 echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;} { (exit 1); exit 1; }; } fi else ac_cv_path_EGREP=$EGREP fi fi fi { echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5 echo "${ECHO_T}$ac_cv_path_EGREP" >&6; } EGREP="$ac_cv_path_EGREP" # Check whether --with-gnu-ld was given. if test "${with_gnu_ld+set}" = set; then withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes else with_gnu_ld=no fi ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. { echo "$as_me:$LINENO: checking for ld used by $CC" >&5 echo $ECHO_N "checking for ld used by $CC... $ECHO_C" >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`echo $ac_prog| $SED 's%\\\\%/%g'` while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do ac_prog=`echo $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then { echo "$as_me:$LINENO: checking for GNU ld" >&5 echo $ECHO_N "checking for GNU ld... $ECHO_C" >&6; } else { echo "$as_me:$LINENO: checking for non-GNU ld" >&5 echo $ECHO_N "checking for non-GNU ld... $ECHO_C" >&6; } fi if test "${lt_cv_path_LD+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 echo "${ECHO_T}$LD" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -z "$LD" && { { echo "$as_me:$LINENO: error: no acceptable ld found in \$PATH" >&5 echo "$as_me: error: no acceptable ld found in \$PATH" >&2;} { (exit 1); exit 1; }; } { echo "$as_me:$LINENO: checking if the linker ($LD) is GNU ld" >&5 echo $ECHO_N "checking if the linker ($LD) is GNU ld... $ECHO_C" >&6; } if test "${lt_cv_prog_gnu_ld+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 echo "${ECHO_T}$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld { echo "$as_me:$LINENO: checking for $LD option to reload object files" >&5 echo $ECHO_N "checking for $LD option to reload object files... $ECHO_C" >&6; } if test "${lt_cv_ld_reload_flag+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_ld_reload_flag='-r' fi { echo "$as_me:$LINENO: result: $lt_cv_ld_reload_flag" >&5 echo "${ECHO_T}$lt_cv_ld_reload_flag" >&6; } reload_flag=$lt_cv_ld_reload_flag case $reload_flag in "" | " "*) ;; *) reload_flag=" $reload_flag" ;; esac reload_cmds='$LD$reload_flag -o $output$reload_objs' case $host_os in darwin*) if test "$GCC" = yes; then reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs' else reload_cmds='$LD$reload_flag -o $output$reload_objs' fi ;; esac { echo "$as_me:$LINENO: checking for BSD-compatible nm" >&5 echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" >&6; } if test "${lt_cv_path_NM+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm fi fi { echo "$as_me:$LINENO: result: $lt_cv_path_NM" >&5 echo "${ECHO_T}$lt_cv_path_NM" >&6; } NM="$lt_cv_path_NM" { echo "$as_me:$LINENO: checking whether ln -s works" >&5 echo $ECHO_N "checking whether ln -s works... $ECHO_C" >&6; } LN_S=$as_ln_s if test "$LN_S" = "ln -s"; then { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } else { echo "$as_me:$LINENO: result: no, using $LN_S" >&5 echo "${ECHO_T}no, using $LN_S" >&6; } fi { echo "$as_me:$LINENO: checking how to recognize dependent libraries" >&5 echo $ECHO_N "checking how to recognize dependent libraries... $ECHO_C" >&6; } if test "${lt_cv_deplibs_check_method+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_file_magic_cmd='$MAGIC_CMD' lt_cv_file_magic_test_file= lt_cv_deplibs_check_method='unknown' # Need to set the preceding variable on all platforms that support # interlibrary dependencies. # 'none' -- dependencies not supported. # `unknown' -- same as none, but documents that we really don't know. # 'pass_all' -- all dependencies passed with no checks. # 'test_compile' -- check by making test program. # 'file_magic [[regex]]' -- check by looking for files in library path # which responds to the $file_magic_cmd with a given extended regex. # If you have `file' or equivalent on your system and you're not sure # whether `pass_all' will *always* work, you probably want this one. case $host_os in aix4* | aix5*) lt_cv_deplibs_check_method=pass_all ;; beos*) lt_cv_deplibs_check_method=pass_all ;; bsdi[45]*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)' lt_cv_file_magic_cmd='/usr/bin/file -L' lt_cv_file_magic_test_file=/shlib/libc.so ;; cygwin*) # func_win32_libid is a shell function defined in ltmain.sh lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' ;; mingw* | pw32*) # Base MSYS/MinGW do not provide the 'file' command needed by # func_win32_libid shell function, so use a weaker test based on 'objdump', # unless we find 'file', for example because we are cross-compiling. if ( file / ) >/dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - PA-RISC [0-9].[0-9]' lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[3-9]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be Linux ELF. linux* | k*bsd*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; nto-qnx*) lt_cv_deplibs_check_method=unknown ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; esac fi { echo "$as_me:$LINENO: result: $lt_cv_deplibs_check_method" >&5 echo "${ECHO_T}$lt_cv_deplibs_check_method" >&6; } file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # Check whether --enable-libtool-lock was given. if test "${enable_libtool_lock+set}" = set; then enableval=$enable_libtool_lock; fi test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '#line 9204 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) libsuff=64 case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" { echo "$as_me:$LINENO: checking whether the C compiler needs -belf" >&5 echo $ECHO_N "checking whether the C compiler needs -belf... $ECHO_C" >&6; } if test "${lt_cv_cc_needs_belf+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_cv_cc_needs_belf=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 lt_cv_cc_needs_belf=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi { echo "$as_me:$LINENO: result: $lt_cv_cc_needs_belf" >&5 echo "${ECHO_T}$lt_cv_cc_needs_belf" >&6; } if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; sparc*-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) LD="${LD-ld} -m elf64_sparc" ;; *) LD="${LD-ld} -64" ;; esac ;; esac fi rm -rf conftest* ;; esac need_locks="$enable_libtool_lock" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; } # On Suns, sometimes $CPP names a directory. if test -n "$CPP" && test -d "$CPP"; then CPP= fi if test -z "$CPP"; then if test "${ac_cv_prog_CPP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Double quotes because CPP needs to be expanded for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" do ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CPP=$CPP fi CPP=$ac_cv_prog_CPP else ac_cv_prog_CPP=$CPP fi { echo "$as_me:$LINENO: result: $CPP" >&5 echo "${ECHO_T}$CPP" >&6; } ac_preproc_ok=false for ac_c_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&5 echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for ANSI C header files" >&5 echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } if test "${ac_cv_header_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_header_stdc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi { echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 echo "${ECHO_T}$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi # On IRIX 5.3, sys/types and inttypes.h are conflicting. for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ inttypes.h stdint.h unistd.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then eval "$as_ac_Header=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_Header=no" fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi ac_res=`eval echo '${'$as_ac_Header'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done for ac_header in dlfcn.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then { echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi ac_res=`eval echo '${'$as_ac_Header'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } else # Is the header compilable? { echo "$as_me:$LINENO: checking $ac_header usability" >&5 echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6; } # Is the header present? { echo "$as_me:$LINENO: checking $ac_header presence" >&5 echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## ----------------------------- ## ## Report this to esler@uiuc.edu ## ## ----------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac { echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi ac_res=`eval echo '${'$as_ac_Header'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } fi if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu { echo "$as_me:$LINENO: checking how to run the C++ preprocessor" >&5 echo $ECHO_N "checking how to run the C++ preprocessor... $ECHO_C" >&6; } if test -z "$CXXCPP"; then if test "${ac_cv_prog_CXXCPP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # Double quotes because CXXCPP needs to be expanded for CXXCPP in "$CXX -E" "/lib/cpp" do ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then break fi done ac_cv_prog_CXXCPP=$CXXCPP fi CXXCPP=$ac_cv_prog_CXXCPP else ac_cv_prog_CXXCPP=$CXXCPP fi { echo "$as_me:$LINENO: result: $CXXCPP" >&5 echo "${ECHO_T}$CXXCPP" >&6; } ac_preproc_ok=false for ac_cxx_preproc_warn_flag in '' yes do # Use a header file that comes with gcc, so configuring glibc # with a fresh cross-compiler works. # Prefer to if __STDC__ is defined, since # exists even on freestanding compilers. # On the NeXT, cc -E runs the code through the compiler's parser, # not just through cpp. "Syntax error" is here to catch this case. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef __STDC__ # include #else # include #endif Syntax error _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err }; then : else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Broken: fails on valid input. continue fi rm -f conftest.err conftest.$ac_ext # OK, works on sane cases. Now check whether nonexistent headers # can be detected and how. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" || test ! -s conftest.err }; then # Broken: success on invalid input. continue else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 # Passes both tests. ac_preproc_ok=: break fi rm -f conftest.err conftest.$ac_ext done # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. rm -f conftest.err conftest.$ac_ext if $ac_preproc_ok; then : else { { echo "$as_me:$LINENO: error: C++ preprocessor \"$CXXCPP\" fails sanity check See \`config.log' for more details." >&5 echo "$as_me: error: C++ preprocessor \"$CXXCPP\" fails sanity check See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu fi # Autoconf 2.13's AC_OBJEXT and AC_EXEEXT macros only works for C compilers! # find the maximum length of command line arguments { echo "$as_me:$LINENO: checking the maximum length of command line arguments" >&5 echo $ECHO_N "checking the maximum length of command line arguments... $ECHO_C" >&6; } if test "${lt_cv_sys_max_cmd_len+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[ ]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} while (test "X"`$SHELL $0 --fallback-echo "X$teststring" 2>/dev/null` \ = "XX$teststring") >/dev/null 2>&1 && new_result=`expr "X$teststring" : ".*" 2>&1` && lt_cv_sys_max_cmd_len=$new_result && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done teststring= # Add a significant safety factor because C++ compilers can tack on massive # amounts of additional arguments before passing them to the linker. # It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac fi if test -n $lt_cv_sys_max_cmd_len ; then { echo "$as_me:$LINENO: result: $lt_cv_sys_max_cmd_len" >&5 echo "${ECHO_T}$lt_cv_sys_max_cmd_len" >&6; } else { echo "$as_me:$LINENO: result: none" >&5 echo "${ECHO_T}none" >&6; } fi # Check for command to grab the raw symbol name followed by C symbol from nm. { echo "$as_me:$LINENO: checking command to parse $NM output from $compiler object" >&5 echo $ECHO_N "checking command to parse $NM output from $compiler object... $ECHO_C" >&6; } if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[BCDEGRST]' # Regexp to match symbols that can be accessed directly from C. sympat='\([_A-Za-z][_A-Za-z0-9]*\)' # Transform an extracted symbol line into a proper C declaration lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([^ ]*\) \([^ ]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" # Define system-specific variables. case $host_os in aix*) symcode='[BCDT]' ;; cygwin* | mingw* | pw32*) symcode='[ABCDGISTW]' ;; hpux*) # Its linker distinguishes data from code symbols if test "$host_cpu" = ia64; then symcode='[ABCDEGRST]' fi lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" ;; linux* | k*bsd*-gnu) if test "$host_cpu" = ia64; then symcode='[ABCDGIRSTW]' lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" fi ;; irix* | nonstopux*) symcode='[BCDEGRST]' ;; osf*) symcode='[BCDEGQRST]' ;; solaris*) symcode='[BDRT]' ;; sco3.2v5*) symcode='[DT]' ;; sysv4.2uw2*) symcode='[DT]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[ABDT]' ;; sysv4) symcode='[DFNSTU]' ;; esac # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[ABCDGIRSTW]' ;; esac # Try without a prefix undercore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext <&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Now try to grab the symbols. nlist=conftest.nm if { (eval echo "$as_me:$LINENO: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist\"") >&5 (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if grep ' nm_test_var$' "$nlist" >/dev/null; then if grep ' nm_test_func$' "$nlist" >/dev/null; then cat < conftest.$ac_ext #ifdef __cplusplus extern "C" { #endif EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext' cat <> conftest.$ac_ext #if defined (__STDC__) && __STDC__ # define lt_ptr_t void * #else # define lt_ptr_t char * # define const #endif /* The mapping between symbol names and symbols. */ const struct { const char *name; lt_ptr_t address; } lt_preloaded_symbols[] = { EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext cat <<\EOF >> conftest.$ac_ext {0, (lt_ptr_t) 0} }; #ifdef __cplusplus } #endif EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_save_LIBS="$LIBS" lt_save_CFLAGS="$CFLAGS" LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag" if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS="$lt_save_LIBS" CFLAGS="$lt_save_CFLAGS" else echo "cannot find nm_test_func in $nlist" >&5 fi else echo "cannot find nm_test_var in $nlist" >&5 fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5 fi else echo "$progname: failed program was:" >&5 cat conftest.$ac_ext >&5 fi rm -f conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done fi if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then { echo "$as_me:$LINENO: result: failed" >&5 echo "${ECHO_T}failed" >&6; } else { echo "$as_me:$LINENO: result: ok" >&5 echo "${ECHO_T}ok" >&6; } fi { echo "$as_me:$LINENO: checking for objdir" >&5 echo $ECHO_N "checking for objdir... $ECHO_C" >&6; } if test "${lt_cv_objdir+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null fi { echo "$as_me:$LINENO: result: $lt_cv_objdir" >&5 echo "${ECHO_T}$lt_cv_objdir" >&6; } objdir=$lt_cv_objdir case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed='sed -e 1s/^X//' sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g' # Same as above, but do not quote variable references. double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g' # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' # Constants: rm="rm -f" # Global variables: default_ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a ltmain="$ac_aux_dir/ltmain.sh" ofile="$default_ofile" with_gnu_ld="$lt_cv_prog_gnu_ld" if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args. set dummy ${ac_tool_prefix}ar; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_AR+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$AR"; then ac_cv_prog_AR="$AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_AR="${ac_tool_prefix}ar" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi AR=$ac_cv_prog_AR if test -n "$AR"; then { echo "$as_me:$LINENO: result: $AR" >&5 echo "${ECHO_T}$AR" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_prog_AR"; then ac_ct_AR=$AR # Extract the first word of "ar", so it can be a program name with args. set dummy ar; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_AR+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_AR"; then ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_AR="ar" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_AR=$ac_cv_prog_ac_ct_AR if test -n "$ac_ct_AR"; then { echo "$as_me:$LINENO: result: $ac_ct_AR" >&5 echo "${ECHO_T}$ac_ct_AR" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_ct_AR" = x; then AR="false" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac AR=$ac_ct_AR fi else AR="$ac_cv_prog_AR" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_RANLIB+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$RANLIB"; then ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi RANLIB=$ac_cv_prog_RANLIB if test -n "$RANLIB"; then { echo "$as_me:$LINENO: result: $RANLIB" >&5 echo "${ECHO_T}$RANLIB" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_prog_RANLIB"; then ac_ct_RANLIB=$RANLIB # Extract the first word of "ranlib", so it can be a program name with args. set dummy ranlib; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_RANLIB"; then ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_RANLIB="ranlib" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB if test -n "$ac_ct_RANLIB"; then { echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5 echo "${ECHO_T}$ac_ct_RANLIB" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_ct_RANLIB" = x; then RANLIB=":" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac RANLIB=$ac_ct_RANLIB fi else RANLIB="$ac_cv_prog_RANLIB" fi if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}strip", so it can be a program name with args. set dummy ${ac_tool_prefix}strip; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_STRIP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$STRIP"; then ac_cv_prog_STRIP="$STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_STRIP="${ac_tool_prefix}strip" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi STRIP=$ac_cv_prog_STRIP if test -n "$STRIP"; then { echo "$as_me:$LINENO: result: $STRIP" >&5 echo "${ECHO_T}$STRIP" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi fi if test -z "$ac_cv_prog_STRIP"; then ac_ct_STRIP=$STRIP # Extract the first word of "strip", so it can be a program name with args. set dummy strip; ac_word=$2 { echo "$as_me:$LINENO: checking for $ac_word" >&5 echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; } if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -n "$ac_ct_STRIP"; then ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then ac_cv_prog_ac_ct_STRIP="strip" echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP if test -n "$ac_ct_STRIP"; then { echo "$as_me:$LINENO: result: $ac_ct_STRIP" >&5 echo "${ECHO_T}$ac_ct_STRIP" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test "x$ac_ct_STRIP" = x; then STRIP=":" else case $cross_compiling:$ac_tool_warned in yes:) { echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&5 echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools whose name does not start with the host triplet. If you think this configuration is useful to you, please write to autoconf@gnu.org." >&2;} ac_tool_warned=yes ;; esac STRIP=$ac_ct_STRIP fi else STRIP="$ac_cv_prog_STRIP" fi old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$AR" && AR=ar test -z "$AR_FLAGS" && AR_FLAGS=cru test -z "$AS" && AS=as test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$DLLTOOL" && DLLTOOL=dlltool test -z "$LD" && LD=ld test -z "$LN_S" && LN_S="ln -s" test -z "$MAGIC_CMD" && MAGIC_CMD=file test -z "$NM" && NM=nm test -z "$SED" && SED=sed test -z "$OBJDUMP" && OBJDUMP=objdump test -z "$RANLIB" && RANLIB=: test -z "$STRIP" && STRIP=: test -z "$ac_objext" && ac_objext=o # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" fi for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` # Only perform the check for file, if the check method requires it case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then { echo "$as_me:$LINENO: checking for ${ac_tool_prefix}file" >&5 echo $ECHO_N "checking for ${ac_tool_prefix}file... $ECHO_C" >&6; } if test "${lt_cv_path_MAGIC_CMD+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/${ac_tool_prefix}file; then lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { echo "$as_me:$LINENO: result: $MAGIC_CMD" >&5 echo "${ECHO_T}$MAGIC_CMD" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then { echo "$as_me:$LINENO: checking for file" >&5 echo $ECHO_N "checking for file... $ECHO_C" >&6; } if test "${lt_cv_path_MAGIC_CMD+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else case $MAGIC_CMD in [\\/*] | ?:[\\/]*) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR ac_dummy="/usr/bin$PATH_SEPARATOR$PATH" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/file; then lt_cv_path_MAGIC_CMD="$ac_dir/file" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac fi MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then { echo "$as_me:$LINENO: result: $MAGIC_CMD" >&5 echo "${ECHO_T}$MAGIC_CMD" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi else MAGIC_CMD=: fi fi fi ;; esac enable_dlopen=no enable_win32_dll=no # Check whether --enable-libtool-lock was given. if test "${enable_libtool_lock+set}" = set; then enableval=$enable_libtool_lock; fi test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Check whether --with-pic was given. if test "${with_pic+set}" = set; then withval=$with_pic; pic_mode="$withval" else pic_mode=default fi test -z "$pic_mode" && pic_mode=default # Use C for the default configuration in the libtool script tagname= lt_save_CC="$CC" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o objext=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $rm conftest* lt_prog_compiler_no_builtin_flag= if test "$GCC" = yes; then lt_prog_compiler_no_builtin_flag=' -fno-builtin' { echo "$as_me:$LINENO: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 echo $ECHO_N "checking if $compiler supports -fno-rtti -fno-exceptions... $ECHO_C" >&6; } if test "${lt_cv_prog_compiler_rtti_exceptions+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:11213: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:11217: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_rtti_exceptions=yes fi fi $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 echo "${ECHO_T}$lt_cv_prog_compiler_rtti_exceptions" >&6; } if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag -fno-rtti -fno-exceptions" else : fi fi lt_prog_compiler_wl= lt_prog_compiler_pic= lt_prog_compiler_static= { echo "$as_me:$LINENO: checking for $compiler option to produce PIC" >&5 echo $ECHO_N "checking for $compiler option to produce PIC... $ECHO_C" >&6; } if test "$GCC" = yes; then lt_prog_compiler_wl='-Wl,' lt_prog_compiler_static='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' fi ;; amigaos*) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4' ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic='-fno-common' ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared=no enable_shared=no ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic=-Kconform_pic fi ;; hpux*) # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic='-fPIC' ;; esac ;; *) lt_prog_compiler_pic='-fPIC' ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static='-Bstatic' else lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp' fi ;; darwin*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files case $cc_basename in xlc*) lt_prog_compiler_pic='-qnocommon' lt_prog_compiler_wl='-Wl,' ;; esac ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic='-DDLL_EXPORT' ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static='-non_shared' ;; newsos6) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; linux* | k*bsd*-gnu) case $cc_basename in icc* | ecc*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-static' ;; pgcc* | pgf77* | pgf90* | pgf95*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-fpic' lt_prog_compiler_static='-Bstatic' ;; ccc*) lt_prog_compiler_wl='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static='-non_shared' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='-Wl,' ;; *Sun\ F*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' lt_prog_compiler_wl='' ;; esac ;; esac ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static='-non_shared' ;; rdos*) lt_prog_compiler_static='-non_shared' ;; solaris*) lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' case $cc_basename in f77* | f90* | f95*) lt_prog_compiler_wl='-Qoption ld ';; *) lt_prog_compiler_wl='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl='-Qoption ld ' lt_prog_compiler_pic='-PIC' lt_prog_compiler_static='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then lt_prog_compiler_pic='-Kconform_pic' lt_prog_compiler_static='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_pic='-KPIC' lt_prog_compiler_static='-Bstatic' ;; unicos*) lt_prog_compiler_wl='-Wl,' lt_prog_compiler_can_build_shared=no ;; uts4*) lt_prog_compiler_pic='-pic' lt_prog_compiler_static='-Bstatic' ;; *) lt_prog_compiler_can_build_shared=no ;; esac fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic" >&5 echo "${ECHO_T}$lt_prog_compiler_pic" >&6; } # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic"; then { echo "$as_me:$LINENO: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5 echo $ECHO_N "checking if $compiler PIC flag $lt_prog_compiler_pic works... $ECHO_C" >&6; } if test "${lt_prog_compiler_pic_works+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_pic_works=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic -DPIC" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:11503: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:11507: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_pic_works=yes fi fi $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic_works" >&5 echo "${ECHO_T}$lt_prog_compiler_pic_works" >&6; } if test x"$lt_prog_compiler_pic_works" = xyes; then case $lt_prog_compiler_pic in "" | " "*) ;; *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;; esac else lt_prog_compiler_pic= lt_prog_compiler_can_build_shared=no fi fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic= ;; *) lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC" ;; esac # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\" { echo "$as_me:$LINENO: checking if $compiler static flag $lt_tmp_static_flag works" >&5 echo $ECHO_N "checking if $compiler static flag $lt_tmp_static_flag works... $ECHO_C" >&6; } if test "${lt_prog_compiler_static_works+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_static_works=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_static_works=yes fi else lt_prog_compiler_static_works=yes fi fi $rm conftest* LDFLAGS="$save_LDFLAGS" fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_static_works" >&5 echo "${ECHO_T}$lt_prog_compiler_static_works" >&6; } if test x"$lt_prog_compiler_static_works" = xyes; then : else lt_prog_compiler_static= fi { echo "$as_me:$LINENO: checking if $compiler supports -c -o file.$ac_objext" >&5 echo $ECHO_N "checking if $compiler supports -c -o file.$ac_objext... $ECHO_C" >&6; } if test "${lt_cv_prog_compiler_c_o+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_prog_compiler_c_o=no $rm -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:11607: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:11611: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o=yes fi fi chmod u+w . 2>&5 $rm conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files $rm out/* && rmdir out cd .. rmdir conftest $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_prog_compiler_c_o" >&5 echo "${ECHO_T}$lt_cv_prog_compiler_c_o" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { echo "$as_me:$LINENO: checking if we can lock with hard links" >&5 echo $ECHO_N "checking if we can lock with hard links... $ECHO_C" >&6; } hard_links=yes $rm conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { echo "$as_me:$LINENO: result: $hard_links" >&5 echo "${ECHO_T}$hard_links" >&6; } if test "$hard_links" = no; then { echo "$as_me:$LINENO: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { echo "$as_me:$LINENO: checking whether the $compiler linker ($LD) supports shared libraries" >&5 echo $ECHO_N "checking whether the $compiler linker ($LD) supports shared libraries... $ECHO_C" >&6; } runpath_var= allow_undefined_flag= enable_shared_with_static_runtimes=no archive_cmds= archive_expsym_cmds= old_archive_From_new_cmds= old_archive_from_expsyms_cmds= export_dynamic_flag_spec= whole_archive_flag_spec= thread_safe_flag_spec= hardcode_libdir_flag_spec= hardcode_libdir_flag_spec_ld= hardcode_libdir_separator= hardcode_direct=no hardcode_minus_L=no hardcode_shlibpath_var=unsupported link_all_deplibs=unknown hardcode_automatic=no module_cmds= module_expsym_cmds= always_export_symbols=no export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. exclude_expsyms="_GLOBAL_OFFSET_TABLE_" # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. extract_expsyms_cmds= # Just being paranoid about ensuring that cc_basename is set. for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` case $host_os in cygwin* | mingw* | pw32*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; esac ld_shlibs=yes if test "$with_gnu_ld" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' export_dynamic_flag_spec='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | grep 'no-whole-archive' > /dev/null; then whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec= fi supports_anon_versioning=no case `$LD -v 2>/dev/null` in *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix3* | aix4* | aix5*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then ld_shlibs=no cat <&2 *** Warning: the GNU linker, at least up to release 2.9.1, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to modify your PATH *** so that a non-GNU linker is found, and then restart. EOF fi ;; amigaos*) archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes # Samuel A. Falvo II reports # that the semantics of dynamic libraries on AmigaOS, at least up # to version 4, is to share data among multiple programs linked # with the same dynamic library. Since this doesn't match the # behavior of shared libraries on other platforms, we can't use # them. ld_shlibs=no ;; beos*) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then allow_undefined_flag=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs=no fi ;; cygwin* | mingw* | pw32*) # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec='-L$libdir' allow_undefined_flag=unsupported always_export_symbols=no enable_shared_with_static_runtimes=yes export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/'\'' -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs=no fi ;; interix[3-9]*) hardcode_direct=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | k*bsd*-gnu) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then tmp_addflag= case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95*) # Portland Group f77 and f90 compilers whole_archive_flag_spec='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; *) tmp_sharedflag='-shared' ;; esac archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test $supports_anon_versioning = yes; then archive_expsym_cmds='$echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ $echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi else ld_shlibs=no fi ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then ld_shlibs=no cat <&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. EOF elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`' archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname,-retain-symbols-file,$export_symbols -o $lib' else ld_shlibs=no fi ;; esac ;; sunos4*) archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct=yes hardcode_shlibpath_var=no ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs=no fi ;; esac if test "$ld_shlibs" = no; then runpath_var= hardcode_libdir_flag_spec= export_dynamic_flag_spec= whole_archive_flag_spec= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag=unsupported always_export_symbols=yes archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct=unsupported fi ;; aix4* | aix5*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix5*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds='' hardcode_direct=yes hardcode_libdir_separator=':' link_all_deplibs=yes if test "$GCC" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && \ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L=yes hardcode_libdir_flag_spec='-L$libdir' hardcode_libdir_separator= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag='-berok' # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag="-z nodefs" archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag=' ${wl}-bernotok' allow_undefined_flag=' ${wl}-berok' # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec='$convenience' archive_cmds_need_lc=yes # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes # see comment about different semantics on the GNU ld section ld_shlibs=no ;; bsdi[45]*) export_dynamic_flag_spec=-rdynamic ;; cygwin* | mingw* | pw32*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. hardcode_libdir_flag_spec=' ' allow_undefined_flag=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | $SED -e '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_From_new_cmds='true' # FIXME: Should let the user specify the lib program. old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs' fix_srcfile_path='`cygpath -w "$srcfile"`' enable_shared_with_static_runtimes=yes ;; darwin* | rhapsody*) case $host_os in rhapsody* | darwin1.[012]) allow_undefined_flag='${wl}-undefined ${wl}suppress' ;; *) # Darwin 1.3 on if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then allow_undefined_flag='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' else case ${MACOSX_DEPLOYMENT_TARGET} in 10.[012]) allow_undefined_flag='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) allow_undefined_flag='${wl}-undefined ${wl}dynamic_lookup' ;; esac fi ;; esac archive_cmds_need_lc=no hardcode_direct=no hardcode_automatic=yes hardcode_shlibpath_var=unsupported whole_archive_flag_spec='' link_all_deplibs=yes if test "$GCC" = yes ; then output_verbose_link_cmd='echo' archive_cmds='$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' module_cmds='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds archive_expsym_cmds='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' module_expsym_cmds='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else case $cc_basename in xlc*) output_verbose_link_cmd='echo' archive_cmds='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' module_cmds='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds archive_expsym_cmds='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' module_expsym_cmds='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) ld_shlibs=no ;; esac fi ;; dgux*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; freebsd1*) ld_shlibs=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; hpux9*) if test "$GCC" = yes; then archive_cmds='$rm $output_objdir/$soname~$CC -shared -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes export_dynamic_flag_spec='${wl}-E' ;; hpux10*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then archive_cmds='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: hardcode_direct=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes fi ;; hpux11*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' hardcode_libdir_separator=: case $host_cpu in hppa*64*|ia64*) hardcode_libdir_flag_spec_ld='+b $libdir' hardcode_direct=no hardcode_shlibpath_var=no ;; *) hardcode_direct=yes export_dynamic_flag_spec='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_ld='-rpath $libdir' fi hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: link_all_deplibs=yes ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec='-R$libdir' hardcode_direct=yes hardcode_shlibpath_var=no ;; newsos6) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: hardcode_shlibpath_var=no ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct=yes hardcode_shlibpath_var=no if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' export_dynamic_flag_spec='${wl}-E' else case $host_os in openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-R$libdir' ;; *) archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec='${wl}-rpath,$libdir' ;; esac fi else ld_shlibs=no fi ;; os2*) hardcode_libdir_flag_spec='-L$libdir' hardcode_minus_L=yes allow_undefined_flag=unsupported archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' old_archive_From_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' fi hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' else allow_undefined_flag=' -expect_unresolved \*' archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~$rm $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec='-rpath $libdir' fi hardcode_libdir_separator=: ;; solaris*) no_undefined_flag=' -z text' if test "$GCC" = yes; then wlarc='${wl}' archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -shared ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$rm $lib.exp' else wlarc='' archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' fi hardcode_libdir_flag_spec='-R$libdir' hardcode_shlibpath_var=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else whole_archive_flag_spec='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec='-L$libdir' hardcode_direct=yes hardcode_minus_L=yes hardcode_shlibpath_var=no ;; sysv4) case $host_vendor in sni) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds='$CC -r -o $output$reload_objs' hardcode_direct=no ;; motorola) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var=no ;; sysv4.3*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no export_dynamic_flag_spec='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag='${wl}-z,text' archive_cmds_need_lc=no hardcode_shlibpath_var=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag='${wl}-z,text' allow_undefined_flag='${wl}-z,nodefs' archive_cmds_need_lc=no hardcode_shlibpath_var=no hardcode_libdir_flag_spec='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`' hardcode_libdir_separator=':' link_all_deplibs=yes export_dynamic_flag_spec='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec='-L$libdir' hardcode_shlibpath_var=no ;; *) ld_shlibs=no ;; esac fi { echo "$as_me:$LINENO: result: $ld_shlibs" >&5 echo "${ECHO_T}$ld_shlibs" >&6; } test "$ld_shlibs" = no && can_build_shared=no # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc" in x|xyes) # Assume -lc should be added archive_cmds_need_lc=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; } $rm conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl pic_flag=$lt_prog_compiler_pic compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag allow_undefined_flag= if { (eval echo "$as_me:$LINENO: \"$archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1\"") >&5 (eval $archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } then archive_cmds_need_lc=no else archive_cmds_need_lc=yes fi allow_undefined_flag=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $rm conftest* { echo "$as_me:$LINENO: result: $archive_cmds_need_lc" >&5 echo "${ECHO_T}$archive_cmds_need_lc" >&6; } ;; esac fi ;; esac { echo "$as_me:$LINENO: checking dynamic linker characteristics" >&5 echo $ECHO_N "checking dynamic linker characteristics... $ECHO_C" >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$lt_search_path_spec" | grep ';' >/dev/null ; then # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e 's/;/ /g'` else lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`echo $lt_tmp_lt_search_path_spec | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[lt_foo]++; } if (lt_freq[lt_foo] == 1) { print lt_foo; } }'` sys_lib_search_path_spec=`echo $lt_search_path_spec` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix4* | aix5*) version_type=linux need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$host_os in yes,cygwin* | yes,mingw* | yes,pw32*) library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $rm \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib" ;; mingw*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$sys_lib_search_path_spec" | grep ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH printed by # mingw gcc, but we are running on Cygwin. Gcc prints its search # path with ; separators, and with drive letters. We can handle the # drive letters (cygwin fileutils understands them), so leave them, # especially as we might pass files found there to a mingw objdump, # which wouldn't understand a cygwinified path. Ahh. sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac ;; *) library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' ;; esac dynamic_linker='Win32 ld.exe' # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib" sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd1*) dynamic_linker=no ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[123]*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555. postinstall_cmds='chmod 555 $lib' ;; interix[3-9]*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be Linux ELF. linux* | k*bsd*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; nto-qnx*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no export_dynamic_flag_spec='${wl}-Blargedynsym' runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' shlibpath_overrides_runpath=no else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' shlibpath_overrides_runpath=yes case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; uts4*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { echo "$as_me:$LINENO: result: $dynamic_linker" >&5 echo "${ECHO_T}$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi { echo "$as_me:$LINENO: checking how to hardcode library paths into programs" >&5 echo $ECHO_N "checking how to hardcode library paths into programs... $ECHO_C" >&6; } hardcode_action= if test -n "$hardcode_libdir_flag_spec" || \ test -n "$runpath_var" || \ test "X$hardcode_automatic" = "Xyes" ; then # We can hardcode non-existant directories. if test "$hardcode_direct" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, )" != no && test "$hardcode_minus_L" != no; then # Linking always hardcodes the temporary library directory. hardcode_action=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action=unsupported fi { echo "$as_me:$LINENO: result: $hardcode_action" >&5 echo "${ECHO_T}$hardcode_action" >&6; } if test "$hardcode_action" = relink; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi striplib= old_striplib= { echo "$as_me:$LINENO: checking whether stripping libraries is possible" >&5 echo $ECHO_N "checking whether stripping libraries is possible... $ECHO_C" >&6; } if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi ;; *) { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } ;; esac fi if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it { echo "$as_me:$LINENO: checking for dlopen in -ldl" >&5 echo $ECHO_N "checking for dlopen in -ldl... $ECHO_C" >&6; } if test "${ac_cv_lib_dl_dlopen+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_dl_dlopen=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_dl_dlopen" >&5 echo "${ECHO_T}$ac_cv_lib_dl_dlopen" >&6; } if test $ac_cv_lib_dl_dlopen = yes; then lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes fi ;; *) { echo "$as_me:$LINENO: checking for shl_load" >&5 echo $ECHO_N "checking for shl_load... $ECHO_C" >&6; } if test "${ac_cv_func_shl_load+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Define shl_load to an innocuous variant, in case declares shl_load. For example, HP-UX 11i declares gettimeofday. */ #define shl_load innocuous_shl_load /* System header to define __stub macros and hopefully few prototypes, which can conflict with char shl_load (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef shl_load /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char shl_load (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_shl_load || defined __stub___shl_load choke me #endif int main () { return shl_load (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_func_shl_load=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_func_shl_load=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_func_shl_load" >&5 echo "${ECHO_T}$ac_cv_func_shl_load" >&6; } if test $ac_cv_func_shl_load = yes; then lt_cv_dlopen="shl_load" else { echo "$as_me:$LINENO: checking for shl_load in -ldld" >&5 echo $ECHO_N "checking for shl_load in -ldld... $ECHO_C" >&6; } if test "${ac_cv_lib_dld_shl_load+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char shl_load (); int main () { return shl_load (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_dld_shl_load=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_dld_shl_load=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_dld_shl_load" >&5 echo "${ECHO_T}$ac_cv_lib_dld_shl_load" >&6; } if test $ac_cv_lib_dld_shl_load = yes; then lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld" else { echo "$as_me:$LINENO: checking for dlopen" >&5 echo $ECHO_N "checking for dlopen... $ECHO_C" >&6; } if test "${ac_cv_func_dlopen+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Define dlopen to an innocuous variant, in case declares dlopen. For example, HP-UX 11i declares gettimeofday. */ #define dlopen innocuous_dlopen /* System header to define __stub macros and hopefully few prototypes, which can conflict with char dlopen (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef dlopen /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_dlopen || defined __stub___dlopen choke me #endif int main () { return dlopen (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_func_dlopen=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_func_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_func_dlopen" >&5 echo "${ECHO_T}$ac_cv_func_dlopen" >&6; } if test $ac_cv_func_dlopen = yes; then lt_cv_dlopen="dlopen" else { echo "$as_me:$LINENO: checking for dlopen in -ldl" >&5 echo $ECHO_N "checking for dlopen in -ldl... $ECHO_C" >&6; } if test "${ac_cv_lib_dl_dlopen+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldl $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_dl_dlopen=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_dl_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_dl_dlopen" >&5 echo "${ECHO_T}$ac_cv_lib_dl_dlopen" >&6; } if test $ac_cv_lib_dl_dlopen = yes; then lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl" else { echo "$as_me:$LINENO: checking for dlopen in -lsvld" >&5 echo $ECHO_N "checking for dlopen in -lsvld... $ECHO_C" >&6; } if test "${ac_cv_lib_svld_dlopen+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lsvld $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dlopen (); int main () { return dlopen (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_svld_dlopen=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_svld_dlopen=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_svld_dlopen" >&5 echo "${ECHO_T}$ac_cv_lib_svld_dlopen" >&6; } if test $ac_cv_lib_svld_dlopen = yes; then lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld" else { echo "$as_me:$LINENO: checking for dld_link in -ldld" >&5 echo $ECHO_N "checking for dld_link in -ldld... $ECHO_C" >&6; } if test "${ac_cv_lib_dld_dld_link+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-ldld $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char dld_link (); int main () { return dld_link (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_dld_dld_link=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_dld_dld_link=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_dld_dld_link" >&5 echo "${ECHO_T}$ac_cv_lib_dld_dld_link" >&6; } if test $ac_cv_lib_dld_dld_link = yes; then lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld" fi fi fi fi fi fi ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" { echo "$as_me:$LINENO: checking whether a program can dlopen itself" >&5 echo $ECHO_N "checking whether a program can dlopen itself... $ECHO_C" >&6; } if test "${lt_cv_dlopen_self+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif #ifdef __cplusplus extern "C" void exit (int); #endif void fnord() { int i=42;} int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; /* dlclose (self); */ } else puts (dlerror ()); exit (status); } EOF if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;; esac else : # compilation failed lt_cv_dlopen_self=no fi fi rm -fr conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_dlopen_self" >&5 echo "${ECHO_T}$lt_cv_dlopen_self" >&6; } if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" { echo "$as_me:$LINENO: checking whether a statically linked program can dlopen itself" >&5 echo $ECHO_N "checking whether a statically linked program can dlopen itself... $ECHO_C" >&6; } if test "${lt_cv_dlopen_self_static+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then : lt_cv_dlopen_self_static=cross else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif #ifdef __cplusplus extern "C" void exit (int); #endif void fnord() { int i=42;} int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; /* dlclose (self); */ } else puts (dlerror ()); exit (status); } EOF if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 (eval $ac_link) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&5 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;; x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;; esac else : # compilation failed lt_cv_dlopen_self_static=no fi fi rm -fr conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_dlopen_self_static" >&5 echo "${ECHO_T}$lt_cv_dlopen_self_static" >&6; } fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi # Report which library types will actually be built { echo "$as_me:$LINENO: checking if libtool supports shared libraries" >&5 echo $ECHO_N "checking if libtool supports shared libraries... $ECHO_C" >&6; } { echo "$as_me:$LINENO: result: $can_build_shared" >&5 echo "${ECHO_T}$can_build_shared" >&6; } { echo "$as_me:$LINENO: checking whether to build shared libraries" >&5 echo $ECHO_N "checking whether to build shared libraries... $ECHO_C" >&6; } test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix4* | aix5*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac { echo "$as_me:$LINENO: result: $enable_shared" >&5 echo "${ECHO_T}$enable_shared" >&6; } { echo "$as_me:$LINENO: checking whether to build static libraries" >&5 echo $ECHO_N "checking whether to build static libraries... $ECHO_C" >&6; } # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes { echo "$as_me:$LINENO: result: $enable_static" >&5 echo "${ECHO_T}$enable_static" >&6; } # The else clause should only fire when bootstrapping the # libtool distribution, otherwise you forgot to ship ltmain.sh # with your package, and you will get complaints that there are # no rules to generate ltmain.sh. if test -f "$ltmain"; then # See if we are running on zsh, and set the options which allow our commands through # without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi # Now quote all the things that may contain metacharacters while being # careful not to overquote the AC_SUBSTed values. We take copies of the # variables and quote the copies for generation of the libtool script. for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \ SED SHELL STRIP \ libname_spec library_names_spec soname_spec extract_expsyms_cmds \ old_striplib striplib file_magic_cmd finish_cmds finish_eval \ deplibs_check_method reload_flag reload_cmds need_locks \ lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ old_postinstall_cmds old_postuninstall_cmds \ compiler \ CC \ LD \ lt_prog_compiler_wl \ lt_prog_compiler_pic \ lt_prog_compiler_static \ lt_prog_compiler_no_builtin_flag \ export_dynamic_flag_spec \ thread_safe_flag_spec \ whole_archive_flag_spec \ enable_shared_with_static_runtimes \ old_archive_cmds \ old_archive_from_new_cmds \ predep_objects \ postdep_objects \ predeps \ postdeps \ compiler_lib_search_path \ archive_cmds \ archive_expsym_cmds \ postinstall_cmds \ postuninstall_cmds \ old_archive_from_expsyms_cmds \ allow_undefined_flag \ no_undefined_flag \ export_symbols_cmds \ hardcode_libdir_flag_spec \ hardcode_libdir_flag_spec_ld \ hardcode_libdir_separator \ hardcode_automatic \ module_cmds \ module_expsym_cmds \ lt_cv_prog_compiler_c_o \ fix_srcfile_path \ exclude_expsyms \ include_expsyms; do case $var in old_archive_cmds | \ old_archive_from_new_cmds | \ archive_cmds | \ archive_expsym_cmds | \ module_cmds | \ module_expsym_cmds | \ old_archive_from_expsyms_cmds | \ export_symbols_cmds | \ extract_expsyms_cmds | reload_cmds | finish_cmds | \ postinstall_cmds | postuninstall_cmds | \ old_postinstall_cmds | old_postuninstall_cmds | \ sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) # Double-quote double-evaled strings. eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ;; *) eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ;; esac done case $lt_echo in *'\$0 --fallback-echo"') lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` ;; esac cfgfile="${ofile}T" trap "$rm \"$cfgfile\"; exit 1" 1 2 15 $rm -f "$cfgfile" { echo "$as_me:$LINENO: creating $ofile" >&5 echo "$as_me: creating $ofile" >&6;} cat <<__EOF__ >> "$cfgfile" #! $SHELL # `$echo "$cfgfile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) # NOTE: Changes made to this file will be lost: look at ltmain.sh. # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 # Free Software Foundation, Inc. # # This file is part of GNU Libtool: # Originally by Gordon Matzigkeit , 1996 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # A sed program that does not truncate output. SED=$lt_SED # Sed that helps us avoid accidentally triggering echo(1) options like -n. Xsed="$SED -e 1s/^X//" # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # The names of the tagged configurations supported by this script. available_tags= # ### BEGIN LIBTOOL CONFIG # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc # Whether or not to disallow shared libs when runtime libs are static allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # An echo program that does not interpret backslashes. echo=$lt_echo # The archiver. AR=$lt_AR AR_FLAGS=$lt_AR_FLAGS # A C compiler. LTCC=$lt_LTCC # LTCC compiler flags. LTCFLAGS=$lt_LTCFLAGS # A language-specific compiler. CC=$lt_compiler # Is the compiler the GNU C compiler? with_gcc=$GCC # An ERE matcher. EGREP=$lt_EGREP # The linker used to build libraries. LD=$lt_LD # Whether we need hard or soft links. LN_S=$lt_LN_S # A BSD-compatible nm program. NM=$lt_NM # A symbol stripping program STRIP=$lt_STRIP # Used to examine libraries when file_magic_cmd begins "file" MAGIC_CMD=$MAGIC_CMD # Used on cygwin: DLL creation program. DLLTOOL="$DLLTOOL" # Used on cygwin: object dumper. OBJDUMP="$OBJDUMP" # Used on cygwin: assembler. AS="$AS" # The name of the directory that contains temporary libtool files. objdir=$objdir # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl # Object file suffix (normally "o"). objext="$ac_objext" # Old archive suffix (normally "a"). libext="$libext" # Shared library suffix (normally ".so"). shrext_cmds='$shrext_cmds' # Executable file suffix (normally ""). exeext="$exeext" # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic pic_mode=$pic_mode # What is the maximum length of a command? max_cmd_len=$lt_cv_sys_max_cmd_len # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o # Must we lock files when doing compilation? need_locks=$lt_need_locks # Do we need the lib prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec # Compiler flag to generate thread-safe objects. thread_safe_flag_spec=$lt_thread_safe_flag_spec # Library versioning type. version_type=$version_type # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME. library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Commands used to build and install an old-style archive. RANLIB=$lt_RANLIB old_archive_cmds=$lt_old_archive_cmds old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds # Commands used to build and install a shared archive. archive_cmds=$lt_archive_cmds archive_expsym_cmds=$lt_archive_expsym_cmds postinstall_cmds=$lt_postinstall_cmds postuninstall_cmds=$lt_postuninstall_cmds # Commands used to build a loadable module (assumed same as above if empty) module_cmds=$lt_module_cmds module_expsym_cmds=$lt_module_expsym_cmds # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # Dependencies to place before the objects being linked to create a # shared library. predep_objects=$lt_predep_objects # Dependencies to place after the objects being linked to create a # shared library. postdep_objects=$lt_postdep_objects # Dependencies to place before the objects being linked to create a # shared library. predeps=$lt_predeps # Dependencies to place after the objects being linked to create a # shared library. postdeps=$lt_postdeps # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method == file_magic. file_magic_cmd=$lt_file_magic_cmd # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag # Flag that forces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # Same as above, but a single script fragment to be evaled but not shown. finish_eval=$lt_finish_eval # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # This is the shared library runtime path variable. runpath_var=$runpath_var # This is the shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist. hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec # If ld is used when linking, flag to hardcode \$libdir into # a binary during linking. This must work even if \$libdir does # not exist. hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld # Whether we need a single -rpath flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator # Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the # resulting binary. hardcode_direct=$hardcode_direct # Set to yes if using the -LDIR flag during linking hardcodes DIR into the # resulting binary. hardcode_minus_L=$hardcode_minus_L # Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into # the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var # Set to yes if building a shared library automatically hardcodes DIR into the library # and all subsequent libraries and executables linked against it. hardcode_automatic=$hardcode_automatic # Variables whose values should be saved in libtool wrapper scripts and # restored at relink time. variables_saved_for_relink="$variables_saved_for_relink" # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs # Compile-time system search path for libraries sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$always_export_symbols # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms # Symbols that must always be exported. include_expsyms=$lt_include_expsyms # ### END LIBTOOL CONFIG __EOF__ case $host_os in aix3*) cat <<\EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi EOF ;; esac # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || \ (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" else # If there is no Makefile yet, we rely on a make rule to execute # `config.status --recheck' to rerun these tests and create the # libtool script then. ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'` if test -f "$ltmain_in"; then test -f Makefile && make "$ltmain" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC="$lt_save_CC" # Check whether --with-tags was given. if test "${with_tags+set}" = set; then withval=$with_tags; tagnames="$withval" fi if test -f "$ltmain" && test -n "$tagnames"; then if test ! -f "${ofile}"; then { echo "$as_me:$LINENO: WARNING: output file \`$ofile' does not exist" >&5 echo "$as_me: WARNING: output file \`$ofile' does not exist" >&2;} fi if test -z "$LTCC"; then eval "`$SHELL ${ofile} --config | grep '^LTCC='`" if test -z "$LTCC"; then { echo "$as_me:$LINENO: WARNING: output file \`$ofile' does not look like a libtool script" >&5 echo "$as_me: WARNING: output file \`$ofile' does not look like a libtool script" >&2;} else { echo "$as_me:$LINENO: WARNING: using \`LTCC=$LTCC', extracted from \`$ofile'" >&5 echo "$as_me: WARNING: using \`LTCC=$LTCC', extracted from \`$ofile'" >&2;} fi fi if test -z "$LTCFLAGS"; then eval "`$SHELL ${ofile} --config | grep '^LTCFLAGS='`" fi # Extract list of available tagged configurations in $ofile. # Note that this assumes the entire list is on one line. available_tags=`grep "^available_tags=" "${ofile}" | $SED -e 's/available_tags=\(.*$\)/\1/' -e 's/\"//g'` lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for tagname in $tagnames; do IFS="$lt_save_ifs" # Check whether tagname contains only valid characters case `$echo "X$tagname" | $Xsed -e 's:[-_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890,/]::g'` in "") ;; *) { { echo "$as_me:$LINENO: error: invalid tag name: $tagname" >&5 echo "$as_me: error: invalid tag name: $tagname" >&2;} { (exit 1); exit 1; }; } ;; esac if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null then { { echo "$as_me:$LINENO: error: tag name \"$tagname\" already exists" >&5 echo "$as_me: error: tag name \"$tagname\" already exists" >&2;} { (exit 1); exit 1; }; } fi # Update the list of available tags. if test -n "$tagname"; then echo appending configuration tag \"$tagname\" to $ofile case $tagname in CXX) if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu archive_cmds_need_lc_CXX=no allow_undefined_flag_CXX= always_export_symbols_CXX=no archive_expsym_cmds_CXX= export_dynamic_flag_spec_CXX= hardcode_direct_CXX=no hardcode_libdir_flag_spec_CXX= hardcode_libdir_flag_spec_ld_CXX= hardcode_libdir_separator_CXX= hardcode_minus_L_CXX=no hardcode_shlibpath_var_CXX=unsupported hardcode_automatic_CXX=no module_cmds_CXX= module_expsym_cmds_CXX= link_all_deplibs_CXX=unknown old_archive_cmds_CXX=$old_archive_cmds no_undefined_flag_CXX= whole_archive_flag_spec_CXX= enable_shared_with_static_runtimes_CXX=no # Dependencies to place before and after the object being linked: predep_objects_CXX= postdep_objects_CXX= predeps_CXX= postdeps_CXX= compiler_lib_search_path_CXX= # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o objext_CXX=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $rm conftest* # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} compiler=$CC compiler_CXX=$CC for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` # We don't want -fno-exception wen compiling C++ code, so set the # no_builtin_flag separately if test "$GXX" = yes; then lt_prog_compiler_no_builtin_flag_CXX=' -fno-builtin' else lt_prog_compiler_no_builtin_flag_CXX= fi if test "$GXX" = yes; then # Set up default GNU C++ configuration # Check whether --with-gnu-ld was given. if test "${with_gnu_ld+set}" = set; then withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes else with_gnu_ld=no fi ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. { echo "$as_me:$LINENO: checking for ld used by $CC" >&5 echo $ECHO_N "checking for ld used by $CC... $ECHO_C" >&6; } case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [\\/]* | ?:[\\/]*) re_direlt='/[^/][^/]*/\.\./' # Canonicalize the pathname of ld ac_prog=`echo $ac_prog| $SED 's%\\\\%/%g'` while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do ac_prog=`echo $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then { echo "$as_me:$LINENO: checking for GNU ld" >&5 echo $ECHO_N "checking for GNU ld... $ECHO_C" >&6; } else { echo "$as_me:$LINENO: checking for non-GNU ld" >&5 echo $ECHO_N "checking for non-GNU ld... $ECHO_C" >&6; } fi if test "${lt_cv_path_LD+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &5 echo "${ECHO_T}$LD" >&6; } else { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fi test -z "$LD" && { { echo "$as_me:$LINENO: error: no acceptable ld found in \$PATH" >&5 echo "$as_me: error: no acceptable ld found in \$PATH" >&2;} { (exit 1); exit 1; }; } { echo "$as_me:$LINENO: checking if the linker ($LD) is GNU ld" >&5 echo $ECHO_N "checking if the linker ($LD) is GNU ld... $ECHO_C" >&6; } if test "${lt_cv_prog_gnu_ld+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else # I'd rather use --version here, but apparently some GNU lds only accept -v. case `$LD -v 2>&1 &5 echo "${ECHO_T}$lt_cv_prog_gnu_ld" >&6; } with_gnu_ld=$lt_cv_prog_gnu_ld # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test "$with_gnu_ld" = yes; then archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='${wl}' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | \ grep 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec_CXX= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics { echo "$as_me:$LINENO: checking whether the $compiler linker ($LD) supports shared libraries" >&5 echo $ECHO_N "checking whether the $compiler linker ($LD) supports shared libraries... $ECHO_C" >&6; } ld_shlibs_CXX=yes case $host_os in aix3*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aix4* | aix5*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix5*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_CXX='' hardcode_direct_CXX=yes hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes if test "$GXX" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && \ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_CXX=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_CXX=yes hardcode_libdir_flag_spec_CXX='-L$libdir' hardcode_libdir_separator_CXX= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols_CXX=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag_CXX='-berok' # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_CXX="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec_CXX='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag_CXX="-z nodefs" archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec_CXX='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_CXX=' ${wl}-bernotok' allow_undefined_flag_CXX=' ${wl}-berok' # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_CXX='$convenience' archive_cmds_need_lc_CXX=yes # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds_CXX="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; beos*) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_CXX=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_CXX='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs_CXX=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; cygwin* | mingw* | pw32*) # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, CXX) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_CXX='-L$libdir' allow_undefined_flag_CXX=unsupported always_export_symbols_CXX=no enable_shared_with_static_runtimes_CXX=yes if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds_CXX='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_CXX=no fi ;; darwin* | rhapsody*) case $host_os in rhapsody* | darwin1.[012]) allow_undefined_flag_CXX='${wl}-undefined ${wl}suppress' ;; *) # Darwin 1.3 on if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then allow_undefined_flag_CXX='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' else case ${MACOSX_DEPLOYMENT_TARGET} in 10.[012]) allow_undefined_flag_CXX='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) allow_undefined_flag_CXX='${wl}-undefined ${wl}dynamic_lookup' ;; esac fi ;; esac archive_cmds_need_lc_CXX=no hardcode_direct_CXX=no hardcode_automatic_CXX=yes hardcode_shlibpath_var_CXX=unsupported whole_archive_flag_spec_CXX='' link_all_deplibs_CXX=yes if test "$GXX" = yes ; then lt_int_apple_cc_single_mod=no output_verbose_link_cmd='echo' if $CC -dumpspecs 2>&1 | $EGREP 'single_module' >/dev/null ; then lt_int_apple_cc_single_mod=yes fi if test "X$lt_int_apple_cc_single_mod" = Xyes ; then archive_cmds_CXX='$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' else archive_cmds_CXX='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring' fi module_cmds_CXX='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds if test "X$lt_int_apple_cc_single_mod" = Xyes ; then archive_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else archive_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' fi module_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else case $cc_basename in xlc*) output_verbose_link_cmd='echo' archive_cmds_CXX='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' module_cmds_CXX='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds archive_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' module_expsym_cmds_CXX='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) ld_shlibs_CXX=no ;; esac fi ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; freebsd[12]*) # C++ shared libraries reported to be fairly broken before switch to ELF ld_shlibs_CXX=no ;; freebsd-elf*) archive_cmds_need_lc_CXX=no ;; freebsd* | dragonfly*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions ld_shlibs_CXX=yes ;; gnu*) ;; hpux9*) hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' hardcode_libdir_separator_CXX=: export_dynamic_flag_spec_CXX='${wl}-E' hardcode_direct_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) archive_cmds_CXX='$rm $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "[-]L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes; then archive_cmds_CXX='$rm $output_objdir/$soname~$CC -shared -nostdlib -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; hpux10*|hpux11*) if test $with_gnu_ld = no; then hardcode_libdir_flag_spec_CXX='${wl}+b ${wl}$libdir' hardcode_libdir_separator_CXX=: case $host_cpu in hppa*64*|ia64*) ;; *) export_dynamic_flag_spec_CXX='${wl}-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no ;; *) hardcode_direct_CXX=yes hardcode_minus_L_CXX=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; aCC*) case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes; then if test $with_gnu_ld = no; then case $host_cpu in hppa*64*) archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; interix[3-9]*) hardcode_direct_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_CXX='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_CXX='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ archive_cmds_CXX='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test "$GXX" = yes; then if test "$with_gnu_ld" = no; then archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else archive_cmds_CXX='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` -o $lib' fi fi link_all_deplibs_CXX=yes ;; esac hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: ;; linux* | k*bsd*-gnu) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' archive_expsym_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | grep "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' hardcode_libdir_flag_spec_CXX='${wl}--rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc*) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac archive_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; esac archive_cmds_need_lc_CXX=no hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' whole_archive_flag_spec_CXX='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; pgCC*) # Portland Group C++ compiler archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' hardcode_libdir_flag_spec_CXX='${wl}--rpath ${wl}$libdir' export_dynamic_flag_spec_CXX='${wl}--export-dynamic' whole_archive_flag_spec_CXX='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' ;; cxx*) # Compaq C++ archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_CXX='-rpath $libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' hardcode_libdir_flag_spec_CXX='-R$libdir' whole_archive_flag_spec_CXX='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='echo' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; m88k*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then archive_cmds_CXX='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; openbsd2*) # C++ shared libraries are fairly broken ld_shlibs_CXX=no ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct_CXX=yes hardcode_shlibpath_var_CXX=no archive_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_expsym_cmds_CXX='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' export_dynamic_flag_spec_CXX='${wl}-E' whole_archive_flag_spec_CXX="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' fi output_verbose_link_cmd='echo' else ld_shlibs_CXX=no fi ;; osf3*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' hardcode_libdir_separator_CXX=: # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. old_archive_cmds_CXX='$CC -Bstatic -o $oldlib $oldobjs' ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; cxx*) allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && echo ${wl}-set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. archive_cmds_CXX='tempext=`echo $shared_ext | $SED -e '\''s/\([^()0-9A-Za-z{}]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath,$libdir' hardcode_libdir_separator_CXX=: # Archives containing C++ object files must be created using # the KAI C++ compiler. old_archive_cmds_CXX='$CC -o $oldlib $oldobjs' ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; cxx*) allow_undefined_flag_CXX=' -expect_unresolved \*' archive_cmds_CXX='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds_CXX='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname -Wl,-input -Wl,$lib.exp `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~ $rm $lib.exp' hardcode_libdir_flag_spec_CXX='-rpath $libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then allow_undefined_flag_CXX=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_CXX='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_CXX='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_CXX=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else # FIXME: insert proper C++ library support ld_shlibs_CXX=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; solaris*) case $cc_basename in CC*) # Sun C++ 4.2, 5.x and Centerline C++ archive_cmds_need_lc_CXX=yes no_undefined_flag_CXX=' -zdefs' archive_cmds_CXX='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' archive_expsym_cmds_CXX='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' hardcode_libdir_flag_spec_CXX='-R$libdir' hardcode_shlibpath_var_CXX=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) whole_archive_flag_spec_CXX='-z allextract$convenience -z defaultextract' ;; esac link_all_deplibs_CXX=yes output_verbose_link_cmd='echo' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. old_archive_cmds_CXX='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler archive_cmds_CXX='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. old_archive_cmds_CXX='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test "$GXX" = yes && test "$with_gnu_ld" = no; then no_undefined_flag_CXX=' ${wl}-z ${wl}defs' if $CC --version | grep -v '^2\.7' > /dev/null; then archive_cmds_CXX='$CC -shared -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' archive_expsym_cmds_CXX='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -shared -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd="$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\"" else # g++ 2.7 appears to require `-G' NOT `-shared' on this # platform. archive_cmds_CXX='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' archive_expsym_cmds_CXX='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd="$CC -G $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\"" fi hardcode_libdir_flag_spec_CXX='${wl}-R $wl$libdir' case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) whole_archive_flag_spec_CXX='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_CXX='${wl}-z,text' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. # So that behaviour is only enabled if SCOABSPATH is set to a # non-empty value in the environment. Most likely only useful for # creating official distributions of packages. # This is a hack until libtool officially supports absolute path # names for shared libraries. no_undefined_flag_CXX='${wl}-z,text' allow_undefined_flag_CXX='${wl}-z,nodefs' archive_cmds_need_lc_CXX=no hardcode_shlibpath_var_CXX=no hardcode_libdir_flag_spec_CXX='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`' hardcode_libdir_separator_CXX=':' link_all_deplibs_CXX=yes export_dynamic_flag_spec_CXX='${wl}-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) archive_cmds_CXX='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_CXX='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_CXX='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; *) # FIXME: insert proper C++ library support ld_shlibs_CXX=no ;; esac { echo "$as_me:$LINENO: result: $ld_shlibs_CXX" >&5 echo "${ECHO_T}$ld_shlibs_CXX" >&6; } test "$ld_shlibs_CXX" = no && can_build_shared=no GCC_CXX="$GXX" LD_CXX="$LD" cat > conftest.$ac_ext <&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then # Parse the compiler output and extract the necessary # objects, libraries and library flags. # Sentinel used to keep track of whether or not we are before # the conftest object file. pre_test_object_deps_done=no # The `*' in the case matches for architectures that use `case' in # $output_verbose_cmd can trigger glob expansion during the loop # eval without this substitution. output_verbose_link_cmd=`$echo "X$output_verbose_link_cmd" | $Xsed -e "$no_glob_subst"` for p in `eval $output_verbose_link_cmd`; do case $p in -L* | -R* | -l*) # Some compilers place space between "-{L,R}" and the path. # Remove the space. if test $p = "-L" \ || test $p = "-R"; then prev=$p continue else prev= fi if test "$pre_test_object_deps_done" = no; then case $p in -L* | -R*) # Internal compiler library paths should come after those # provided the user. The postdeps already come after the # user supplied libs so there is no need to process them. if test -z "$compiler_lib_search_path_CXX"; then compiler_lib_search_path_CXX="${prev}${p}" else compiler_lib_search_path_CXX="${compiler_lib_search_path_CXX} ${prev}${p}" fi ;; # The "-l" case would never come before the object being # linked, so don't bother handling this case. esac else if test -z "$postdeps_CXX"; then postdeps_CXX="${prev}${p}" else postdeps_CXX="${postdeps_CXX} ${prev}${p}" fi fi ;; *.$objext) # This assumes that the test object file only shows up # once in the compiler output. if test "$p" = "conftest.$objext"; then pre_test_object_deps_done=yes continue fi if test "$pre_test_object_deps_done" = no; then if test -z "$predep_objects_CXX"; then predep_objects_CXX="$p" else predep_objects_CXX="$predep_objects_CXX $p" fi else if test -z "$postdep_objects_CXX"; then postdep_objects_CXX="$p" else postdep_objects_CXX="$postdep_objects_CXX $p" fi fi ;; *) ;; # Ignore the rest. esac done # Clean up. rm -f a.out a.exe else echo "libtool.m4: error: problem compiling CXX test program" fi $rm -f confest.$objext # PORTME: override above test on systems where it is broken case $host_os in interix[3-9]*) # Interix 3.5 installs completely hosed .la files for C++, so rather than # hack all around it, let's just trust "g++" to DTRT. predep_objects_CXX= postdep_objects_CXX= postdeps_CXX= ;; linux*) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 # # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac if test "$solaris_use_stlport4" != yes; then postdeps_CXX='-library=Cstd -library=Crun' fi ;; esac ;; solaris*) case $cc_basename in CC*) # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. if test "$solaris_use_stlport4" != yes; then postdeps_CXX='-library=Cstd -library=Crun' fi ;; esac ;; esac case " $postdeps_CXX " in *" -lc "*) archive_cmds_need_lc_CXX=no ;; esac lt_prog_compiler_wl_CXX= lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX= { echo "$as_me:$LINENO: checking for $compiler option to produce PIC" >&5 echo $ECHO_N "checking for $compiler option to produce PIC... $ECHO_C" >&6; } # C++ specific cases for pic, static, wl, etc. if test "$GXX" = yes; then lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' fi ;; amigaos*) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic_CXX='-m68020 -resident32 -malways-restore-a4' ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_CXX='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_CXX='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all lt_prog_compiler_pic_CXX= ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_CXX=-Kconform_pic fi ;; hpux*) # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac ;; *) lt_prog_compiler_pic_CXX='-fPIC' ;; esac else case $host_os in aix4* | aix5*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_CXX='-Bstatic' else lt_prog_compiler_static_CXX='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_AC_TAGVAR(lt_prog_compiler_static, CXX)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; darwin*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files case $cc_basename in xlc*) lt_prog_compiler_pic_CXX='-qnocommon' lt_prog_compiler_wl_CXX='-Wl,' ;; esac ;; dgux*) case $cc_basename in ec++*) lt_prog_compiler_pic_CXX='-KPIC' ;; ghcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; freebsd* | dragonfly*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' if test "$host_cpu" != ia64; then lt_prog_compiler_pic_CXX='+Z' fi ;; aCC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='${wl}-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_CXX='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_static_CXX='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu) case $cc_basename in KCC*) # KAI C++ Compiler lt_prog_compiler_wl_CXX='--backend -Wl,' lt_prog_compiler_pic_CXX='-fPIC' ;; icpc* | ecpc*) # Intel C++ lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-static' ;; pgCC*) # Portland Group C++ compiler. lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-fpic' lt_prog_compiler_static_CXX='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) lt_prog_compiler_pic_CXX='-W c,exportall' ;; *) ;; esac ;; netbsd*) ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) lt_prog_compiler_wl_CXX='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 lt_prog_compiler_pic_CXX='-pic' ;; cxx*) # Digital/Compaq C++ lt_prog_compiler_wl_CXX='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. lt_prog_compiler_pic_CXX= lt_prog_compiler_static_CXX='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC*) # Sun C++ 4.2, 5.x and Centerline C++ lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' lt_prog_compiler_wl_CXX='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler lt_prog_compiler_pic_CXX='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x lt_prog_compiler_pic_CXX='-pic' lt_prog_compiler_static_CXX='-Bstatic' ;; lcc*) # Lucid lt_prog_compiler_pic_CXX='-pic' ;; *) ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 lt_prog_compiler_pic_CXX='-KPIC' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) lt_prog_compiler_wl_CXX='-Wl,' lt_prog_compiler_pic_CXX='-KPIC' lt_prog_compiler_static_CXX='-Bstatic' ;; esac ;; vxworks*) ;; *) lt_prog_compiler_can_build_shared_CXX=no ;; esac fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic_CXX" >&5 echo "${ECHO_T}$lt_prog_compiler_pic_CXX" >&6; } # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_CXX"; then { echo "$as_me:$LINENO: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5 echo $ECHO_N "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... $ECHO_C" >&6; } if test "${lt_prog_compiler_pic_works_CXX+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_pic_works_CXX=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_CXX -DPIC" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:16478: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:16482: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_pic_works_CXX=yes fi fi $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic_works_CXX" >&5 echo "${ECHO_T}$lt_prog_compiler_pic_works_CXX" >&6; } if test x"$lt_prog_compiler_pic_works_CXX" = xyes; then case $lt_prog_compiler_pic_CXX in "" | " "*) ;; *) lt_prog_compiler_pic_CXX=" $lt_prog_compiler_pic_CXX" ;; esac else lt_prog_compiler_pic_CXX= lt_prog_compiler_can_build_shared_CXX=no fi fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_CXX= ;; *) lt_prog_compiler_pic_CXX="$lt_prog_compiler_pic_CXX -DPIC" ;; esac # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\" { echo "$as_me:$LINENO: checking if $compiler static flag $lt_tmp_static_flag works" >&5 echo $ECHO_N "checking if $compiler static flag $lt_tmp_static_flag works... $ECHO_C" >&6; } if test "${lt_prog_compiler_static_works_CXX+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_static_works_CXX=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_static_works_CXX=yes fi else lt_prog_compiler_static_works_CXX=yes fi fi $rm conftest* LDFLAGS="$save_LDFLAGS" fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_static_works_CXX" >&5 echo "${ECHO_T}$lt_prog_compiler_static_works_CXX" >&6; } if test x"$lt_prog_compiler_static_works_CXX" = xyes; then : else lt_prog_compiler_static_CXX= fi { echo "$as_me:$LINENO: checking if $compiler supports -c -o file.$ac_objext" >&5 echo $ECHO_N "checking if $compiler supports -c -o file.$ac_objext... $ECHO_C" >&6; } if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_prog_compiler_c_o_CXX=no $rm -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:16582: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:16586: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_CXX=yes fi fi chmod u+w . 2>&5 $rm conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files $rm out/* && rmdir out cd .. rmdir conftest $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_prog_compiler_c_o_CXX" >&5 echo "${ECHO_T}$lt_cv_prog_compiler_c_o_CXX" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o_CXX" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { echo "$as_me:$LINENO: checking if we can lock with hard links" >&5 echo $ECHO_N "checking if we can lock with hard links... $ECHO_C" >&6; } hard_links=yes $rm conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { echo "$as_me:$LINENO: result: $hard_links" >&5 echo "${ECHO_T}$hard_links" >&6; } if test "$hard_links" = no; then { echo "$as_me:$LINENO: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { echo "$as_me:$LINENO: checking whether the $compiler linker ($LD) supports shared libraries" >&5 echo $ECHO_N "checking whether the $compiler linker ($LD) supports shared libraries... $ECHO_C" >&6; } export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' case $host_os in aix4* | aix5*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' fi ;; pw32*) export_symbols_cmds_CXX="$ltdll_cmds" ;; cygwin* | mingw*) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;/^.*[ ]__nm__/s/^.*[ ]__nm__\([^ ]*\)[ ][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' ;; *) export_symbols_cmds_CXX='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac { echo "$as_me:$LINENO: result: $ld_shlibs_CXX" >&5 echo "${ECHO_T}$ld_shlibs_CXX" >&6; } test "$ld_shlibs_CXX" = no && can_build_shared=no # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_CXX" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_CXX=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds_CXX in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; } $rm conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_CXX pic_flag=$lt_prog_compiler_pic_CXX compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_CXX allow_undefined_flag_CXX= if { (eval echo "$as_me:$LINENO: \"$archive_cmds_CXX 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1\"") >&5 (eval $archive_cmds_CXX 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } then archive_cmds_need_lc_CXX=no else archive_cmds_need_lc_CXX=yes fi allow_undefined_flag_CXX=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $rm conftest* { echo "$as_me:$LINENO: result: $archive_cmds_need_lc_CXX" >&5 echo "${ECHO_T}$archive_cmds_need_lc_CXX" >&6; } ;; esac fi ;; esac { echo "$as_me:$LINENO: checking dynamic linker characteristics" >&5 echo $ECHO_N "checking dynamic linker characteristics... $ECHO_C" >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix4* | aix5*) version_type=linux need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$host_os in yes,cygwin* | yes,mingw* | yes,pw32*) library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $rm \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib" ;; mingw*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$sys_lib_search_path_spec" | grep ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH printed by # mingw gcc, but we are running on Cygwin. Gcc prints its search # path with ; separators, and with drive letters. We can handle the # drive letters (cygwin fileutils understands them), so leave them, # especially as we might pass files found there to a mingw objdump, # which wouldn't understand a cygwinified path. Ahh. sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac ;; *) library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' ;; esac dynamic_linker='Win32 ld.exe' # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd1*) dynamic_linker=no ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[123]*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555. postinstall_cmds='chmod 555 $lib' ;; interix[3-9]*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be Linux ELF. linux* | k*bsd*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; nto-qnx*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no export_dynamic_flag_spec='${wl}-Blargedynsym' runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' shlibpath_overrides_runpath=no else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' shlibpath_overrides_runpath=yes case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; uts4*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { echo "$as_me:$LINENO: result: $dynamic_linker" >&5 echo "${ECHO_T}$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi { echo "$as_me:$LINENO: checking how to hardcode library paths into programs" >&5 echo $ECHO_N "checking how to hardcode library paths into programs... $ECHO_C" >&6; } hardcode_action_CXX= if test -n "$hardcode_libdir_flag_spec_CXX" || \ test -n "$runpath_var_CXX" || \ test "X$hardcode_automatic_CXX" = "Xyes" ; then # We can hardcode non-existant directories. if test "$hardcode_direct_CXX" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, CXX)" != no && test "$hardcode_minus_L_CXX" != no; then # Linking always hardcodes the temporary library directory. hardcode_action_CXX=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_CXX=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_CXX=unsupported fi { echo "$as_me:$LINENO: result: $hardcode_action_CXX" >&5 echo "${ECHO_T}$hardcode_action_CXX" >&6; } if test "$hardcode_action_CXX" = relink; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi # The else clause should only fire when bootstrapping the # libtool distribution, otherwise you forgot to ship ltmain.sh # with your package, and you will get complaints that there are # no rules to generate ltmain.sh. if test -f "$ltmain"; then # See if we are running on zsh, and set the options which allow our commands through # without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi # Now quote all the things that may contain metacharacters while being # careful not to overquote the AC_SUBSTed values. We take copies of the # variables and quote the copies for generation of the libtool script. for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \ SED SHELL STRIP \ libname_spec library_names_spec soname_spec extract_expsyms_cmds \ old_striplib striplib file_magic_cmd finish_cmds finish_eval \ deplibs_check_method reload_flag reload_cmds need_locks \ lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ old_postinstall_cmds old_postuninstall_cmds \ compiler_CXX \ CC_CXX \ LD_CXX \ lt_prog_compiler_wl_CXX \ lt_prog_compiler_pic_CXX \ lt_prog_compiler_static_CXX \ lt_prog_compiler_no_builtin_flag_CXX \ export_dynamic_flag_spec_CXX \ thread_safe_flag_spec_CXX \ whole_archive_flag_spec_CXX \ enable_shared_with_static_runtimes_CXX \ old_archive_cmds_CXX \ old_archive_from_new_cmds_CXX \ predep_objects_CXX \ postdep_objects_CXX \ predeps_CXX \ postdeps_CXX \ compiler_lib_search_path_CXX \ archive_cmds_CXX \ archive_expsym_cmds_CXX \ postinstall_cmds_CXX \ postuninstall_cmds_CXX \ old_archive_from_expsyms_cmds_CXX \ allow_undefined_flag_CXX \ no_undefined_flag_CXX \ export_symbols_cmds_CXX \ hardcode_libdir_flag_spec_CXX \ hardcode_libdir_flag_spec_ld_CXX \ hardcode_libdir_separator_CXX \ hardcode_automatic_CXX \ module_cmds_CXX \ module_expsym_cmds_CXX \ lt_cv_prog_compiler_c_o_CXX \ fix_srcfile_path_CXX \ exclude_expsyms_CXX \ include_expsyms_CXX; do case $var in old_archive_cmds_CXX | \ old_archive_from_new_cmds_CXX | \ archive_cmds_CXX | \ archive_expsym_cmds_CXX | \ module_cmds_CXX | \ module_expsym_cmds_CXX | \ old_archive_from_expsyms_cmds_CXX | \ export_symbols_cmds_CXX | \ extract_expsyms_cmds | reload_cmds | finish_cmds | \ postinstall_cmds | postuninstall_cmds | \ old_postinstall_cmds | old_postuninstall_cmds | \ sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) # Double-quote double-evaled strings. eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ;; *) eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ;; esac done case $lt_echo in *'\$0 --fallback-echo"') lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` ;; esac cfgfile="$ofile" cat <<__EOF__ >> "$cfgfile" # ### BEGIN LIBTOOL TAG CONFIG: $tagname # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_CXX # Whether or not to disallow shared libs when runtime libs are static allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_CXX # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # An echo program that does not interpret backslashes. echo=$lt_echo # The archiver. AR=$lt_AR AR_FLAGS=$lt_AR_FLAGS # A C compiler. LTCC=$lt_LTCC # LTCC compiler flags. LTCFLAGS=$lt_LTCFLAGS # A language-specific compiler. CC=$lt_compiler_CXX # Is the compiler the GNU C compiler? with_gcc=$GCC_CXX # An ERE matcher. EGREP=$lt_EGREP # The linker used to build libraries. LD=$lt_LD_CXX # Whether we need hard or soft links. LN_S=$lt_LN_S # A BSD-compatible nm program. NM=$lt_NM # A symbol stripping program STRIP=$lt_STRIP # Used to examine libraries when file_magic_cmd begins "file" MAGIC_CMD=$MAGIC_CMD # Used on cygwin: DLL creation program. DLLTOOL="$DLLTOOL" # Used on cygwin: object dumper. OBJDUMP="$OBJDUMP" # Used on cygwin: assembler. AS="$AS" # The name of the directory that contains temporary libtool files. objdir=$objdir # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_CXX # Object file suffix (normally "o"). objext="$ac_objext" # Old archive suffix (normally "a"). libext="$libext" # Shared library suffix (normally ".so"). shrext_cmds='$shrext_cmds' # Executable file suffix (normally ""). exeext="$exeext" # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_CXX pic_mode=$pic_mode # What is the maximum length of a command? max_cmd_len=$lt_cv_sys_max_cmd_len # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_CXX # Must we lock files when doing compilation? need_locks=$lt_need_locks # Do we need the lib prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_CXX # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_CXX # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_CXX # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_CXX # Compiler flag to generate thread-safe objects. thread_safe_flag_spec=$lt_thread_safe_flag_spec_CXX # Library versioning type. version_type=$version_type # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME. library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Commands used to build and install an old-style archive. RANLIB=$lt_RANLIB old_archive_cmds=$lt_old_archive_cmds_CXX old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_CXX # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_CXX # Commands used to build and install a shared archive. archive_cmds=$lt_archive_cmds_CXX archive_expsym_cmds=$lt_archive_expsym_cmds_CXX postinstall_cmds=$lt_postinstall_cmds postuninstall_cmds=$lt_postuninstall_cmds # Commands used to build a loadable module (assumed same as above if empty) module_cmds=$lt_module_cmds_CXX module_expsym_cmds=$lt_module_expsym_cmds_CXX # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # Dependencies to place before the objects being linked to create a # shared library. predep_objects=$lt_predep_objects_CXX # Dependencies to place after the objects being linked to create a # shared library. postdep_objects=$lt_postdep_objects_CXX # Dependencies to place before the objects being linked to create a # shared library. predeps=$lt_predeps_CXX # Dependencies to place after the objects being linked to create a # shared library. postdeps=$lt_postdeps_CXX # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_CXX # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method == file_magic. file_magic_cmd=$lt_file_magic_cmd # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_CXX # Flag that forces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_CXX # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # Same as above, but a single script fragment to be evaled but not shown. finish_eval=$lt_finish_eval # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # This is the shared library runtime path variable. runpath_var=$runpath_var # This is the shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_CXX # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist. hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_CXX # If ld is used when linking, flag to hardcode \$libdir into # a binary during linking. This must work even if \$libdir does # not exist. hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_CXX # Whether we need a single -rpath flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_CXX # Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the # resulting binary. hardcode_direct=$hardcode_direct_CXX # Set to yes if using the -LDIR flag during linking hardcodes DIR into the # resulting binary. hardcode_minus_L=$hardcode_minus_L_CXX # Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into # the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_CXX # Set to yes if building a shared library automatically hardcodes DIR into the library # and all subsequent libraries and executables linked against it. hardcode_automatic=$hardcode_automatic_CXX # Variables whose values should be saved in libtool wrapper scripts and # restored at relink time. variables_saved_for_relink="$variables_saved_for_relink" # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_CXX # Compile-time system search path for libraries sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$always_export_symbols_CXX # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_CXX # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_CXX # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_CXX # ### END LIBTOOL TAG CONFIG: $tagname __EOF__ else # If there is no Makefile yet, we rely on a make rule to execute # `config.status --recheck' to rerun these tests and create the # libtool script then. ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'` if test -f "$ltmain_in"; then test -f Makefile && make "$ltmain" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC=$lt_save_CC LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ldcxx=$with_gnu_ld with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld else tagname="" fi ;; F77) if test -n "$F77" && test "X$F77" != "Xno"; then : else tagname="" fi ;; GCJ) if test -n "$GCJ" && test "X$GCJ" != "Xno"; then # Source file extension for Java test sources. ac_ext=java # Object file extension for compiled Java test sources. objext=o objext_GCJ=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests lt_simple_link_test_code='public class conftest { public static void main(String[] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $rm conftest* # Allow CC to be a program name with arguments. lt_save_CC="$CC" CC=${GCJ-"gcj"} compiler=$CC compiler_GCJ=$CC for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` # GCJ did not exist at the time GCC didn't implicitly link libc in. archive_cmds_need_lc_GCJ=no old_archive_cmds_GCJ=$old_archive_cmds lt_prog_compiler_no_builtin_flag_GCJ= if test "$GCC" = yes; then lt_prog_compiler_no_builtin_flag_GCJ=' -fno-builtin' { echo "$as_me:$LINENO: checking if $compiler supports -fno-rtti -fno-exceptions" >&5 echo $ECHO_N "checking if $compiler supports -fno-rtti -fno-exceptions... $ECHO_C" >&6; } if test "${lt_cv_prog_compiler_rtti_exceptions+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_prog_compiler_rtti_exceptions=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-fno-rtti -fno-exceptions" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:17853: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:17857: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_cv_prog_compiler_rtti_exceptions=yes fi fi $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_prog_compiler_rtti_exceptions" >&5 echo "${ECHO_T}$lt_cv_prog_compiler_rtti_exceptions" >&6; } if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then lt_prog_compiler_no_builtin_flag_GCJ="$lt_prog_compiler_no_builtin_flag_GCJ -fno-rtti -fno-exceptions" else : fi fi lt_prog_compiler_wl_GCJ= lt_prog_compiler_pic_GCJ= lt_prog_compiler_static_GCJ= { echo "$as_me:$LINENO: checking for $compiler option to produce PIC" >&5 echo $ECHO_N "checking for $compiler option to produce PIC... $ECHO_C" >&6; } if test "$GCC" = yes; then lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_static_GCJ='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_GCJ='-Bstatic' fi ;; amigaos*) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. lt_prog_compiler_pic_GCJ='-m68020 -resident32 -malways-restore-a4' ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries lt_prog_compiler_pic_GCJ='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files lt_prog_compiler_pic_GCJ='-fno-common' ;; interix[3-9]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. lt_prog_compiler_can_build_shared_GCJ=no enable_shared=no ;; sysv4*MP*) if test -d /usr/nec; then lt_prog_compiler_pic_GCJ=-Kconform_pic fi ;; hpux*) # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_GCJ='-fPIC' ;; esac ;; *) lt_prog_compiler_pic_GCJ='-fPIC' ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) lt_prog_compiler_wl_GCJ='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor lt_prog_compiler_static_GCJ='-Bstatic' else lt_prog_compiler_static_GCJ='-bnso -bI:/lib/syscalls.exp' fi ;; darwin*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files case $cc_basename in xlc*) lt_prog_compiler_pic_GCJ='-qnocommon' lt_prog_compiler_wl_GCJ='-Wl,' ;; esac ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). lt_prog_compiler_pic_GCJ='-DDLL_EXPORT' ;; hpux9* | hpux10* | hpux11*) lt_prog_compiler_wl_GCJ='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) lt_prog_compiler_pic_GCJ='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? lt_prog_compiler_static_GCJ='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) lt_prog_compiler_wl_GCJ='-Wl,' # PIC (with -KPIC) is the default. lt_prog_compiler_static_GCJ='-non_shared' ;; newsos6) lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' ;; linux* | k*bsd*-gnu) case $cc_basename in icc* | ecc*) lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-static' ;; pgcc* | pgf77* | pgf90* | pgf95*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_pic_GCJ='-fpic' lt_prog_compiler_static_GCJ='-Bstatic' ;; ccc*) lt_prog_compiler_wl_GCJ='-Wl,' # All Alpha code is PIC. lt_prog_compiler_static_GCJ='-non_shared' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' lt_prog_compiler_wl_GCJ='-Wl,' ;; *Sun\ F*) # Sun Fortran 8.3 passes all unrecognized flags to the linker lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' lt_prog_compiler_wl_GCJ='' ;; esac ;; esac ;; osf3* | osf4* | osf5*) lt_prog_compiler_wl_GCJ='-Wl,' # All OSF/1 code is PIC. lt_prog_compiler_static_GCJ='-non_shared' ;; rdos*) lt_prog_compiler_static_GCJ='-non_shared' ;; solaris*) lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' case $cc_basename in f77* | f90* | f95*) lt_prog_compiler_wl_GCJ='-Qoption ld ';; *) lt_prog_compiler_wl_GCJ='-Wl,';; esac ;; sunos4*) lt_prog_compiler_wl_GCJ='-Qoption ld ' lt_prog_compiler_pic_GCJ='-PIC' lt_prog_compiler_static_GCJ='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then lt_prog_compiler_pic_GCJ='-Kconform_pic' lt_prog_compiler_static_GCJ='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_pic_GCJ='-KPIC' lt_prog_compiler_static_GCJ='-Bstatic' ;; unicos*) lt_prog_compiler_wl_GCJ='-Wl,' lt_prog_compiler_can_build_shared_GCJ=no ;; uts4*) lt_prog_compiler_pic_GCJ='-pic' lt_prog_compiler_static_GCJ='-Bstatic' ;; *) lt_prog_compiler_can_build_shared_GCJ=no ;; esac fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic_GCJ" >&5 echo "${ECHO_T}$lt_prog_compiler_pic_GCJ" >&6; } # # Check to make sure the PIC flag actually works. # if test -n "$lt_prog_compiler_pic_GCJ"; then { echo "$as_me:$LINENO: checking if $compiler PIC flag $lt_prog_compiler_pic_GCJ works" >&5 echo $ECHO_N "checking if $compiler PIC flag $lt_prog_compiler_pic_GCJ works... $ECHO_C" >&6; } if test "${lt_prog_compiler_pic_works_GCJ+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_pic_works_GCJ=no ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$lt_prog_compiler_pic_GCJ" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:18143: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 echo "$as_me:18147: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_pic_works_GCJ=yes fi fi $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_pic_works_GCJ" >&5 echo "${ECHO_T}$lt_prog_compiler_pic_works_GCJ" >&6; } if test x"$lt_prog_compiler_pic_works_GCJ" = xyes; then case $lt_prog_compiler_pic_GCJ in "" | " "*) ;; *) lt_prog_compiler_pic_GCJ=" $lt_prog_compiler_pic_GCJ" ;; esac else lt_prog_compiler_pic_GCJ= lt_prog_compiler_can_build_shared_GCJ=no fi fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) lt_prog_compiler_pic_GCJ= ;; *) lt_prog_compiler_pic_GCJ="$lt_prog_compiler_pic_GCJ" ;; esac # # Check to make sure the static flag actually works. # wl=$lt_prog_compiler_wl_GCJ eval lt_tmp_static_flag=\"$lt_prog_compiler_static_GCJ\" { echo "$as_me:$LINENO: checking if $compiler static flag $lt_tmp_static_flag works" >&5 echo $ECHO_N "checking if $compiler static flag $lt_tmp_static_flag works... $ECHO_C" >&6; } if test "${lt_prog_compiler_static_works_GCJ+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_prog_compiler_static_works_GCJ=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $lt_tmp_static_flag" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&5 $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then lt_prog_compiler_static_works_GCJ=yes fi else lt_prog_compiler_static_works_GCJ=yes fi fi $rm conftest* LDFLAGS="$save_LDFLAGS" fi { echo "$as_me:$LINENO: result: $lt_prog_compiler_static_works_GCJ" >&5 echo "${ECHO_T}$lt_prog_compiler_static_works_GCJ" >&6; } if test x"$lt_prog_compiler_static_works_GCJ" = xyes; then : else lt_prog_compiler_static_GCJ= fi { echo "$as_me:$LINENO: checking if $compiler supports -c -o file.$ac_objext" >&5 echo $ECHO_N "checking if $compiler supports -c -o file.$ac_objext... $ECHO_C" >&6; } if test "${lt_cv_prog_compiler_c_o_GCJ+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else lt_cv_prog_compiler_c_o_GCJ=no $rm -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:18247: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 echo "$as_me:18251: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then lt_cv_prog_compiler_c_o_GCJ=yes fi fi chmod u+w . 2>&5 $rm conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files $rm out/* && rmdir out cd .. rmdir conftest $rm conftest* fi { echo "$as_me:$LINENO: result: $lt_cv_prog_compiler_c_o_GCJ" >&5 echo "${ECHO_T}$lt_cv_prog_compiler_c_o_GCJ" >&6; } hard_links="nottested" if test "$lt_cv_prog_compiler_c_o_GCJ" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user { echo "$as_me:$LINENO: checking if we can lock with hard links" >&5 echo $ECHO_N "checking if we can lock with hard links... $ECHO_C" >&6; } hard_links=yes $rm conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no { echo "$as_me:$LINENO: result: $hard_links" >&5 echo "${ECHO_T}$hard_links" >&6; } if test "$hard_links" = no; then { echo "$as_me:$LINENO: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&5 echo "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make -j' may be unsafe" >&2;} need_locks=warn fi else need_locks=no fi { echo "$as_me:$LINENO: checking whether the $compiler linker ($LD) supports shared libraries" >&5 echo $ECHO_N "checking whether the $compiler linker ($LD) supports shared libraries... $ECHO_C" >&6; } runpath_var= allow_undefined_flag_GCJ= enable_shared_with_static_runtimes_GCJ=no archive_cmds_GCJ= archive_expsym_cmds_GCJ= old_archive_From_new_cmds_GCJ= old_archive_from_expsyms_cmds_GCJ= export_dynamic_flag_spec_GCJ= whole_archive_flag_spec_GCJ= thread_safe_flag_spec_GCJ= hardcode_libdir_flag_spec_GCJ= hardcode_libdir_flag_spec_ld_GCJ= hardcode_libdir_separator_GCJ= hardcode_direct_GCJ=no hardcode_minus_L_GCJ=no hardcode_shlibpath_var_GCJ=unsupported link_all_deplibs_GCJ=unknown hardcode_automatic_GCJ=no module_cmds_GCJ= module_expsym_cmds_GCJ= always_export_symbols_GCJ=no export_symbols_cmds_GCJ='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list include_expsyms_GCJ= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. exclude_expsyms_GCJ="_GLOBAL_OFFSET_TABLE_" # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. extract_expsyms_cmds= # Just being paranoid about ensuring that cc_basename is set. for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` case $host_os in cygwin* | mingw* | pw32*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; esac ld_shlibs_GCJ=yes if test "$with_gnu_ld" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH hardcode_libdir_flag_spec_GCJ='${wl}--rpath ${wl}$libdir' export_dynamic_flag_spec_GCJ='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | grep 'no-whole-archive' > /dev/null; then whole_archive_flag_spec_GCJ="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else whole_archive_flag_spec_GCJ= fi supports_anon_versioning=no case `$LD -v 2>/dev/null` in *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix3* | aix4* | aix5*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then ld_shlibs_GCJ=no cat <&2 *** Warning: the GNU linker, at least up to release 2.9.1, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to modify your PATH *** so that a non-GNU linker is found, and then restart. EOF fi ;; amigaos*) archive_cmds_GCJ='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_minus_L_GCJ=yes # Samuel A. Falvo II reports # that the semantics of dynamic libraries on AmigaOS, at least up # to version 4, is to share data among multiple programs linked # with the same dynamic library. Since this doesn't match the # behavior of shared libraries on other platforms, we can't use # them. ld_shlibs_GCJ=no ;; beos*) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then allow_undefined_flag_GCJ=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME archive_cmds_GCJ='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else ld_shlibs_GCJ=no fi ;; cygwin* | mingw* | pw32*) # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, GCJ) is actually meaningless, # as there is no search path for DLLs. hardcode_libdir_flag_spec_GCJ='-L$libdir' allow_undefined_flag_GCJ=unsupported always_export_symbols_GCJ=no enable_shared_with_static_runtimes_GCJ=yes export_symbols_cmds_GCJ='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/'\'' -e '\''/^[AITW][ ]/s/.*[ ]//'\'' | sort | uniq > $export_symbols' if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... archive_expsym_cmds_GCJ='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else ld_shlibs_GCJ=no fi ;; interix[3-9]*) hardcode_direct_GCJ=no hardcode_shlibpath_var_GCJ=no hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir' export_dynamic_flag_spec_GCJ='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. archive_cmds_GCJ='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' archive_expsym_cmds_GCJ='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | k*bsd*-gnu) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then tmp_addflag= case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler whole_archive_flag_spec_GCJ='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95*) # Portland Group f77 and f90 compilers whole_archive_flag_spec_GCJ='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 whole_archive_flag_spec_GCJ='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; *) tmp_sharedflag='-shared' ;; esac archive_cmds_GCJ='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test $supports_anon_versioning = yes; then archive_expsym_cmds_GCJ='$echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ $echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi else ld_shlibs_GCJ=no fi ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then archive_cmds_GCJ='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then ld_shlibs_GCJ=no cat <&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. EOF elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_GCJ=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*) ld_shlibs_GCJ=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then hardcode_libdir_flag_spec_GCJ='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`' archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib' archive_expsym_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname,-retain-symbols-file,$export_symbols -o $lib' else ld_shlibs_GCJ=no fi ;; esac ;; sunos4*) archive_cmds_GCJ='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= hardcode_direct_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' archive_expsym_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else ld_shlibs_GCJ=no fi ;; esac if test "$ld_shlibs_GCJ" = no; then runpath_var= hardcode_libdir_flag_spec_GCJ= export_dynamic_flag_spec_GCJ= whole_archive_flag_spec_GCJ= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) allow_undefined_flag_GCJ=unsupported always_export_symbols_GCJ=yes archive_expsym_cmds_GCJ='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. hardcode_minus_L_GCJ=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. hardcode_direct_GCJ=unsupported fi ;; aix4* | aix5*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then export_symbols_cmds_GCJ='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' else export_symbols_cmds_GCJ='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\$2 == "T") || (\$2 == "D") || (\$2 == "B")) && (substr(\$3,1,1) != ".")) { print \$3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[23]|aix4.[23].*|aix5*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. archive_cmds_GCJ='' hardcode_direct_GCJ=yes hardcode_libdir_separator_GCJ=':' link_all_deplibs_GCJ=yes if test "$GCC" = yes; then case $host_os in aix4.[012]|aix4.[012].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && \ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 hardcode_direct_GCJ=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking hardcode_minus_L_GCJ=yes hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_libdir_separator_GCJ= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. always_export_symbols_GCJ=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. allow_undefined_flag_GCJ='-berok' # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec_GCJ='${wl}-blibpath:$libdir:'"$aix_libpath" archive_expsym_cmds_GCJ="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then hardcode_libdir_flag_spec_GCJ='${wl}-R $libdir:/usr/lib:/lib' allow_undefined_flag_GCJ="-z nodefs" archive_expsym_cmds_GCJ="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an empty executable. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi hardcode_libdir_flag_spec_GCJ='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. no_undefined_flag_GCJ=' ${wl}-bernotok' allow_undefined_flag_GCJ=' ${wl}-berok' # Exported symbols can be pulled into shared objects from archives whole_archive_flag_spec_GCJ='$convenience' archive_cmds_need_lc_GCJ=yes # This is similar to how AIX traditionally builds its shared libraries. archive_expsym_cmds_GCJ="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) archive_cmds_GCJ='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_minus_L_GCJ=yes # see comment about different semantics on the GNU ld section ld_shlibs_GCJ=no ;; bsdi[45]*) export_dynamic_flag_spec_GCJ=-rdynamic ;; cygwin* | mingw* | pw32*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. hardcode_libdir_flag_spec_GCJ=' ' allow_undefined_flag_GCJ=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. archive_cmds_GCJ='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | $SED -e '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. old_archive_From_new_cmds_GCJ='true' # FIXME: Should let the user specify the lib program. old_archive_cmds_GCJ='lib -OUT:$oldlib$oldobjs$old_deplibs' fix_srcfile_path_GCJ='`cygpath -w "$srcfile"`' enable_shared_with_static_runtimes_GCJ=yes ;; darwin* | rhapsody*) case $host_os in rhapsody* | darwin1.[012]) allow_undefined_flag_GCJ='${wl}-undefined ${wl}suppress' ;; *) # Darwin 1.3 on if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then allow_undefined_flag_GCJ='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' else case ${MACOSX_DEPLOYMENT_TARGET} in 10.[012]) allow_undefined_flag_GCJ='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) allow_undefined_flag_GCJ='${wl}-undefined ${wl}dynamic_lookup' ;; esac fi ;; esac archive_cmds_need_lc_GCJ=no hardcode_direct_GCJ=no hardcode_automatic_GCJ=yes hardcode_shlibpath_var_GCJ=unsupported whole_archive_flag_spec_GCJ='' link_all_deplibs_GCJ=yes if test "$GCC" = yes ; then output_verbose_link_cmd='echo' archive_cmds_GCJ='$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' module_cmds_GCJ='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds archive_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' module_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else case $cc_basename in xlc*) output_verbose_link_cmd='echo' archive_cmds_GCJ='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' module_cmds_GCJ='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds archive_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' module_expsym_cmds_GCJ='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) ld_shlibs_GCJ=no ;; esac fi ;; dgux*) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_shlibpath_var_GCJ=no ;; freebsd1*) ld_shlibs_GCJ=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' hardcode_libdir_flag_spec_GCJ='-R$libdir' hardcode_direct_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2*) archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_GCJ=yes hardcode_minus_L_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) archive_cmds_GCJ='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_GCJ='-R$libdir' hardcode_direct_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; hpux9*) if test "$GCC" = yes; then archive_cmds_GCJ='$rm $output_objdir/$soname~$CC -shared -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else archive_cmds_GCJ='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi hardcode_libdir_flag_spec_GCJ='${wl}+b ${wl}$libdir' hardcode_libdir_separator_GCJ=: hardcode_direct_GCJ=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_GCJ=yes export_dynamic_flag_spec_GCJ='${wl}-E' ;; hpux10*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then archive_cmds_GCJ='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_GCJ='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec_GCJ='${wl}+b ${wl}$libdir' hardcode_libdir_separator_GCJ=: hardcode_direct_GCJ=yes export_dynamic_flag_spec_GCJ='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_GCJ=yes fi ;; hpux11*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) archive_cmds_GCJ='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_GCJ='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_GCJ='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) archive_cmds_GCJ='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) archive_cmds_GCJ='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) archive_cmds_GCJ='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac fi if test "$with_gnu_ld" = no; then hardcode_libdir_flag_spec_GCJ='${wl}+b ${wl}$libdir' hardcode_libdir_separator_GCJ=: case $host_cpu in hppa*64*|ia64*) hardcode_libdir_flag_spec_ld_GCJ='+b $libdir' hardcode_direct_GCJ=no hardcode_shlibpath_var_GCJ=no ;; *) hardcode_direct_GCJ=yes export_dynamic_flag_spec_GCJ='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. hardcode_minus_L_GCJ=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then archive_cmds_GCJ='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else archive_cmds_GCJ='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_ld_GCJ='-rpath $libdir' fi hardcode_libdir_flag_spec_GCJ='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_GCJ=: link_all_deplibs_GCJ=yes ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else archive_cmds_GCJ='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi hardcode_libdir_flag_spec_GCJ='-R$libdir' hardcode_direct_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; newsos6) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_GCJ=yes hardcode_libdir_flag_spec_GCJ='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_GCJ=: hardcode_shlibpath_var_GCJ=no ;; openbsd*) if test -f /usr/libexec/ld.so; then hardcode_direct_GCJ=yes hardcode_shlibpath_var_GCJ=no if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir' export_dynamic_flag_spec_GCJ='${wl}-E' else case $host_os in openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*) archive_cmds_GCJ='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_GCJ='-R$libdir' ;; *) archive_cmds_GCJ='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' hardcode_libdir_flag_spec_GCJ='${wl}-rpath,$libdir' ;; esac fi else ld_shlibs_GCJ=no fi ;; os2*) hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_minus_L_GCJ=yes allow_undefined_flag_GCJ=unsupported archive_cmds_GCJ='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' old_archive_From_new_cmds_GCJ='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then allow_undefined_flag_GCJ=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_GCJ='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else allow_undefined_flag_GCJ=' -expect_unresolved \*' archive_cmds_GCJ='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' fi hardcode_libdir_flag_spec_GCJ='${wl}-rpath ${wl}$libdir' hardcode_libdir_separator_GCJ=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then allow_undefined_flag_GCJ=' ${wl}-expect_unresolved ${wl}\*' archive_cmds_GCJ='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' hardcode_libdir_flag_spec_GCJ='${wl}-rpath ${wl}$libdir' else allow_undefined_flag_GCJ=' -expect_unresolved \*' archive_cmds_GCJ='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' archive_expsym_cmds_GCJ='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~$rm $lib.exp' # Both c and cxx compiler support -rpath directly hardcode_libdir_flag_spec_GCJ='-rpath $libdir' fi hardcode_libdir_separator_GCJ=: ;; solaris*) no_undefined_flag_GCJ=' -z text' if test "$GCC" = yes; then wlarc='${wl}' archive_cmds_GCJ='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -shared ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$rm $lib.exp' else wlarc='' archive_cmds_GCJ='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' archive_expsym_cmds_GCJ='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' fi hardcode_libdir_flag_spec_GCJ='-R$libdir' hardcode_shlibpath_var_GCJ=no case $host_os in solaris2.[0-5] | solaris2.[0-5].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then whole_archive_flag_spec_GCJ='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else whole_archive_flag_spec_GCJ='-z allextract$convenience -z defaultextract' fi ;; esac link_all_deplibs_GCJ=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. archive_cmds_GCJ='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_GCJ='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_direct_GCJ=yes hardcode_minus_L_GCJ=yes hardcode_shlibpath_var_GCJ=no ;; sysv4) case $host_vendor in sni) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_GCJ=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. archive_cmds_GCJ='$LD -G -o $lib $libobjs $deplibs $linker_flags' reload_cmds_GCJ='$CC -r -o $output$reload_objs' hardcode_direct_GCJ=no ;; motorola) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_direct_GCJ=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' hardcode_shlibpath_var_GCJ=no ;; sysv4.3*) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_GCJ=no export_dynamic_flag_spec_GCJ='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_shlibpath_var_GCJ=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes ld_shlibs_GCJ=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | sco3.2v5.0.[024]*) no_undefined_flag_GCJ='${wl}-z,text' archive_cmds_need_lc_GCJ=no hardcode_shlibpath_var_GCJ=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds_GCJ='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_GCJ='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. no_undefined_flag_GCJ='${wl}-z,text' allow_undefined_flag_GCJ='${wl}-z,nodefs' archive_cmds_need_lc_GCJ=no hardcode_shlibpath_var_GCJ=no hardcode_libdir_flag_spec_GCJ='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`' hardcode_libdir_separator_GCJ=':' link_all_deplibs_GCJ=yes export_dynamic_flag_spec_GCJ='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then archive_cmds_GCJ='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' else archive_cmds_GCJ='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' archive_expsym_cmds_GCJ='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) archive_cmds_GCJ='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' hardcode_libdir_flag_spec_GCJ='-L$libdir' hardcode_shlibpath_var_GCJ=no ;; *) ld_shlibs_GCJ=no ;; esac fi { echo "$as_me:$LINENO: result: $ld_shlibs_GCJ" >&5 echo "${ECHO_T}$ld_shlibs_GCJ" >&6; } test "$ld_shlibs_GCJ" = no && can_build_shared=no # # Do we need to explicitly link libc? # case "x$archive_cmds_need_lc_GCJ" in x|xyes) # Assume -lc should be added archive_cmds_need_lc_GCJ=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $archive_cmds_GCJ in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. { echo "$as_me:$LINENO: checking whether -lc should be explicitly linked in" >&5 echo $ECHO_N "checking whether -lc should be explicitly linked in... $ECHO_C" >&6; } $rm conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$lt_prog_compiler_wl_GCJ pic_flag=$lt_prog_compiler_pic_GCJ compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$allow_undefined_flag_GCJ allow_undefined_flag_GCJ= if { (eval echo "$as_me:$LINENO: \"$archive_cmds_GCJ 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1\"") >&5 (eval $archive_cmds_GCJ 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } then archive_cmds_need_lc_GCJ=no else archive_cmds_need_lc_GCJ=yes fi allow_undefined_flag_GCJ=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $rm conftest* { echo "$as_me:$LINENO: result: $archive_cmds_need_lc_GCJ" >&5 echo "${ECHO_T}$archive_cmds_need_lc_GCJ" >&6; } ;; esac fi ;; esac { echo "$as_me:$LINENO: checking dynamic linker characteristics" >&5 echo $ECHO_N "checking dynamic linker characteristics... $ECHO_C" >&6; } library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix4* | aix5*) version_type=linux need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[01] | aix4.[01].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[45]*) version_type=linux need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$host_os in yes,cygwin* | yes,mingw* | yes,pw32*) library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $rm \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib" ;; mingw*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$sys_lib_search_path_spec" | grep ';[c-zC-Z]:/' >/dev/null; then # It is most probably a Windows format PATH printed by # mingw gcc, but we are running on Cygwin. Gcc prints its search # path with ; separators, and with drive letters. We can handle the # drive letters (cygwin fileutils understands them), so leave them, # especially as we might pass files found there to a mingw objdump, # which wouldn't understand a cygwinified path. Ahh. sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}' ;; esac ;; *) library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib' ;; esac dynamic_linker='Win32 ld.exe' # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd1*) dynamic_linker=no ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[123]*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2*) shlibpath_overrides_runpath=yes ;; freebsd3.[01]* | freebsdelf3.[01]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[2-9]* | freebsdelf3.[2-9]* | \ freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555. postinstall_cmds='chmod 555 $lib' ;; interix[3-9]*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be Linux ELF. linux* | k*bsd*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; nto-qnx*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[89] | openbsd2.[89].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no export_dynamic_flag_spec='${wl}-Blargedynsym' runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' shlibpath_overrides_runpath=no else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' shlibpath_overrides_runpath=yes case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; uts4*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac { echo "$as_me:$LINENO: result: $dynamic_linker" >&5 echo "${ECHO_T}$dynamic_linker" >&6; } test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi { echo "$as_me:$LINENO: checking how to hardcode library paths into programs" >&5 echo $ECHO_N "checking how to hardcode library paths into programs... $ECHO_C" >&6; } hardcode_action_GCJ= if test -n "$hardcode_libdir_flag_spec_GCJ" || \ test -n "$runpath_var_GCJ" || \ test "X$hardcode_automatic_GCJ" = "Xyes" ; then # We can hardcode non-existant directories. if test "$hardcode_direct_GCJ" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, GCJ)" != no && test "$hardcode_minus_L_GCJ" != no; then # Linking always hardcodes the temporary library directory. hardcode_action_GCJ=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. hardcode_action_GCJ=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. hardcode_action_GCJ=unsupported fi { echo "$as_me:$LINENO: result: $hardcode_action_GCJ" >&5 echo "${ECHO_T}$hardcode_action_GCJ" >&6; } if test "$hardcode_action_GCJ" = relink; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi # The else clause should only fire when bootstrapping the # libtool distribution, otherwise you forgot to ship ltmain.sh # with your package, and you will get complaints that there are # no rules to generate ltmain.sh. if test -f "$ltmain"; then # See if we are running on zsh, and set the options which allow our commands through # without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi # Now quote all the things that may contain metacharacters while being # careful not to overquote the AC_SUBSTed values. We take copies of the # variables and quote the copies for generation of the libtool script. for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \ SED SHELL STRIP \ libname_spec library_names_spec soname_spec extract_expsyms_cmds \ old_striplib striplib file_magic_cmd finish_cmds finish_eval \ deplibs_check_method reload_flag reload_cmds need_locks \ lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ old_postinstall_cmds old_postuninstall_cmds \ compiler_GCJ \ CC_GCJ \ LD_GCJ \ lt_prog_compiler_wl_GCJ \ lt_prog_compiler_pic_GCJ \ lt_prog_compiler_static_GCJ \ lt_prog_compiler_no_builtin_flag_GCJ \ export_dynamic_flag_spec_GCJ \ thread_safe_flag_spec_GCJ \ whole_archive_flag_spec_GCJ \ enable_shared_with_static_runtimes_GCJ \ old_archive_cmds_GCJ \ old_archive_from_new_cmds_GCJ \ predep_objects_GCJ \ postdep_objects_GCJ \ predeps_GCJ \ postdeps_GCJ \ compiler_lib_search_path_GCJ \ archive_cmds_GCJ \ archive_expsym_cmds_GCJ \ postinstall_cmds_GCJ \ postuninstall_cmds_GCJ \ old_archive_from_expsyms_cmds_GCJ \ allow_undefined_flag_GCJ \ no_undefined_flag_GCJ \ export_symbols_cmds_GCJ \ hardcode_libdir_flag_spec_GCJ \ hardcode_libdir_flag_spec_ld_GCJ \ hardcode_libdir_separator_GCJ \ hardcode_automatic_GCJ \ module_cmds_GCJ \ module_expsym_cmds_GCJ \ lt_cv_prog_compiler_c_o_GCJ \ fix_srcfile_path_GCJ \ exclude_expsyms_GCJ \ include_expsyms_GCJ; do case $var in old_archive_cmds_GCJ | \ old_archive_from_new_cmds_GCJ | \ archive_cmds_GCJ | \ archive_expsym_cmds_GCJ | \ module_cmds_GCJ | \ module_expsym_cmds_GCJ | \ old_archive_from_expsyms_cmds_GCJ | \ export_symbols_cmds_GCJ | \ extract_expsyms_cmds | reload_cmds | finish_cmds | \ postinstall_cmds | postuninstall_cmds | \ old_postinstall_cmds | old_postuninstall_cmds | \ sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) # Double-quote double-evaled strings. eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ;; *) eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ;; esac done case $lt_echo in *'\$0 --fallback-echo"') lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` ;; esac cfgfile="$ofile" cat <<__EOF__ >> "$cfgfile" # ### BEGIN LIBTOOL TAG CONFIG: $tagname # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_GCJ # Whether or not to disallow shared libs when runtime libs are static allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_GCJ # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # An echo program that does not interpret backslashes. echo=$lt_echo # The archiver. AR=$lt_AR AR_FLAGS=$lt_AR_FLAGS # A C compiler. LTCC=$lt_LTCC # LTCC compiler flags. LTCFLAGS=$lt_LTCFLAGS # A language-specific compiler. CC=$lt_compiler_GCJ # Is the compiler the GNU C compiler? with_gcc=$GCC_GCJ # An ERE matcher. EGREP=$lt_EGREP # The linker used to build libraries. LD=$lt_LD_GCJ # Whether we need hard or soft links. LN_S=$lt_LN_S # A BSD-compatible nm program. NM=$lt_NM # A symbol stripping program STRIP=$lt_STRIP # Used to examine libraries when file_magic_cmd begins "file" MAGIC_CMD=$MAGIC_CMD # Used on cygwin: DLL creation program. DLLTOOL="$DLLTOOL" # Used on cygwin: object dumper. OBJDUMP="$OBJDUMP" # Used on cygwin: assembler. AS="$AS" # The name of the directory that contains temporary libtool files. objdir=$objdir # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_GCJ # Object file suffix (normally "o"). objext="$ac_objext" # Old archive suffix (normally "a"). libext="$libext" # Shared library suffix (normally ".so"). shrext_cmds='$shrext_cmds' # Executable file suffix (normally ""). exeext="$exeext" # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_GCJ pic_mode=$pic_mode # What is the maximum length of a command? max_cmd_len=$lt_cv_sys_max_cmd_len # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_GCJ # Must we lock files when doing compilation? need_locks=$lt_need_locks # Do we need the lib prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_GCJ # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_GCJ # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_GCJ # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_GCJ # Compiler flag to generate thread-safe objects. thread_safe_flag_spec=$lt_thread_safe_flag_spec_GCJ # Library versioning type. version_type=$version_type # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME. library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Commands used to build and install an old-style archive. RANLIB=$lt_RANLIB old_archive_cmds=$lt_old_archive_cmds_GCJ old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_GCJ # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_GCJ # Commands used to build and install a shared archive. archive_cmds=$lt_archive_cmds_GCJ archive_expsym_cmds=$lt_archive_expsym_cmds_GCJ postinstall_cmds=$lt_postinstall_cmds postuninstall_cmds=$lt_postuninstall_cmds # Commands used to build a loadable module (assumed same as above if empty) module_cmds=$lt_module_cmds_GCJ module_expsym_cmds=$lt_module_expsym_cmds_GCJ # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # Dependencies to place before the objects being linked to create a # shared library. predep_objects=$lt_predep_objects_GCJ # Dependencies to place after the objects being linked to create a # shared library. postdep_objects=$lt_postdep_objects_GCJ # Dependencies to place before the objects being linked to create a # shared library. predeps=$lt_predeps_GCJ # Dependencies to place after the objects being linked to create a # shared library. postdeps=$lt_postdeps_GCJ # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_GCJ # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method == file_magic. file_magic_cmd=$lt_file_magic_cmd # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_GCJ # Flag that forces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_GCJ # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # Same as above, but a single script fragment to be evaled but not shown. finish_eval=$lt_finish_eval # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # This is the shared library runtime path variable. runpath_var=$runpath_var # This is the shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_GCJ # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist. hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_GCJ # If ld is used when linking, flag to hardcode \$libdir into # a binary during linking. This must work even if \$libdir does # not exist. hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_GCJ # Whether we need a single -rpath flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_GCJ # Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the # resulting binary. hardcode_direct=$hardcode_direct_GCJ # Set to yes if using the -LDIR flag during linking hardcodes DIR into the # resulting binary. hardcode_minus_L=$hardcode_minus_L_GCJ # Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into # the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_GCJ # Set to yes if building a shared library automatically hardcodes DIR into the library # and all subsequent libraries and executables linked against it. hardcode_automatic=$hardcode_automatic_GCJ # Variables whose values should be saved in libtool wrapper scripts and # restored at relink time. variables_saved_for_relink="$variables_saved_for_relink" # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_GCJ # Compile-time system search path for libraries sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$always_export_symbols_GCJ # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_GCJ # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_GCJ # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_GCJ # ### END LIBTOOL TAG CONFIG: $tagname __EOF__ else # If there is no Makefile yet, we rely on a make rule to execute # `config.status --recheck' to rerun these tests and create the # libtool script then. ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'` if test -f "$ltmain_in"; then test -f Makefile && make "$ltmain" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC="$lt_save_CC" else tagname="" fi ;; RC) # Source file extension for RC test sources. ac_ext=rc # Object file extension for compiled RC test sources. objext=o objext_RC=$objext # Code to be used in simple compile tests lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code="$lt_simple_compile_test_code" # ltmain only uses $CC for tagged configurations so make sure $CC is set. # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC # save warnings/boilerplate of simple test code ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $rm conftest* # Allow CC to be a program name with arguments. lt_save_CC="$CC" CC=${RC-"windres"} compiler=$CC compiler_RC=$CC for cc_temp in $compiler""; do case $cc_temp in compile | *[\\/]compile | ccache | *[\\/]ccache ) ;; distcc | *[\\/]distcc | purify | *[\\/]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` lt_cv_prog_compiler_c_o_RC=yes # The else clause should only fire when bootstrapping the # libtool distribution, otherwise you forgot to ship ltmain.sh # with your package, and you will get complaints that there are # no rules to generate ltmain.sh. if test -f "$ltmain"; then # See if we are running on zsh, and set the options which allow our commands through # without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi # Now quote all the things that may contain metacharacters while being # careful not to overquote the AC_SUBSTed values. We take copies of the # variables and quote the copies for generation of the libtool script. for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \ SED SHELL STRIP \ libname_spec library_names_spec soname_spec extract_expsyms_cmds \ old_striplib striplib file_magic_cmd finish_cmds finish_eval \ deplibs_check_method reload_flag reload_cmds need_locks \ lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ old_postinstall_cmds old_postuninstall_cmds \ compiler_RC \ CC_RC \ LD_RC \ lt_prog_compiler_wl_RC \ lt_prog_compiler_pic_RC \ lt_prog_compiler_static_RC \ lt_prog_compiler_no_builtin_flag_RC \ export_dynamic_flag_spec_RC \ thread_safe_flag_spec_RC \ whole_archive_flag_spec_RC \ enable_shared_with_static_runtimes_RC \ old_archive_cmds_RC \ old_archive_from_new_cmds_RC \ predep_objects_RC \ postdep_objects_RC \ predeps_RC \ postdeps_RC \ compiler_lib_search_path_RC \ archive_cmds_RC \ archive_expsym_cmds_RC \ postinstall_cmds_RC \ postuninstall_cmds_RC \ old_archive_from_expsyms_cmds_RC \ allow_undefined_flag_RC \ no_undefined_flag_RC \ export_symbols_cmds_RC \ hardcode_libdir_flag_spec_RC \ hardcode_libdir_flag_spec_ld_RC \ hardcode_libdir_separator_RC \ hardcode_automatic_RC \ module_cmds_RC \ module_expsym_cmds_RC \ lt_cv_prog_compiler_c_o_RC \ fix_srcfile_path_RC \ exclude_expsyms_RC \ include_expsyms_RC; do case $var in old_archive_cmds_RC | \ old_archive_from_new_cmds_RC | \ archive_cmds_RC | \ archive_expsym_cmds_RC | \ module_cmds_RC | \ module_expsym_cmds_RC | \ old_archive_from_expsyms_cmds_RC | \ export_symbols_cmds_RC | \ extract_expsyms_cmds | reload_cmds | finish_cmds | \ postinstall_cmds | postuninstall_cmds | \ old_postinstall_cmds | old_postuninstall_cmds | \ sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) # Double-quote double-evaled strings. eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ;; *) eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ;; esac done case $lt_echo in *'\$0 --fallback-echo"') lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\$0 --fallback-echo"$/$0 --fallback-echo"/'` ;; esac cfgfile="$ofile" cat <<__EOF__ >> "$cfgfile" # ### BEGIN LIBTOOL TAG CONFIG: $tagname # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$archive_cmds_need_lc_RC # Whether or not to disallow shared libs when runtime libs are static allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_RC # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # An echo program that does not interpret backslashes. echo=$lt_echo # The archiver. AR=$lt_AR AR_FLAGS=$lt_AR_FLAGS # A C compiler. LTCC=$lt_LTCC # LTCC compiler flags. LTCFLAGS=$lt_LTCFLAGS # A language-specific compiler. CC=$lt_compiler_RC # Is the compiler the GNU C compiler? with_gcc=$GCC_RC # An ERE matcher. EGREP=$lt_EGREP # The linker used to build libraries. LD=$lt_LD_RC # Whether we need hard or soft links. LN_S=$lt_LN_S # A BSD-compatible nm program. NM=$lt_NM # A symbol stripping program STRIP=$lt_STRIP # Used to examine libraries when file_magic_cmd begins "file" MAGIC_CMD=$MAGIC_CMD # Used on cygwin: DLL creation program. DLLTOOL="$DLLTOOL" # Used on cygwin: object dumper. OBJDUMP="$OBJDUMP" # Used on cygwin: assembler. AS="$AS" # The name of the directory that contains temporary libtool files. objdir=$objdir # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # How to pass a linker flag through the compiler. wl=$lt_lt_prog_compiler_wl_RC # Object file suffix (normally "o"). objext="$ac_objext" # Old archive suffix (normally "a"). libext="$libext" # Shared library suffix (normally ".so"). shrext_cmds='$shrext_cmds' # Executable file suffix (normally ""). exeext="$exeext" # Additional compiler flags for building library objects. pic_flag=$lt_lt_prog_compiler_pic_RC pic_mode=$pic_mode # What is the maximum length of a command? max_cmd_len=$lt_cv_sys_max_cmd_len # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_lt_cv_prog_compiler_c_o_RC # Must we lock files when doing compilation? need_locks=$lt_need_locks # Do we need the lib prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Compiler flag to prevent dynamic linking. link_static_flag=$lt_lt_prog_compiler_static_RC # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_RC # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_RC # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_whole_archive_flag_spec_RC # Compiler flag to generate thread-safe objects. thread_safe_flag_spec=$lt_thread_safe_flag_spec_RC # Library versioning type. version_type=$version_type # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME. library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Commands used to build and install an old-style archive. RANLIB=$lt_RANLIB old_archive_cmds=$lt_old_archive_cmds_RC old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_RC # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_RC # Commands used to build and install a shared archive. archive_cmds=$lt_archive_cmds_RC archive_expsym_cmds=$lt_archive_expsym_cmds_RC postinstall_cmds=$lt_postinstall_cmds postuninstall_cmds=$lt_postuninstall_cmds # Commands used to build a loadable module (assumed same as above if empty) module_cmds=$lt_module_cmds_RC module_expsym_cmds=$lt_module_expsym_cmds_RC # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # Dependencies to place before the objects being linked to create a # shared library. predep_objects=$lt_predep_objects_RC # Dependencies to place after the objects being linked to create a # shared library. postdep_objects=$lt_postdep_objects_RC # Dependencies to place before the objects being linked to create a # shared library. predeps=$lt_predeps_RC # Dependencies to place after the objects being linked to create a # shared library. postdeps=$lt_postdeps_RC # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_compiler_lib_search_path_RC # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method == file_magic. file_magic_cmd=$lt_file_magic_cmd # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_allow_undefined_flag_RC # Flag that forces no undefined symbols. no_undefined_flag=$lt_no_undefined_flag_RC # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # Same as above, but a single script fragment to be evaled but not shown. finish_eval=$lt_finish_eval # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # This is the shared library runtime path variable. runpath_var=$runpath_var # This is the shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # How to hardcode a shared library path into an executable. hardcode_action=$hardcode_action_RC # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist. hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_RC # If ld is used when linking, flag to hardcode \$libdir into # a binary during linking. This must work even if \$libdir does # not exist. hardcode_libdir_flag_spec_ld=$lt_hardcode_libdir_flag_spec_ld_RC # Whether we need a single -rpath flag with a separated argument. hardcode_libdir_separator=$lt_hardcode_libdir_separator_RC # Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the # resulting binary. hardcode_direct=$hardcode_direct_RC # Set to yes if using the -LDIR flag during linking hardcodes DIR into the # resulting binary. hardcode_minus_L=$hardcode_minus_L_RC # Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into # the resulting binary. hardcode_shlibpath_var=$hardcode_shlibpath_var_RC # Set to yes if building a shared library automatically hardcodes DIR into the library # and all subsequent libraries and executables linked against it. hardcode_automatic=$hardcode_automatic_RC # Variables whose values should be saved in libtool wrapper scripts and # restored at relink time. variables_saved_for_relink="$variables_saved_for_relink" # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$link_all_deplibs_RC # Compile-time system search path for libraries sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$always_export_symbols_RC # The commands to list exported symbols. export_symbols_cmds=$lt_export_symbols_cmds_RC # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_exclude_expsyms_RC # Symbols that must always be exported. include_expsyms=$lt_include_expsyms_RC # ### END LIBTOOL TAG CONFIG: $tagname __EOF__ else # If there is no Makefile yet, we rely on a make rule to execute # `config.status --recheck' to rerun these tests and create the # libtool script then. ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'` if test -f "$ltmain_in"; then test -f Makefile && make "$ltmain" fi fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu CC="$lt_save_CC" ;; *) { { echo "$as_me:$LINENO: error: Unsupported tag name: $tagname" >&5 echo "$as_me: error: Unsupported tag name: $tagname" >&2;} { (exit 1); exit 1; }; } ;; esac # Append the new tag name to the list of available tags. if test -n "$tagname" ; then available_tags="$available_tags $tagname" fi fi done IFS="$lt_save_ifs" # Now substitute the updated list of available tags. if eval "sed -e 's/^available_tags=.*\$/available_tags=\"$available_tags\"/' \"$ofile\" > \"${ofile}T\""; then mv "${ofile}T" "$ofile" chmod +x "$ofile" else rm -f "${ofile}T" { { echo "$as_me:$LINENO: error: unable to update list of available tagged configurations." >&5 echo "$as_me: error: unable to update list of available tagged configurations." >&2;} { (exit 1); exit 1; }; } fi fi # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' # Prevent multiple expansion { echo "$as_me:$LINENO: result: enable_foratran = $enable_fortran" >&5 echo "${ECHO_T}enable_foratran = $enable_fortran" >&6; } if test "$enable_fortran" != "no"; then WANT_FORTRAN_TRUE= WANT_FORTRAN_FALSE='#' else WANT_FORTRAN_TRUE='#' WANT_FORTRAN_FALSE= fi { echo "$as_me:$LINENO: checking for sqrt in -lm" >&5 echo $ECHO_N "checking for sqrt in -lm... $ECHO_C" >&6; } if test "${ac_cv_lib_m_sqrt+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char sqrt (); int main () { return sqrt (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_m_sqrt=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_m_sqrt=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_m_sqrt" >&5 echo "${ECHO_T}$ac_cv_lib_m_sqrt" >&6; } if test $ac_cv_lib_m_sqrt = yes; then cat >>confdefs.h <<_ACEOF #define HAVE_LIBM 1 _ACEOF LIBS="-lm $LIBS" fi { echo "$as_me:$LINENO: checking LIBS = $LIBS" >&5 echo $ECHO_N "checking LIBS = $LIBS... $ECHO_C" >&6; } # Checks for header files. { echo "$as_me:$LINENO: checking for ANSI C header files" >&5 echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; } if test "${ac_cv_header_stdc+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #include #include int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_header_stdc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdc=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext if test $ac_cv_header_stdc = yes; then # SunOS 4.x string.h does not declare mem*, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "memchr" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | $EGREP "free" >/dev/null 2>&1; then : else ac_cv_header_stdc=no fi rm -f conftest* fi if test $ac_cv_header_stdc = yes; then # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. if test "$cross_compiling" = yes; then : else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #include #if ((' ' & 0x0FF) == 0x020) # define ISLOWER(c) ('a' <= (c) && (c) <= 'z') # define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) #else # define ISLOWER(c) \ (('a' <= (c) && (c) <= 'i') \ || ('j' <= (c) && (c) <= 'r') \ || ('s' <= (c) && (c) <= 'z')) # define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) #endif #define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) int main () { int i; for (i = 0; i < 256; i++) if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) return 2; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then : else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_header_stdc=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi fi { echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 echo "${ECHO_T}$ac_cv_header_stdc" >&6; } if test $ac_cv_header_stdc = yes; then cat >>confdefs.h <<\_ACEOF #define STDC_HEADERS 1 _ACEOF fi for ac_header in stdlib.h sys/time.h unistd.h do as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then { echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi ac_res=`eval echo '${'$as_ac_Header'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } else # Is the header compilable? { echo "$as_me:$LINENO: checking $ac_header usability" >&5 echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include <$ac_header> _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6; } # Is the header present? { echo "$as_me:$LINENO: checking $ac_header presence" >&5 echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include <$ac_header> _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## ----------------------------- ## ## Report this to esler@uiuc.edu ## ## ----------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac { echo "$as_me:$LINENO: checking for $ac_header" >&5 echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 else eval "$as_ac_Header=\$ac_header_preproc" fi ac_res=`eval echo '${'$as_ac_Header'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } fi if test `eval echo '${'$as_ac_Header'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 _ACEOF fi done { echo "$as_me:$LINENO: checking for variable-length arrays" >&5 echo $ECHO_N "checking for variable-length arrays... $ECHO_C" >&6; } if test "${ac_cv_c_vararrays+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { static int x; char a[++x]; a[sizeof a - 1] = 0; return a[0]; ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_c_vararrays=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_c_vararrays=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_c_vararrays" >&5 echo "${ECHO_T}$ac_cv_c_vararrays" >&6; } if test $ac_cv_c_vararrays = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_C_VARARRAYS 1 _ACEOF fi { echo "$as_me:$LINENO: checking optimal compiler flags" >&5 echo $ECHO_N "checking optimal compiler flags... $ECHO_C" >&6; } CFLAGS="${CFLAGS} -g" CXXFLAGS="${CXXFLAGS} -g" { echo "$as_me:$LINENO: result: ${CFLAGS}" >&5 echo "${ECHO_T}${CFLAGS}" >&6; } # Compile CUDA versions # Check whether --enable-cuda was given. if test "${enable_cuda+set}" = set; then enableval=$enable_cuda; fi if test "$enable_cuda" = "yes"; then HAVE_CUDA_TRUE= HAVE_CUDA_FALSE='#' else HAVE_CUDA_TRUE='#' HAVE_CUDA_FALSE= fi if test "x$enable_cuda" = "xyes"; then NVCC="nvcc" NVCCFLAGS="-DNO_CUDA_MAIN -g -O3 -arch sm_13 -Drestrict=__restrict__" CUDA_LIBS="-L/usr/local/cuda/lib -lcudart" # LIBS="$LIBS -L/usr/local/cuda/lib -lcudart" fi ######################## # Check for CUDA ######################## # Check whether --with-cuda was given. if test "${with_cuda+set}" = set; then withval=$with_cuda; fi if test "x$with_cuda" != "x"; then CUDA_CFLAGS="-I$with_cuda/include" CUDA_LIBS="-L$with_cuda/lib -lcudart" NVCC="$with_cuda/bin/nvcc" LIBS="$LIBS $CUDA_LIBS" elseif test "$enable_cuda" = "yes" CUDA_CFLAGS="-I/usr/local/cuda/include" CUDA_LIBS="-L/usr/local/cuda/lib -lcudart" NVCC="nvcc" fi LIBS="$LIBS $CUDA_LIBS" NVCCFLAGS="-DNO_CUDA_MAIN -O3 -use_fast_math -g -arch sm_13 -Drestrict=__restrict__" # Compile with -pthread # Check whether --enable-pthread was given. if test "${enable_pthread+set}" = set; then enableval=$enable_pthread; fi if test "x$enable_pthread" = "xyes"; then CFLAGS="$CFLAGS -pthread" CXXFLAGS="$CXXFLAGS -pthread" PTHREAD_FLAG="-pthread" else PTHREAD_FLAG="" fi # Compile with -openmp # Check whether --enable-openmp was given. if test "${enable_openmp+set}" = set; then enableval=$enable_openmp; fi if test "x$enable_openmp" = "xyes"; then CFLAGS="$CFLAGS -openmp" CXXFLAGS="$CXXFLAGS -openmp" OPENMP_FLAG="-openmp" else OPENMP_FLAG="" fi # Check if we want static-only binaries # Check whether --enable-prefetch was given. if test "${enable_prefetch+set}" = set; then enableval=$enable_prefetch; fi if test "$enable_prefetch" = "yes"; then cat >>confdefs.h <<\_ACEOF #define USE_PREFETCH _ACEOF cat >>confdefs.h <<\_ACEOF #define PREFETCH_AHEAD 12 _ACEOF elif test "x$enable_prefetch" != "x"; then cat >>confdefs.h <<\_ACEOF #define USE_PREFETCH _ACEOF cat >>confdefs.h <<_ACEOF #define PREFETCH_AHEAD $enable_prefetch _ACEOF fi # Check whether --enable-all-static was given. if test "${enable_all_static+set}" = set; then enableval=$enable_all_static; fi if test "$enable_all_static" = "yes"; then ALL_STATIC="-all-static" else ALL_STATIC="" fi # Check whether --enable-altivec was given. if test "${enable_altivec+set}" = set; then enableval=$enable_altivec; fi if test "x$enable_altivec" = "xyes"; then CFLAGS="$CFLAGS -faltivec -DUSE_ALTIVEC" FFLAGS="$FFLAGS -DUSE_ALTIVEC" fi # Check whether --enable-sse was given. if test "${enable_sse+set}" = set; then enableval=$enable_sse; fi if test "x$enable_sse" = "xyes"; then if test "${ac_cv_header_xmmintrin_h+set}" = set; then { echo "$as_me:$LINENO: checking for xmmintrin.h" >&5 echo $ECHO_N "checking for xmmintrin.h... $ECHO_C" >&6; } if test "${ac_cv_header_xmmintrin_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 fi { echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5 echo "${ECHO_T}$ac_cv_header_xmmintrin_h" >&6; } else # Is the header compilable? { echo "$as_me:$LINENO: checking xmmintrin.h usability" >&5 echo $ECHO_N "checking xmmintrin.h usability... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #include _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_header_compiler=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_compiler=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 echo "${ECHO_T}$ac_header_compiler" >&6; } # Is the header present? { echo "$as_me:$LINENO: checking xmmintrin.h presence" >&5 echo $ECHO_N "checking xmmintrin.h presence... $ECHO_C" >&6; } cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include _ACEOF if { (ac_try="$ac_cpp conftest.$ac_ext" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } >/dev/null && { test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" || test ! -s conftest.err }; then ac_header_preproc=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_header_preproc=no fi rm -f conftest.err conftest.$ac_ext { echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 echo "${ECHO_T}$ac_header_preproc" >&6; } # So? What about this header? case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in yes:no: ) { echo "$as_me:$LINENO: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&5 echo "$as_me: WARNING: xmmintrin.h: accepted by the compiler, rejected by the preprocessor!" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the compiler's result" >&5 echo "$as_me: WARNING: xmmintrin.h: proceeding with the compiler's result" >&2;} ac_header_preproc=yes ;; no:yes:* ) { echo "$as_me:$LINENO: WARNING: xmmintrin.h: present but cannot be compiled" >&5 echo "$as_me: WARNING: xmmintrin.h: present but cannot be compiled" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&5 echo "$as_me: WARNING: xmmintrin.h: check for missing prerequisite headers?" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: see the Autoconf documentation" >&5 echo "$as_me: WARNING: xmmintrin.h: see the Autoconf documentation" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&5 echo "$as_me: WARNING: xmmintrin.h: section \"Present But Cannot Be Compiled\"" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&5 echo "$as_me: WARNING: xmmintrin.h: proceeding with the preprocessor's result" >&2;} { echo "$as_me:$LINENO: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&5 echo "$as_me: WARNING: xmmintrin.h: in the future, the compiler will take precedence" >&2;} ( cat <<\_ASBOX ## ----------------------------- ## ## Report this to esler@uiuc.edu ## ## ----------------------------- ## _ASBOX ) | sed "s/^/$as_me: WARNING: /" >&2 ;; esac { echo "$as_me:$LINENO: checking for xmmintrin.h" >&5 echo $ECHO_N "checking for xmmintrin.h... $ECHO_C" >&6; } if test "${ac_cv_header_xmmintrin_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_header_xmmintrin_h=$ac_header_preproc fi { echo "$as_me:$LINENO: result: $ac_cv_header_xmmintrin_h" >&5 echo "${ECHO_T}$ac_cv_header_xmmintrin_h" >&6; } fi if test $ac_cv_header_xmmintrin_h = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_XMMINTRIN_H 1 _ACEOF else { { echo "$as_me:$LINENO: error: Cannot utilize SSE2 extensions without xmmintrin.h" >&5 echo "$as_me: error: Cannot utilize SSE2 extensions without xmmintrin.h" >&2;} { (exit 1); exit 1; }; } fi cat >>confdefs.h <<\_ACEOF #define HAVE_SSE 1 _ACEOF fi if test "x$enable_sse" = "xyes"; then ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { echo "$as_me:$LINENO: checking for x86 cpuid 1 output" >&5 echo $ECHO_N "checking for x86 cpuid 1 output... $ECHO_C" >&6; } if test "${ax_cv_gcc_x86_cpuid_1+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ax_cv_gcc_x86_cpuid_1=unknown else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include int main () { int op = 1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ; return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ax_cv_gcc_x86_cpuid_1=`cat conftest_cpuid`; rm -f conftest_cpuid else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ax_cv_gcc_x86_cpuid_1=unknown; rm -f conftest_cpuid fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ax_cv_gcc_x86_cpuid_1" >&5 echo "${ECHO_T}$ax_cv_gcc_x86_cpuid_1" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ecx=`echo $ax_cv_gcc_x86_cpuid_1 | cut -d ":" -f 3` edx=`echo $ax_cv_gcc_x86_cpuid_1 | cut -d ":" -f 4` { echo "$as_me:$LINENO: checking whether mmx is supported" >&5 echo $ECHO_N "checking whether mmx is supported... $ECHO_C" >&6; } if test "${ax_have_mmx_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_mmx_ext=no if test "$((0x$edx>>23&0x01))" = 1; then ax_have_mmx_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_mmx_ext" >&5 echo "${ECHO_T}$ax_have_mmx_ext" >&6; } { echo "$as_me:$LINENO: checking whether sse is supported" >&5 echo $ECHO_N "checking whether sse is supported... $ECHO_C" >&6; } if test "${ax_have_sse_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_sse_ext=no if test "$((0x$edx>>25&0x01))" = 1; then ax_have_sse_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_sse_ext" >&5 echo "${ECHO_T}$ax_have_sse_ext" >&6; } { echo "$as_me:$LINENO: checking whether sse2 is supported" >&5 echo $ECHO_N "checking whether sse2 is supported... $ECHO_C" >&6; } if test "${ax_have_sse2_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_sse2_ext=no if test "$((0x$edx>>26&0x01))" = 1; then ax_have_sse2_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_sse2_ext" >&5 echo "${ECHO_T}$ax_have_sse2_ext" >&6; } { echo "$as_me:$LINENO: checking whether sse3 is supported" >&5 echo $ECHO_N "checking whether sse3 is supported... $ECHO_C" >&6; } if test "${ax_have_sse3_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_sse3_ext=no if test "$((0x$ecx&0x01))" = 1; then ax_have_sse3_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_sse3_ext" >&5 echo "${ECHO_T}$ax_have_sse3_ext" >&6; } { echo "$as_me:$LINENO: checking whether ssse3 is supported" >&5 echo $ECHO_N "checking whether ssse3 is supported... $ECHO_C" >&6; } if test "${ax_have_ssse3_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_ssse3_ext=no if test "$((0x$ecx>>9&0x01))" = 1; then ax_have_ssse3_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_ssse3_ext" >&5 echo "${ECHO_T}$ax_have_ssse3_ext" >&6; } { echo "$as_me:$LINENO: checking whether sse4.1 is supported" >&5 echo $ECHO_N "checking whether sse4.1 is supported... $ECHO_C" >&6; } if test "${ax_have_sse4_1_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_sse4_1_ext=no if test "$((0x$ecx>>19&0x01))" = 1; then ax_have_sse4_1_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_sse4_1_ext" >&5 echo "${ECHO_T}$ax_have_sse4_1_ext" >&6; } { echo "$as_me:$LINENO: checking whether sse4.2 is supported" >&5 echo $ECHO_N "checking whether sse4.2 is supported... $ECHO_C" >&6; } if test "${ax_have_sse4_2_ext+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_have_sse4_2_ext=no if test "$((0x$ecx>>20&0x01))" = 1; then ax_have_sse4_2_ext=yes fi fi { echo "$as_me:$LINENO: result: $ax_have_sse4_2_ext" >&5 echo "${ECHO_T}$ax_have_sse4_2_ext" >&6; } if test "$ax_have_mmx_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_MMX _ACEOF { echo "$as_me:$LINENO: checking whether C compiler accepts -mmmx" >&5 echo $ECHO_N "checking whether C compiler accepts -mmmx... $ECHO_C" >&6; } if test "${ax_cv_c_flags__mmmx+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-mmmx" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__mmmx=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__mmmx=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__mmmx { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then SIMD_FLAGS="$SIMD_FLAGS -mmmx" else : fi fi if test "$ax_have_sse_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSE _ACEOF { echo "$as_me:$LINENO: checking whether C compiler accepts -msse" >&5 echo $ECHO_N "checking whether C compiler accepts -msse... $ECHO_C" >&6; } if test "${ax_cv_c_flags__msse+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-msse" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__msse=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__msse=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__msse { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then SIMD_FLAGS="$SIMD_FLAGS -msse" else : fi fi if test "$ax_have_sse2_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSE2 _ACEOF { echo "$as_me:$LINENO: checking whether C compiler accepts -msse2" >&5 echo $ECHO_N "checking whether C compiler accepts -msse2... $ECHO_C" >&6; } if test "${ax_cv_c_flags__msse2+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-msse2" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__msse2=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__msse2=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__msse2 { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then SIMD_FLAGS="$SIMD_FLAGS -msse2" else : fi fi if test "$ax_have_sse3_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSE3 _ACEOF { echo "$as_me:$LINENO: checking whether C compiler accepts -msse3" >&5 echo $ECHO_N "checking whether C compiler accepts -msse3... $ECHO_C" >&6; } if test "${ax_cv_c_flags__msse3+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ax_save_FLAGS=$CFLAGS CFLAGS="-msse3" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ax_cv_c_flags__msse3=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ax_cv_c_flags__msse3=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext CFLAGS=$ax_save_FLAGS fi eval ax_check_compiler_flags=$ax_cv_c_flags__msse3 { echo "$as_me:$LINENO: result: $ax_check_compiler_flags" >&5 echo "${ECHO_T}$ax_check_compiler_flags" >&6; } if test "x$ax_check_compiler_flags" = xyes; then SIMD_FLAGS="$SIMD_FLAGS -msse3" else : fi fi if test "$ax_have_ssse3_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSSE3 _ACEOF fi if test "$ax_have_sse4_1_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSE4_1 _ACEOF fi if test "$ax_have_sse4_2_ext" = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_SSE4_2 _ACEOF fi fi if test "$ax_have_sse_ext" = "yes"; then HAVE_SSE_TRUE= HAVE_SSE_FALSE='#' else HAVE_SSE_TRUE='#' HAVE_SSE_FALSE= fi if test "$ax_have_sse2_ext" = "yes"; then HAVE_SSE2_TRUE= HAVE_SSE2_FALSE='#' else HAVE_SSE2_TRUE='#' HAVE_SSE2_FALSE= fi if test "$ax_have_sse3_ext" = "yes"; then HAVE_SSE3_TRUE= HAVE_SSE3_FALSE='#' else HAVE_SSE3_TRUE='#' HAVE_SSE3_FALSE= fi if test "$ax_have_ssse3_ext" = "yes"; then HAVE_SSSE3_TRUE= HAVE_SSSE3_FALSE='#' else HAVE_SSSE3_TRUE='#' HAVE_SSSE3_FALSE= fi if test "$ax_have_sse4_1_ext" = "yes"; then HAVE_SSE4_1_TRUE= HAVE_SSE4_1_FALSE='#' else HAVE_SSE4_1_TRUE='#' HAVE_SSE4_1_FALSE= fi if test "$ax_have_sse4_2_ext" = "yes"; then HAVE_SSE4_2_TRUE= HAVE_SSE4_2_FALSE='#' else HAVE_SSE4_2_TRUE='#' HAVE_SSE4_2_FALSE= fi # Check whether --enable-blips was given. if test "${enable_blips+set}" = set; then enableval=$enable_blips; fi if test "$enable_blips" = "yes"; then WANT_BLIPS_TRUE= WANT_BLIPS_FALSE='#' else WANT_BLIPS_TRUE='#' WANT_BLIPS_FALSE= fi if test "x$enable_blips" = "xyes"; then pkg_failed=no { echo "$as_me:$LINENO: checking for FFTW3" >&5 echo $ECHO_N "checking for FFTW3... $ECHO_C" >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FFTW3_CFLAGS"; then pkg_cv_FFTW3_CFLAGS="$FFTW3_CFLAGS" else if test -n "$PKG_CONFIG" && \ { (echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"fftw3\"") >&5 ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then pkg_cv_FFTW3_CFLAGS=`$PKG_CONFIG --cflags "fftw3" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FFTW3_LIBS"; then pkg_cv_FFTW3_LIBS="$FFTW3_LIBS" else if test -n "$PKG_CONFIG" && \ { (echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"fftw3\"") >&5 ($PKG_CONFIG --exists --print-errors "fftw3") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then pkg_cv_FFTW3_LIBS=`$PKG_CONFIG --libs "fftw3" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FFTW3_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3"` else FFTW3_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3"` fi # Put the nasty error message in config.log where it belongs echo "$FFTW3_PKG_ERRORS" >&5 { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fftw3_ok=no elif test $pkg_failed = untried; then fftw3_ok=no else FFTW3_CFLAGS=$pkg_cv_FFTW3_CFLAGS FFTW3_LIBS=$pkg_cv_FFTW3_LIBS { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } fftw3_ok=yes fi pkg_failed=no { echo "$as_me:$LINENO: checking for FFTW3F" >&5 echo $ECHO_N "checking for FFTW3F... $ECHO_C" >&6; } if test -n "$PKG_CONFIG"; then if test -n "$FFTW3F_CFLAGS"; then pkg_cv_FFTW3F_CFLAGS="$FFTW3F_CFLAGS" else if test -n "$PKG_CONFIG" && \ { (echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"fftw3f\"") >&5 ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then pkg_cv_FFTW3F_CFLAGS=`$PKG_CONFIG --cflags "fftw3f" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test -n "$PKG_CONFIG"; then if test -n "$FFTW3F_LIBS"; then pkg_cv_FFTW3F_LIBS="$FFTW3F_LIBS" else if test -n "$PKG_CONFIG" && \ { (echo "$as_me:$LINENO: \$PKG_CONFIG --exists --print-errors \"fftw3f\"") >&5 ($PKG_CONFIG --exists --print-errors "fftw3f") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; then pkg_cv_FFTW3F_LIBS=`$PKG_CONFIG --libs "fftw3f" 2>/dev/null` else pkg_failed=yes fi fi else pkg_failed=untried fi if test $pkg_failed = yes; then if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then FFTW3F_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "fftw3f"` else FFTW3F_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "fftw3f"` fi # Put the nasty error message in config.log where it belongs echo "$FFTW3F_PKG_ERRORS" >&5 { echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6; } fftw3f_ok=no elif test $pkg_failed = untried; then fftw3f_ok=no else FFTW3F_CFLAGS=$pkg_cv_FFTW3F_CFLAGS FFTW3F_LIBS=$pkg_cv_FFTW3F_LIBS { echo "$as_me:$LINENO: result: yes" >&5 echo "${ECHO_T}yes" >&6; } fftw3f_ok=yes fi if test "x$fftw3_ok" = "xyes"; then cat >>confdefs.h <<\_ACEOF #define HAVE_FFTW3 1 _ACEOF else { { echo "$as_me:$LINENO: error: Blip construction routines require that FFTW3 be installed." >&5 echo "$as_me: error: Blip construction routines require that FFTW3 be installed." >&2;} { (exit 1); exit 1; }; } fi if test "x$fftw3f_ok" = "xyes"; then cat >>confdefs.h <<\_ACEOF #define HAVE_FFTW3F 1 _ACEOF else { { echo "$as_me:$LINENO: error: Blip construction routines require that FFTW3 be installed." >&5 echo "$as_me: error: Blip construction routines require that FFTW3 be installed." >&2;} { (exit 1); exit 1; }; } fi fi # Check whether --enable-precision was given. if test "${enable_precision+set}" = set; then enableval=$enable_precision; fi if test "x$enable_precision" = "xyes"; then cat >>confdefs.h <<\_ACEOF #define HIGH_PRECISION 1 _ACEOF fi # Check whether --enable-profile was given. if test "${enable_profile+set}" = set; then enableval=$enable_profile; fi if test "x$enable_profile" = "xyes"; then CFLAGS="-pg $CFLAGS" FFLAGS="-pg $FFLAGS" LDFLAGS="-pg $LDFLAGS" fi # Check whether --enable-debug was given. if test "${enable_debug+set}" = set; then enableval=$enable_debug; fi if test "x$enable_debug" = "xyes"; then CFLAGS="-DDEBUG $CFLAGS" FFLAGS="-DDEBUG $FFLAGS" fi if test "x$enable_fortran" != "xno"; then { echo "$as_me:$LINENO: result: *************** Enabling F77! ***************" >&5 echo "${ECHO_T}*************** Enabling F77! ***************" >&6; } ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { echo "$as_me:$LINENO: checking how to get verbose linking output from $F77" >&5 echo $ECHO_N "checking how to get verbose linking output from $F77... $ECHO_C" >&6; } if test "${ac_cv_prog_f77_v+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_f77_v= # Try some options frequently used verbose output for ac_verb in -v -verbose --verbose -V -\#\#\#; do cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF # Compile and link our simple test program by passing a flag (argument # 1 to this macro) to the Fortran compiler in order to get # "verbose" output that we can then parse for the Fortran linker # flags. ac_save_FFLAGS=$FFLAGS FFLAGS="$FFLAGS $ac_verb" eval "set x $ac_link" shift echo "$as_me:$LINENO: $*" >&5 ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | grep -v 'Driving:'` echo "$ac_f77_v_output" >&5 FFLAGS=$ac_save_FFLAGS rm -f conftest* # On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where # /foo, /bar, and /baz are search directories for the Fortran linker. # Here, we change these into -L/foo -L/bar -L/baz (and put it first): ac_f77_v_output="`echo $ac_f77_v_output | grep 'LPATH is:' | sed 's,.*LPATH is\(: *[^ ]*\).*,\1,;s,: */, -L/,g'` $ac_f77_v_output" # FIXME: we keep getting bitten by quoted arguments; a more general fix # that detects unbalanced quotes in FLIBS should be implemented # and (ugh) tested at some point. case $ac_f77_v_output in # If we are using xlf then replace all the commas with spaces. *xlfentry*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/,/ /g'` ;; # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted # $LIBS confuse us, and the libraries appear later in the output anyway). *mGLOB_options_string*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; # Portland Group compiler has singly- or doubly-quoted -cmdline argument # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". *-cmdline\ * | *-ignore\ * | *-def\ *) ac_f77_v_output=`echo $ac_f77_v_output | sed "\ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; # If we are using Cray Fortran then delete quotes. *cft90*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; esac # look for -l* and *.a constructs in the output for ac_arg in $ac_f77_v_output; do case $ac_arg in [\\/]*.a | ?:[\\/]*.a | -[lLRu]*) ac_cv_prog_f77_v=$ac_verb break 2 ;; esac done done if test -z "$ac_cv_prog_f77_v"; then { echo "$as_me:$LINENO: WARNING: cannot determine how to obtain linking information from $F77" >&5 echo "$as_me: WARNING: cannot determine how to obtain linking information from $F77" >&2;} fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { echo "$as_me:$LINENO: WARNING: compilation failed" >&5 echo "$as_me: WARNING: compilation failed" >&2;} fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_prog_f77_v" >&5 echo "${ECHO_T}$ac_cv_prog_f77_v" >&6; } { echo "$as_me:$LINENO: checking for Fortran 77 libraries of $F77" >&5 echo $ECHO_N "checking for Fortran 77 libraries of $F77... $ECHO_C" >&6; } if test "${ac_cv_f77_libs+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "x$FLIBS" != "x"; then ac_cv_f77_libs="$FLIBS" # Let the user override the test. else cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF # Compile and link our simple test program by passing a flag (argument # 1 to this macro) to the Fortran compiler in order to get # "verbose" output that we can then parse for the Fortran linker # flags. ac_save_FFLAGS=$FFLAGS FFLAGS="$FFLAGS $ac_cv_prog_f77_v" eval "set x $ac_link" shift echo "$as_me:$LINENO: $*" >&5 ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | grep -v 'Driving:'` echo "$ac_f77_v_output" >&5 FFLAGS=$ac_save_FFLAGS rm -f conftest* # On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where # /foo, /bar, and /baz are search directories for the Fortran linker. # Here, we change these into -L/foo -L/bar -L/baz (and put it first): ac_f77_v_output="`echo $ac_f77_v_output | grep 'LPATH is:' | sed 's,.*LPATH is\(: *[^ ]*\).*,\1,;s,: */, -L/,g'` $ac_f77_v_output" # FIXME: we keep getting bitten by quoted arguments; a more general fix # that detects unbalanced quotes in FLIBS should be implemented # and (ugh) tested at some point. case $ac_f77_v_output in # If we are using xlf then replace all the commas with spaces. *xlfentry*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/,/ /g'` ;; # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted # $LIBS confuse us, and the libraries appear later in the output anyway). *mGLOB_options_string*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; # Portland Group compiler has singly- or doubly-quoted -cmdline argument # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". *-cmdline\ * | *-ignore\ * | *-def\ *) ac_f77_v_output=`echo $ac_f77_v_output | sed "\ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; # If we are using Cray Fortran then delete quotes. *cft90*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; esac ac_cv_f77_libs= # Save positional arguments (if any) ac_save_positional="$@" set X $ac_f77_v_output while test $# != 1; do shift ac_arg=$1 case $ac_arg in [\\/]*.a | ?:[\\/]*.a) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi ;; -bI:*) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else if test "$ac_compiler_gnu" = yes; then for ac_link_opt in $ac_arg; do ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" done else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi fi ;; # Ignore these flags. -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -LANG:=* | -LIST:* | -LNO:*) ;; -lkernel32) test x"$CYGWIN" != xyes && ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" ;; -[LRuYz]) # These flags, when seen by themselves, take an argument. # We remove the space between option and argument and re-iterate # unless we find an empty arg or a new option (starting with -) case $2 in "" | -*);; *) ac_arg="$ac_arg$2" shift; shift set X $ac_arg "$@" ;; esac ;; -YP,*) for ac_j in `echo $ac_arg | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_j" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_arg="$ac_arg $ac_j" ac_cv_f77_libs="$ac_cv_f77_libs $ac_j" fi done ;; -[lLR]*) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi ;; -zallextract*| -zdefaultextract) ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" ;; # Ignore everything else. esac done # restore positional arguments set X $ac_save_positional; shift # We only consider "LD_RUN_PATH" on Solaris systems. If this is seen, # then we insist that the "run path" must be an absolute path (i.e. it # must begin with a "/"). case `(uname -sr) 2>/dev/null` in "SunOS 5"*) ac_ld_run_path=`echo $ac_f77_v_output | sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'` test "x$ac_ld_run_path" != x && if test "$ac_compiler_gnu" = yes; then for ac_link_opt in $ac_ld_run_path; do ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" done else ac_cv_f77_libs="$ac_cv_f77_libs $ac_ld_run_path" fi ;; esac fi # test "x$[]_AC_LANG_PREFIX[]LIBS" = "x" fi { echo "$as_me:$LINENO: result: $ac_cv_f77_libs" >&5 echo "${ECHO_T}$ac_cv_f77_libs" >&6; } FLIBS="$ac_cv_f77_libs" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { echo "$as_me:$LINENO: checking for dummy main to link with Fortran 77 libraries" >&5 echo $ECHO_N "checking for dummy main to link with Fortran 77 libraries... $ECHO_C" >&6; } if test "${ac_cv_f77_dummy_main+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_f77_dm_save_LIBS=$LIBS LIBS="$LIBS $FLIBS" ac_fortran_dm_var=F77_DUMMY_MAIN ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # First, try linking without a dummy main: cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_fortran_dummy_main=none else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_fortran_dummy_main=unknown fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test $ac_cv_fortran_dummy_main = unknown; then for ac_func in MAIN__ MAIN_ __main MAIN _MAIN __MAIN main_ main__ _main; do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define $ac_fortran_dm_var $ac_func #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_fortran_dummy_main=$ac_func; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext done fi ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu ac_cv_f77_dummy_main=$ac_cv_fortran_dummy_main rm -f conftest* LIBS=$ac_f77_dm_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_f77_dummy_main" >&5 echo "${ECHO_T}$ac_cv_f77_dummy_main" >&6; } F77_DUMMY_MAIN=$ac_cv_f77_dummy_main if test "$F77_DUMMY_MAIN" != unknown; then if test $F77_DUMMY_MAIN != none; then cat >>confdefs.h <<_ACEOF #define F77_DUMMY_MAIN $F77_DUMMY_MAIN _ACEOF if test "x$ac_cv_fc_dummy_main" = "x$ac_cv_f77_dummy_main"; then cat >>confdefs.h <<\_ACEOF #define FC_DUMMY_MAIN_EQ_F77 1 _ACEOF fi fi else { { echo "$as_me:$LINENO: error: linking to Fortran libraries from C fails See \`config.log' for more details." >&5 echo "$as_me: error: linking to Fortran libraries from C fails See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { echo "$as_me:$LINENO: checking for Fortran 77 name-mangling scheme" >&5 echo $ECHO_N "checking for Fortran 77 name-mangling scheme... $ECHO_C" >&6; } if test "${ac_cv_f77_mangling+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF subroutine foobar() return end subroutine foo_bar() return end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then mv conftest.$ac_objext cfortran_test.$ac_objext ac_save_LIBS=$LIBS LIBS="cfortran_test.$ac_objext $LIBS $FLIBS" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_success=no for ac_foobar in foobar FOOBAR; do for ac_underscore in "" "_"; do ac_func="$ac_foobar$ac_underscore" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $ac_func (); #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { return $ac_func (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_success=yes; break 2 else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext done done ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu if test "$ac_success" = "yes"; then case $ac_foobar in foobar) ac_case=lower ac_foo_bar=foo_bar ;; FOOBAR) ac_case=upper ac_foo_bar=FOO_BAR ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_success_extra=no for ac_extra in "" "_"; do ac_func="$ac_foo_bar$ac_underscore$ac_extra" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $ac_func (); #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { return $ac_func (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_success_extra=yes; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext done ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu if test "$ac_success_extra" = "yes"; then ac_cv_f77_mangling="$ac_case case" if test -z "$ac_underscore"; then ac_cv_f77_mangling="$ac_cv_f77_mangling, no underscore" else ac_cv_f77_mangling="$ac_cv_f77_mangling, underscore" fi if test -z "$ac_extra"; then ac_cv_f77_mangling="$ac_cv_f77_mangling, no extra underscore" else ac_cv_f77_mangling="$ac_cv_f77_mangling, extra underscore" fi else ac_cv_f77_mangling="unknown" fi else ac_cv_f77_mangling="unknown" fi LIBS=$ac_save_LIBS rm -f cfortran_test* conftest* else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { echo "$as_me:$LINENO: error: cannot compile a simple Fortran program See \`config.log' for more details." >&5 echo "$as_me: error: cannot compile a simple Fortran program See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_f77_mangling" >&5 echo "${ECHO_T}$ac_cv_f77_mangling" >&6; } ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu case $ac_cv_f77_mangling in "lower case, no underscore, no extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) name _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) name _ACEOF ;; "lower case, no underscore, extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) name _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) name ## _ _ACEOF ;; "lower case, underscore, no extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) name ## _ _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) name ## _ _ACEOF ;; "lower case, underscore, extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) name ## _ _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) name ## __ _ACEOF ;; "upper case, no underscore, no extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) NAME _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) NAME _ACEOF ;; "upper case, no underscore, extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) NAME _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) NAME ## _ _ACEOF ;; "upper case, underscore, no extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) NAME ## _ _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) NAME ## _ _ACEOF ;; "upper case, underscore, extra underscore") cat >>confdefs.h <<\_ACEOF #define F77_FUNC(name,NAME) NAME ## _ _ACEOF cat >>confdefs.h <<\_ACEOF #define F77_FUNC_(name,NAME) NAME ## __ _ACEOF ;; *) { echo "$as_me:$LINENO: WARNING: unknown Fortran name-mangling scheme" >&5 echo "$as_me: WARNING: unknown Fortran name-mangling scheme" >&2;} ;; esac ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { echo "$as_me:$LINENO: checking how to get verbose linking output from $F77" >&5 echo $ECHO_N "checking how to get verbose linking output from $F77... $ECHO_C" >&6; } if test "${ac_cv_prog_f77_v+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_f77_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_prog_f77_v= # Try some options frequently used verbose output for ac_verb in -v -verbose --verbose -V -\#\#\#; do cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF # Compile and link our simple test program by passing a flag (argument # 1 to this macro) to the Fortran compiler in order to get # "verbose" output that we can then parse for the Fortran linker # flags. ac_save_FFLAGS=$FFLAGS FFLAGS="$FFLAGS $ac_verb" eval "set x $ac_link" shift echo "$as_me:$LINENO: $*" >&5 ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | grep -v 'Driving:'` echo "$ac_f77_v_output" >&5 FFLAGS=$ac_save_FFLAGS rm -f conftest* # On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where # /foo, /bar, and /baz are search directories for the Fortran linker. # Here, we change these into -L/foo -L/bar -L/baz (and put it first): ac_f77_v_output="`echo $ac_f77_v_output | grep 'LPATH is:' | sed 's,.*LPATH is\(: *[^ ]*\).*,\1,;s,: */, -L/,g'` $ac_f77_v_output" # FIXME: we keep getting bitten by quoted arguments; a more general fix # that detects unbalanced quotes in FLIBS should be implemented # and (ugh) tested at some point. case $ac_f77_v_output in # If we are using xlf then replace all the commas with spaces. *xlfentry*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/,/ /g'` ;; # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted # $LIBS confuse us, and the libraries appear later in the output anyway). *mGLOB_options_string*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; # Portland Group compiler has singly- or doubly-quoted -cmdline argument # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". *-cmdline\ * | *-ignore\ * | *-def\ *) ac_f77_v_output=`echo $ac_f77_v_output | sed "\ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; # If we are using Cray Fortran then delete quotes. *cft90*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; esac # look for -l* and *.a constructs in the output for ac_arg in $ac_f77_v_output; do case $ac_arg in [\\/]*.a | ?:[\\/]*.a | -[lLRu]*) ac_cv_prog_f77_v=$ac_verb break 2 ;; esac done done if test -z "$ac_cv_prog_f77_v"; then { echo "$as_me:$LINENO: WARNING: cannot determine how to obtain linking information from $F77" >&5 echo "$as_me: WARNING: cannot determine how to obtain linking information from $F77" >&2;} fi else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { echo "$as_me:$LINENO: WARNING: compilation failed" >&5 echo "$as_me: WARNING: compilation failed" >&2;} fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_prog_f77_v" >&5 echo "${ECHO_T}$ac_cv_prog_f77_v" >&6; } { echo "$as_me:$LINENO: checking for Fortran 77 libraries of $F77" >&5 echo $ECHO_N "checking for Fortran 77 libraries of $F77... $ECHO_C" >&6; } if test "${ac_cv_f77_libs+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "x$FLIBS" != "x"; then ac_cv_f77_libs="$FLIBS" # Let the user override the test. else cat >conftest.$ac_ext <<_ACEOF program main end _ACEOF # Compile and link our simple test program by passing a flag (argument # 1 to this macro) to the Fortran compiler in order to get # "verbose" output that we can then parse for the Fortran linker # flags. ac_save_FFLAGS=$FFLAGS FFLAGS="$FFLAGS $ac_cv_prog_f77_v" eval "set x $ac_link" shift echo "$as_me:$LINENO: $*" >&5 ac_f77_v_output=`eval $ac_link 5>&1 2>&1 | grep -v 'Driving:'` echo "$ac_f77_v_output" >&5 FFLAGS=$ac_save_FFLAGS rm -f conftest* # On HP/UX there is a line like: "LPATH is: /foo:/bar:/baz" where # /foo, /bar, and /baz are search directories for the Fortran linker. # Here, we change these into -L/foo -L/bar -L/baz (and put it first): ac_f77_v_output="`echo $ac_f77_v_output | grep 'LPATH is:' | sed 's,.*LPATH is\(: *[^ ]*\).*,\1,;s,: */, -L/,g'` $ac_f77_v_output" # FIXME: we keep getting bitten by quoted arguments; a more general fix # that detects unbalanced quotes in FLIBS should be implemented # and (ugh) tested at some point. case $ac_f77_v_output in # If we are using xlf then replace all the commas with spaces. *xlfentry*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/,/ /g'` ;; # With Intel ifc, ignore the quoted -mGLOB_options_string stuff (quoted # $LIBS confuse us, and the libraries appear later in the output anyway). *mGLOB_options_string*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"-mGLOB[^"]*"/ /g'` ;; # Portland Group compiler has singly- or doubly-quoted -cmdline argument # Singly-quoted arguments were reported for versions 5.2-4 and 6.0-4. # Doubly-quoted arguments were reported for "PGF90/x86 Linux/x86 5.0-2". *-cmdline\ * | *-ignore\ * | *-def\ *) ac_f77_v_output=`echo $ac_f77_v_output | sed "\ s/-cmdline *'[^']*'/ /g; s/-cmdline *\"[^\"]*\"/ /g s/-ignore *'[^']*'/ /g; s/-ignore *\"[^\"]*\"/ /g s/-def *'[^']*'/ /g; s/-def *\"[^\"]*\"/ /g"` ;; # If we are using Cray Fortran then delete quotes. *cft90*) ac_f77_v_output=`echo $ac_f77_v_output | sed 's/"//g'` ;; esac ac_cv_f77_libs= # Save positional arguments (if any) ac_save_positional="$@" set X $ac_f77_v_output while test $# != 1; do shift ac_arg=$1 case $ac_arg in [\\/]*.a | ?:[\\/]*.a) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi ;; -bI:*) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else if test "$ac_compiler_gnu" = yes; then for ac_link_opt in $ac_arg; do ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" done else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi fi ;; # Ignore these flags. -lang* | -lcrt*.o | -lc | -lgcc* | -lSystem | -libmil | -LANG:=* | -LIST:* | -LNO:*) ;; -lkernel32) test x"$CYGWIN" != xyes && ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" ;; -[LRuYz]) # These flags, when seen by themselves, take an argument. # We remove the space between option and argument and re-iterate # unless we find an empty arg or a new option (starting with -) case $2 in "" | -*);; *) ac_arg="$ac_arg$2" shift; shift set X $ac_arg "$@" ;; esac ;; -YP,*) for ac_j in `echo $ac_arg | sed -e 's/-YP,/-L/;s/:/ -L/g'`; do ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_j" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_arg="$ac_arg $ac_j" ac_cv_f77_libs="$ac_cv_f77_libs $ac_j" fi done ;; -[lLR]*) ac_exists=false for ac_i in $ac_cv_f77_libs; do if test x"$ac_arg" = x"$ac_i"; then ac_exists=true break fi done if test x"$ac_exists" = xtrue; then : else ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" fi ;; -zallextract*| -zdefaultextract) ac_cv_f77_libs="$ac_cv_f77_libs $ac_arg" ;; # Ignore everything else. esac done # restore positional arguments set X $ac_save_positional; shift # We only consider "LD_RUN_PATH" on Solaris systems. If this is seen, # then we insist that the "run path" must be an absolute path (i.e. it # must begin with a "/"). case `(uname -sr) 2>/dev/null` in "SunOS 5"*) ac_ld_run_path=`echo $ac_f77_v_output | sed -n 's,^.*LD_RUN_PATH *= *\(/[^ ]*\).*$,-R\1,p'` test "x$ac_ld_run_path" != x && if test "$ac_compiler_gnu" = yes; then for ac_link_opt in $ac_ld_run_path; do ac_cv_f77_libs="$ac_cv_f77_libs -Xlinker $ac_link_opt" done else ac_cv_f77_libs="$ac_cv_f77_libs $ac_ld_run_path" fi ;; esac fi # test "x$[]_AC_LANG_PREFIX[]LIBS" = "x" fi { echo "$as_me:$LINENO: result: $ac_cv_f77_libs" >&5 echo "${ECHO_T}$ac_cv_f77_libs" >&6; } FLIBS="$ac_cv_f77_libs" ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu { echo "$as_me:$LINENO: checking for dummy main to link with Fortran 77 libraries" >&5 echo $ECHO_N "checking for dummy main to link with Fortran 77 libraries... $ECHO_C" >&6; } if test "${ac_cv_f77_dummy_main+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_f77_dm_save_LIBS=$LIBS LIBS="$LIBS $FLIBS" ac_fortran_dm_var=F77_DUMMY_MAIN ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu # First, try linking without a dummy main: cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_fortran_dummy_main=none else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_fortran_dummy_main=unknown fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext if test $ac_cv_fortran_dummy_main = unknown; then for ac_func in MAIN__ MAIN_ __main MAIN _MAIN __MAIN main_ main__ _main; do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #define $ac_fortran_dm_var $ac_func #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_fortran_dummy_main=$ac_func; break else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext done fi ac_ext=f ac_compile='$F77 -c $FFLAGS conftest.$ac_ext >&5' ac_link='$F77 -o conftest$ac_exeext $FFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_f77_compiler_gnu ac_cv_f77_dummy_main=$ac_cv_fortran_dummy_main rm -f conftest* LIBS=$ac_f77_dm_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_f77_dummy_main" >&5 echo "${ECHO_T}$ac_cv_f77_dummy_main" >&6; } F77_DUMMY_MAIN=$ac_cv_f77_dummy_main if test "$F77_DUMMY_MAIN" != unknown; then if test $F77_DUMMY_MAIN != none; then cat >>confdefs.h <<_ACEOF #define F77_DUMMY_MAIN $F77_DUMMY_MAIN _ACEOF if test "x$ac_cv_fc_dummy_main" = "x$ac_cv_f77_dummy_main"; then cat >>confdefs.h <<\_ACEOF #define FC_DUMMY_MAIN_EQ_F77 1 _ACEOF fi fi else { { echo "$as_me:$LINENO: error: linking to Fortran libraries from C fails See \`config.log' for more details." >&5 echo "$as_me: error: linking to Fortran libraries from C fails See \`config.log' for more details." >&2;} { (exit 1); exit 1; }; } fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu fi # Checks for typedefs, structures, and compiler characteristics. { echo "$as_me:$LINENO: checking for stdbool.h that conforms to C99" >&5 echo $ECHO_N "checking for stdbool.h that conforms to C99... $ECHO_C" >&6; } if test "${ac_cv_header_stdbool_h+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #ifndef bool "error: bool is not defined" #endif #ifndef false "error: false is not defined" #endif #if false "error: false is not 0" #endif #ifndef true "error: true is not defined" #endif #if true != 1 "error: true is not 1" #endif #ifndef __bool_true_false_are_defined "error: __bool_true_false_are_defined is not defined" #endif struct s { _Bool s: 1; _Bool t; } s; char a[true == 1 ? 1 : -1]; char b[false == 0 ? 1 : -1]; char c[__bool_true_false_are_defined == 1 ? 1 : -1]; char d[(bool) 0.5 == true ? 1 : -1]; bool e = &s; char f[(_Bool) 0.0 == false ? 1 : -1]; char g[true]; char h[sizeof (_Bool)]; char i[sizeof s.t]; enum { j = false, k = true, l = false * true, m = true * 256 }; _Bool n[m]; char o[sizeof n == m * sizeof n[0] ? 1 : -1]; char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1]; # if defined __xlc__ || defined __GNUC__ /* Catch a bug in IBM AIX xlc compiler version 6.0.0.0 reported by James Lemley on 2005-10-05; see http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html This test is not quite right, since xlc is allowed to reject this program, as the initializer for xlcbug is not one of the forms that C requires support for. However, doing the test right would require a runtime test, and that would make cross-compilation harder. Let us hope that IBM fixes the xlc bug, and also adds support for this kind of constant expression. In the meantime, this test will reject xlc, which is OK, since our stdbool.h substitute should suffice. We also test this with GCC, where it should work, to detect more quickly whether someone messes up the test in the future. */ char digs[] = "0123456789"; int xlcbug = 1 / (&(digs + 5)[-2 + (bool) 1] == &digs[4] ? 1 : -1); # endif /* Catch a bug in an HP-UX C compiler. See http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html */ _Bool q = true; _Bool *pq = &q; #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { *pq |= q; *pq |= ! q; /* Refer to every declared value, to avoid compiler optimizations. */ return (!a + !b + !c + !d + !e + !f + !g + !h + !i + !!j + !k + !!l + !m + !n + !o + !p + !q + !pq); ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_header_stdbool_h=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_header_stdbool_h=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_header_stdbool_h" >&5 echo "${ECHO_T}$ac_cv_header_stdbool_h" >&6; } { echo "$as_me:$LINENO: checking for _Bool" >&5 echo $ECHO_N "checking for _Bool... $ECHO_C" >&6; } if test "${ac_cv_type__Bool+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default typedef _Bool ac__type_new_; #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { if ((ac__type_new_ *) 0) return 0; if (sizeof (ac__type_new_)) return 0; ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_type__Bool=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_type__Bool=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_type__Bool" >&5 echo "${ECHO_T}$ac_cv_type__Bool" >&6; } if test $ac_cv_type__Bool = yes; then cat >>confdefs.h <<_ACEOF #define HAVE__BOOL 1 _ACEOF fi if test $ac_cv_header_stdbool_h = yes; then cat >>confdefs.h <<\_ACEOF #define HAVE_STDBOOL_H 1 _ACEOF fi { echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5 echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6; } if test "${ac_cv_c_const+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { /* FIXME: Include the comments suggested by Paul. */ #ifndef __cplusplus /* Ultrix mips cc rejects this. */ typedef int charset[2]; const charset cs; /* SunOS 4.1.1 cc rejects this. */ char const *const *pcpcc; char **ppc; /* NEC SVR4.0.2 mips cc rejects this. */ struct point {int x, y;}; static struct point const zero = {0,0}; /* AIX XL C 1.02.0.0 rejects this. It does not let you subtract one const X* pointer from another in an arm of an if-expression whose if-part is not a constant expression */ const char *g = "string"; pcpcc = &g + (g ? g-g : 0); /* HPUX 7.0 cc rejects these. */ ++pcpcc; ppc = (char**) pcpcc; pcpcc = (char const *const *) ppc; { /* SCO 3.2v4 cc rejects this. */ char *t; char const *s = 0 ? (char *) 0 : (char const *) 0; *t++ = 0; if (s) return 0; } { /* Someone thinks the Sun supposedly-ANSI compiler will reject this. */ int x[] = {25, 17}; const int *foo = &x[0]; ++foo; } { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */ typedef const int *iptr; iptr p = 0; ++p; } { /* AIX XL C 1.02.0.0 rejects this saying "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */ struct s { int j; const int *ap[3]; }; struct s *b; b->j = 5; } { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ const int foo = 10; if (!foo) return 0; } return !cs[0] && !zero.x; #endif ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_c_const=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_c_const=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5 echo "${ECHO_T}$ac_cv_c_const" >&6; } if test $ac_cv_c_const = no; then cat >>confdefs.h <<\_ACEOF #define const _ACEOF fi { echo "$as_me:$LINENO: checking for inline" >&5 echo $ECHO_N "checking for inline... $ECHO_C" >&6; } if test "${ac_cv_c_inline+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_cv_c_inline=no for ac_kw in inline __inline__ __inline; do cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #ifndef __cplusplus typedef int foo_t; static $ac_kw foo_t static_foo () {return 0; } $ac_kw foo_t foo () {return 0; } #endif _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_c_inline=$ac_kw else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext test "$ac_cv_c_inline" != no && break done fi { echo "$as_me:$LINENO: result: $ac_cv_c_inline" >&5 echo "${ECHO_T}$ac_cv_c_inline" >&6; } case $ac_cv_c_inline in inline | yes) ;; *) case $ac_cv_c_inline in no) ac_val=;; *) ac_val=$ac_cv_c_inline;; esac cat >>confdefs.h <<_ACEOF #ifndef __cplusplus #define inline $ac_val #endif _ACEOF ;; esac { echo "$as_me:$LINENO: checking for size_t" >&5 echo $ECHO_N "checking for size_t... $ECHO_C" >&6; } if test "${ac_cv_type_size_t+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default typedef size_t ac__type_new_; #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { if ((ac__type_new_ *) 0) return 0; if (sizeof (ac__type_new_)) return 0; ; return 0; } _ACEOF rm -f conftest.$ac_objext if { (ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_compile") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then ac_cv_type_size_t=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_type_size_t=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5 echo "${ECHO_T}$ac_cv_type_size_t" >&6; } if test $ac_cv_type_size_t = yes; then : else cat >>confdefs.h <<_ACEOF #define size_t unsigned int _ACEOF fi # Checks for library functions. { echo "$as_me:$LINENO: checking for error_at_line" >&5 echo $ECHO_N "checking for error_at_line... $ECHO_C" >&6; } if test "${ac_cv_lib_error_at_line+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ #include #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { error_at_line (0, 0, "", 0, "an error occurred"); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_error_at_line=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_error_at_line=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_lib_error_at_line" >&5 echo "${ECHO_T}$ac_cv_lib_error_at_line" >&6; } if test $ac_cv_lib_error_at_line = no; then case " $LIBOBJS " in *" error.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS error.$ac_objext" ;; esac fi { echo "$as_me:$LINENO: checking for working strtod" >&5 echo $ECHO_N "checking for working strtod... $ECHO_C" >&6; } if test "${ac_cv_func_strtod+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else if test "$cross_compiling" = yes; then ac_cv_func_strtod=no else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ $ac_includes_default #ifndef strtod double strtod (); #endif int main() { { /* Some versions of Linux strtod mis-parse strings with leading '+'. */ char *string = " +69"; char *term; double value; value = strtod (string, &term); if (value != 69 || term != (string + 4)) return 1; } { /* Under Solaris 2.4, strtod returns the wrong value for the terminating character under some conditions. */ char *string = "NaN"; char *term; strtod (string, &term); if (term != string && *(term - 1) == 0) return 1; } return 0; } _ACEOF rm -f conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { ac_try='./conftest$ac_exeext' { (case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_try") 2>&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then ac_cv_func_strtod=yes else echo "$as_me: program exited with status $ac_status" >&5 echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ( exit $ac_status ) ac_cv_func_strtod=no fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext fi fi { echo "$as_me:$LINENO: result: $ac_cv_func_strtod" >&5 echo "${ECHO_T}$ac_cv_func_strtod" >&6; } if test $ac_cv_func_strtod = no; then case " $LIBOBJS " in *" strtod.$ac_objext "* ) ;; *) LIBOBJS="$LIBOBJS strtod.$ac_objext" ;; esac { echo "$as_me:$LINENO: checking for pow" >&5 echo $ECHO_N "checking for pow... $ECHO_C" >&6; } if test "${ac_cv_func_pow+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Define pow to an innocuous variant, in case declares pow. For example, HP-UX 11i declares gettimeofday. */ #define pow innocuous_pow /* System header to define __stub macros and hopefully few prototypes, which can conflict with char pow (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef pow /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pow (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_pow || defined __stub___pow choke me #endif #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { return pow (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_func_pow=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_func_pow=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext fi { echo "$as_me:$LINENO: result: $ac_cv_func_pow" >&5 echo "${ECHO_T}$ac_cv_func_pow" >&6; } if test $ac_cv_func_pow = no; then { echo "$as_me:$LINENO: checking for pow in -lm" >&5 echo $ECHO_N "checking for pow in -lm... $ECHO_C" >&6; } if test "${ac_cv_lib_m_pow+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lm $LIBS" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char pow (); #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { return pow (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then ac_cv_lib_m_pow=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_cv_lib_m_pow=no fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { echo "$as_me:$LINENO: result: $ac_cv_lib_m_pow" >&5 echo "${ECHO_T}$ac_cv_lib_m_pow" >&6; } if test $ac_cv_lib_m_pow = yes; then POW_LIB=-lm else { echo "$as_me:$LINENO: WARNING: cannot find library containing definition of pow" >&5 echo "$as_me: WARNING: cannot find library containing definition of pow" >&2;} fi fi fi for ac_func in clock_gettime floor pow sqrt strtol posix_memalign do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` { echo "$as_me:$LINENO: checking for $ac_func" >&5 echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; } if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then echo $ECHO_N "(cached) $ECHO_C" >&6 else cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF cat confdefs.h >>conftest.$ac_ext cat >>conftest.$ac_ext <<_ACEOF /* end confdefs.h. */ /* Define $ac_func to an innocuous variant, in case declares $ac_func. For example, HP-UX 11i declares gettimeofday. */ #define $ac_func innocuous_$ac_func /* System header to define __stub macros and hopefully few prototypes, which can conflict with char $ac_func (); below. Prefer to if __STDC__ is defined, since exists even on freestanding compilers. */ #ifdef __STDC__ # include #else # include #endif #undef $ac_func /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char $ac_func (); /* The GNU C library defines this for functions which it implements to always fail with ENOSYS. Some functions are actually named something starting with __ and the normal name is an alias. */ #if defined __stub_$ac_func || defined __stub___$ac_func choke me #endif #ifdef F77_DUMMY_MAIN # ifdef __cplusplus extern "C" # endif int F77_DUMMY_MAIN() { return 1; } #endif int main () { return $ac_func (); ; return 0; } _ACEOF rm -f conftest.$ac_objext conftest$ac_exeext if { (ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 (eval "$ac_link") 2>conftest.er1 ac_status=$? grep -v '^ *+' conftest.er1 >conftest.err rm -f conftest.er1 cat conftest.err >&5 echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && $as_test_x conftest$ac_exeext; then eval "$as_ac_var=yes" else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 eval "$as_ac_var=no" fi rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ conftest$ac_exeext conftest.$ac_ext fi ac_res=`eval echo '${'$as_ac_var'}'` { echo "$as_me:$LINENO: result: $ac_res" >&5 echo "${ECHO_T}$ac_res" >&6; } if test `eval echo '${'$as_ac_var'}'` = yes; then cat >>confdefs.h <<_ACEOF #define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 _ACEOF fi done ac_config_files="$ac_config_files Makefile src/Makefile doc/Makefile www/Makefile einspline.pc" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5 echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( *) $as_unset $ac_var ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes (double-quote # substitution turns \\\\ into \\, and sed turns \\ into \). sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then test "x$cache_file" != "x/dev/null" && { echo "$as_me:$LINENO: updating cache $cache_file" >&5 echo "$as_me: updating cache $cache_file" >&6;} cat confcache >$cache_file else { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5 echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' DEFS=-DHAVE_CONFIG_H ac_libobjs= ac_ltlibobjs= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext" ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"AMDEP\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"am__fastdepCC\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${am__fastdepCXX_TRUE}" && test -z "${am__fastdepCXX_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"am__fastdepCXX\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${WANT_FORTRAN_TRUE}" && test -z "${WANT_FORTRAN_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"WANT_FORTRAN\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"WANT_FORTRAN\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_CUDA_TRUE}" && test -z "${HAVE_CUDA_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_CUDA\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_CUDA\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSE_TRUE}" && test -z "${HAVE_SSE_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSE\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSE\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSE2_TRUE}" && test -z "${HAVE_SSE2_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSE2\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSE2\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSE3_TRUE}" && test -z "${HAVE_SSE3_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSE3\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSE3\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSSE3_TRUE}" && test -z "${HAVE_SSSE3_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSSE3\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSSE3\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSE4_1_TRUE}" && test -z "${HAVE_SSE4_1_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSE4_1\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSE4_1\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${HAVE_SSE4_2_TRUE}" && test -z "${HAVE_SSE4_2_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"HAVE_SSE4_2\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"HAVE_SSE4_2\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi if test -z "${WANT_BLIPS_TRUE}" && test -z "${WANT_BLIPS_FALSE}"; then { { echo "$as_me:$LINENO: error: conditional \"WANT_BLIPS\" was never defined. Usually this means the macro was only invoked conditionally." >&5 echo "$as_me: error: conditional \"WANT_BLIPS\" was never defined. Usually this means the macro was only invoked conditionally." >&2;} { (exit 1); exit 1; }; } fi : ${CONFIG_STATUS=./config.status} ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 echo "$as_me: creating $CONFIG_STATUS" >&6;} cat >$CONFIG_STATUS <<_ACEOF #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF ## --------------------- ## ## M4sh Initialization. ## ## --------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then emulate sh NULLCMD=: # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in *posix*) set -o posix ;; esac fi # PATH needs CR # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then echo "#! /bin/sh" >conf$$.sh echo "exit 0" >>conf$$.sh chmod +x conf$$.sh if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then PATH_SEPARATOR=';' else PATH_SEPARATOR=: fi rm -f conf$$.sh fi # Support unset when possible. if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then as_unset=unset else as_unset=false fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) as_nl=' ' IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. case $0 in *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 { (exit 1); exit 1; } fi # Work around bugs in pre-3.0 UWIN ksh. for as_var in ENV MAIL MAILPATH do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. for as_var in \ LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ LC_TELEPHONE LC_TIME do if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then eval $as_var=C; export $as_var else ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var fi done # Required to use basename. if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi # Name of the executable. as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # CDPATH. $as_unset CDPATH as_lineno_1=$LINENO as_lineno_2=$LINENO test "x$as_lineno_1" != "x$as_lineno_2" && test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || { # Create $as_me.lineno as a copy of $as_myself, but with $LINENO # uniformly replaced by the line number. The first 'sed' inserts a # line-number line after each line using $LINENO; the second 'sed' # does the real work. The second script uses 'N' to pair each # line-number line with the line containing $LINENO, and appends # trailing '-' during substitution so that $LINENO is not a special # case at line end. # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the # scripts with optimization help from Paolo Bonzini. Blame Lee # E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 { (exit 1); exit 1; }; } # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in -n*) case `echo 'x\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. *) ECHO_C='\c';; esac;; *) ECHO_N='-n';; esac if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir fi echo >conf$$.file if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -p'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -p' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -p' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p=: else test -d ./-p && rmdir ./-p as_mkdir_p=false fi if test -x / >/dev/null 2>&1; then as_test_x='test -x' else if ls -dL / >/dev/null 2>&1; then as_ls_L_option=L else as_ls_L_option= fi as_test_x=' eval sh -c '\'' if test -d "$1"; then test -d "$1/."; else case $1 in -*)set "./$1";; esac; case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in ???[sx]*):;;*)false;;esac;fi '\'' sh ' fi as_executable_p=$as_test_x # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 # Save the log message, to keep $[0] and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by einspline $as_me 0.9.2, which was generated by GNU Autoconf 2.61. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF cat >>$CONFIG_STATUS <<_ACEOF # Files that config.status was made for. config_files="$ac_config_files" config_headers="$ac_config_headers" config_commands="$ac_config_commands" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF ac_cs_usage="\ \`$as_me' instantiates files from templates according to the current configuration. Usage: $0 [OPTIONS] [FILE]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit -q, --quiet do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE --header=FILE[:TEMPLATE] instantiate the configuration header FILE Configuration files: $config_files Configuration headers: $config_headers Configuration commands: $config_commands Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ einspline config.status 0.9.2 configured by $0, generated by GNU Autoconf 2.61, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" Copyright (C) 2006 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' INSTALL='$INSTALL' MKDIR_P='$MKDIR_P' _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # If no file are specified by the user, then we need to provide default # value. By we need to know if files were specified by the user. ac_need_defaults=: while test $# != 0 do case $1 in --*=*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) echo "$ac_cs_version"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift CONFIG_FILES="$CONFIG_FILES $ac_optarg" ac_need_defaults=false;; --header | --heade | --head | --hea ) $ac_shift CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" ac_need_defaults=false;; --he | --h) # Conflict between --help and --header { echo "$as_me: error: ambiguous option: $1 Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; };; --help | --hel | -h ) echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) { echo "$as_me: error: unrecognized option: $1 Try \`$0 --help' for more information." >&2 { (exit 1); exit 1; }; } ;; *) ac_config_targets="$ac_config_targets $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF if \$ac_cs_recheck; then echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 CONFIG_SHELL=$SHELL export CONFIG_SHELL exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF # # INIT-COMMANDS # AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "src/config.h") CONFIG_HEADERS="$CONFIG_HEADERS src/config.h" ;; "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;; "www/Makefile") CONFIG_FILES="$CONFIG_FILES www/Makefile" ;; "einspline.pc") CONFIG_FILES="$CONFIG_FILES einspline.pc" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 echo "$as_me: error: invalid argument: $ac_config_target" >&2;} { (exit 1); exit 1; }; };; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= trap 'exit_status=$? { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status ' 0 trap '{ (exit 1); exit 1; }' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || { echo "$me: cannot create a temporary directory in ." >&2 { (exit 1); exit 1; } } # # Set up the sed scripts for CONFIG_FILES section. # # No need to generate the scripts if there are no CONFIG_FILES. # This happens for instance when ./config.status config.h if test -n "$CONFIG_FILES"; then _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF SHELL!$SHELL$ac_delim PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim PACKAGE_NAME!$PACKAGE_NAME$ac_delim PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim PACKAGE_STRING!$PACKAGE_STRING$ac_delim PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim exec_prefix!$exec_prefix$ac_delim prefix!$prefix$ac_delim program_transform_name!$program_transform_name$ac_delim bindir!$bindir$ac_delim sbindir!$sbindir$ac_delim libexecdir!$libexecdir$ac_delim datarootdir!$datarootdir$ac_delim datadir!$datadir$ac_delim sysconfdir!$sysconfdir$ac_delim sharedstatedir!$sharedstatedir$ac_delim localstatedir!$localstatedir$ac_delim includedir!$includedir$ac_delim oldincludedir!$oldincludedir$ac_delim docdir!$docdir$ac_delim infodir!$infodir$ac_delim htmldir!$htmldir$ac_delim dvidir!$dvidir$ac_delim pdfdir!$pdfdir$ac_delim psdir!$psdir$ac_delim libdir!$libdir$ac_delim localedir!$localedir$ac_delim mandir!$mandir$ac_delim DEFS!$DEFS$ac_delim ECHO_C!$ECHO_C$ac_delim ECHO_N!$ECHO_N$ac_delim ECHO_T!$ECHO_T$ac_delim LIBS!$LIBS$ac_delim build_alias!$build_alias$ac_delim host_alias!$host_alias$ac_delim target_alias!$target_alias$ac_delim INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim INSTALL_DATA!$INSTALL_DATA$ac_delim am__isrc!$am__isrc$ac_delim CYGPATH_W!$CYGPATH_W$ac_delim PACKAGE!$PACKAGE$ac_delim VERSION!$VERSION$ac_delim ACLOCAL!$ACLOCAL$ac_delim AUTOCONF!$AUTOCONF$ac_delim AUTOMAKE!$AUTOMAKE$ac_delim AUTOHEADER!$AUTOHEADER$ac_delim MAKEINFO!$MAKEINFO$ac_delim install_sh!$install_sh$ac_delim STRIP!$STRIP$ac_delim INSTALL_STRIP_PROGRAM!$INSTALL_STRIP_PROGRAM$ac_delim mkdir_p!$mkdir_p$ac_delim AWK!$AWK$ac_delim SET_MAKE!$SET_MAKE$ac_delim am__leading_dot!$am__leading_dot$ac_delim AMTAR!$AMTAR$ac_delim am__tar!$am__tar$ac_delim am__untar!$am__untar$ac_delim CC!$CC$ac_delim CFLAGS!$CFLAGS$ac_delim LDFLAGS!$LDFLAGS$ac_delim CPPFLAGS!$CPPFLAGS$ac_delim ac_ct_CC!$ac_ct_CC$ac_delim EXEEXT!$EXEEXT$ac_delim OBJEXT!$OBJEXT$ac_delim DEPDIR!$DEPDIR$ac_delim am__include!$am__include$ac_delim am__quote!$am__quote$ac_delim AMDEP_TRUE!$AMDEP_TRUE$ac_delim AMDEP_FALSE!$AMDEP_FALSE$ac_delim AMDEPBACKSLASH!$AMDEPBACKSLASH$ac_delim CCDEPMODE!$CCDEPMODE$ac_delim am__fastdepCC_TRUE!$am__fastdepCC_TRUE$ac_delim am__fastdepCC_FALSE!$am__fastdepCC_FALSE$ac_delim CXX!$CXX$ac_delim CXXFLAGS!$CXXFLAGS$ac_delim ac_ct_CXX!$ac_ct_CXX$ac_delim CXXDEPMODE!$CXXDEPMODE$ac_delim am__fastdepCXX_TRUE!$am__fastdepCXX_TRUE$ac_delim am__fastdepCXX_FALSE!$am__fastdepCXX_FALSE$ac_delim PKG_CONFIG!$PKG_CONFIG$ac_delim build!$build$ac_delim build_cpu!$build_cpu$ac_delim build_vendor!$build_vendor$ac_delim build_os!$build_os$ac_delim host!$host$ac_delim host_cpu!$host_cpu$ac_delim host_vendor!$host_vendor$ac_delim host_os!$host_os$ac_delim PRTDIAG!$PRTDIAG$ac_delim F77!$F77$ac_delim FFLAGS!$FFLAGS$ac_delim ac_ct_F77!$ac_ct_F77$ac_delim SED!$SED$ac_delim GREP!$GREP$ac_delim EGREP!$EGREP$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` if test -n "$ac_eof"; then ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` ac_eof=`expr $ac_eof + 1` fi cat >>$CONFIG_STATUS <<_ACEOF cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof /@[a-zA-Z_][a-zA-Z_0-9]*@/!b _ACEOF sed ' s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g s/^/s,@/; s/!/@,|#_!!_#|/ :n t n s/'"$ac_delim"'$/,g/; t s/$/\\/; p N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n ' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF CEOF$ac_eof _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF LN_S!$LN_S$ac_delim ECHO!$ECHO$ac_delim AR!$AR$ac_delim RANLIB!$RANLIB$ac_delim CPP!$CPP$ac_delim CXXCPP!$CXXCPP$ac_delim LIBTOOL!$LIBTOOL$ac_delim WANT_FORTRAN_TRUE!$WANT_FORTRAN_TRUE$ac_delim WANT_FORTRAN_FALSE!$WANT_FORTRAN_FALSE$ac_delim HAVE_CUDA_TRUE!$HAVE_CUDA_TRUE$ac_delim HAVE_CUDA_FALSE!$HAVE_CUDA_FALSE$ac_delim CUDA_CFLAGS!$CUDA_CFLAGS$ac_delim CUDA_LIBS!$CUDA_LIBS$ac_delim NVCC!$NVCC$ac_delim NVCCFLAGS!$NVCCFLAGS$ac_delim PTHREAD_FLAG!$PTHREAD_FLAG$ac_delim OPENMP_FLAG!$OPENMP_FLAG$ac_delim ALL_STATIC!$ALL_STATIC$ac_delim SIMD_FLAGS!$SIMD_FLAGS$ac_delim HAVE_SSE_TRUE!$HAVE_SSE_TRUE$ac_delim HAVE_SSE_FALSE!$HAVE_SSE_FALSE$ac_delim HAVE_SSE2_TRUE!$HAVE_SSE2_TRUE$ac_delim HAVE_SSE2_FALSE!$HAVE_SSE2_FALSE$ac_delim HAVE_SSE3_TRUE!$HAVE_SSE3_TRUE$ac_delim HAVE_SSE3_FALSE!$HAVE_SSE3_FALSE$ac_delim HAVE_SSSE3_TRUE!$HAVE_SSSE3_TRUE$ac_delim HAVE_SSSE3_FALSE!$HAVE_SSSE3_FALSE$ac_delim HAVE_SSE4_1_TRUE!$HAVE_SSE4_1_TRUE$ac_delim HAVE_SSE4_1_FALSE!$HAVE_SSE4_1_FALSE$ac_delim HAVE_SSE4_2_TRUE!$HAVE_SSE4_2_TRUE$ac_delim HAVE_SSE4_2_FALSE!$HAVE_SSE4_2_FALSE$ac_delim WANT_BLIPS_TRUE!$WANT_BLIPS_TRUE$ac_delim WANT_BLIPS_FALSE!$WANT_BLIPS_FALSE$ac_delim FFTW3_CFLAGS!$FFTW3_CFLAGS$ac_delim FFTW3_LIBS!$FFTW3_LIBS$ac_delim FFTW3F_CFLAGS!$FFTW3F_CFLAGS$ac_delim FFTW3F_LIBS!$FFTW3F_LIBS$ac_delim FLIBS!$FLIBS$ac_delim LIBOBJS!$LIBOBJS$ac_delim POW_LIB!$POW_LIB$ac_delim PKGDATADEF!$PKGDATADEF$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 42; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 echo "$as_me: error: could not make $CONFIG_STATUS" >&2;} { (exit 1); exit 1; }; } else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed` if test -n "$ac_eof"; then ac_eof=`echo "$ac_eof" | sort -nru | sed 1q` ac_eof=`expr $ac_eof + 1` fi cat >>$CONFIG_STATUS <<_ACEOF cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof /@[a-zA-Z_][a-zA-Z_0-9]*@/!b end _ACEOF sed ' s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g s/^/s,@/; s/!/@,|#_!!_#|/ :n t n s/'"$ac_delim"'$/,g/; t s/$/\\/; p N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n ' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF :end s/|#_!!_#|//g CEOF$ac_eof _ACEOF # VPATH may cause trouble with some makes, so we remove $(srcdir), # ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=/{ s/:*\$(srcdir):*/:/ s/:*\${srcdir}:*/:/ s/:*@srcdir@:*/:/ s/^\([^=]*=[ ]*\):*/\1/ s/:*$// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF fi # test -n "$CONFIG_FILES" for ac_tag in :F $CONFIG_FILES :H $CONFIG_HEADERS :C $CONFIG_COMMANDS do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5 echo "$as_me: error: Invalid tag $ac_tag." >&2;} { (exit 1); exit 1; }; };; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5 echo "$as_me: error: cannot find input file: $ac_f" >&2;} { (exit 1); exit 1; }; };; esac ac_file_inputs="$ac_file_inputs $ac_f" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input="Generated from "`IFS=: echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure." if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { echo "$as_me:$LINENO: creating $ac_file" >&5 echo "$as_me: creating $ac_file" >&6;} fi case $ac_tag in *:-:* | *:-) cat >"$tmp/stdin";; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` { as_dir="$ac_dir" case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 echo "$as_me: error: cannot create directory $as_dir" >&2;} { (exit 1); exit 1; }; }; } ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # case $INSTALL in [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;; *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;; esac ac_MKDIR_P=$MKDIR_P case $MKDIR_P in [\\/$]* | ?:[\\/]* ) ;; */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;; esac _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= case `sed -n '/datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p ' $ac_file_inputs` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF sed "$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s&@configure_input@&$configure_input&;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t s&@MKDIR_P@&$ac_MKDIR_P&;t t $ac_datarootdir_hack " $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } && { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&5 echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined." >&2;} rm -f "$tmp/stdin" case $ac_file in -) cat "$tmp/out"; rm -f "$tmp/out";; *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;; esac ;; :H) # # CONFIG_HEADER # _ACEOF # Transform confdefs.h into a sed script `conftest.defines', that # substitutes the proper values into config.h.in to produce config.h. rm -f conftest.defines conftest.tail # First, append a space to every undef/define line, to ease matching. echo 's/$/ /' >conftest.defines # Then, protect against being on the right side of a sed subst, or in # an unquoted here document, in config.status. If some macros were # called several times there might be several #defines for the same # symbol, which is useless. But do not sort them, since the last # AC_DEFINE must be honored. ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]* # These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where # NAME is the cpp macro being defined, VALUE is the value it is being given. # PARAMS is the parameter list in the macro definition--in most cases, it's # just an empty string. ac_dA='s,^\\([ #]*\\)[^ ]*\\([ ]*' ac_dB='\\)[ (].*,\\1define\\2' ac_dC=' ' ac_dD=' ,' uniq confdefs.h | sed -n ' t rset :rset s/^[ ]*#[ ]*define[ ][ ]*// t ok d :ok s/[\\&,]/\\&/g s/^\('"$ac_word_re"'\)\(([^()]*)\)[ ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p s/^\('"$ac_word_re"'\)[ ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p ' >>conftest.defines # Remove the space that was appended to ease matching. # Then replace #undef with comments. This is necessary, for # example, in the case of _POSIX_SOURCE, which is predefined and required # on some systems where configure will not decide to define it. # (The regexp can be short, since the line contains either #define or #undef.) echo 's/ $// s,^[ #]*u.*,/* & */,' >>conftest.defines # Break up conftest.defines: ac_max_sed_lines=50 # First sed command is: sed -f defines.sed $ac_file_inputs >"$tmp/out1" # Second one is: sed -f defines.sed "$tmp/out1" >"$tmp/out2" # Third one will be: sed -f defines.sed "$tmp/out2" >"$tmp/out1" # et cetera. ac_in='$ac_file_inputs' ac_out='"$tmp/out1"' ac_nxt='"$tmp/out2"' while : do # Write a here document: cat >>$CONFIG_STATUS <<_ACEOF # First, check the format of the line: cat >"\$tmp/defines.sed" <<\\CEOF /^[ ]*#[ ]*undef[ ][ ]*$ac_word_re[ ]*\$/b def /^[ ]*#[ ]*define[ ][ ]*$ac_word_re[( ]/b def b :def _ACEOF sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS echo 'CEOF sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail grep . conftest.tail >/dev/null || break rm -f conftest.defines mv conftest.tail conftest.defines done rm -f conftest.defines conftest.tail echo "ac_result=$ac_in" >>$CONFIG_STATUS cat >>$CONFIG_STATUS <<\_ACEOF if test x"$ac_file" != x-; then echo "/* $configure_input */" >"$tmp/config.h" cat "$ac_result" >>"$tmp/config.h" if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 echo "$as_me: $ac_file is unchanged" >&6;} else rm -f $ac_file mv "$tmp/config.h" $ac_file fi else echo "/* $configure_input */" cat "$ac_result" fi rm -f "$tmp/out12" # Compute $ac_file's index in $config_headers. _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $ac_file | $ac_file:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $ac_file" >`$as_dirname -- $ac_file || $as_expr X$ac_file : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X$ac_file : 'X\(//\)[^/]' \| \ X$ac_file : 'X\(//\)$' \| \ X$ac_file : 'X\(/\)' \| . 2>/dev/null || echo X$ac_file | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'`/stamp-h$_am_stamp_count ;; :C) { echo "$as_me:$LINENO: executing $ac_file commands" >&5 echo "$as_me: executing $ac_file commands" >&6;} ;; esac case $ac_file$ac_mode in "depfiles":C) test x"$AMDEP_TRUE" != x"" || for mf in $CONFIG_FILES; do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named `Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then dirpart=`$as_dirname -- "$mf" || $as_expr X"$mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$mf" : 'X\(//\)[^/]' \| \ X"$mf" : 'X\(//\)$' \| \ X"$mf" : 'X\(/\)' \| . 2>/dev/null || echo X"$mf" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running `make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # When using ansi2knr, U may be empty or an underscore; expand it U=`sed -n 's/^U = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`$as_dirname -- "$file" || $as_expr X"$file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$file" : 'X\(//\)[^/]' \| \ X"$file" : 'X\(//\)$' \| \ X"$file" : 'X\(/\)' \| . 2>/dev/null || echo X"$file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` { as_dir=$dirpart/$fdir case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5 echo "$as_me: error: cannot create directory $as_dir" >&2;} { (exit 1); exit 1; }; }; } # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done ;; esac done # for ac_tag { (exit 0); exit 0; } _ACEOF chmod +x $CONFIG_STATUS ac_clean_files=$ac_clean_files_save # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || { (exit 1); exit 1; } fi einspline-0.9.2/doc/0000777000113000011300000000000011311505425011237 500000000000000einspline-0.9.2/doc/Makefile.in0000664000113000011300000002266511273633721013245 00000000000000# Makefile.in generated by automake 1.10 from Makefile.am. # @configure_input@ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, # 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. @SET_MAKE@ VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = doc DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/m4/acx_pthread.m4 \ $(top_srcdir)/m4/ax_cc_maxopt.m4 \ $(top_srcdir)/m4/ax_cxx_maxopt.m4 \ $(top_srcdir)/m4/ax_f77_maxopt.m4 \ $(top_srcdir)/m4/ax_check_compiler_flags.m4 \ $(top_srcdir)/m4/ax_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_cxx_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_c_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_f77_compiler_vendor.m4 \ $(top_srcdir)/m4/ax_gcc_aligns_stack.m4 \ $(top_srcdir)/m4/ax_gcc_archflag.m4 \ $(top_srcdir)/m4/ax_gxx_archflag.m4 \ $(top_srcdir)/m4/ax_gcc_version.m4 \ $(top_srcdir)/m4/ax_gcc_x86_cpuid.m4 \ $(top_srcdir)/m4/ax_ext.m4 $(top_srcdir)/m4/ac_cxx_restrict.m4 \ $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/src/config.h CONFIG_CLEAN_FILES = SOURCES = DIST_SOURCES = DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ALL_STATIC = @ALL_STATIC@ AMTAR = @AMTAR@ AR = @AR@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CUDA_CFLAGS = @CUDA_CFLAGS@ CUDA_LIBS = @CUDA_LIBS@ CXX = @CXX@ CXXCPP = @CXXCPP@ CXXDEPMODE = @CXXDEPMODE@ CXXFLAGS = @CXXFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO = @ECHO@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ F77 = @F77@ FFLAGS = @FFLAGS@ FFTW3F_CFLAGS = @FFTW3F_CFLAGS@ FFTW3F_LIBS = @FFTW3F_LIBS@ FFTW3_CFLAGS = @FFTW3_CFLAGS@ FFTW3_LIBS = @FFTW3_LIBS@ FLIBS = @FLIBS@ GREP = @GREP@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ LN_S = @LN_S@ LTLIBOBJS = @LTLIBOBJS@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ NVCC = @NVCC@ NVCCFLAGS = @NVCCFLAGS@ OBJEXT = @OBJEXT@ OPENMP_FLAG = @OPENMP_FLAG@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKGDATADEF = @PKGDATADEF@ PKG_CONFIG = @PKG_CONFIG@ POW_LIB = @POW_LIB@ PRTDIAG = @PRTDIAG@ PTHREAD_FLAG = @PTHREAD_FLAG@ RANLIB = @RANLIB@ SED = @SED@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_FLAGS = @SIMD_FLAGS@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ ac_ct_CXX = @ac_ct_CXX@ ac_ct_F77 = @ac_ct_F77@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ EXTRA_DIST = all: all-am .SUFFIXES: $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ && exit 0; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/Makefile'; \ cd $(top_srcdir) && \ $(AUTOMAKE) --gnu doc/Makefile .PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh mostlyclean-libtool: -rm -f *.lo clean-libtool: -rm -rf .libs _libs tags: TAGS TAGS: ctags: CTAGS CTAGS: distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ fi; \ cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ else \ test -f $(distdir)/$$file \ || cp -p $$d/$$file $(distdir)/$$file \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile installdirs: install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ `test -z '$(STRIP)' || \ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install mostlyclean-generic: clean-generic: distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic clean-libtool mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic dvi: dvi-am dvi-am: html: html-am info: info-am info-am: install-data-am: install-dvi: install-dvi-am install-exec-am: install-html: install-html-am install-info: install-info-am install-man: install-pdf: install-pdf-am install-ps: install-ps-am installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic mostlyclean-libtool pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: .MAKE: install-am install-strip .PHONY: all all-am check check-am clean clean-generic clean-libtool \ distclean distclean-generic distclean-libtool distdir dvi \ dvi-am html html-am info info-am install install-am \ install-data install-data-am install-dvi install-dvi-am \ install-exec install-exec-am install-html install-html-am \ install-info install-info-am install-man install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: einspline-0.9.2/doc/Makefile.am0000664000113000011300000000001511012400560013175 00000000000000EXTRA_DIST = einspline-0.9.2/depcomp0000755000113000011300000004224611012400653011766 00000000000000#! /bin/sh # depcomp - compile a program generating dependencies as side-effects scriptversion=2006-10-15.18 # Copyright (C) 1999, 2000, 2003, 2004, 2005, 2006 Free Software # Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Alexandre Oliva . case $1 in '') echo "$0: No command. Try \`$0 --help' for more information." 1>&2 exit 1; ;; -h | --h*) cat <<\EOF Usage: depcomp [--help] [--version] PROGRAM [ARGS] Run PROGRAMS ARGS to compile a file, generating dependencies as side-effects. Environment variables: depmode Dependency tracking mode. source Source file read by `PROGRAMS ARGS'. object Object file output by `PROGRAMS ARGS'. DEPDIR directory where to store dependencies. depfile Dependency file to output. tmpdepfile Temporary file to use when outputing dependencies. libtool Whether libtool is used (yes/no). Report bugs to . EOF exit $? ;; -v | --v*) echo "depcomp $scriptversion" exit $? ;; esac if test -z "$depmode" || test -z "$source" || test -z "$object"; then echo "depcomp: Variables source, object and depmode must be set" 1>&2 exit 1 fi # Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po. depfile=${depfile-`echo "$object" | sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`} tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`} rm -f "$tmpdepfile" # Some modes work just like other modes, but use different flags. We # parameterize here, but still list the modes in the big case below, # to make depend.m4 easier to write. Note that we *cannot* use a case # here, because this file can only contain one case statement. if test "$depmode" = hp; then # HP compiler uses -M and no extra arg. gccflag=-M depmode=gcc fi if test "$depmode" = dashXmstdout; then # This is just like dashmstdout with a different argument. dashmflag=-xM depmode=dashmstdout fi case "$depmode" in gcc3) ## gcc 3 implements dependency tracking that does exactly what ## we want. Yay! Note: for some reason libtool 1.4 doesn't like ## it if -MD -MP comes after the -MF stuff. Hmm. ## Unfortunately, FreeBSD c89 acceptance of flags depends upon ## the command line argument order; so add the flags where they ## appear in depend2.am. Note that the slowdown incurred here ## affects only configure: in makefiles, %FASTDEP% shortcuts this. for arg do case $arg in -c) set fnord "$@" -MT "$object" -MD -MP -MF "$tmpdepfile" "$arg" ;; *) set fnord "$@" "$arg" ;; esac shift # fnord shift # $arg done "$@" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi mv "$tmpdepfile" "$depfile" ;; gcc) ## There are various ways to get dependency output from gcc. Here's ## why we pick this rather obscure method: ## - Don't want to use -MD because we'd like the dependencies to end ## up in a subdir. Having to rename by hand is ugly. ## (We might end up doing this anyway to support other compilers.) ## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like ## -MM, not -M (despite what the docs say). ## - Using -M directly means running the compiler twice (even worse ## than renaming). if test -z "$gccflag"; then gccflag=-MD, fi "$@" -Wp,"$gccflag$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" echo "$object : \\" > "$depfile" alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ## The second -e expression handles DOS-style file names with drive letters. sed -e 's/^[^:]*: / /' \ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile" ## This next piece of magic avoids the `deleted header file' problem. ## The problem is that when a header file which appears in a .P file ## is deleted, the dependency causes make to die (because there is ## typically no way to rebuild the header). We avoid this by adding ## dummy dependencies for each header file. Too bad gcc doesn't do ## this for us directly. tr ' ' ' ' < "$tmpdepfile" | ## Some versions of gcc put a space before the `:'. On the theory ## that the space means something, we add a space to the output as ## well. ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp) # This case exists only to let depend.m4 do its work. It works by # looking at the text of this script. This case will never be run, # since it is checked for above. exit 1 ;; sgi) if test "$libtool" = yes; then "$@" "-Wp,-MDupdate,$tmpdepfile" else "$@" -MDupdate "$tmpdepfile" fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files echo "$object : \\" > "$depfile" # Clip off the initial element (the dependent). Don't try to be # clever and replace this with sed code, as IRIX sed won't handle # lines with more than a fixed number of characters (4096 in # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines; # the IRIX cc adds comments like `#:fec' to the end of the # dependency line. tr ' ' ' ' < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \ tr ' ' ' ' >> $depfile echo >> $depfile # The second pass generates a dummy entry for each header file. tr ' ' ' ' < "$tmpdepfile" \ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \ >> $depfile else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; aix) # The C for AIX Compiler uses -M and outputs the dependencies # in a .u file. In older versions, this file always lives in the # current directory. Also, the AIX compiler puts `$object:' at the # start of each line; $object doesn't have directory information. # Version 6 uses the directory in both cases. stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'` tmpdepfile="$stripped.u" if test "$libtool" = yes; then "$@" -Wc,-M else "$@" -M fi stat=$? if test -f "$tmpdepfile"; then : else stripped=`echo "$stripped" | sed 's,^.*/,,'` tmpdepfile="$stripped.u" fi if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi if test -f "$tmpdepfile"; then outname="$stripped.o" # Each line is of the form `foo.o: dependent.h'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile" sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile" else # The sourcefile does not contain any dependencies, so just # store a dummy comment line, to avoid errors with the Makefile # "include basename.Plo" scheme. echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; icc) # Intel's C compiler understands `-MD -MF file'. However on # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c # ICC 7.0 will fill foo.d with something like # foo.o: sub/foo.c # foo.o: sub/foo.h # which is wrong. We want: # sub/foo.o: sub/foo.c # sub/foo.o: sub/foo.h # sub/foo.c: # sub/foo.h: # ICC 7.1 will output # foo.o: sub/foo.c sub/foo.h # and will wrap long lines using \ : # foo.o: sub/foo.c ... \ # sub/foo.h ... \ # ... "$@" -MD -MF "$tmpdepfile" stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile" exit $stat fi rm -f "$depfile" # Each line is of the form `foo.o: dependent.h', # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'. # Do two passes, one to just change these to # `$object: dependent.h' and one to simply `dependent.h:'. sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile" # Some versions of the HPUX 10.20 sed can't process this invocation # correctly. Breaking it into two sed invocations is a workaround. sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; hp2) # The "hp" stanza above does not work with aCC (C++) and HP's ia64 # compilers, which have integrated preprocessors. The correct option # to use with these is +Maked; it writes dependencies to a file named # 'foo.d', which lands next to the object file, wherever that # happens to be. # Much of this is similar to the tru64 case; see comments there. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then tmpdepfile1=$dir$base.d tmpdepfile2=$dir.libs/$base.d "$@" -Wc,+Maked else tmpdepfile1=$dir$base.d tmpdepfile2=$dir$base.d "$@" +Maked fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," "$tmpdepfile" > "$depfile" # Add `dependent.h:' lines. sed -ne '2,${; s/^ *//; s/ \\*$//; s/$/:/; p;}' "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" "$tmpdepfile2" ;; tru64) # The Tru64 compiler uses -MD to generate dependencies as a side # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'. # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put # dependencies in `foo.d' instead, so we check for that too. # Subdirectories are respected. dir=`echo "$object" | sed -e 's|/[^/]*$|/|'` test "x$dir" = "x$object" && dir= base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'` if test "$libtool" = yes; then # With Tru64 cc, shared objects can also be used to make a # static library. This mechanism is used in libtool 1.4 series to # handle both shared and static libraries in a single compilation. # With libtool 1.4, dependencies were output in $dir.libs/$base.lo.d. # # With libtool 1.5 this exception was removed, and libtool now # generates 2 separate objects for the 2 libraries. These two # compilations output dependencies in $dir.libs/$base.o.d and # in $dir$base.o.d. We have to check for both files, because # one of the two compilations can be disabled. We should prefer # $dir$base.o.d over $dir.libs/$base.o.d because the latter is # automatically cleaned when .libs/ is deleted, while ignoring # the former would cause a distcleancheck panic. tmpdepfile1=$dir.libs/$base.lo.d # libtool 1.4 tmpdepfile2=$dir$base.o.d # libtool 1.5 tmpdepfile3=$dir.libs/$base.o.d # libtool 1.5 tmpdepfile4=$dir.libs/$base.d # Compaq CCC V6.2-504 "$@" -Wc,-MD else tmpdepfile1=$dir$base.o.d tmpdepfile2=$dir$base.d tmpdepfile3=$dir$base.d tmpdepfile4=$dir$base.d "$@" -MD fi stat=$? if test $stat -eq 0; then : else rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" exit $stat fi for tmpdepfile in "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3" "$tmpdepfile4" do test -f "$tmpdepfile" && break done if test -f "$tmpdepfile"; then sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile" # That's a tab and a space in the []. sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile" else echo "#dummy" > "$depfile" fi rm -f "$tmpdepfile" ;; #nosideeffect) # This comment above is used by automake to tell side-effect # dependency tracking mechanisms from slower ones. dashmstdout) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test $1 != '--mode=compile'; do shift done shift fi # Remove `-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done test -z "$dashmflag" && dashmflag=-M # Require at least two characters before searching for `:' # in the target name. This is to cope with DOS-style filenames: # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise. "$@" $dashmflag | sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" tr ' ' ' ' < "$tmpdepfile" | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; dashXmstdout) # This case only exists to satisfy depend.m4. It is never actually # run, as this mode is specially recognized in the preamble. exit 1 ;; makedepend) "$@" || exit $? # Remove any Libtool call if test "$libtool" = yes; then while test $1 != '--mode=compile'; do shift done shift fi # X makedepend shift cleared=no for arg in "$@"; do case $cleared in no) set ""; shift cleared=yes ;; esac case "$arg" in -D*|-I*) set fnord "$@" "$arg"; shift ;; # Strip any option that makedepend may not understand. Remove # the object too, otherwise makedepend will parse it as a source file. -*|$object) ;; *) set fnord "$@" "$arg"; shift ;; esac done obj_suffix="`echo $object | sed 's/^.*\././'`" touch "$tmpdepfile" ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@" rm -f "$depfile" cat < "$tmpdepfile" > "$depfile" sed '1,2d' "$tmpdepfile" | tr ' ' ' ' | \ ## Some versions of the HPUX 10.20 sed can't process this invocation ## correctly. Breaking it into two sed invocations is a workaround. sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile" rm -f "$tmpdepfile" "$tmpdepfile".bak ;; cpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout. "$@" || exit $? # Remove the call to Libtool. if test "$libtool" = yes; then while test $1 != '--mode=compile'; do shift done shift fi # Remove `-o $object'. IFS=" " for arg do case $arg in -o) shift ;; $object) shift ;; *) set fnord "$@" "$arg" shift # fnord shift # $arg ;; esac done "$@" -E | sed -n -e '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' \ -e '/^#line [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' | sed '$ s: \\$::' > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" cat < "$tmpdepfile" >> "$depfile" sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile" rm -f "$tmpdepfile" ;; msvisualcpp) # Important note: in order to support this mode, a compiler *must* # always write the preprocessed file to stdout, regardless of -o, # because we must use -o when running libtool. "$@" || exit $? IFS=" " for arg do case "$arg" in "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI") set fnord "$@" shift shift ;; *) set fnord "$@" "$arg" shift shift ;; esac done "$@" -E | sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile" rm -f "$depfile" echo "$object : \\" > "$depfile" . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile" echo " " >> "$depfile" . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile" rm -f "$tmpdepfile" ;; none) exec "$@" ;; *) echo "Unknown depmode $depmode" 1>&2 exit 1 ;; esac exit 0 # Local Variables: # mode: shell-script # sh-indentation: 2 # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "scriptversion=" # time-stamp-format: "%:y-%02m-%02d.%02H" # time-stamp-end: "$" # End: einspline-0.9.2/NEWS0000664000113000011300000000000011012400563011070 00000000000000einspline-0.9.2/m4/0000777000113000011300000000000011311505423011010 500000000000000einspline-0.9.2/m4/ax_compiler_vendor.m40000664000113000011300000000265711012400556015060 00000000000000dnl @synopsis AX_COMPILER_VENDOR dnl @summary find the vendor (gnu, intel, etc.) of the C/C++ compiler dnl @category C dnl @category C++ dnl dnl Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, dnl sun, hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, dnl microsoft, watcom, etc. The vendor is returned in the cache variable dnl $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson with Matteo Frigo AC_DEFUN([AX_COMPILER_VENDOR], [ AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ #if !($vencpp) thisisanerror; #endif ])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break]) done ]) ]) einspline-0.9.2/m4/ax_gxx_archflag.m40000664000113000011300000001633711012400556014326 00000000000000dnl @synopsis AX_GCC_ARCHFLAG([PORTABLE?], [ACTION-SUCCESS], [ACTION-FAILURE]) dnl @summary find target architecture name for gcc -march/-mtune flags dnl @category Misc dnl dnl This macro tries to guess the "native" arch corresponding to dnl the target architecture for use with gcc's -march=arch or -mtune=arch dnl flags. If found, the cache variable $ax_cv_gcc_archflag is set to this dnl flag and ACTION-SUCCESS is executed; otherwise $ax_cv_gcc_archflag is dnl is set to "unknown" and ACTION-FAILURE is executed. The default dnl ACTION-SUCCESS is to add $ax_cv_gcc_archflag to the end of $CFLAGS. dnl dnl PORTABLE? should be either [yes] (default) or [no]. In the former case, dnl the flag is set to -mtune (or equivalent) so that the architecture dnl is only used for tuning, but the instruction set used is still dnl portable. In the latter case, the flag is set to -march (or equivalent) dnl so that architecture-specific instructions are enabled. dnl dnl The user can specify --with-gcc-arch= in order to override dnl the macro's choice of architecture, or --without-gcc-arch to dnl disable this. dnl dnl When cross-compiling, or if $CC is not gcc, then ACTION-FAILURE is dnl called unless the user specified --with-gcc-arch manually. dnl dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_GCC_X86_CPUID dnl dnl (The main emphasis here is on recent CPUs, on the principle that dnl doing high-performance computing on old hardware is uncommon.) dnl dnl @version 2006-01-04 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_GXX_ARCHFLAG], [AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([AC_CANONICAL_HOST]) AC_ARG_WITH(gcc-arch, [AC_HELP_STRING([--with-gcc-arch=], [use architecture for gcc -march/-mtune, instead of guessing])], ax_gcc_arch=$withval, ax_gcc_arch=yes) AC_MSG_CHECKING([for gcc architecture flag]) AC_MSG_RESULT([]) AC_CACHE_VAL(ax_cv_gcc_archflag, [ ax_cv_gcc_archflag="unknown" if test "$GCC" = yes; then if test "x$ax_gcc_arch" = xyes; then ax_gcc_arch="" if test "$cross_compiling" = no; then case $host_cpu in i[[3456]]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones AX_GCC_X86_CPUID(0) AX_GCC_X86_CPUID(1) case $ax_cv_gcc_x86_cpuid_0 in *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in *5[[48]]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; *5??:*:*:*) ax_gcc_arch=pentium ;; *6[[3456]]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[[01]]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[[234]]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6[[9d]]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; *6[[78b]]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6??:*:*:*) ax_gcc_arch=pentiumpro ;; *f3[[347]]:*:*:*|*f4[1347]:*:*:*) case $host_cpu in x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; esac ;; *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; esac ;; *:68747541:*:*) # AMD case $ax_cv_gcc_x86_cpuid_1 in *5[[67]]?:*:*:*) ax_gcc_arch=k6 ;; *5[[8d]]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; *5[[9]]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; *60?:*:*:*) ax_gcc_arch=k7 ;; *6[[12]]?:*:*:*) ax_gcc_arch="athlon k7" ;; *6[[34]]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; *6[[68a]]?:*:*:*) AX_GCC_X86_CPUID(0x80000006) # L2 cache size case $ax_cv_gcc_x86_cpuid_0x80000006 in *:*:*[[1-9a-f]]??????:*) # (L2 = ecx >> 16) >= 256 ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; *) ax_gcc_arch="athlon-4 athlon k7" ;; esac ;; *f[[4cef8b]]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; *f??:*:*:*) ax_gcc_arch="k8" ;; esac ;; *:746e6543:*:*) # IDT case $ax_cv_gcc_x86_cpuid_1 in *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; *58?:*:*:*) ax_gcc_arch=winchip2 ;; *6[[78]]?:*:*:*) ax_gcc_arch=c3 ;; *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; esac ;; esac if test x"$ax_gcc_arch" = x; then # fallback case $host_cpu in i586*) ax_gcc_arch=pentium ;; i686*) ax_gcc_arch=pentiumpro ;; esac fi ;; sparc*) AC_PATH_PROG([PRTDIAG], [prtdiag], [prtdiag], [$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/]) cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` case $cputype in *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; *supersparc*|*tms390z5[[05]]*) ax_gcc_arch="supersparc v8" ;; *hypersparc*|*rt62[[056]]*) ax_gcc_arch="hypersparc v8" ;; *cypress*) ax_gcc_arch=cypress ;; esac ;; alphaev5) ax_gcc_arch=ev5 ;; alphaev56) ax_gcc_arch=ev56 ;; alphapca56) ax_gcc_arch="pca56 ev56" ;; alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; alphaev6) ax_gcc_arch=ev6 ;; alphaev67) ax_gcc_arch=ev67 ;; alphaev68) ax_gcc_arch="ev68 ev67" ;; alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; powerpc*) cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` case $cputype in *750*) ax_gcc_arch="750 G3" ;; *740[[0-9]]*) ax_gcc_arch="$cputype 7400 G4" ;; *74[[4-5]][[0-9]]*) ax_gcc_arch="$cputype 7450 G4" ;; *74[[0-9]][[0-9]]*) ax_gcc_arch="$cputype G4" ;; *970*) ax_gcc_arch="970 G5 power4";; *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" ;; esac fi # not cross-compiling fi # guess arch if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then for arch in $ax_gcc_arch; do if test "x[]m4_default([$1],yes)" = xyes; then # if we require portable code flags="-mtune=$arch" # -mcpu=$arch and m$arch generate nonportable code on every arch except # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac else flags="-march=$arch -mcpu=$arch -m$arch" fi for flag in $flags; do AX_CHECK_COMPILER_FLAGS($flag, [ax_cv_gcc_archflag=$flag; break]) done test "x$ax_cv_gcc_archflag" = xunknown || break done fi fi # $GCC=yes ]) AC_MSG_CHECKING([for gcc architecture flag]) AC_MSG_RESULT($ax_cv_gcc_archflag) if test "x$ax_cv_gcc_archflag" = xunknown; then m4_default([$3],:) else m4_default([$2], [CXXFLAGS="$CXXFLAGS $ax_cv_gcc_archflag"]) fi ]) einspline-0.9.2/m4/ax_check_compiler_flags.m40000664000113000011300000000316711012400556016011 00000000000000dnl @synopsis AX_CHECK_COMPILER_FLAGS(FLAGS, [ACTION-SUCCESS], [ACTION-FAILURE]) dnl @summary check whether FLAGS are accepted by the compiler dnl @category Misc dnl dnl Check whether the given compiler FLAGS work with the current language's dnl compiler, or whether they give an error. (Warnings, however, are dnl ignored.) dnl dnl ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on dnl success/failure. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_CHECK_COMPILER_FLAGS], [AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX AC_MSG_CHECKING([whether _AC_LANG compiler accepts $1]) dnl Some hackery here since AC_CACHE_VAL can't handle a non-literal varname: AS_LITERAL_IF([$1], [AC_CACHE_VAL(AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1), [ ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$1" AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS])], [ax_save_FLAGS=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$1" AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=yes, eval AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1)=no) _AC_LANG_PREFIX[]FLAGS=$ax_save_FLAGS]) eval ax_check_compiler_flags=$AS_TR_SH(ax_cv_[]_AC_LANG_ABBREV[]_flags_$1) AC_MSG_RESULT($ax_check_compiler_flags) if test "x$ax_check_compiler_flags" = xyes; then m4_default([$2], :) else m4_default([$3], :) fi ])dnl AX_CHECK_COMPILER_FLAGS einspline-0.9.2/m4/ax_f77_maxopt.m40000664000113000011300000001324311012400556013655 00000000000000dnl @synopsis AX_CXX_MAXOPT dnl @summary turn on optimization flags for the C compiler dnl @category C dnl dnl Try to turn on "good" C optimization flags for various compilers dnl and architectures, for some definition of "good". (In our case, dnl good for FFTW and hopefully for other scientific codes. Modify dnl as needed.) dnl dnl The user can override the flags by setting the CXXFLAGS environment dnl variable. The user can also specify --enable-portable-binary in dnl order to disable any optimization flags that might result in dnl a binary that only runs on the host architecture. dnl dnl Note also that the flags assume that ANSI C aliasing rules are dnl followed by the code (e.g. for gcxx's -fstrict-aliasing), and that dnl floating-point computations can be re-ordered as needed. dnl dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_COMPILER_VENDOR, dnl AX_GCC_ARCHFLAG, AX_GCC_X86_CPUID dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_F77_MAXOPT], [ AC_LANG_SAVE AC_LANG_FORTRAN77 AC_REQUIRE([AC_PROG_F77]) AC_REQUIRE([AX_F77_COMPILER_VENDOR]) AC_REQUIRE([AC_CANONICAL_HOST]) AC_ARG_ENABLE(portable-binary, [AC_HELP_STRING([--enable-portable-binary], [disable compiler optimizations that would produce unportable binaries])], acx_maxopt_portable=$withval, acx_maxopt_portable=no) # Try to determine "good" native compiler flags if none specified via FFLAGS if test "$ac_test_FFLAGS" != "set"; then FFLAGS="" case $ax_cv_f77_compiler_vendor in dec) FFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then FFLAGS="$FFLAGS -arch host" fi;; sun) FFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then FFLAGS="$FFLAGS -xarch=generic" fi;; hp) FFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then FFLAGS="$FFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi AX_CHECK_COMPILER_FLAGS($xlc_opt, FFLAGS="-O3 -qansialias -w $xlc_opt", [FFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* FFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the FFLAGS environment var. *" echo "* and re-run configure.) For more info, man cxx. *" echo "******************************************************"]) ;; intel) FFLAGS="-O3 -ansi_alias" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: AX_GCC_X86_CPUID(0) AX_GCC_X86_CPUID(1) case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[[234]]:*:*|*6[[789b]]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[[347]]:*:*:*|*f4[[1347a]]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do AX_CHECK_COMPILER_FLAGS($flag, [icc_archflag=$flag; break]) done fi AC_MSG_CHECKING([for icc architecture flag]) AC_MSG_RESULT($icc_archflag) if test "x$icc_archflag" != xunknown; then FFLAGS="$FFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems FFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems AX_CHECK_COMPILER_FLAGS(-malign-double, FFLAGS="$FFLAGS -malign-double") # -fstrict-aliasing for gcc-2.95+ AX_CHECK_COMPILER_FLAGS(-fstrict-aliasing, FFLAGS="$FFLAGS -fstrict-aliasing") # note that we enable "unsafe" fp optimization with other compilers, too AX_CHECK_COMPILER_FLAGS(-ffast-math, FFLAGS="$FFLAGS -ffast-math") AX_GXX_ARCHFLAG($acx_maxopt_portable) ;; esac if test -z "$FFLAGS"; then echo "" echo "**********************************************************" echo "* WARNING: Don't know the best FFLAGS for this system *" echo "* Use ./configure FFLAGS=... to specify your own flags *" echo "* (otherwise, a default of FFLAGS=-O3 will be used) *" echo "**********************************************************" echo "" FFLAGS="-O3" fi AX_CHECK_COMPILER_FLAGS($FFLAGS, [], [ echo "" echo "**********************************************************" echo "* WARNING: The guessed FFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure FFLAGS=... to specify your own flags *" echo "**********************************************************" echo "" FFLAGS="" ]) fi AC_LANG_RESTORE ]) einspline-0.9.2/m4/ax_cc_maxopt.m40000664000113000011300000001312311012400556013634 00000000000000dnl @synopsis AX_CC_MAXOPT dnl @summary turn on optimization flags for the C compiler dnl @category C dnl dnl Try to turn on "good" C optimization flags for various compilers dnl and architectures, for some definition of "good". (In our case, dnl good for FFTW and hopefully for other scientific codes. Modify dnl as needed.) dnl dnl The user can override the flags by setting the CFLAGS environment dnl variable. The user can also specify --enable-portable-binary in dnl order to disable any optimization flags that might result in dnl a binary that only runs on the host architecture. dnl dnl Note also that the flags assume that ANSI C aliasing rules are dnl followed by the code (e.g. for gcc's -fstrict-aliasing), and that dnl floating-point computations can be re-ordered as needed. dnl dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_COMPILER_VENDOR, dnl AX_GCC_ARCHFLAG, AX_GCC_X86_CPUID dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_CC_MAXOPT], [ AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AX_C_COMPILER_VENDOR]) AC_REQUIRE([AC_CANONICAL_HOST]) AC_ARG_ENABLE(portable-binary, [AC_HELP_STRING([--enable-portable-binary], [disable compiler optimizations that would produce unportable binaries])], acx_maxopt_portable=$withval, acx_maxopt_portable=no) # Try to determine "good" native compiler flags if none specified via CFLAGS if test "$ac_test_CFLAGS" != "set"; then CFLAGS="" case $ax_cv_c_compiler_vendor in dec) CFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then CFLAGS="$CFLAGS -arch host" fi;; sun) CFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then CFLAGS="$CFLAGS -xarch=generic" fi;; hp) CFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then CFLAGS="$CFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi AX_CHECK_COMPILER_FLAGS($xlc_opt, CFLAGS="-O3 -qansialias -w $xlc_opt", [CFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* CFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the CFLAGS environment var. *" echo "* and re-run configure.) For more info, man cc. *" echo "******************************************************"]) ;; intel) CFLAGS="-O3" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: AX_GCC_X86_CPUID(0) AX_GCC_X86_CPUID(1) case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[[234]]:*:*|*6[[789b]]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[[347]]:*:*:*|*f4[[1347a]]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do AX_CHECK_COMPILER_FLAGS($flag, [icc_archflag=$flag; break]) done fi AC_MSG_CHECKING([for icc architecture flag]) AC_MSG_RESULT($icc_archflag) if test "x$icc_archflag" != xunknown; then CFLAGS="$CFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems CFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems AX_CHECK_COMPILER_FLAGS(-malign-double, CFLAGS="$CFLAGS -malign-double") # -fstrict-aliasing for gcc-2.95+ AX_CHECK_COMPILER_FLAGS(-fstrict-aliasing, CFLAGS="$CFLAGS -fstrict-aliasing") # note that we enable "unsafe" fp optimization with other compilers, too AX_CHECK_COMPILER_FLAGS(-ffast-math, CFLAGS="$CFLAGS -ffast-math") AX_GCC_ARCHFLAG($acx_maxopt_portable) ;; esac if test -z "$CFLAGS"; then echo "" echo "********************************************************" echo "* WARNING: Don't know the best CFLAGS for this system *" echo "* Use ./configure CFLAGS=... to specify your own flags *" echo "* (otherwise, a default of CFLAGS=-O3 will be used) *" echo "********************************************************" echo "" CFLAGS="-O3" fi AX_CHECK_COMPILER_FLAGS($CFLAGS, [], [ echo "" echo "********************************************************" echo "* WARNING: The guessed CFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure CFLAGS=... to specify your own flags *" echo "********************************************************" echo "" CFLAGS="" ]) fi ]) einspline-0.9.2/m4/ax_ext.m40000664000113000011300000000556711117544104012500 00000000000000dnl Copyright © 2007 Christophe Tournayre dnl Copying and distribution of this file, with or without modification, dnl are permitted in any medium without royalty provided the copyright dnl notice and this notice are preserved. AC_DEFUN([AX_EXT], [ AC_REQUIRE([AX_GCC_X86_CPUID]) AX_GCC_X86_CPUID(1) ecx=`echo $ax_cv_gcc_x86_cpuid_1 | cut -d ":" -f 3` edx=`echo $ax_cv_gcc_x86_cpuid_1 | cut -d ":" -f 4` AC_CACHE_CHECK([whether mmx is supported], [ax_have_mmx_ext], [ ax_have_mmx_ext=no if test "$((0x$edx>>23&0x01))" = 1; then ax_have_mmx_ext=yes fi ]) AC_CACHE_CHECK([whether sse is supported], [ax_have_sse_ext], [ ax_have_sse_ext=no if test "$((0x$edx>>25&0x01))" = 1; then ax_have_sse_ext=yes fi ]) AC_CACHE_CHECK([whether sse2 is supported], [ax_have_sse2_ext], [ ax_have_sse2_ext=no if test "$((0x$edx>>26&0x01))" = 1; then ax_have_sse2_ext=yes fi ]) AC_CACHE_CHECK([whether sse3 is supported], [ax_have_sse3_ext], [ ax_have_sse3_ext=no if test "$((0x$ecx&0x01))" = 1; then ax_have_sse3_ext=yes fi ]) AC_CACHE_CHECK([whether ssse3 is supported], [ax_have_ssse3_ext], [ ax_have_ssse3_ext=no if test "$((0x$ecx>>9&0x01))" = 1; then ax_have_ssse3_ext=yes fi ]) AC_CACHE_CHECK([whether sse4.1 is supported], [ax_have_sse4_1_ext], [ ax_have_sse4_1_ext=no if test "$((0x$ecx>>19&0x01))" = 1; then ax_have_sse4_1_ext=yes fi ]) AC_CACHE_CHECK([whether sse4.2 is supported], [ax_have_sse4_2_ext], [ ax_have_sse4_2_ext=no if test "$((0x$ecx>>20&0x01))" = 1; then ax_have_sse4_2_ext=yes fi ]) if test "$ax_have_mmx_ext" = yes; then AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) AX_CHECK_COMPILER_FLAGS(-mmmx, SIMD_FLAGS="$SIMD_FLAGS -mmmx", []) fi if test "$ax_have_sse_ext" = yes; then AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) AX_CHECK_COMPILER_FLAGS(-msse, SIMD_FLAGS="$SIMD_FLAGS -msse", []) fi if test "$ax_have_sse2_ext" = yes; then AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) AX_CHECK_COMPILER_FLAGS(-msse2, SIMD_FLAGS="$SIMD_FLAGS -msse2", []) fi if test "$ax_have_sse3_ext" = yes; then AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) AX_CHECK_COMPILER_FLAGS(-msse3, SIMD_FLAGS="$SIMD_FLAGS -msse3", []) fi if test "$ax_have_ssse3_ext" = yes; then AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) fi if test "$ax_have_sse4_1_ext" = yes; then AC_DEFINE(HAVE_SSE4_1,,[Support SSE4.1 (Streaming SIMD Extensions 4.1) instructions]) fi if test "$ax_have_sse4_2_ext" = yes; then AC_DEFINE(HAVE_SSE4_2,,[Support SSE4.2 (Streaming SIMD Extensions 4.2) instructions]) fi AC_SUBST(SIMD_FLAGS) ]) einspline-0.9.2/m4/ax_gcc_version.m40000664000113000011300000000215011012400556014156 00000000000000dnl @synopsis AX_GCC_VERSION(MAJOR, MINOR, PATCHLEVEL, [ACTION-SUCCESS], [ACTION-FAILURE]) dnl @summary check wither gcc is at least version MAJOR.MINOR.PATCHLEVEL dnl @category InstalledPackages dnl dnl Check whether we are using gcc and, if so, whether its version dnl is at least MAJOR.MINOR.PATCHLEVEL dnl dnl ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on dnl success/failure. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_GCC_VERSION], [ AC_REQUIRE([AC_PROG_CC]) AC_CACHE_CHECK(whether we are using gcc $1.$2.$3 or later, ax_cv_gcc_$1_$2_$3, [ ax_cv_gcc_$1_$2_$3=no if test "$GCC" = "yes"; then dnl The semicolon after "yes" below is to pacify NeXT's syntax-checking cpp. AC_EGREP_CPP(yes, [ #ifdef __GNUC__ # if (__GNUC__ > $1) || (__GNUC__ == $1 && __GNUC_MINOR__ > $2) \ || (__GNUC__ == $1 && __GNUC_MINOR__ == $2 && __GNUC_PATCHLEVEL__ >= $3) yes; # endif #endif ], [ax_cv_gcc_$1_$2_$3=yes]) fi ]) if test "$ax_cv_gcc_$1_$2_$3" = yes; then m4_default([$4], :) else m4_default([$5], :) fi ]) einspline-0.9.2/m4/ax_c_compiler_vendor.m40000664000113000011300000000266111012400556015355 00000000000000dnl @synopsis AX_COMPILER_VENDOR dnl @summary find the vendor (gnu, intel, etc.) of the C/C++ compiler dnl @category C dnl @category C++ dnl dnl Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, dnl sun, hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, dnl microsoft, watcom, etc. The vendor is returned in the cache variable dnl $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson with Matteo Frigo AC_DEFUN([AX_C_COMPILER_VENDOR], [ AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ #if !($vencpp) thisisanerror; #endif ])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break]) done ]) ]) einspline-0.9.2/m4/ax_cxx_maxopt.m40000664000113000011300000001334711021344427014065 00000000000000dnl @synopsis AX_CXX_MAXOPT dnl @summary turn on optimization flags for the C compiler dnl @category C dnl dnl Try to turn on "good" C optimization flags for various compilers dnl and architectures, for some definition of "good". (In our case, dnl good for FFTW and hopefully for other scientific codes. Modify dnl as needed.) dnl dnl The user can override the flags by setting the CXXFLAGS environment dnl variable. The user can also specify --enable-portable-binary in dnl order to disable any optimization flags that might result in dnl a binary that only runs on the host architecture. dnl dnl Note also that the flags assume that ANSI C aliasing rules are dnl followed by the code (e.g. for gcxx's -fstrict-aliasing), and that dnl floating-point computations can be re-ordered as needed. dnl dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_COMPILER_VENDOR, dnl AX_GCC_ARCHFLAG, AX_GCC_X86_CPUID dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_CXX_MAXOPT], [ AC_LANG_SAVE AC_LANG_CPLUSPLUS AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([AX_CXX_COMPILER_VENDOR]) AC_REQUIRE([AC_CANONICAL_HOST]) AC_ARG_ENABLE(portable-binary, [AC_HELP_STRING([--enable-portable-binary], [disable compiler optimizations that would produce unportable binaries])], acx_maxopt_portable=$withval, acx_maxopt_portable=no) # Try to determine "good" native compiler flags if none specified via CXXFLAGS if test "$ac_test_CXXFLAGS" != "set"; then CXXFLAGS="" case $ax_cv_cxx_compiler_vendor in dec) CXXFLAGS="-newc -w0 -O5 -ansi_alias -ansi_args -fp_reorder -tune host" if test "x$acx_maxopt_portable" = xno; then CXXFLAGS="$CXXFLAGS -arch host" fi;; sun) CXXFLAGS="-native -fast -xO5 -dalign" if test "x$acx_maxopt_portable" = xyes; then CXXFLAGS="$CXXFLAGS -xarch=generic" fi;; hp) CXXFLAGS="+Oall +Optrs_ansi +DSnative" if test "x$acx_maxopt_portable" = xyes; then CXXFLAGS="$CXXFLAGS +DAportable" fi;; ibm) if test "x$acx_maxopt_portable" = xno; then xlc_opt="-qarch=auto -qtune=auto" else xlc_opt="-qtune=auto" fi AX_CHECK_COMPILER_FLAGS($xlc_opt, CXXFLAGS="-O3 -qansialias -w $xlc_opt", [CXXFLAGS="-O3 -qansialias -w" echo "******************************************************" echo "* You seem to have the IBM C compiler. It is *" echo "* recommended for best performance that you use: *" echo "* *" echo "* CXXFLAGS=-O3 -qarch=xxx -qtune=xxx -qansialias -w *" echo "* ^^^ ^^^ *" echo "* where xxx is pwr2, pwr3, 604, or whatever kind of *" echo "* CPU you have. (Set the CXXFLAGS environment var. *" echo "* and re-run configure.) For more info, man cxx. *" echo "******************************************************"]) ;; intel) CXXFLAGS="-O3 -restrict" if test "x$acx_maxopt_portable" = xno; then icc_archflag=unknown icc_flags="" case $host_cpu in i686*|x86_64*) # icc accepts gcc assembly syntax, so these should work: AX_GCC_X86_CPUID(0) AX_GCC_X86_CPUID(1) case $ax_cv_gcc_x86_cpuid_0 in # see AX_GCC_ARCHFLAG *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in 1067?:*:*:*) icc_flags="-xS";; *6a?:*[[234]]:*:*|*6[[789b]]?:*:*:*) icc_flags="-xK";; *6f?:*:*:*) icc_flags="-xT";; *f3[[347]]:*:*:*|*f4[[1347a]]:*:*:*) icc_flags="-xP -xN -xW -xK";; *f??:*:*:*) icc_flags="-xN -xW -xK";; esac ;; esac ;; esac if test "x$icc_flags" != x; then for flag in $icc_flags; do AX_CHECK_COMPILER_FLAGS($flag, [icc_archflag=$flag; break]) done fi AC_MSG_CHECKING([for icc architecture flag]) AC_MSG_RESULT($icc_archflag) if test "x$icc_archflag" != xunknown; then CXXFLAGS="$CXXFLAGS $icc_archflag" fi fi ;; gnu) # default optimization flags for gcc on all systems CXXFLAGS="-O3 -fomit-frame-pointer" # -malign-double for x86 systems AX_CHECK_COMPILER_FLAGS(-malign-double, CXXFLAGS="$CXXFLAGS -malign-double") # -fstrict-aliasing for gcc-2.95+ AX_CHECK_COMPILER_FLAGS(-fstrict-aliasing, CXXFLAGS="$CXXFLAGS -fstrict-aliasing") # note that we enable "unsafe" fp optimization with other compilers, too AX_CHECK_COMPILER_FLAGS(-ffast-math, CXXFLAGS="$CXXFLAGS -ffast-math") AX_GXX_ARCHFLAG($acx_maxopt_portable) ;; esac if test -z "$CXXFLAGS"; then echo "" echo "**********************************************************" echo "* WARNING: Don't know the best CXXFLAGS for this system *" echo "* Use ./configure CXXFLAGS=... to specify your own flags *" echo "* (otherwise, a default of CXXFLAGS=-O3 will be used) *" echo "**********************************************************" echo "" CXXFLAGS="-O3" fi AX_CHECK_COMPILER_FLAGS($CXXFLAGS, [], [ echo "" echo "**********************************************************" echo "* WARNING: The guessed CXXFLAGS don't seem to work with *" echo "* your compiler. *" echo "* Use ./configure CXXFLAGS=... to specify your own flags *" echo "**********************************************************" echo "" CXXFLAGS="" ]) fi AC_LANG_RESTORE ]) einspline-0.9.2/m4/acx_pthread.m40000664000113000011300000002225111012400556013453 00000000000000dnl @synopsis ACX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) dnl @summary figure out how to build C programs using POSIX threads dnl @category InstalledPackages dnl dnl This macro figures out how to build C programs using POSIX dnl threads. It sets the PTHREAD_LIBS output variable to the threads dnl library and linker flags, and the PTHREAD_CFLAGS output variable dnl to any special C compiler flags that are needed. (The user can also dnl force certain compiler flags/libs to be tested by setting these dnl environment variables.) dnl dnl Also sets PTHREAD_CC to any special C compiler that is needed for dnl multi-threaded programs (defaults to the value of CC otherwise). dnl (This is necessary on AIX to use the special cc_r compiler alias.) dnl dnl NOTE: You are assumed to not only compile your program with these dnl flags, but also link it with them as well. e.g. you should link dnl with $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS dnl dnl If you are only building threads programs, you may wish to dnl use these variables in your default LIBS, CFLAGS, and CC: dnl dnl LIBS="$PTHREAD_LIBS $LIBS" dnl CFLAGS="$CFLAGS $PTHREAD_CFLAGS" dnl CC="$PTHREAD_CC" dnl dnl In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute dnl constant has a nonstandard name, defines PTHREAD_CREATE_JOINABLE dnl to that name (e.g. PTHREAD_CREATE_UNDETACHED on AIX). dnl dnl ACTION-IF-FOUND is a list of shell commands to run if a threads dnl library is found, and ACTION-IF-NOT-FOUND is a list of commands dnl to run it if it is not found. If ACTION-IF-FOUND is not specified, dnl the default action will define HAVE_PTHREAD. dnl dnl Please let the authors know if this macro fails on any platform, dnl or if you have any other suggestions or comments. This macro was dnl based on work by SGJ on autoconf scripts for FFTW (www.fftw.org) dnl (with help from M. Frigo), as well as ac_pthread and hb_pthread dnl macros posted by Alejandro Forero Cuervo to the autoconf macro dnl repository. We are also grateful for the helpful feedback of dnl numerous users. dnl dnl @version 2005-06-15 dnl @license GPLWithACException dnl @author Steven G. Johnson AC_DEFUN([ACX_PTHREAD], [ AC_REQUIRE([AC_CANONICAL_HOST]) AC_LANG_SAVE AC_LANG_C acx_pthread_ok=no # We used to check for pthread.h first, but this fails if pthread.h # requires special compiler flags (e.g. on True64 or Sequent). # It gets checked for in the link test anyway. # First of all, check if the user has set any of the PTHREAD_LIBS, # etcetera environment variables, and if threads linking works using # them: if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" save_LIBS="$LIBS" LIBS="$PTHREAD_LIBS $LIBS" AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS]) AC_TRY_LINK_FUNC(pthread_join, acx_pthread_ok=yes) AC_MSG_RESULT($acx_pthread_ok) if test x"$acx_pthread_ok" = xno; then PTHREAD_LIBS="" PTHREAD_CFLAGS="" fi LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" fi # We must check for the threads library under a number of different # names; the ordering is very important because some systems # (e.g. DEC) have both -lpthread and -lpthreads, where one of the # libraries is broken (non-POSIX). # Create a list of thread flags to try. Items starting with a "-" are # C compiler flags, and other items are library names, except for "none" # which indicates that we try without any flags at all, and "pthread-config" # which is a program returning the flags for the Pth emulation library. acx_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config" # The ordering *is* (sometimes) important. Some notes on the # individual items follow: # pthreads: AIX (must check this before -lpthread) # none: in case threads are in libc; should be tried before -Kthread and # other compiler flags to prevent continual compiler warnings # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h) # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able) # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread) # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads) # -pthreads: Solaris/gcc # -mthreads: Mingw32/gcc, Lynx/gcc # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it # doesn't hurt to check since this sometimes defines pthreads too; # also defines -D_REENTRANT) # ... -mt is also the pthreads flag for HP/aCC # pthread: Linux, etcetera # --thread-safe: KAI C++ # pthread-config: use pthread-config program (for GNU Pth library) case "${host_cpu}-${host_os}" in *solaris*) # On Solaris (at least, for some versions), libc contains stubbed # (non-functional) versions of the pthreads routines, so link-based # tests will erroneously succeed. (We need to link with -pthreads/-mt/ # -lpthread.) (The stubs are missing pthread_cleanup_push, or rather # a function called by this macro, so we could check for that, but # who knows whether they'll stub that too in a future libc.) So, # we'll just look for -pthreads and -lpthread first: acx_pthread_flags="-pthreads pthread -mt -pthread $acx_pthread_flags" ;; esac if test x"$acx_pthread_ok" = xno; then for flag in $acx_pthread_flags; do case $flag in none) AC_MSG_CHECKING([whether pthreads work without any flags]) ;; -*) AC_MSG_CHECKING([whether pthreads work with $flag]) PTHREAD_CFLAGS="$flag" ;; pthread-config) AC_CHECK_PROG(acx_pthread_config, pthread-config, yes, no) if test x"$acx_pthread_config" = xno; then continue; fi PTHREAD_CFLAGS="`pthread-config --cflags`" PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`" ;; *) AC_MSG_CHECKING([for the pthreads library -l$flag]) PTHREAD_LIBS="-l$flag" ;; esac save_LIBS="$LIBS" save_CFLAGS="$CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Check for various functions. We must include pthread.h, # since some functions may be macros. (On the Sequent, we # need a special flag -Kthread to make this header compile.) # We check for pthread_join because it is in -lpthread on IRIX # while pthread_create is in libc. We check for pthread_attr_init # due to DEC craziness with -lpthreads. We check for # pthread_cleanup_push because it is one of the few pthread # functions on Solaris that doesn't have a non-functional libc stub. # We try pthread_create on general principles. AC_TRY_LINK([#include ], [pthread_t th; pthread_join(th, 0); pthread_attr_init(0); pthread_cleanup_push(0, 0); pthread_create(0,0,0,0); pthread_cleanup_pop(0); ], [acx_pthread_ok=yes]) LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" AC_MSG_RESULT($acx_pthread_ok) if test "x$acx_pthread_ok" = xyes; then break; fi PTHREAD_LIBS="" PTHREAD_CFLAGS="" done fi # Various other checks: if test "x$acx_pthread_ok" = xyes; then save_LIBS="$LIBS" LIBS="$PTHREAD_LIBS $LIBS" save_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" # Detect AIX lossage: JOINABLE attribute is called UNDETACHED. AC_MSG_CHECKING([for joinable pthread attribute]) attr_name=unknown for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do AC_TRY_LINK([#include ], [int attr=$attr; return attr;], [attr_name=$attr; break]) done AC_MSG_RESULT($attr_name) if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name, [Define to necessary symbol if this constant uses a non-standard name on your system.]) fi AC_MSG_CHECKING([if more special flags are required for pthreads]) flag=no case "${host_cpu}-${host_os}" in *-aix* | *-freebsd* | *-darwin*) flag="-D_THREAD_SAFE";; *solaris* | *-osf* | *-hpux*) flag="-D_REENTRANT";; esac AC_MSG_RESULT(${flag}) if test "x$flag" != xno; then PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS" fi LIBS="$save_LIBS" CFLAGS="$save_CFLAGS" # More AIX lossage: must compile with cc_r AC_CHECK_PROG(PTHREAD_CC, cc_r, cc_r, ${CC}) else PTHREAD_CC="$CC" fi AC_SUBST(PTHREAD_LIBS) AC_SUBST(PTHREAD_CFLAGS) AC_SUBST(PTHREAD_CC) # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND: if test x"$acx_pthread_ok" = xyes; then ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1]) : else acx_pthread_ok=no $2 fi AC_LANG_RESTORE ])dnl ACX_PTHREAD einspline-0.9.2/m4/ax_f77_compiler_vendor.m40000664000113000011300000000277311117544173015554 00000000000000dnl @synopsis AX_COMPILER_VENDOR dnl @summary find the vendor (gnu, intel, etc.) of the C/C++ compiler dnl @category C dnl @category C++ dnl dnl Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, dnl sun, hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, dnl microsoft, watcom, etc. The vendor is returned in the cache variable dnl $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson with Matteo Frigo AC_DEFUN([AX_F77_COMPILER_VENDOR], [ FSAVE="$FFLAGS" dnl FFLAGS="$FFLAGS -fpp" AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER,__IFC,__IFORT ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ #if !($vencpp) thisisanerror; #endif ])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break]) done ]) ]) FFLAGS="$FSAVE" einspline-0.9.2/m4/ac_cxx_restrict.m40000664000113000011300000000163411015600776014370 00000000000000dnl Check routine for "restrict" keyword which was introduced dnl in ANSI C99. Some C++ compiler like g++ or kcc does support dnl the keyword inside C++ as well. dnl Does nothing if the compiler accepts the keyword. Otherwise, if dnl the compiler supports an equivalent, like gcc's __restrict__ or dnl SGI's __restrict, define "restrict" to be that. dnl Otherwise, define "restrict" to be empty. AC_DEFUN([AC_CXX_RESTRICT], [AC_CACHE_CHECK([for restrict], ac_cxx_restrict, [ac_cxx_restrict=no AC_LANG_SAVE AC_LANG_CPLUSPLUS for ac_kw in restrict __restrict__ __restrict; do AC_TRY_COMPILE(, [void* $ac_kw bar], [ac_cxx_restrict=$ac_kw; break]) done AC_LANG_RESTORE ]) if test "$ac_cxx_restrict" != "restrict"; then ac_kw="$ac_cxx_restrict" if test "$ac_kw" = unsupported; then ac_kw=""; fi AC_DEFINE_UNQUOTED(restrict, $ac_cxx_restrict, [Define to empty if the C99 keyword for C++ does not work.]) fi ]) einspline-0.9.2/m4/ax_cxx_compiler_vendor.m40000664000113000011300000000266311012400556015737 00000000000000dnl @synopsis AX_COMPILER_VENDOR dnl @summary find the vendor (gnu, intel, etc.) of the C/C++ compiler dnl @category C dnl @category C++ dnl dnl Determine the vendor of the C/C++ compiler, e.g., gnu, intel, ibm, dnl sun, hp, borland, comeau, dec, cray, kai, lcc, metrowerks, sgi, dnl microsoft, watcom, etc. The vendor is returned in the cache variable dnl $ax_cv_c_compiler_vendor for C and $ax_cv_cxx_compiler_vendor for C++. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson with Matteo Frigo AC_DEFUN([AX_CXX_COMPILER_VENDOR], [ AC_CACHE_CHECK([for _AC_LANG compiler vendor], ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor, [ax_cv_[]_AC_LANG_ABBREV[]_compiler_vendor=unknown # note: don't check for gcc first since some other compilers define __GNUC__ for ventest in intel:__ICC,__ECC,__INTEL_COMPILER ibm:__xlc__,__xlC__,__IBMC__,__IBMCPP__ gnu:__GNUC__ sun:__SUNPRO_C,__SUNPRO_CC hp:__HP_cc,__HP_aCC dec:__DECC,__DECCXX,__DECC_VER,__DECCXX_VER borland:__BORLANDC__,__TURBOC__ comeau:__COMO__ cray:_CRAYC kai:__KCC lcc:__LCC__ metrowerks:__MWERKS__ sgi:__sgi,sgi microsoft:_MSC_VER watcom:__WATCOMC__ portland:__PGI; do vencpp="defined("`echo $ventest | cut -d: -f2 | sed 's/,/) || defined(/g'`")" AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[ #if !($vencpp) thisisanerror; #endif ])], [ax_cv_]_AC_LANG_ABBREV[_compiler_vendor=`echo $ventest | cut -d: -f1`; break]) done ]) ]) einspline-0.9.2/m4/ax_gcc_aligns_stack.m40000664000113000011300000000407211012400556015140 00000000000000dnl @synopsis AX_GCC_ALIGNS_STACK([ACTION-IF-YES], [ACTION-IF-NO]) dnl @summary check whether gcc can align stack to 8-byte boundary dnl @category Misc dnl dnl Check to see if we are using a version of gcc that aligns the stack dnl (true in gcc-2.95+, which have the -mpreferred-stack-boundary flag). dnl Also, however, checks whether main() is correctly aligned by the dnl OS/libc/..., as well as for a bug in the stack alignment of gcc-2.95.x dnl (see http://gcc.gnu.org/ml/gcc-bugs/1999-11/msg00259.html). dnl dnl ACTION-IF-YES/ACTION-IF-NO are shell commands to execute if we are dnl using gcc and the stack is/isn't aligned, respectively. dnl dnl Requires macro: AX_CHECK_COMPILER_FLAGS, AX_GCC_VERSION dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson AC_DEFUN([AX_GCC_ALIGNS_STACK], [ AC_REQUIRE([AC_PROG_CC]) ax_gcc_aligns_stack=no if test "$GCC" = "yes"; then AX_CHECK_COMPILER_FLAGS(-mpreferred-stack-boundary=4, [ AC_MSG_CHECKING([whether the stack is at least 8-byte aligned by gcc]) save_CFLAGS="$CFLAGS" CFLAGS="-O" AX_CHECK_COMPILER_FLAGS(-malign-double, CFLAGS="$CFLAGS -malign-double") AC_TRY_RUN([#include # include struct yuck { int blechh; }; int one(void) { return 1; } struct yuck ick(void) { struct yuck y; y.blechh = 3; return y; } # define CHK_ALIGN(x) if ((((long) &(x)) & 0x7)) { fprintf(stderr, "bad alignment of " #x "\n"); exit(1); } void blah(int foo) { double foobar; CHK_ALIGN(foobar); } int main2(void) {double ok1; struct yuck y; double ok2; CHK_ALIGN(ok1); CHK_ALIGN(ok2); y = ick(); blah(one()); return 0;} int main(void) { if ((((long) (__builtin_alloca(0))) & 0x7)) __builtin_alloca(4); return main2(); } ], [ax_gcc_aligns_stack=yes; ax_gcc_stack_align_bug=no], ax_gcc_stack_align_bug=yes, [AX_GCC_VERSION(3,0,0, ax_gcc_stack_align_bug=no, ax_gcc_stack_align_bug=yes)]) CFLAGS="$save_CFLAGS" AC_MSG_RESULT($ax_gcc_aligns_stack) ]) fi if test "$ax_gcc_aligns_stack" = yes; then m4_default([$1], :) else m4_default([$2], :) fi ]) einspline-0.9.2/m4/ax_gcc_x86_cpuid.m40000664000113000011300000000324111012400556014304 00000000000000dnl @synopsis AX_GCC_X86_CPUID(OP) dnl @summary run x86 cpuid instruction OP using gcc inline assembler dnl @category Misc dnl dnl On Pentium and later x86 processors, with gcc or a compiler that dnl has a compatible syntax for inline assembly instructions, run dnl a small program that executes the cpuid instruction with dnl input OP. This can be used to detect the CPU type. dnl dnl On output, the values of the eax, ebx, ecx, and edx registers dnl are stored as hexadecimal strings as "eax:ebx:ecx:edx" in dnl the cache variable ax_cv_gcc_x86_cpuid_OP. dnl dnl If the cpuid instruction fails (because you are running a cross-compiler, dnl or because you are not using gcc, or because you are on a processor dnl that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP is set dnl to the string "unknown". dnl dnl This macro mainly exists to be used in AX_GCC_ARCHFLAG. dnl dnl @version 2005-05-30 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_GCC_X86_CPUID], [AC_REQUIRE([AC_PROG_CC]) AC_LANG_PUSH([C]) AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ int op = $1, eax, ebx, ecx, edx; FILE *f; __asm__("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (op)); f = fopen("conftest_cpuid", "w"); if (!f) return 1; fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); fclose(f); return 0; ])], [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], [ax_cv_gcc_x86_cpuid_$1=unknown])]) AC_LANG_POP([C]) ]) einspline-0.9.2/m4/ax_gcc_archflag.m40000664000113000011300000001670411035230373014255 00000000000000dnl @synopsis AX_GCC_ARCHFLAG([PORTABLE?], [ACTION-SUCCESS], [ACTION-FAILURE]) dnl @summary find target architecture name for gcc -march/-mtune flags dnl @category Misc dnl dnl This macro tries to guess the "native" arch corresponding to dnl the target architecture for use with gcc's -march=arch or -mtune=arch dnl flags. If found, the cache variable $ax_cv_gcc_archflag is set to this dnl flag and ACTION-SUCCESS is executed; otherwise $ax_cv_gcc_archflag is dnl is set to "unknown" and ACTION-FAILURE is executed. The default dnl ACTION-SUCCESS is to add $ax_cv_gcc_archflag to the end of $CFLAGS. dnl dnl PORTABLE? should be either [yes] (default) or [no]. In the former case, dnl the flag is set to -mtune (or equivalent) so that the architecture dnl is only used for tuning, but the instruction set used is still dnl portable. In the latter case, the flag is set to -march (or equivalent) dnl so that architecture-specific instructions are enabled. dnl dnl The user can specify --with-gcc-arch= in order to override dnl the macro's choice of architecture, or --without-gcc-arch to dnl disable this. dnl dnl When cross-compiling, or if $CC is not gcc, then ACTION-FAILURE is dnl called unless the user specified --with-gcc-arch manually. dnl dnl Requires macros: AX_CHECK_COMPILER_FLAGS, AX_GCC_X86_CPUID dnl dnl (The main emphasis here is on recent CPUs, on the principle that dnl doing high-performance computing on old hardware is uncommon.) dnl dnl @version 2006-01-04 dnl @license GPLWithACException dnl @author Steven G. Johnson and Matteo Frigo. AC_DEFUN([AX_GCC_ARCHFLAG], [AC_REQUIRE([AC_PROG_CC]) AC_REQUIRE([AC_CANONICAL_HOST]) AC_ARG_WITH(gcc-arch, [AC_HELP_STRING([--with-gcc-arch=], [use architecture for gcc -march/-mtune, instead of guessing])], ax_gcc_arch=$withval, ax_gcc_arch=yes) AC_MSG_CHECKING([for gcc architecture flag]) AC_MSG_RESULT([]) AC_CACHE_VAL(ax_cv_gcc_archflag, [ ax_cv_gcc_archflag="unknown" if test "$GCC" = yes; then if test "x$ax_gcc_arch" = xyes; then ax_gcc_arch="" if test "$cross_compiling" = no; then case $host_cpu in i[[3456]]86*|x86_64*) # use cpuid codes, in part from x86info-1.7 by D. Jones AX_GCC_X86_CPUID(0) AX_GCC_X86_CPUID(1) case $ax_cv_gcc_x86_cpuid_0 in *:756e6547:*:*) # Intel case $ax_cv_gcc_x86_cpuid_1 in *6f?:*:*:*) ax_gcc_arch="core2 nocona pentium3";; 1067?:*:*:*) ax_gcc_arch="core2 nocona pentium3";; *5[[48]]?:*:*:*) ax_gcc_arch="pentium-mmx pentium" ;; *5??:*:*:*) ax_gcc_arch=pentium ;; *6[[3456]]?:*:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[[01]]:*:*) ax_gcc_arch="pentium2 pentiumpro" ;; *6a?:*[[234]]:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6[[9d]]?:*:*:*) ax_gcc_arch="pentium-m pentium3 pentiumpro" ;; *6[[78b]]?:*:*:*) ax_gcc_arch="pentium3 pentiumpro" ;; *6??:*:*:*) ax_gcc_arch=pentiumpro ;; *f3[[347]]:*:*:*|*f4[1347]:*:*:*) case $host_cpu in x86_64*) ax_gcc_arch="nocona pentium4 pentiumpro" ;; *) ax_gcc_arch="prescott pentium4 pentiumpro" ;; esac ;; *f4a:*:*:*) ax_gcc_arch="nocona" ;; *f??:*:*:*) ax_gcc_arch="pentium4 pentiumpro";; esac ;; *:68747541:*:*) # AMD case $ax_cv_gcc_x86_cpuid_1 in *5[[67]]?:*:*:*) ax_gcc_arch=k6 ;; *5[[8d]]?:*:*:*) ax_gcc_arch="k6-2 k6" ;; *5[[9]]?:*:*:*) ax_gcc_arch="k6-3 k6" ;; *60?:*:*:*) ax_gcc_arch=k7 ;; *6[[12]]?:*:*:*) ax_gcc_arch="athlon k7" ;; *6[[34]]?:*:*:*) ax_gcc_arch="athlon-tbird k7" ;; *67?:*:*:*) ax_gcc_arch="athlon-4 athlon k7" ;; *6[[68a]]?:*:*:*) AX_GCC_X86_CPUID(0x80000006) # L2 cache size case $ax_cv_gcc_x86_cpuid_0x80000006 in *:*:*[[1-9a-f]]??????:*) # (L2 = ecx >> 16) >= 256 ax_gcc_arch="athlon-xp athlon-4 athlon k7" ;; *) ax_gcc_arch="athlon-4 athlon k7" ;; esac ;; *f[[4cef8b]]?:*:*:*) ax_gcc_arch="athlon64 k8" ;; *f5?:*:*:*) ax_gcc_arch="opteron k8" ;; *f7?:*:*:*) ax_gcc_arch="athlon-fx opteron k8" ;; *f??:*:*:*) ax_gcc_arch="k8" ;; esac ;; *:746e6543:*:*) # IDT case $ax_cv_gcc_x86_cpuid_1 in *54?:*:*:*) ax_gcc_arch=winchip-c6 ;; *58?:*:*:*) ax_gcc_arch=winchip2 ;; *6[[78]]?:*:*:*) ax_gcc_arch=c3 ;; *69?:*:*:*) ax_gcc_arch="c3-2 c3" ;; esac ;; esac if test x"$ax_gcc_arch" = x; then # fallback case $host_cpu in i586*) ax_gcc_arch=pentium ;; i686*) ax_gcc_arch=pentiumpro ;; esac fi ;; sparc*) AC_PATH_PROG([PRTDIAG], [prtdiag], [prtdiag], [$PATH:/usr/platform/`uname -i`/sbin/:/usr/platform/`uname -m`/sbin/]) cputype=`(((grep cpu /proc/cpuinfo | cut -d: -f2) ; ($PRTDIAG -v |grep -i sparc) ; grep -i cpu /var/run/dmesg.boot ) | head -n 1) 2> /dev/null` cputype=`echo "$cputype" | tr -d ' -' |tr $as_cr_LETTERS $as_cr_letters` case $cputype in *ultrasparciv*) ax_gcc_arch="ultrasparc4 ultrasparc3 ultrasparc v9" ;; *ultrasparciii*) ax_gcc_arch="ultrasparc3 ultrasparc v9" ;; *ultrasparc*) ax_gcc_arch="ultrasparc v9" ;; *supersparc*|*tms390z5[[05]]*) ax_gcc_arch="supersparc v8" ;; *hypersparc*|*rt62[[056]]*) ax_gcc_arch="hypersparc v8" ;; *cypress*) ax_gcc_arch=cypress ;; esac ;; alphaev5) ax_gcc_arch=ev5 ;; alphaev56) ax_gcc_arch=ev56 ;; alphapca56) ax_gcc_arch="pca56 ev56" ;; alphapca57) ax_gcc_arch="pca57 pca56 ev56" ;; alphaev6) ax_gcc_arch=ev6 ;; alphaev67) ax_gcc_arch=ev67 ;; alphaev68) ax_gcc_arch="ev68 ev67" ;; alphaev69) ax_gcc_arch="ev69 ev68 ev67" ;; alphaev7) ax_gcc_arch="ev7 ev69 ev68 ev67" ;; alphaev79) ax_gcc_arch="ev79 ev7 ev69 ev68 ev67" ;; powerpc*) cputype=`((grep cpu /proc/cpuinfo | head -n 1 | cut -d: -f2 | cut -d, -f1 | sed 's/ //g') ; /usr/bin/machine ; /bin/machine; grep CPU /var/run/dmesg.boot | head -n 1 | cut -d" " -f2) 2> /dev/null` cputype=`echo $cputype | sed -e 's/ppc//g;s/ *//g'` case $cputype in *750*) ax_gcc_arch="750 G3" ;; *740[[0-9]]*) ax_gcc_arch="$cputype 7400 G4" ;; *74[[4-5]][[0-9]]*) ax_gcc_arch="$cputype 7450 G4" ;; *74[[0-9]][[0-9]]*) ax_gcc_arch="$cputype G4" ;; *970*) ax_gcc_arch="970 G5 power4";; *POWER4*|*power4*|*gq*) ax_gcc_arch="power4 970";; *POWER5*|*power5*|*gr*|*gs*) ax_gcc_arch="power5 power4 970";; 603ev|8240) ax_gcc_arch="$cputype 603e 603";; *) ax_gcc_arch=$cputype ;; esac ax_gcc_arch="$ax_gcc_arch powerpc" ;; esac fi # not cross-compiling fi # guess arch if test "x$ax_gcc_arch" != x -a "x$ax_gcc_arch" != xno; then for arch in $ax_gcc_arch; do if test "x[]m4_default([$1],yes)" = xyes; then # if we require portable code flags="-mtune=$arch" # -mcpu=$arch and m$arch generate nonportable code on every arch except # x86. And some other arches (e.g. Alpha) don't accept -mtune. Grrr. case $host_cpu in i*86|x86_64*) flags="$flags -mcpu=$arch -m$arch";; esac else flags="-march=$arch -mcpu=$arch -m$arch" fi for flag in $flags; do AX_CHECK_COMPILER_FLAGS($flag, [ax_cv_gcc_archflag=$flag; break]) done test "x$ax_cv_gcc_archflag" = xunknown || break done fi fi # $GCC=yes ]) AC_MSG_CHECKING([for gcc architecture flag]) AC_MSG_RESULT($ax_cv_gcc_archflag) if test "x$ax_cv_gcc_archflag" = xunknown; then m4_default([$3],:) else m4_default([$2], [CFLAGS="$CFLAGS $ax_cv_gcc_archflag"]) m4_default([$2], [CXXFLAGS="$CFLAGS $ax_cv_gcc_archflag"]) fi ]) einspline-0.9.2/config.guess0000755000113000011300000012706310665263176012756 00000000000000#! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, # 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, # Inc. timestamp='2007-05-17' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA # 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # Originally written by Per Bothner . # Please send patches to . Submit a context # diff and a properly formatted ChangeLog entry. # # This script attempts to guess a canonical system name similar to # config.sub. If it succeeds, it prints the system name on stdout, and # exits with 0. Otherwise, it exits with 1. # # The plan is that this can be called by configure scripts if you # don't specify an explicit build system type. me=`echo "$0" | sed -e 's,.*/,,'` usage="\ Usage: $0 [OPTION] Output the configuration name of the system \`$me' is run on. Operation modes: -h, --help print this help, then exit -t, --time-stamp print date of last modification, then exit -v, --version print version number, then exit Report bugs and patches to ." version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." help=" Try \`$me --help' for more information." # Parse command line while test $# -gt 0 ; do case $1 in --time-stamp | --time* | -t ) echo "$timestamp" ; exit ;; --version | -v ) echo "$version" ; exit ;; --help | --h* | -h ) echo "$usage"; exit ;; -- ) # Stop option processing shift; break ;; - ) # Use stdin as input. break ;; -* ) echo "$me: invalid option $1$help" >&2 exit 1 ;; * ) break ;; esac done if test $# != 0; then echo "$me: too many arguments$help" >&2 exit 1 fi trap 'exit 1' 1 2 15 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a # compiler to aid in system detection is discouraged as it requires # temporary files to be created and, as you can see below, it is a # headache to deal with in a portable fashion. # Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still # use `HOST_CC' if defined, but it is deprecated. # Portable tmp directory creation inspired by the Autoconf team. set_cc_for_build=' trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; : ${TMPDIR=/tmp} ; { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; dummy=$tmp/dummy ; tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; case $CC_FOR_BUILD,$HOST_CC,$CC in ,,) echo "int x;" > $dummy.c ; for c in cc gcc c89 c99 ; do if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then CC_FOR_BUILD="$c"; break ; fi ; done ; if test x"$CC_FOR_BUILD" = x ; then CC_FOR_BUILD=no_compiler_found ; fi ;; ,,*) CC_FOR_BUILD=$CC ;; ,*,*) CC_FOR_BUILD=$HOST_CC ;; esac ; set_cc_for_build= ;' # This is needed to find uname on a Pyramid OSx when run in the BSD universe. # (ghazi@noc.rutgers.edu 1994-08-24) if (test -f /.attbin/uname) >/dev/null 2>&1 ; then PATH=$PATH:/.attbin ; export PATH fi UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:NetBSD:*:*) # NetBSD (nbsd) targets should (where applicable) match one or # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently # switched to ELF, *-*-netbsd* would select the old # object file format. This provides both forward # compatibility and a consistent mechanism for selecting the # object file format. # # Note: NetBSD doesn't particularly care about the vendor # portion of the name. We always set it to "unknown". sysctl="sysctl -n hw.machine_arch" UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ /usr/sbin/$sysctl 2>/dev/null || echo unknown)` case "${UNAME_MACHINE_ARCH}" in armeb) machine=armeb-unknown ;; arm*) machine=arm-unknown ;; sh3el) machine=shl-unknown ;; sh3eb) machine=sh-unknown ;; sh5el) machine=sh5le-unknown ;; *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in arm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep __ELF__ >/dev/null then # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). # Return netbsd for either. FIX? os=netbsd else os=netbsdelf fi ;; *) os=netbsd ;; esac # The OS release # Debian GNU/NetBSD machines have a different userland, and # thus, need a distinct triplet. However, they do not need # kernel version information, so it can be replaced with a # suitable tag, in the style of linux-gnu. case "${UNAME_VERSION}" in Debian*) release='-gnu' ;; *) release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` ;; esac # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: # contains redundant information, the shorter form: # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" exit ;; *:OpenBSD:*:*) UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; *:SolidBSD:*:*) echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} exit ;; macppc:MirBSD:*:*) echo powerpc-unknown-mirbsd${UNAME_RELEASE} exit ;; *:MirBSD:*:*) echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} exit ;; alpha:OSF1:*:*) case $UNAME_RELEASE in *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` ;; *5.*) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` ;; esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU # types through head -n 1, so we only detect the type of CPU 0. ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") UNAME_MACHINE="alpha" ;; "EV5 (21164)") UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` exit ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # Should we change UNAME_MACHINE based on the output of uname instead # of the specific Alpha model? echo alpha-pc-interix exit ;; 21064:Windows_NT:50:3) echo alpha-dec-winnt3.5 exit ;; Amiga*:UNIX_System_V:4.0:*) echo m68k-unknown-sysv4 exit ;; *:[Aa]miga[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-amigaos exit ;; *:[Mm]orph[Oo][Ss]:*:*) echo ${UNAME_MACHINE}-unknown-morphos exit ;; *:OS/390:*:*) echo i370-ibm-openedition exit ;; *:z/VM:*:*) echo s390-ibm-zvmoe exit ;; *:OS400:*:*) echo powerpc-ibm-os400 exit ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit ;; arm:riscos:*:*|arm:RISCOS:*:*) echo arm-unknown-riscos exit ;; SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) echo hppa1.1-hitachi-hiuxmpp exit ;; Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. if test "`(/bin/universe) 2>/dev/null`" = att ; then echo pyramid-pyramid-sysv3 else echo pyramid-pyramid-bsd fi exit ;; NILE*:*:*:dcosx) echo pyramid-pyramid-svr4 exit ;; DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 exit ;; DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in sparc) echo sparc-icl-nx7; exit ;; esac ;; sun4H:SunOS:5.*:*) echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; i86pc:SunOS:5.*:* | ix86xen:SunOS:5.*:*) echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:6*:*) # According to config.sub, this is the proper way to canonicalize # SunOS6. Hard to guess exactly what SunOS6 will be like, but # it's likely to be more like Solaris than SunOS4. echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; sun4*:SunOS:*:*) case "`/usr/bin/arch -k`" in Series*|S4*) UNAME_RELEASE=`uname -v` ;; esac # Japanese Language versions have a version number like `4.1.3-JL'. echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` exit ;; sun3*:SunOS:*:*) echo m68k-sun-sunos${UNAME_RELEASE} exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} ;; sun4) echo sparc-sun-sunos${UNAME_RELEASE} ;; esac exit ;; aushp:SunOS:*:*) echo sparc-auspex-sunos${UNAME_RELEASE} exit ;; # The situation for MiNT is a little confusing. The machine name # can be virtually everything (everything which is not # "atarist" or "atariste" at least should have a processor # > m68000). The system name ranges from "MiNT" over "FreeMiNT" # to the lowercase version "mint" (or "freemint"). Finally # the system name "TOS" denotes a system which is actually not # MiNT. But MiNT is downward compatible to TOS, so this should # be no problem. atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) echo m68k-atari-mint${UNAME_RELEASE} exit ;; milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) echo m68k-milan-mint${UNAME_RELEASE} exit ;; hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) echo m68k-hades-mint${UNAME_RELEASE} exit ;; *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) echo m68k-unknown-mint${UNAME_RELEASE} exit ;; m68k:machten:*:*) echo m68k-apple-machten${UNAME_RELEASE} exit ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} exit ;; RISC*:Mach:*:*) echo mips-dec-mach_bsd4.3 exit ;; RISC*:ULTRIX:*:*) echo mips-dec-ultrix${UNAME_RELEASE} exit ;; VAX*:ULTRIX*:*:*) echo vax-dec-ultrix${UNAME_RELEASE} exit ;; 2020:CLIX:*:* | 2430:CLIX:*:*) echo clipper-intergraph-clix${UNAME_RELEASE} exit ;; mips:*:*:UMIPS | mips:*:*:RISCos) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #ifdef __cplusplus #include /* for printf() prototype */ int main (int argc, char *argv[]) { #else int main (argc, argv) int argc; char *argv[]; { #endif #if defined (host_mips) && defined (MIPSEB) #if defined (SYSTYPE_SYSV) printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_SVR4) printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); #endif #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); #endif #endif exit (-1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && SYSTEM_NAME=`$dummy $dummyarg` && { echo "$SYSTEM_NAME"; exit; } echo mips-mips-riscos${UNAME_RELEASE} exit ;; Motorola:PowerMAX_OS:*:*) echo powerpc-motorola-powermax exit ;; Motorola:*:4.3:PL8-*) echo powerpc-harris-powermax exit ;; Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) echo powerpc-harris-powermax exit ;; Night_Hawk:Power_UNIX:*:*) echo powerpc-harris-powerunix exit ;; m88k:CX/UX:7*:*) echo m88k-harris-cxux7 exit ;; m88k:*:4*:R4*) echo m88k-motorola-sysv4 exit ;; m88k:*:3*:R3*) echo m88k-motorola-sysv3 exit ;; AViiON:dgux:*:*) # DG/UX returns AViiON for all architectures UNAME_PROCESSOR=`/usr/bin/uname -p` if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] then if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ [ ${TARGET_BINARY_INTERFACE}x = x ] then echo m88k-dg-dgux${UNAME_RELEASE} else echo m88k-dg-dguxbcs${UNAME_RELEASE} fi else echo i586-dg-dgux${UNAME_RELEASE} fi exit ;; M88*:DolphinOS:*:*) # DolphinOS (SVR3) echo m88k-dolphin-sysv3 exit ;; M88*:*:R3*:*) # Delta 88k system running SVR3 echo m88k-motorola-sysv3 exit ;; XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) echo m88k-tektronix-sysv3 exit ;; Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) echo m68k-tektronix-bsd exit ;; *:IRIX*:*:*) echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` exit ;; ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' i*86:AIX:*:*) echo i386-ibm-aix exit ;; ia64:AIX:*:*) if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} exit ;; *:AIX:2:3) if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include main() { if (!__power_pc()) exit(1); puts("powerpc-ibm-aix3.2.5"); exit(0); } EOF if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` then echo "$SYSTEM_NAME" else echo rs6000-ibm-aix3.2.5 fi elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then echo rs6000-ibm-aix3.2.4 else echo rs6000-ibm-aix3.2 fi exit ;; *:AIX:*:[45]) IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then IBM_ARCH=rs6000 else IBM_ARCH=powerpc fi if [ -x /usr/bin/oslevel ] ; then IBM_REV=`/usr/bin/oslevel` else IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} fi echo ${IBM_ARCH}-ibm-aix${IBM_REV} exit ;; *:AIX:*:*) echo rs6000-ibm-aix exit ;; ibmrt:4.4BSD:*|romp-ibm:BSD:*) echo romp-ibm-bsd4.4 exit ;; ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to exit ;; # report: romp-ibm BSD 4.3 *:BOSX:*:*) echo rs6000-bull-bosx exit ;; DPX/2?00:B.O.S.:*:*) echo m68k-bull-sysv3 exit ;; 9000/[34]??:4.3bsd:1.*:*) echo m68k-hp-bsd exit ;; hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) echo m68k-hp-bsd4.4 exit ;; 9000/[34678]??:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` case "${UNAME_MACHINE}" in 9000/31? ) HP_ARCH=m68000 ;; 9000/[34]?? ) HP_ARCH=m68k ;; 9000/[678][0-9][0-9]) if [ -x /usr/bin/getconf ]; then sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in 32) HP_ARCH="hppa2.0n" ;; 64) HP_ARCH="hppa2.0w" ;; '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi if [ "${HP_ARCH}" = "" ]; then eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #define _HPUX_SOURCE #include #include int main () { #if defined(_SC_KERNEL_BITS) long bits = sysconf(_SC_KERNEL_BITS); #endif long cpu = sysconf (_SC_CPU_VERSION); switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0"); break; case CPU_PA_RISC1_1: puts ("hppa1.1"); break; case CPU_PA_RISC2_0: #if defined(_SC_KERNEL_BITS) switch (bits) { case 64: puts ("hppa2.0w"); break; case 32: puts ("hppa2.0n"); break; default: puts ("hppa2.0"); break; } break; #else /* !defined(_SC_KERNEL_BITS) */ puts ("hppa2.0"); break; #endif default: puts ("hppa1.0"); break; } exit (0); } EOF (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler # generating 64-bit code. GNU and HP use different nomenclature: # # $ CC_FOR_BUILD=cc ./config.guess # => hppa2.0w-hp-hpux11.23 # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep __LP64__ >/dev/null then HP_ARCH="hppa2.0w" else HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} exit ;; ia64:HP-UX:*:*) HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` echo ia64-hp-hpux${HPUX_REV} exit ;; 3050*:HI-UX:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include int main () { long cpu = sysconf (_SC_CPU_VERSION); /* The order matters, because CPU_IS_HP_MC68K erroneously returns true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct results, however. */ if (CPU_IS_PA_RISC (cpu)) { switch (cpu) { case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; default: puts ("hppa-hitachi-hiuxwe2"); break; } } else if (CPU_IS_HP_MC68K (cpu)) puts ("m68k-hitachi-hiuxwe2"); else puts ("unknown-hitachi-hiuxwe2"); exit (0); } EOF $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } echo unknown-hitachi-hiuxwe2 exit ;; 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) echo hppa1.1-hp-bsd exit ;; 9000/8??:4.3bsd:*:*) echo hppa1.0-hp-bsd exit ;; *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) echo hppa1.0-hp-mpeix exit ;; hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) echo hppa1.1-hp-osf exit ;; hp8??:OSF1:*:*) echo hppa1.0-hp-osf exit ;; i*86:OSF1:*:*) if [ -x /usr/sbin/sysversion ] ; then echo ${UNAME_MACHINE}-unknown-osf1mk else echo ${UNAME_MACHINE}-unknown-osf1 fi exit ;; parisc*:Lites*:*:*) echo hppa1.1-hp-lites exit ;; C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) echo c1-convex-bsd exit ;; C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) echo c34-convex-bsd exit ;; C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) echo c38-convex-bsd exit ;; C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) echo c4-convex-bsd exit ;; CRAY*Y-MP:*:*:*) echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*[A-Z]90:*:*:*) echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ -e 's/\.[^.]*$/.X/' exit ;; CRAY*TS:*:*:*) echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*T3E:*:*:*) echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; CRAY*SV1:*:*:*) echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; *:UNICOS/mp:*:*) echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} exit ;; sparc*:BSD/OS:*:*) echo sparc-unknown-bsdi${UNAME_RELEASE} exit ;; *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit ;; *:FreeBSD:*:*) case ${UNAME_MACHINE} in pc98) echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; amd64) echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; *) echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; esac exit ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin exit ;; *:MINGW*:*) echo ${UNAME_MACHINE}-pc-mingw32 exit ;; i*:windows32*:*) # uname -m includes "-pc" on this system. echo ${UNAME_MACHINE}-mingw32 exit ;; i*:PW*:*) echo ${UNAME_MACHINE}-pc-pw32 exit ;; *:Interix*:[3456]*) case ${UNAME_MACHINE} in x86) echo i586-pc-interix${UNAME_RELEASE} exit ;; EM64T | authenticamd) echo x86_64-unknown-interix${UNAME_RELEASE} exit ;; esac ;; [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) echo i${UNAME_MACHINE}-pc-mks exit ;; i*:Windows_NT*:* | Pentium*:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we # UNAME_MACHINE based on the output of uname instead of i386? echo i586-pc-interix exit ;; i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin exit ;; amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) echo x86_64-unknown-cygwin exit ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin exit ;; prep*:SunOS:5.*:*) echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit ;; *:GNU:*:*) # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit ;; arm*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; avr32*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; cris:Linux:*:*) echo cris-axis-linux-gnu exit ;; crisv32:Linux:*:*) echo crisv32-axis-linux-gnu exit ;; frv:Linux:*:*) echo frv-unknown-linux-gnu exit ;; ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; m68*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; mips:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU #undef mips #undef mipsel #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) CPU=mipsel #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) CPU=mips #else CPU= #endif #endif EOF eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' /^CPU/{ s: ::g p }'`" test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; mips64:Linux:*:*) eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #undef CPU #undef mips64 #undef mips64el #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) CPU=mips64el #else #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) CPU=mips64 #else CPU= #endif #endif EOF eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' /^CPU/{ s: ::g p }'`" test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; or32:Linux:*:*) echo or32-unknown-linux-gnu exit ;; ppc:Linux:*:*) echo powerpc-unknown-linux-gnu exit ;; ppc64:Linux:*:*) echo powerpc64-unknown-linux-gnu exit ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in EV5) UNAME_MACHINE=alphaev5 ;; EV56) UNAME_MACHINE=alphaev56 ;; PCA56) UNAME_MACHINE=alphapca56 ;; PCA57) UNAME_MACHINE=alphapca56 ;; EV6) UNAME_MACHINE=alphaev6 ;; EV67) UNAME_MACHINE=alphaev67 ;; EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} exit ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in PA7*) echo hppa1.1-unknown-linux-gnu ;; PA8*) echo hppa2.0-unknown-linux-gnu ;; *) echo hppa-unknown-linux-gnu ;; esac exit ;; parisc64:Linux:*:* | hppa64:Linux:*:*) echo hppa64-unknown-linux-gnu exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit ;; sh64*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sh*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; sparc:Linux:*:* | sparc64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; vax:Linux:*:*) echo ${UNAME_MACHINE}-dec-linux-gnu exit ;; x86_64:Linux:*:*) echo x86_64-unknown-linux-gnu exit ;; xtensa:Linux:*:*) echo xtensa-unknown-linux-gnu exit ;; i*86:Linux:*:*) # The BFD linker knows what the default object file format is, so # first see if it will tell us. cd to the root directory to prevent # problems with other programs or directories called `ld' in the path. # Set LC_ALL=C to ensure ld outputs messages in English. ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ | sed -ne '/supported targets:/!d s/[ ][ ]*/ /g s/.*supported targets: *// s/ .*// p'` case "$ld_supported_targets" in elf32-i386) TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" ;; a.out-i386-linux) echo "${UNAME_MACHINE}-pc-linux-gnuaout" exit ;; coff-i386) echo "${UNAME_MACHINE}-pc-linux-gnucoff" exit ;; "") # Either a pre-BFD a.out linker (linux-gnuoldld) or # one that does not give us useful --help. echo "${UNAME_MACHINE}-pc-linux-gnuoldld" exit ;; esac # Determine whether the default compiler is a.out or elf eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c #include #ifdef __ELF__ # ifdef __GLIBC__ # if __GLIBC__ >= 2 LIBC=gnu # else LIBC=gnulibc1 # endif # else LIBC=gnulibc1 # endif #else #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) LIBC=gnu #else LIBC=gnuaout #endif #endif #ifdef __dietlibc__ LIBC=dietlibc #endif EOF eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' /^LIBC/{ s: ::g p }'`" test x"${LIBC}" != x && { echo "${UNAME_MACHINE}-pc-linux-${LIBC}" exit } test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } ;; i*86:DYNIX/ptx:4*:*) # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. # earlier versions are messed up and put the nodename in both # sysname and nodename. echo i386-sequent-sysv4 exit ;; i*86:UNIX_SV:4.2MP:2.*) # Unixware is an offshoot of SVR4, but it has its own version # number series starting with 2... # I am not positive that other SVR4 systems won't match this, # I just have to hope. -- rms. # Use sysv4.2uw... so that sysv4* matches it. echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} exit ;; i*86:OS/2:*:*) # If we were able to find `uname', then EMX Unix compatibility # is probably installed. echo ${UNAME_MACHINE}-pc-os2-emx exit ;; i*86:XTS-300:*:STOP) echo ${UNAME_MACHINE}-unknown-stop exit ;; i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos exit ;; i*86:syllable:*:*) echo ${UNAME_MACHINE}-pc-syllable exit ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit ;; i*86:*DOS:*:*) echo ${UNAME_MACHINE}-pc-msdosdjgpp exit ;; i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} else echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} fi exit ;; i*86:*:5:[678]*) # UnixWare 7.x, OpenUNIX and OpenServer 6. case `/bin/uname -X | grep "^Machine"` in *486*) UNAME_MACHINE=i486 ;; *Pentium) UNAME_MACHINE=i586 ;; *Pent*|*Celeron) UNAME_MACHINE=i686 ;; esac echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} exit ;; i*86:*:3.2:*) if test -f /usr/options/cb.name; then UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ && UNAME_MACHINE=i586 (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ && UNAME_MACHINE=i686 (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ && UNAME_MACHINE=i686 echo ${UNAME_MACHINE}-pc-sco$UNAME_REL else echo ${UNAME_MACHINE}-pc-sysv32 fi exit ;; pc:*:*:*) # Left here for compatibility: # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i386. echo i386-pc-msdosdjgpp exit ;; Intel:Mach:3*:*) echo i386-pc-mach3 exit ;; paragon:*:*:*) echo i860-intel-osf1 exit ;; i860:*:4.*:*) # i860-SVR4 if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 else # Add other i860-SVR4 vendors below as they are discovered. echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 fi exit ;; mini*:CTIX:SYS*5:*) # "miniframe" echo m68010-convergent-sysv exit ;; mc68k:UNIX:SYSTEM5:3.51m) echo m68k-convergent-sysv exit ;; M680?0:D-NIX:5.3:*) echo m68k-diab-dnix exit ;; M68*:*:R3V[5678]*:*) test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4.3${OS_REL}; exit; } /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ && { echo i486-ncr-sysv4; exit; } ;; m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) echo m68k-unknown-lynxos${UNAME_RELEASE} exit ;; mc68030:UNIX_System_V:4.*:*) echo m68k-atari-sysv4 exit ;; TSUNAMI:LynxOS:2.*:*) echo sparc-unknown-lynxos${UNAME_RELEASE} exit ;; rs6000:LynxOS:2.*:*) echo rs6000-unknown-lynxos${UNAME_RELEASE} exit ;; PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) echo powerpc-unknown-lynxos${UNAME_RELEASE} exit ;; SM[BE]S:UNIX_SV:*:*) echo mips-dde-sysv${UNAME_RELEASE} exit ;; RM*:ReliantUNIX-*:*:*) echo mips-sni-sysv4 exit ;; RM*:SINIX-*:*:*) echo mips-sni-sysv4 exit ;; *:SINIX-*:*:*) if uname -p 2>/dev/null >/dev/null ; then UNAME_MACHINE=`(uname -p) 2>/dev/null` echo ${UNAME_MACHINE}-sni-sysv4 else echo ns32k-sni-sysv fi exit ;; PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort # says echo i586-unisys-sysv4 exit ;; *:UNIX_System_V:4*:FTX*) # From Gerald Hewes . # How about differentiating between stratus architectures? -djm echo hppa1.1-stratus-sysv4 exit ;; *:*:*:FTX*) # From seanf@swdc.stratus.com. echo i860-stratus-sysv4 exit ;; i*86:VOS:*:*) # From Paul.Green@stratus.com. echo ${UNAME_MACHINE}-stratus-vos exit ;; *:VOS:*:*) # From Paul.Green@stratus.com. echo hppa1.1-stratus-vos exit ;; mc68*:A/UX:*:*) echo m68k-apple-aux${UNAME_RELEASE} exit ;; news*:NEWS-OS:6*:*) echo mips-sony-newsos6 exit ;; R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) if [ -d /usr/nec ]; then echo mips-nec-sysv${UNAME_RELEASE} else echo mips-unknown-sysv${UNAME_RELEASE} fi exit ;; BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. echo powerpc-be-beos exit ;; BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. echo powerpc-apple-beos exit ;; BePC:BeOS:*:*) # BeOS running on Intel PC compatible. echo i586-pc-beos exit ;; SX-4:SUPER-UX:*:*) echo sx4-nec-superux${UNAME_RELEASE} exit ;; SX-5:SUPER-UX:*:*) echo sx5-nec-superux${UNAME_RELEASE} exit ;; SX-6:SUPER-UX:*:*) echo sx6-nec-superux${UNAME_RELEASE} exit ;; SX-7:SUPER-UX:*:*) echo sx7-nec-superux${UNAME_RELEASE} exit ;; SX-8:SUPER-UX:*:*) echo sx8-nec-superux${UNAME_RELEASE} exit ;; SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; *:Rhapsody:*:*) echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} exit ;; *:Darwin:*:*) UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown case $UNAME_PROCESSOR in unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} exit ;; *:QNX:*:4*) echo i386-pc-qnx exit ;; NSE-?:NONSTOP_KERNEL:*:*) echo nse-tandem-nsk${UNAME_RELEASE} exit ;; NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit ;; *:NonStop-UX:*:*) echo mips-compaq-nonstopux exit ;; BS2000:POSIX*:*:*) echo bs2000-siemens-sysv exit ;; DS/*:UNIX_System_V:*:*) echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} exit ;; *:Plan9:*:*) # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" fi echo ${UNAME_MACHINE}-unknown-plan9 exit ;; *:TOPS-10:*:*) echo pdp10-unknown-tops10 exit ;; *:TENEX:*:*) echo pdp10-unknown-tenex exit ;; KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) echo pdp10-dec-tops20 exit ;; XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) echo pdp10-xkl-tops20 exit ;; *:TOPS-20:*:*) echo pdp10-unknown-tops20 exit ;; *:ITS:*:*) echo pdp10-unknown-its exit ;; SEI:*:*:SEIUX) echo mips-sei-seiux${UNAME_RELEASE} exit ;; *:DragonFly:*:*) echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit ;; *:*VMS:*:*) UNAME_MACHINE=`(uname -p) 2>/dev/null` case "${UNAME_MACHINE}" in A*) echo alpha-dec-vms ; exit ;; I*) echo ia64-dec-vms ; exit ;; V*) echo vax-dec-vms ; exit ;; esac ;; *:XENIX:*:SysV) echo i386-pc-xenix exit ;; i*86:skyos:*:*) echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos exit ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 #echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 eval $set_cc_for_build cat >$dummy.c < # include #endif main () { #if defined (sony) #if defined (MIPSEB) /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, I don't know.... */ printf ("mips-sony-bsd\n"); exit (0); #else #include printf ("m68k-sony-newsos%s\n", #ifdef NEWSOS4 "4" #else "" #endif ); exit (0); #endif #endif #if defined (__arm) && defined (__acorn) && defined (__unix) printf ("arm-acorn-riscix\n"); exit (0); #endif #if defined (hp300) && !defined (hpux) printf ("m68k-hp-bsd\n"); exit (0); #endif #if defined (NeXT) #if !defined (__ARCHITECTURE__) #define __ARCHITECTURE__ "m68k" #endif int version; version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; if (version < 4) printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); else printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); exit (0); #endif #if defined (MULTIMAX) || defined (n16) #if defined (UMAXV) printf ("ns32k-encore-sysv\n"); exit (0); #else #if defined (CMU) printf ("ns32k-encore-mach\n"); exit (0); #else printf ("ns32k-encore-bsd\n"); exit (0); #endif #endif #endif #if defined (__386BSD__) printf ("i386-pc-bsd\n"); exit (0); #endif #if defined (sequent) #if defined (i386) printf ("i386-sequent-dynix\n"); exit (0); #endif #if defined (ns32000) printf ("ns32k-sequent-dynix\n"); exit (0); #endif #endif #if defined (_SEQUENT_) struct utsname un; uname(&un); if (strncmp(un.version, "V2", 2) == 0) { printf ("i386-sequent-ptx2\n"); exit (0); } if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ printf ("i386-sequent-ptx1\n"); exit (0); } printf ("i386-sequent-ptx\n"); exit (0); #endif #if defined (vax) # if !defined (ultrix) # include # if defined (BSD) # if BSD == 43 printf ("vax-dec-bsd4.3\n"); exit (0); # else # if BSD == 199006 printf ("vax-dec-bsd4.3reno\n"); exit (0); # else printf ("vax-dec-bsd\n"); exit (0); # endif # endif # else printf ("vax-dec-bsd\n"); exit (0); # endif # else printf ("vax-dec-ultrix\n"); exit (0); # endif #endif #if defined (alliant) && defined (i860) printf ("i860-alliant-bsd\n"); exit (0); #endif exit (1); } EOF $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && { echo "$SYSTEM_NAME"; exit; } # Apollos put the system type in the environment. test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } # Convex versions that predate uname can use getsysinfo(1) if [ -x /usr/convex/getsysinfo ] then case `getsysinfo -f cpu_type` in c1*) echo c1-convex-bsd exit ;; c2*) if getsysinfo -f scalar_acc then echo c32-convex-bsd else echo c2-convex-bsd fi exit ;; c34*) echo c34-convex-bsd exit ;; c38*) echo c38-convex-bsd exit ;; c4*) echo c4-convex-bsd exit ;; esac fi cat >&2 < in order to provide the needed information to handle your system. config.guess timestamp = $timestamp uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` /bin/uname -X = `(/bin/uname -X) 2>/dev/null` hostinfo = `(hostinfo) 2>/dev/null` /bin/universe = `(/bin/universe) 2>/dev/null` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` /bin/arch = `(/bin/arch) 2>/dev/null` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` UNAME_MACHINE = ${UNAME_MACHINE} UNAME_RELEASE = ${UNAME_RELEASE} UNAME_SYSTEM = ${UNAME_SYSTEM} UNAME_VERSION = ${UNAME_VERSION} EOF exit 1 # Local variables: # eval: (add-hook 'write-file-hooks 'time-stamp) # time-stamp-start: "timestamp='" # time-stamp-format: "%:y-%02m-%02d" # time-stamp-end: "'" # End: einspline-0.9.2/Makefile.am0000664000113000011300000000026011222435630012443 00000000000000SUBDIRS = src www doc pkgconfig_DATA = einspline.pc EXTRA_DIST = einspline.pc.in m4/*.m4 cudalt.py pkgconfigdir = $(libdir)/pkgconfig/ includedir=$(prefix)/include/einspline einspline-0.9.2/einspline.pc.in0000664000113000011300000000041311061770677013344 00000000000000prefix=@prefix@ exec_prefix=@exec_prefix@ libdir=@libdir@ includedir=@includedir@ Name: einspline Description: library for creation and evaluation of cubic B-splines Version: @VERSION@ Libs: -L${libdir} -leinspline @CUDA_LIBS@ Cflags: -I${includedir} @PTHREAD_FLAG@ einspline-0.9.2/aclocal.m40000664000113000011300000102320011273633717012263 00000000000000# generated automatically by aclocal 1.10 -*- Autoconf -*- # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, # 2005, 2006 Free Software Foundation, Inc. # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY, to the extent permitted by law; without # even the implied warranty of MERCHANTABILITY or FITNESS FOR A # PARTICULAR PURPOSE. m4_if(m4_PACKAGE_VERSION, [2.61],, [m4_fatal([this file was generated for autoconf 2.61. You have another version of autoconf. If you want to use that, you should regenerate the build system entirely.], [63])]) # libtool.m4 - Configure libtool for the host system. -*-Autoconf-*- # serial 51 AC_PROG_LIBTOOL # AC_PROVIDE_IFELSE(MACRO-NAME, IF-PROVIDED, IF-NOT-PROVIDED) # ----------------------------------------------------------- # If this macro is not defined by Autoconf, define it here. m4_ifdef([AC_PROVIDE_IFELSE], [], [m4_define([AC_PROVIDE_IFELSE], [m4_ifdef([AC_PROVIDE_$1], [$2], [$3])])]) # AC_PROG_LIBTOOL # --------------- AC_DEFUN([AC_PROG_LIBTOOL], [AC_REQUIRE([_AC_PROG_LIBTOOL])dnl dnl If AC_PROG_CXX has already been expanded, run AC_LIBTOOL_CXX dnl immediately, otherwise, hook it in at the end of AC_PROG_CXX. AC_PROVIDE_IFELSE([AC_PROG_CXX], [AC_LIBTOOL_CXX], [define([AC_PROG_CXX], defn([AC_PROG_CXX])[AC_LIBTOOL_CXX ])]) dnl And a similar setup for Fortran 77 support AC_PROVIDE_IFELSE([AC_PROG_F77], [AC_LIBTOOL_F77], [define([AC_PROG_F77], defn([AC_PROG_F77])[AC_LIBTOOL_F77 ])]) dnl Quote A][M_PROG_GCJ so that aclocal doesn't bring it in needlessly. dnl If either AC_PROG_GCJ or A][M_PROG_GCJ have already been expanded, run dnl AC_LIBTOOL_GCJ immediately, otherwise, hook it in at the end of both. AC_PROVIDE_IFELSE([AC_PROG_GCJ], [AC_LIBTOOL_GCJ], [AC_PROVIDE_IFELSE([A][M_PROG_GCJ], [AC_LIBTOOL_GCJ], [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ], [AC_LIBTOOL_GCJ], [ifdef([AC_PROG_GCJ], [define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[AC_LIBTOOL_GCJ])]) ifdef([A][M_PROG_GCJ], [define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[AC_LIBTOOL_GCJ])]) ifdef([LT_AC_PROG_GCJ], [define([LT_AC_PROG_GCJ], defn([LT_AC_PROG_GCJ])[AC_LIBTOOL_GCJ])])])]) ])])# AC_PROG_LIBTOOL # _AC_PROG_LIBTOOL # ---------------- AC_DEFUN([_AC_PROG_LIBTOOL], [AC_REQUIRE([AC_LIBTOOL_SETUP])dnl AC_BEFORE([$0],[AC_LIBTOOL_CXX])dnl AC_BEFORE([$0],[AC_LIBTOOL_F77])dnl AC_BEFORE([$0],[AC_LIBTOOL_GCJ])dnl # This can be used to rebuild libtool when needed LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh" # Always use our own libtool. LIBTOOL='$(SHELL) $(top_builddir)/libtool' AC_SUBST(LIBTOOL)dnl # Prevent multiple expansion define([AC_PROG_LIBTOOL], []) ])# _AC_PROG_LIBTOOL # AC_LIBTOOL_SETUP # ---------------- AC_DEFUN([AC_LIBTOOL_SETUP], [AC_PREREQ(2.50)dnl AC_REQUIRE([AC_ENABLE_SHARED])dnl AC_REQUIRE([AC_ENABLE_STATIC])dnl AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_PROG_LD])dnl AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl AC_REQUIRE([AC_PROG_NM])dnl AC_REQUIRE([AC_PROG_LN_S])dnl AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl # Autoconf 2.13's AC_OBJEXT and AC_EXEEXT macros only works for C compilers! AC_REQUIRE([AC_OBJEXT])dnl AC_REQUIRE([AC_EXEEXT])dnl dnl AC_LIBTOOL_SYS_MAX_CMD_LEN AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE AC_LIBTOOL_OBJDIR AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl _LT_AC_PROG_ECHO_BACKSLASH case $host_os in aix3*) # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi ;; esac # Sed substitution that helps us do robust quoting. It backslashifies # metacharacters that are still active within double-quoted strings. Xsed='sed -e 1s/^X//' [sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'] # Same as above, but do not quote variable references. [double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'] # Sed substitution to delay expansion of an escaped shell variable in a # double_quote_subst'ed string. delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' # Sed substitution to avoid accidental globbing in evaled expressions no_glob_subst='s/\*/\\\*/g' # Constants: rm="rm -f" # Global variables: default_ofile=libtool can_build_shared=yes # All known linkers require a `.a' archive for static linking (except MSVC, # which needs '.lib'). libext=a ltmain="$ac_aux_dir/ltmain.sh" ofile="$default_ofile" with_gnu_ld="$lt_cv_prog_gnu_ld" AC_CHECK_TOOL(AR, ar, false) AC_CHECK_TOOL(RANLIB, ranlib, :) AC_CHECK_TOOL(STRIP, strip, :) old_CC="$CC" old_CFLAGS="$CFLAGS" # Set sane defaults for various variables test -z "$AR" && AR=ar test -z "$AR_FLAGS" && AR_FLAGS=cru test -z "$AS" && AS=as test -z "$CC" && CC=cc test -z "$LTCC" && LTCC=$CC test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS test -z "$DLLTOOL" && DLLTOOL=dlltool test -z "$LD" && LD=ld test -z "$LN_S" && LN_S="ln -s" test -z "$MAGIC_CMD" && MAGIC_CMD=file test -z "$NM" && NM=nm test -z "$SED" && SED=sed test -z "$OBJDUMP" && OBJDUMP=objdump test -z "$RANLIB" && RANLIB=: test -z "$STRIP" && STRIP=: test -z "$ac_objext" && ac_objext=o # Determine commands to create old-style static archives. old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs' old_postinstall_cmds='chmod 644 $oldlib' old_postuninstall_cmds= if test -n "$RANLIB"; then case $host_os in openbsd*) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib" ;; *) old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib" ;; esac old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" fi _LT_CC_BASENAME([$compiler]) # Only perform the check for file, if the check method requires it case $deplibs_check_method in file_magic*) if test "$file_magic_cmd" = '$MAGIC_CMD'; then AC_PATH_MAGIC fi ;; esac AC_PROVIDE_IFELSE([AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no) AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL], enable_win32_dll=yes, enable_win32_dll=no) AC_ARG_ENABLE([libtool-lock], [AC_HELP_STRING([--disable-libtool-lock], [avoid locking (might break parallel builds)])]) test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes AC_ARG_WITH([pic], [AC_HELP_STRING([--with-pic], [try to use only PIC/non-PIC objects @<:@default=use both@:>@])], [pic_mode="$withval"], [pic_mode=default]) test -z "$pic_mode" && pic_mode=default # Use C for the default configuration in the libtool script tagname= AC_LIBTOOL_LANG_C_CONFIG _LT_AC_TAGCONFIG ])# AC_LIBTOOL_SETUP # _LT_AC_SYS_COMPILER # ------------------- AC_DEFUN([_LT_AC_SYS_COMPILER], [AC_REQUIRE([AC_PROG_CC])dnl # If no C compiler was specified, use CC. LTCC=${LTCC-"$CC"} # If no C compiler flags were specified, use CFLAGS. LTCFLAGS=${LTCFLAGS-"$CFLAGS"} # Allow CC to be a program name with arguments. compiler=$CC ])# _LT_AC_SYS_COMPILER # _LT_CC_BASENAME(CC) # ------------------- # Calculate cc_basename. Skip known compiler wrappers and cross-prefix. AC_DEFUN([_LT_CC_BASENAME], [for cc_temp in $1""; do case $cc_temp in compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;; distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;; \-*) ;; *) break;; esac done cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"` ]) # _LT_COMPILER_BOILERPLATE # ------------------------ # Check for compiler boilerplate output or warnings with # the simple compiler test code. AC_DEFUN([_LT_COMPILER_BOILERPLATE], [AC_REQUIRE([LT_AC_PROG_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_compile_test_code" >conftest.$ac_ext eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_compiler_boilerplate=`cat conftest.err` $rm conftest* ])# _LT_COMPILER_BOILERPLATE # _LT_LINKER_BOILERPLATE # ---------------------- # Check for linker boilerplate output or warnings with # the simple link test code. AC_DEFUN([_LT_LINKER_BOILERPLATE], [AC_REQUIRE([LT_AC_PROG_SED])dnl ac_outfile=conftest.$ac_objext echo "$lt_simple_link_test_code" >conftest.$ac_ext eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err _lt_linker_boilerplate=`cat conftest.err` $rm conftest* ])# _LT_LINKER_BOILERPLATE # _LT_AC_SYS_LIBPATH_AIX # ---------------------- # Links a minimal program and checks the executable # for the system default hardcoded library path. In most cases, # this is /usr/lib:/lib, but when the MPI compilers are used # the location of the communication and MPI libs are included too. # If we don't find anything, use the default library path according # to the aix ld manual. AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_LINK_IFELSE(AC_LANG_PROGRAM,[ lt_aix_libpath_sed=' /Import File Strings/,/^$/ { /^0/ { s/^0 *\(.*\)$/\1/ p } }' aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` # Check for a 64-bit object if we didn't find anything. if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e "$lt_aix_libpath_sed"` fi],[]) if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi ])# _LT_AC_SYS_LIBPATH_AIX # _LT_AC_SHELL_INIT(ARG) # ---------------------- AC_DEFUN([_LT_AC_SHELL_INIT], [ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)], [AC_DIVERT_PUSH(NOTICE)]) $1 AC_DIVERT_POP ])# _LT_AC_SHELL_INIT # _LT_AC_PROG_ECHO_BACKSLASH # -------------------------- # Add some code to the start of the generated configure script which # will find an echo command which doesn't interpret backslashes. AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH], [_LT_AC_SHELL_INIT([ # Check that we are running under the correct shell. SHELL=${CONFIG_SHELL-/bin/sh} case X$ECHO in X*--fallback-echo) # Remove one level of quotation (which was required for Make). ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','` ;; esac echo=${ECHO-echo} if test "X[$]1" = X--no-reexec; then # Discard the --no-reexec flag, and continue. shift elif test "X[$]1" = X--fallback-echo; then # Avoid inline document here, it may be left over : elif test "X`($echo '\t') 2>/dev/null`" = 'X\t' ; then # Yippee, $echo works! : else # Restart under the correct shell. exec $SHELL "[$]0" --no-reexec ${1+"[$]@"} fi if test "X[$]1" = X--fallback-echo; then # used as fallback echo shift cat </dev/null 2>&1 && unset CDPATH if test -z "$ECHO"; then if test "X${echo_test_string+set}" != Xset; then # find a string as large as possible, as long as the shell can cope with it for cmd in 'sed 50q "[$]0"' 'sed 20q "[$]0"' 'sed 10q "[$]0"' 'sed 2q "[$]0"' 'echo test'; do # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ... if (echo_test_string=`eval $cmd`) 2>/dev/null && echo_test_string=`eval $cmd` && (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null then break fi done fi if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then : else # The Solaris, AIX, and Digital Unix default echo programs unquote # backslashes. This makes it impossible to quote backslashes using # echo "$something" | sed 's/\\/\\\\/g' # # So, first we look for a working echo in the user's PATH. lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for dir in $PATH /usr/ucb; do IFS="$lt_save_ifs" if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then echo="$dir/echo" break fi done IFS="$lt_save_ifs" if test "X$echo" = Xecho; then # We didn't find a better echo, so look for alternatives. if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then # This shell has a builtin print -r that does the trick. echo='print -r' elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && test "X$CONFIG_SHELL" != X/bin/ksh; then # If we have ksh, try running configure again with it. ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh} export ORIGINAL_CONFIG_SHELL CONFIG_SHELL=/bin/ksh export CONFIG_SHELL exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"} else # Try using printf. echo='printf %s\n' if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then # Cool, printf works : elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && test "X$echo_testing_string" = 'X\t' && echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL export CONFIG_SHELL SHELL="$CONFIG_SHELL" export SHELL echo="$CONFIG_SHELL [$]0 --fallback-echo" elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && test "X$echo_testing_string" = 'X\t' && echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && test "X$echo_testing_string" = "X$echo_test_string"; then echo="$CONFIG_SHELL [$]0 --fallback-echo" else # maybe with a smaller string... prev=: for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null then break fi prev="$cmd" done if test "$prev" != 'sed 50q "[$]0"'; then echo_test_string=`eval $prev` export echo_test_string exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"} else # Oops. We lost completely, so just stick with echo. echo=echo fi fi fi fi fi fi # Copy echo and quote the copy suitably for passing to libtool from # the Makefile, instead of quoting the original, which is used later. ECHO=$echo if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo" fi AC_SUBST(ECHO) ])])# _LT_AC_PROG_ECHO_BACKSLASH # _LT_AC_LOCK # ----------- AC_DEFUN([_LT_AC_LOCK], [AC_ARG_ENABLE([libtool-lock], [AC_HELP_STRING([--disable-libtool-lock], [avoid locking (might break parallel builds)])]) test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes # Some flags need to be propagated to the compiler or linker for good # libtool support. case $host in ia64-*-hpux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.$ac_objext` in *ELF-32*) HPUX_IA64_MODE="32" ;; *ELF-64*) HPUX_IA64_MODE="64" ;; esac fi rm -rf conftest* ;; *-*-irix6*) # Find out which ABI we are using. echo '[#]line __oline__ "configure"' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then if test "$lt_cv_prog_gnu_ld" = yes; then case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -melf32bsmip" ;; *N32*) LD="${LD-ld} -melf32bmipn32" ;; *64-bit*) LD="${LD-ld} -melf64bmip" ;; esac else case `/usr/bin/file conftest.$ac_objext` in *32-bit*) LD="${LD-ld} -32" ;; *N32*) LD="${LD-ld} -n32" ;; *64-bit*) LD="${LD-ld} -64" ;; esac fi fi rm -rf conftest* ;; x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \ s390*-*linux*|sparc*-*linux*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *32-bit*) case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_i386_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_i386" ;; ppc64-*linux*|powerpc64-*linux*) LD="${LD-ld} -m elf32ppclinux" ;; s390x-*linux*) LD="${LD-ld} -m elf_s390" ;; sparc64-*linux*) LD="${LD-ld} -m elf32_sparc" ;; esac ;; *64-bit*) libsuff=64 case $host in x86_64-*kfreebsd*-gnu) LD="${LD-ld} -m elf_x86_64_fbsd" ;; x86_64-*linux*) LD="${LD-ld} -m elf_x86_64" ;; ppc*-*linux*|powerpc*-*linux*) LD="${LD-ld} -m elf64ppc" ;; s390*-*linux*) LD="${LD-ld} -m elf64_s390" ;; sparc*-*linux*) LD="${LD-ld} -m elf64_sparc" ;; esac ;; esac fi rm -rf conftest* ;; *-*-sco3.2v5*) # On SCO OpenServer 5, we need -belf to get full-featured binaries. SAVE_CFLAGS="$CFLAGS" CFLAGS="$CFLAGS -belf" AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, [AC_LANG_PUSH(C) AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) AC_LANG_POP]) if test x"$lt_cv_cc_needs_belf" != x"yes"; then # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf CFLAGS="$SAVE_CFLAGS" fi ;; sparc*-*solaris*) # Find out which ABI we are using. echo 'int i;' > conftest.$ac_ext if AC_TRY_EVAL(ac_compile); then case `/usr/bin/file conftest.o` in *64-bit*) case $lt_cv_prog_gnu_ld in yes*) LD="${LD-ld} -m elf64_sparc" ;; *) LD="${LD-ld} -64" ;; esac ;; esac fi rm -rf conftest* ;; AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL], [*-*-cygwin* | *-*-mingw* | *-*-pw32*) AC_CHECK_TOOL(DLLTOOL, dlltool, false) AC_CHECK_TOOL(AS, as, false) AC_CHECK_TOOL(OBJDUMP, objdump, false) ;; ]) esac need_locks="$enable_libtool_lock" ])# _LT_AC_LOCK # AC_LIBTOOL_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE]) # ---------------------------------------------------------------- # Check whether the given compiler option works AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION], [AC_REQUIRE([LT_AC_PROG_SED]) AC_CACHE_CHECK([$1], [$2], [$2=no ifelse([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4]) echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="$3" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. # The option is referenced via a variable to avoid confusing sed. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi fi $rm conftest* ]) if test x"[$]$2" = xyes; then ifelse([$5], , :, [$5]) else ifelse([$6], , :, [$6]) fi ])# AC_LIBTOOL_COMPILER_OPTION # AC_LIBTOOL_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS, # [ACTION-SUCCESS], [ACTION-FAILURE]) # ------------------------------------------------------------ # Check whether the given compiler option works AC_DEFUN([AC_LIBTOOL_LINKER_OPTION], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_CACHE_CHECK([$1], [$2], [$2=no save_LDFLAGS="$LDFLAGS" LDFLAGS="$LDFLAGS $3" echo "$lt_simple_link_test_code" > conftest.$ac_ext if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then # The linker can only warn and ignore the option if not recognized # So say no if there are warnings if test -s conftest.err; then # Append any errors to the config.log. cat conftest.err 1>&AS_MESSAGE_LOG_FD $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2 if diff conftest.exp conftest.er2 >/dev/null; then $2=yes fi else $2=yes fi fi $rm conftest* LDFLAGS="$save_LDFLAGS" ]) if test x"[$]$2" = xyes; then ifelse([$4], , :, [$4]) else ifelse([$5], , :, [$5]) fi ])# AC_LIBTOOL_LINKER_OPTION # AC_LIBTOOL_SYS_MAX_CMD_LEN # -------------------------- AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN], [# find the maximum length of command line arguments AC_MSG_CHECKING([the maximum length of command line arguments]) AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl i=0 teststring="ABCD" case $build_os in msdosdjgpp*) # On DJGPP, this test can blow up pretty badly due to problems in libc # (any single argument exceeding 2000 bytes causes a buffer overrun # during glob expansion). Even if it were fixed, the result of this # check would be larger than it should be. lt_cv_sys_max_cmd_len=12288; # 12K is about right ;; gnu*) # Under GNU Hurd, this test is not required because there is # no limit to the length of command line arguments. # Libtool will interpret -1 as no limit whatsoever lt_cv_sys_max_cmd_len=-1; ;; cygwin* | mingw*) # On Win9x/ME, this test blows up -- it succeeds, but takes # about 5 minutes as the teststring grows exponentially. # Worse, since 9x/ME are not pre-emptively multitasking, # you end up with a "frozen" computer, even though with patience # the test eventually succeeds (with a max line length of 256k). # Instead, let's just punt: use the minimum linelength reported by # all of the supported platforms: 8192 (on NT/2K/XP). lt_cv_sys_max_cmd_len=8192; ;; amigaos*) # On AmigaOS with pdksh, this test takes hours, literally. # So we just punt and use a minimum line length of 8192. lt_cv_sys_max_cmd_len=8192; ;; netbsd* | freebsd* | openbsd* | darwin* | dragonfly*) # This has been around since 386BSD, at least. Likely further. if test -x /sbin/sysctl; then lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax` elif test -x /usr/sbin/sysctl; then lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax` else lt_cv_sys_max_cmd_len=65536 # usable default for all BSDs fi # And add a safety zone lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` ;; interix*) # We know the value 262144 and hardcode it with a safety zone (like BSD) lt_cv_sys_max_cmd_len=196608 ;; osf*) # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not # nice to cause kernel panics so lets avoid the loop below. # First set a reasonable default. lt_cv_sys_max_cmd_len=16384 # if test -x /sbin/sysconfig; then case `/sbin/sysconfig -q proc exec_disable_arg_limit` in *1*) lt_cv_sys_max_cmd_len=-1 ;; esac fi ;; sco3.2v5*) lt_cv_sys_max_cmd_len=102400 ;; sysv5* | sco5v6* | sysv4.2uw2*) kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null` if test -n "$kargmax"; then lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ ]]//'` else lt_cv_sys_max_cmd_len=32768 fi ;; *) lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null` if test -n "$lt_cv_sys_max_cmd_len"; then lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4` lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3` else SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}} while (test "X"`$SHELL [$]0 --fallback-echo "X$teststring" 2>/dev/null` \ = "XX$teststring") >/dev/null 2>&1 && new_result=`expr "X$teststring" : ".*" 2>&1` && lt_cv_sys_max_cmd_len=$new_result && test $i != 17 # 1/2 MB should be enough do i=`expr $i + 1` teststring=$teststring$teststring done teststring= # Add a significant safety factor because C++ compilers can tack on massive # amounts of additional arguments before passing them to the linker. # It appears as though 1/2 is a usable value. lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2` fi ;; esac ]) if test -n $lt_cv_sys_max_cmd_len ; then AC_MSG_RESULT($lt_cv_sys_max_cmd_len) else AC_MSG_RESULT(none) fi ])# AC_LIBTOOL_SYS_MAX_CMD_LEN # _LT_AC_CHECK_DLFCN # ------------------ AC_DEFUN([_LT_AC_CHECK_DLFCN], [AC_CHECK_HEADERS(dlfcn.h)dnl ])# _LT_AC_CHECK_DLFCN # _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, # ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) # --------------------------------------------------------------------- AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF], [AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl if test "$cross_compiling" = yes; then : [$4] else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < #endif #include #ifdef RTLD_GLOBAL # define LT_DLGLOBAL RTLD_GLOBAL #else # ifdef DL_GLOBAL # define LT_DLGLOBAL DL_GLOBAL # else # define LT_DLGLOBAL 0 # endif #endif /* We may have to define LT_DLLAZY_OR_NOW in the command line if we find out it does not work in some platform. */ #ifndef LT_DLLAZY_OR_NOW # ifdef RTLD_LAZY # define LT_DLLAZY_OR_NOW RTLD_LAZY # else # ifdef DL_LAZY # define LT_DLLAZY_OR_NOW DL_LAZY # else # ifdef RTLD_NOW # define LT_DLLAZY_OR_NOW RTLD_NOW # else # ifdef DL_NOW # define LT_DLLAZY_OR_NOW DL_NOW # else # define LT_DLLAZY_OR_NOW 0 # endif # endif # endif # endif #endif #ifdef __cplusplus extern "C" void exit (int); #endif void fnord() { int i=42;} int main () { void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); int status = $lt_dlunknown; if (self) { if (dlsym (self,"fnord")) status = $lt_dlno_uscore; else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; /* dlclose (self); */ } else puts (dlerror ()); exit (status); }] EOF if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null lt_status=$? case x$lt_status in x$lt_dlno_uscore) $1 ;; x$lt_dlneed_uscore) $2 ;; x$lt_dlunknown|x*) $3 ;; esac else : # compilation failed $3 fi fi rm -fr conftest* ])# _LT_AC_TRY_DLOPEN_SELF # AC_LIBTOOL_DLOPEN_SELF # ---------------------- AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF], [AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl if test "x$enable_dlopen" != xyes; then enable_dlopen=unknown enable_dlopen_self=unknown enable_dlopen_self_static=unknown else lt_cv_dlopen=no lt_cv_dlopen_libs= case $host_os in beos*) lt_cv_dlopen="load_add_on" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ;; mingw* | pw32*) lt_cv_dlopen="LoadLibrary" lt_cv_dlopen_libs= ;; cygwin*) lt_cv_dlopen="dlopen" lt_cv_dlopen_libs= ;; darwin*) # if libdl is installed we need to link against it AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[ lt_cv_dlopen="dyld" lt_cv_dlopen_libs= lt_cv_dlopen_self=yes ]) ;; *) AC_CHECK_FUNC([shl_load], [lt_cv_dlopen="shl_load"], [AC_CHECK_LIB([dld], [shl_load], [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"], [AC_CHECK_FUNC([dlopen], [lt_cv_dlopen="dlopen"], [AC_CHECK_LIB([dl], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], [AC_CHECK_LIB([svld], [dlopen], [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], [AC_CHECK_LIB([dld], [dld_link], [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) ]) ]) ]) ]) ]) ;; esac if test "x$lt_cv_dlopen" != xno; then enable_dlopen=yes else enable_dlopen=no fi case $lt_cv_dlopen in dlopen) save_CPPFLAGS="$CPPFLAGS" test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" save_LDFLAGS="$LDFLAGS" wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" save_LIBS="$LIBS" LIBS="$lt_cv_dlopen_libs $LIBS" AC_CACHE_CHECK([whether a program can dlopen itself], lt_cv_dlopen_self, [dnl _LT_AC_TRY_DLOPEN_SELF( lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) ]) if test "x$lt_cv_dlopen_self" = xyes; then wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\" AC_CACHE_CHECK([whether a statically linked program can dlopen itself], lt_cv_dlopen_self_static, [dnl _LT_AC_TRY_DLOPEN_SELF( lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) ]) fi CPPFLAGS="$save_CPPFLAGS" LDFLAGS="$save_LDFLAGS" LIBS="$save_LIBS" ;; esac case $lt_cv_dlopen_self in yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; *) enable_dlopen_self=unknown ;; esac case $lt_cv_dlopen_self_static in yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; *) enable_dlopen_self_static=unknown ;; esac fi ])# AC_LIBTOOL_DLOPEN_SELF # AC_LIBTOOL_PROG_CC_C_O([TAGNAME]) # --------------------------------- # Check to see if options -c and -o are simultaneously supported by compiler AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext], [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)], [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no $rm -r conftest 2>/dev/null mkdir conftest cd conftest mkdir out echo "$lt_simple_compile_test_code" > conftest.$ac_ext lt_compiler_flag="-o out/conftest2.$ac_objext" # Insert the option either (1) after the last *FLAGS variable, or # (2) before a word containing "conftest.", or (3) at the end. # Note that $ac_compile itself does not contain backslashes and begins # with a dollar sign (not a hyphen), so the echo should work correctly. lt_compile=`echo "$ac_compile" | $SED \ -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&AS_MESSAGE_LOG_FD echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2 if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes fi fi chmod u+w . 2>&AS_MESSAGE_LOG_FD $rm conftest* # SGI C++ compiler will create directory out/ii_files/ for # template instantiation test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files $rm out/* && rmdir out cd .. rmdir conftest $rm conftest* ]) ])# AC_LIBTOOL_PROG_CC_C_O # AC_LIBTOOL_SYS_HARD_LINK_LOCKS([TAGNAME]) # ----------------------------------------- # Check to see if we can do hard links to lock some files if needed AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_REQUIRE([_LT_AC_LOCK])dnl hard_links="nottested" if test "$_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then # do not overwrite the value of need_locks provided by the user AC_MSG_CHECKING([if we can lock with hard links]) hard_links=yes $rm conftest* ln conftest.a conftest.b 2>/dev/null && hard_links=no touch conftest.a ln conftest.a conftest.b 2>&5 || hard_links=no ln conftest.a conftest.b 2>/dev/null && hard_links=no AC_MSG_RESULT([$hard_links]) if test "$hard_links" = no; then AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe]) need_locks=warn fi else need_locks=no fi ])# AC_LIBTOOL_SYS_HARD_LINK_LOCKS # AC_LIBTOOL_OBJDIR # ----------------- AC_DEFUN([AC_LIBTOOL_OBJDIR], [AC_CACHE_CHECK([for objdir], [lt_cv_objdir], [rm -f .libs 2>/dev/null mkdir .libs 2>/dev/null if test -d .libs; then lt_cv_objdir=.libs else # MS-DOS does not allow filenames that begin with a dot. lt_cv_objdir=_libs fi rmdir .libs 2>/dev/null]) objdir=$lt_cv_objdir ])# AC_LIBTOOL_OBJDIR # AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH([TAGNAME]) # ---------------------------------------------- # Check hardcoding attributes. AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_MSG_CHECKING([how to hardcode library paths into programs]) _LT_AC_TAGVAR(hardcode_action, $1)= if test -n "$_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)" || \ test -n "$_LT_AC_TAGVAR(runpath_var, $1)" || \ test "X$_LT_AC_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then # We can hardcode non-existant directories. if test "$_LT_AC_TAGVAR(hardcode_direct, $1)" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library # when we should be linking with a yet-to-be-installed one ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)" != no && test "$_LT_AC_TAGVAR(hardcode_minus_L, $1)" != no; then # Linking always hardcodes the temporary library directory. _LT_AC_TAGVAR(hardcode_action, $1)=relink else # We can link without hardcoding, and we can hardcode nonexisting dirs. _LT_AC_TAGVAR(hardcode_action, $1)=immediate fi else # We cannot hardcode anything, or else we can only hardcode existing # directories. _LT_AC_TAGVAR(hardcode_action, $1)=unsupported fi AC_MSG_RESULT([$_LT_AC_TAGVAR(hardcode_action, $1)]) if test "$_LT_AC_TAGVAR(hardcode_action, $1)" = relink; then # Fast installation is not supported enable_fast_install=no elif test "$shlibpath_overrides_runpath" = yes || test "$enable_shared" = no; then # Fast installation is not necessary enable_fast_install=needless fi ])# AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH # AC_LIBTOOL_SYS_LIB_STRIP # ------------------------ AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP], [striplib= old_striplib= AC_MSG_CHECKING([whether stripping libraries is possible]) if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" test -z "$striplib" && striplib="$STRIP --strip-unneeded" AC_MSG_RESULT([yes]) else # FIXME - insert some real tests, host_os isn't really good enough case $host_os in darwin*) if test -n "$STRIP" ; then striplib="$STRIP -x" old_striplib="$STRIP -S" AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) fi ;; *) AC_MSG_RESULT([no]) ;; esac fi ])# AC_LIBTOOL_SYS_LIB_STRIP # AC_LIBTOOL_SYS_DYNAMIC_LINKER # ----------------------------- # PORTME Fill in your ld.so characteristics AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_MSG_CHECKING([dynamic linker characteristics]) library_names_spec= libname_spec='lib$name' soname_spec= shrext_cmds=".so" postinstall_cmds= postuninstall_cmds= finish_cmds= finish_eval= shlibpath_var= shlibpath_overrides_runpath=unknown version_type=none dynamic_linker="$host_os ld.so" sys_lib_dlsearch_path_spec="/lib /usr/lib" m4_if($1,[],[ if test "$GCC" = yes; then case $host_os in darwin*) lt_awk_arg="/^libraries:/,/LR/" ;; *) lt_awk_arg="/^libraries:/" ;; esac lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$lt_search_path_spec" | grep ';' >/dev/null ; then # if the path contains ";" then we assume it to be the separator # otherwise default to the standard path separator (i.e. ":") - it is # assumed that no part of a normal pathname contains ";" but that should # okay in the real world where ";" in dirpaths is itself problematic. lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e 's/;/ /g'` else lt_search_path_spec=`echo "$lt_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi # Ok, now we have the path, separated by spaces, we can step through it # and add multilib dir if necessary. lt_tmp_lt_search_path_spec= lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 2>/dev/null` for lt_sys_path in $lt_search_path_spec; do if test -d "$lt_sys_path/$lt_multi_os_dir"; then lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path/$lt_multi_os_dir" else test -d "$lt_sys_path" && \ lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path" fi done lt_search_path_spec=`echo $lt_tmp_lt_search_path_spec | awk ' BEGIN {RS=" "; FS="/|\n";} { lt_foo=""; lt_count=0; for (lt_i = NF; lt_i > 0; lt_i--) { if ($lt_i != "" && $lt_i != ".") { if ($lt_i == "..") { lt_count++; } else { if (lt_count == 0) { lt_foo="/" $lt_i lt_foo; } else { lt_count--; } } } } if (lt_foo != "") { lt_freq[[lt_foo]]++; } if (lt_freq[[lt_foo]] == 1) { print lt_foo; } }'` sys_lib_search_path_spec=`echo $lt_search_path_spec` else sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" fi]) need_lib_prefix=unknown hardcode_into_libs=no # when you set need_version to no, make sure it does not cause -set_version # flags to be left without arguments need_version=unknown case $host_os in aix3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a' shlibpath_var=LIBPATH # AIX 3 has no versioning support, so we append a major version to the name. soname_spec='${libname}${release}${shared_ext}$major' ;; aix4* | aix5*) version_type=linux need_lib_prefix=no need_version=no hardcode_into_libs=yes if test "$host_cpu" = ia64; then # AIX 5 supports IA64 library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH else # With GCC up to 2.95.x, collect2 would create an import file # for dependence libraries. The import file would start with # the line `#! .'. This would cause the generated library to # depend on `.', always an invalid library. This was fixed in # development snapshots of GCC prior to 3.0. case $host_os in aix4 | aix4.[[01]] | aix4.[[01]].*) if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' echo ' yes ' echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then : else can_build_shared=no fi ;; esac # AIX (on Power*) has no versioning support, so currently we can not hardcode correct # soname into executable. Probably we can add versioning support to # collect2, so additional links can be useful in future. if test "$aix_use_runtimelinking" = yes; then # If using run time linking (on AIX 4.2 or later) use lib.so # instead of lib.a to let people know that these are not # typical AIX shared libraries. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' else # We preserve .a as extension for shared libraries through AIX4.2 # and later when we are not doing run time linking. library_names_spec='${libname}${release}.a $libname.a' soname_spec='${libname}${release}${shared_ext}$major' fi shlibpath_var=LIBPATH fi ;; amigaos*) library_names_spec='$libname.ixlibrary $libname.a' # Create ${libname}_ixlibrary.a entries in /sys/libs. finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done' ;; beos*) library_names_spec='${libname}${shared_ext}' dynamic_linker="$host_os ld.so" shlibpath_var=LIBRARY_PATH ;; bsdi[[45]]*) version_type=linux need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" # the default ld.so.conf also contains /usr/contrib/lib and # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow # libtool to hard-code these into programs ;; cygwin* | mingw* | pw32*) version_type=windows shrext_cmds=".dll" need_version=no need_lib_prefix=no case $GCC,$host_os in yes,cygwin* | yes,mingw* | yes,pw32*) library_names_spec='$libname.dll.a' # DLL is installed to $(libdir)/../bin by postinstall_cmds postinstall_cmds='base_file=`basename \${file}`~ dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~ dldir=$destdir/`dirname \$dlpath`~ test -d \$dldir || mkdir -p \$dldir~ $install_prog $dir/$dlname \$dldir/$dlname~ chmod a+x \$dldir/$dlname' postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~ dlpath=$dir/\$dldll~ $rm \$dlpath' shlibpath_overrides_runpath=yes case $host_os in cygwin*) # Cygwin DLLs use 'cyg' prefix rather than 'lib' soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib" ;; mingw*) # MinGW DLLs use traditional 'lib' prefix soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"` if echo "$sys_lib_search_path_spec" | [grep ';[c-zC-Z]:/' >/dev/null]; then # It is most probably a Windows format PATH printed by # mingw gcc, but we are running on Cygwin. Gcc prints its search # path with ; separators, and with drive letters. We can handle the # drive letters (cygwin fileutils understands them), so leave them, # especially as we might pass files found there to a mingw objdump, # which wouldn't understand a cygwinified path. Ahh. sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'` else sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"` fi ;; pw32*) # pw32 DLLs use 'pw' prefix rather than 'lib' library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}' ;; esac ;; *) library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib' ;; esac dynamic_linker='Win32 ld.exe' # FIXME: first we should search . and the directory the executable is in shlibpath_var=PATH ;; darwin* | rhapsody*) dynamic_linker="$host_os dyld" version_type=darwin need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext' soname_spec='${libname}${release}${major}$shared_ext' shlibpath_overrides_runpath=yes shlibpath_var=DYLD_LIBRARY_PATH shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`' m4_if([$1], [],[ sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"]) sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib' ;; dgux*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; freebsd1*) dynamic_linker=no ;; freebsd* | dragonfly*) # DragonFly does not have aout. When/if they implement a new # versioning mechanism, adjust this. if test -x /usr/bin/objformat; then objformat=`/usr/bin/objformat` else case $host_os in freebsd[[123]]*) objformat=aout ;; *) objformat=elf ;; esac fi version_type=freebsd-$objformat case $version_type in freebsd-elf*) library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' need_version=no need_lib_prefix=no ;; freebsd-*) library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix' need_version=yes ;; esac shlibpath_var=LD_LIBRARY_PATH case $host_os in freebsd2*) shlibpath_overrides_runpath=yes ;; freebsd3.[[01]]* | freebsdelf3.[[01]]*) shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \ freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1) shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; *) # from 4.6 on, and DragonFly shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; esac ;; gnu*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes ;; hpux9* | hpux10* | hpux11*) # Give a soname corresponding to the major version so that dld.sl refuses to # link against other versions. version_type=sunos need_lib_prefix=no need_version=no case $host_cpu in ia64*) shrext_cmds='.so' hardcode_into_libs=yes dynamic_linker="$host_os dld.so" shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' if test "X$HPUX_IA64_MODE" = X32; then sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib" else sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64" fi sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; hppa*64*) shrext_cmds='.sl' hardcode_into_libs=yes dynamic_linker="$host_os dld.sl" shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH shlibpath_overrides_runpath=yes # Unless +noenvvar is specified. library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64" sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec ;; *) shrext_cmds='.sl' dynamic_linker="$host_os dld.sl" shlibpath_var=SHLIB_PATH shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' ;; esac # HP-UX runs *really* slowly unless shared libraries are mode 555. postinstall_cmds='chmod 555 $lib' ;; interix[[3-9]]*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no hardcode_into_libs=yes ;; irix5* | irix6* | nonstopux*) case $host_os in nonstopux*) version_type=nonstopux ;; *) if test "$lt_cv_prog_gnu_ld" = yes; then version_type=linux else version_type=irix fi ;; esac need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}' case $host_os in irix5* | nonstopux*) libsuff= shlibsuff= ;; *) case $LD in # libtool.m4 will add one of these switches to LD *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ") libsuff= shlibsuff= libmagic=32-bit;; *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ") libsuff=64 shlibsuff=64 libmagic=64-bit;; *) libsuff= shlibsuff= libmagic=never-match;; esac ;; esac shlibpath_var=LD_LIBRARY${shlibsuff}_PATH shlibpath_overrides_runpath=no sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" hardcode_into_libs=yes ;; # No shared lib support for Linux oldld, aout, or coff. linux*oldld* | linux*aout* | linux*coff*) dynamic_linker=no ;; # This must be Linux ELF. linux* | k*bsd*-gnu) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=no # This implies no fast_install, which is unacceptable. # Some rework will be needed to allow for fast_install # before this can be enabled. hardcode_into_libs=yes sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" # Append ld.so.conf contents to the search path if test -f /etc/ld.so.conf; then lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 2>/dev/null", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;/^[ ]*hwcap[ ]/d;s/[:, ]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '` sys_lib_dlsearch_path_spec="$sys_lib_dlsearch_path_spec $lt_ld_extra" fi # We used to test for /lib/ld.so.1 and disable shared libraries on # powerpc, because MkLinux only supported shared libraries with the # GNU dynamic linker. Since this was broken with cross compilers, # most powerpc-linux boxes support dynamic linking these days and # people can always --disable-shared, the test was removed, and we # assume the GNU/Linux dynamic linker is in use. dynamic_linker='GNU/Linux ld.so' ;; netbsd*) version_type=sunos need_lib_prefix=no need_version=no if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' dynamic_linker='NetBSD (a.out) ld.so' else library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' dynamic_linker='NetBSD ld.elf_so' fi shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes ;; newsos6) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; nto-qnx*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes ;; openbsd*) version_type=sunos sys_lib_dlsearch_path_spec="/usr/lib" need_lib_prefix=no # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs. case $host_os in openbsd3.3 | openbsd3.3.*) need_version=yes ;; *) need_version=no ;; esac library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' shlibpath_var=LD_LIBRARY_PATH if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then case $host_os in openbsd2.[[89]] | openbsd2.[[89]].*) shlibpath_overrides_runpath=no ;; *) shlibpath_overrides_runpath=yes ;; esac else shlibpath_overrides_runpath=yes fi ;; os2*) libname_spec='$name' shrext_cmds=".dll" need_lib_prefix=no library_names_spec='$libname${shared_ext} $libname.a' dynamic_linker='OS/2 ld.exe' shlibpath_var=LIBPATH ;; osf3* | osf4* | osf5*) version_type=osf need_lib_prefix=no need_version=no soname_spec='${libname}${release}${shared_ext}$major' library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' shlibpath_var=LD_LIBRARY_PATH sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" ;; rdos*) dynamic_linker=no ;; solaris*) version_type=linux need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes hardcode_into_libs=yes # ldd complains unless libraries are executable postinstall_cmds='chmod +x $lib' ;; sunos4*) version_type=sunos library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix' finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' shlibpath_var=LD_LIBRARY_PATH shlibpath_overrides_runpath=yes if test "$with_gnu_ld" = yes; then need_lib_prefix=no fi need_version=yes ;; sysv4 | sysv4.3*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH case $host_vendor in sni) shlibpath_overrides_runpath=no need_lib_prefix=no export_dynamic_flag_spec='${wl}-Blargedynsym' runpath_var=LD_RUN_PATH ;; siemens) need_lib_prefix=no ;; motorola) need_lib_prefix=no need_version=no shlibpath_overrides_runpath=no sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' ;; esac ;; sysv4*MP*) if test -d /usr/nec ;then version_type=linux library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}' soname_spec='$libname${shared_ext}.$major' shlibpath_var=LD_LIBRARY_PATH fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) version_type=freebsd-elf need_lib_prefix=no need_version=no library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH hardcode_into_libs=yes if test "$with_gnu_ld" = yes; then sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib' shlibpath_overrides_runpath=no else sys_lib_search_path_spec='/usr/ccs/lib /usr/lib' shlibpath_overrides_runpath=yes case $host_os in sco3.2v5*) sys_lib_search_path_spec="$sys_lib_search_path_spec /lib" ;; esac fi sys_lib_dlsearch_path_spec='/usr/lib' ;; uts4*) version_type=linux library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}' soname_spec='${libname}${release}${shared_ext}$major' shlibpath_var=LD_LIBRARY_PATH ;; *) dynamic_linker=no ;; esac AC_MSG_RESULT([$dynamic_linker]) test "$dynamic_linker" = no && can_build_shared=no variables_saved_for_relink="PATH $shlibpath_var $runpath_var" if test "$GCC" = yes; then variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" fi ])# AC_LIBTOOL_SYS_DYNAMIC_LINKER # _LT_AC_TAGCONFIG # ---------------- AC_DEFUN([_LT_AC_TAGCONFIG], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_ARG_WITH([tags], [AC_HELP_STRING([--with-tags@<:@=TAGS@:>@], [include additional configurations @<:@automatic@:>@])], [tagnames="$withval"]) if test -f "$ltmain" && test -n "$tagnames"; then if test ! -f "${ofile}"; then AC_MSG_WARN([output file `$ofile' does not exist]) fi if test -z "$LTCC"; then eval "`$SHELL ${ofile} --config | grep '^LTCC='`" if test -z "$LTCC"; then AC_MSG_WARN([output file `$ofile' does not look like a libtool script]) else AC_MSG_WARN([using `LTCC=$LTCC', extracted from `$ofile']) fi fi if test -z "$LTCFLAGS"; then eval "`$SHELL ${ofile} --config | grep '^LTCFLAGS='`" fi # Extract list of available tagged configurations in $ofile. # Note that this assumes the entire list is on one line. available_tags=`grep "^available_tags=" "${ofile}" | $SED -e 's/available_tags=\(.*$\)/\1/' -e 's/\"//g'` lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for tagname in $tagnames; do IFS="$lt_save_ifs" # Check whether tagname contains only valid characters case `$echo "X$tagname" | $Xsed -e 's:[[-_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890,/]]::g'` in "") ;; *) AC_MSG_ERROR([invalid tag name: $tagname]) ;; esac if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null then AC_MSG_ERROR([tag name \"$tagname\" already exists]) fi # Update the list of available tags. if test -n "$tagname"; then echo appending configuration tag \"$tagname\" to $ofile case $tagname in CXX) if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then AC_LIBTOOL_LANG_CXX_CONFIG else tagname="" fi ;; F77) if test -n "$F77" && test "X$F77" != "Xno"; then AC_LIBTOOL_LANG_F77_CONFIG else tagname="" fi ;; GCJ) if test -n "$GCJ" && test "X$GCJ" != "Xno"; then AC_LIBTOOL_LANG_GCJ_CONFIG else tagname="" fi ;; RC) AC_LIBTOOL_LANG_RC_CONFIG ;; *) AC_MSG_ERROR([Unsupported tag name: $tagname]) ;; esac # Append the new tag name to the list of available tags. if test -n "$tagname" ; then available_tags="$available_tags $tagname" fi fi done IFS="$lt_save_ifs" # Now substitute the updated list of available tags. if eval "sed -e 's/^available_tags=.*\$/available_tags=\"$available_tags\"/' \"$ofile\" > \"${ofile}T\""; then mv "${ofile}T" "$ofile" chmod +x "$ofile" else rm -f "${ofile}T" AC_MSG_ERROR([unable to update list of available tagged configurations.]) fi fi ])# _LT_AC_TAGCONFIG # AC_LIBTOOL_DLOPEN # ----------------- # enable checks for dlopen support AC_DEFUN([AC_LIBTOOL_DLOPEN], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP]) ])# AC_LIBTOOL_DLOPEN # AC_LIBTOOL_WIN32_DLL # -------------------- # declare package support for building win32 DLLs AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_BEFORE([$0], [AC_LIBTOOL_SETUP]) ])# AC_LIBTOOL_WIN32_DLL # AC_ENABLE_SHARED([DEFAULT]) # --------------------------- # implement the --enable-shared flag # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. AC_DEFUN([AC_ENABLE_SHARED], [define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl AC_ARG_ENABLE([shared], [AC_HELP_STRING([--enable-shared@<:@=PKGS@:>@], [build shared libraries @<:@default=]AC_ENABLE_SHARED_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_shared=yes ;; no) enable_shared=no ;; *) enable_shared=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_shared=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_shared=]AC_ENABLE_SHARED_DEFAULT) ])# AC_ENABLE_SHARED # AC_DISABLE_SHARED # ----------------- # set the default shared flag to --disable-shared AC_DEFUN([AC_DISABLE_SHARED], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_ENABLE_SHARED(no) ])# AC_DISABLE_SHARED # AC_ENABLE_STATIC([DEFAULT]) # --------------------------- # implement the --enable-static flag # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. AC_DEFUN([AC_ENABLE_STATIC], [define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl AC_ARG_ENABLE([static], [AC_HELP_STRING([--enable-static@<:@=PKGS@:>@], [build static libraries @<:@default=]AC_ENABLE_STATIC_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_static=yes ;; no) enable_static=no ;; *) enable_static=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_static=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_static=]AC_ENABLE_STATIC_DEFAULT) ])# AC_ENABLE_STATIC # AC_DISABLE_STATIC # ----------------- # set the default static flag to --disable-static AC_DEFUN([AC_DISABLE_STATIC], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_ENABLE_STATIC(no) ])# AC_DISABLE_STATIC # AC_ENABLE_FAST_INSTALL([DEFAULT]) # --------------------------------- # implement the --enable-fast-install flag # DEFAULT is either `yes' or `no'. If omitted, it defaults to `yes'. AC_DEFUN([AC_ENABLE_FAST_INSTALL], [define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl AC_ARG_ENABLE([fast-install], [AC_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@], [optimize for fast installation @<:@default=]AC_ENABLE_FAST_INSTALL_DEFAULT[@:>@])], [p=${PACKAGE-default} case $enableval in yes) enable_fast_install=yes ;; no) enable_fast_install=no ;; *) enable_fast_install=no # Look at the argument we got. We use all the common list separators. lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR," for pkg in $enableval; do IFS="$lt_save_ifs" if test "X$pkg" = "X$p"; then enable_fast_install=yes fi done IFS="$lt_save_ifs" ;; esac], [enable_fast_install=]AC_ENABLE_FAST_INSTALL_DEFAULT) ])# AC_ENABLE_FAST_INSTALL # AC_DISABLE_FAST_INSTALL # ----------------------- # set the default to --disable-fast-install AC_DEFUN([AC_DISABLE_FAST_INSTALL], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_ENABLE_FAST_INSTALL(no) ])# AC_DISABLE_FAST_INSTALL # AC_LIBTOOL_PICMODE([MODE]) # -------------------------- # implement the --with-pic flag # MODE is either `yes' or `no'. If omitted, it defaults to `both'. AC_DEFUN([AC_LIBTOOL_PICMODE], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl pic_mode=ifelse($#,1,$1,default) ])# AC_LIBTOOL_PICMODE # AC_PROG_EGREP # ------------- # This is predefined starting with Autoconf 2.54, so this conditional # definition can be removed once we require Autoconf 2.54 or later. m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP], [AC_CACHE_CHECK([for egrep], [ac_cv_prog_egrep], [if echo a | (grep -E '(a|b)') >/dev/null 2>&1 then ac_cv_prog_egrep='grep -E' else ac_cv_prog_egrep='egrep' fi]) EGREP=$ac_cv_prog_egrep AC_SUBST([EGREP]) ])]) # AC_PATH_TOOL_PREFIX # ------------------- # find a file program which can recognize shared library AC_DEFUN([AC_PATH_TOOL_PREFIX], [AC_REQUIRE([AC_PROG_EGREP])dnl AC_MSG_CHECKING([for $1]) AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, [case $MAGIC_CMD in [[\\/*] | ?:[\\/]*]) lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. ;; *) lt_save_MAGIC_CMD="$MAGIC_CMD" lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR dnl $ac_dummy forces splitting on constant user-supplied paths. dnl POSIX.2 word splitting is done only on the output of word expansions, dnl not every word. This closes a longstanding sh security hole. ac_dummy="ifelse([$2], , $PATH, [$2])" for ac_dir in $ac_dummy; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f $ac_dir/$1; then lt_cv_path_MAGIC_CMD="$ac_dir/$1" if test -n "$file_magic_test_file"; then case $deplibs_check_method in "file_magic "*) file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"` MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | $EGREP "$file_magic_regex" > /dev/null; then : else cat <&2 *** Warning: the command libtool uses to detect shared libraries, *** $file_magic_cmd, produces output that libtool cannot recognize. *** The result is that libtool may fail to recognize shared libraries *** as such. This will affect the creation of libtool libraries that *** depend on shared libraries, but programs linked with such libtool *** libraries will work regardless of this problem. Nevertheless, you *** may want to report the problem to your system manager and/or to *** bug-libtool@gnu.org EOF fi ;; esac fi break fi done IFS="$lt_save_ifs" MAGIC_CMD="$lt_save_MAGIC_CMD" ;; esac]) MAGIC_CMD="$lt_cv_path_MAGIC_CMD" if test -n "$MAGIC_CMD"; then AC_MSG_RESULT($MAGIC_CMD) else AC_MSG_RESULT(no) fi ])# AC_PATH_TOOL_PREFIX # AC_PATH_MAGIC # ------------- # find a file program which can recognize a shared library AC_DEFUN([AC_PATH_MAGIC], [AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH) if test -z "$lt_cv_path_MAGIC_CMD"; then if test -n "$ac_tool_prefix"; then AC_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH) else MAGIC_CMD=: fi fi ])# AC_PATH_MAGIC # AC_PROG_LD # ---------- # find the pathname to the GNU or non-GNU linker AC_DEFUN([AC_PROG_LD], [AC_ARG_WITH([gnu-ld], [AC_HELP_STRING([--with-gnu-ld], [assume the C compiler uses GNU ld @<:@default=no@:>@])], [test "$withval" = no || with_gnu_ld=yes], [with_gnu_ld=no]) AC_REQUIRE([LT_AC_PROG_SED])dnl AC_REQUIRE([AC_PROG_CC])dnl AC_REQUIRE([AC_CANONICAL_HOST])dnl AC_REQUIRE([AC_CANONICAL_BUILD])dnl ac_prog=ld if test "$GCC" = yes; then # Check if gcc -print-prog-name=ld gives a path. AC_MSG_CHECKING([for ld used by $CC]) case $host in *-*-mingw*) # gcc leaves a trailing carriage return which upsets mingw ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; *) ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; esac case $ac_prog in # Accept absolute paths. [[\\/]]* | ?:[[\\/]]*) re_direlt='/[[^/]][[^/]]*/\.\./' # Canonicalize the pathname of ld ac_prog=`echo $ac_prog| $SED 's%\\\\%/%g'` while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do ac_prog=`echo $ac_prog| $SED "s%$re_direlt%/%"` done test -z "$LD" && LD="$ac_prog" ;; "") # If it fails, then pretend we aren't using GCC. ac_prog=ld ;; *) # If it is relative, then search for the first ld in PATH. with_gnu_ld=unknown ;; esac elif test "$with_gnu_ld" = yes; then AC_MSG_CHECKING([for GNU ld]) else AC_MSG_CHECKING([for non-GNU ld]) fi AC_CACHE_VAL(lt_cv_path_LD, [if test -z "$LD"; then lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then lt_cv_path_LD="$ac_dir/$ac_prog" # Check to see if the program is GNU ld. I'd rather use --version, # but apparently some variants of GNU ld only accept -v. # Break only if it was the GNU/non-GNU ld that we prefer. case `"$lt_cv_path_LD" -v 2>&1 &1 /dev/null 2>&1; then lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL' lt_cv_file_magic_cmd='func_win32_libid' else lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' lt_cv_file_magic_cmd='$OBJDUMP -f' fi ;; darwin* | rhapsody*) lt_cv_deplibs_check_method=pass_all ;; freebsd* | dragonfly*) if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then case $host_cpu in i*86 ) # Not sure whether the presence of OpenBSD here was a mistake. # Let's accept both of them until this is cleared up. lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` ;; esac else lt_cv_deplibs_check_method=pass_all fi ;; gnu*) lt_cv_deplibs_check_method=pass_all ;; hpux10.20* | hpux11*) lt_cv_file_magic_cmd=/usr/bin/file case $host_cpu in ia64*) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64' lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so ;; hppa*64*) [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - PA-RISC [0-9].[0-9]'] lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl ;; *) lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library' lt_cv_file_magic_test_file=/usr/lib/libc.sl ;; esac ;; interix[[3-9]]*) # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$' ;; irix5* | irix6* | nonstopux*) case $LD in *-32|*"-32 ") libmagic=32-bit;; *-n32|*"-n32 ") libmagic=N32;; *-64|*"-64 ") libmagic=64-bit;; *) libmagic=never-match;; esac lt_cv_deplibs_check_method=pass_all ;; # This must be Linux ELF. linux* | k*bsd*-gnu) lt_cv_deplibs_check_method=pass_all ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$' fi ;; newos6*) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)' lt_cv_file_magic_cmd=/usr/bin/file lt_cv_file_magic_test_file=/usr/lib/libnls.so ;; nto-qnx*) lt_cv_deplibs_check_method=unknown ;; openbsd*) if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$' else lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$' fi ;; osf3* | osf4* | osf5*) lt_cv_deplibs_check_method=pass_all ;; rdos*) lt_cv_deplibs_check_method=pass_all ;; solaris*) lt_cv_deplibs_check_method=pass_all ;; sysv4 | sysv4.3*) case $host_vendor in motorola) lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]' lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` ;; ncr) lt_cv_deplibs_check_method=pass_all ;; sequent) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )' ;; sni) lt_cv_file_magic_cmd='/bin/file' lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib" lt_cv_file_magic_test_file=/lib/libc.so ;; siemens) lt_cv_deplibs_check_method=pass_all ;; pc) lt_cv_deplibs_check_method=pass_all ;; esac ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*) lt_cv_deplibs_check_method=pass_all ;; esac ]) file_magic_cmd=$lt_cv_file_magic_cmd deplibs_check_method=$lt_cv_deplibs_check_method test -z "$deplibs_check_method" && deplibs_check_method=unknown ])# AC_DEPLIBS_CHECK_METHOD # AC_PROG_NM # ---------- # find the pathname to a BSD-compatible name lister AC_DEFUN([AC_PROG_NM], [AC_CACHE_CHECK([for BSD-compatible nm], lt_cv_path_NM, [if test -n "$NM"; then # Let the user override the test. lt_cv_path_NM="$NM" else lt_nm_to_check="${ac_tool_prefix}nm" if test -n "$ac_tool_prefix" && test "$build" = "$host"; then lt_nm_to_check="$lt_nm_to_check nm" fi for lt_tmp_nm in $lt_nm_to_check; do lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do IFS="$lt_save_ifs" test -z "$ac_dir" && ac_dir=. tmp_nm="$ac_dir/$lt_tmp_nm" if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then # Check to see if the nm accepts a BSD-compat flag. # Adding the `sed 1q' prevents false positives on HP-UX, which says: # nm: unknown option "B" ignored # Tru64's nm complains that /dev/null is an invalid object file case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in */dev/null* | *'Invalid file or object type'*) lt_cv_path_NM="$tmp_nm -B" break ;; *) case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in */dev/null*) lt_cv_path_NM="$tmp_nm -p" break ;; *) lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but continue # so that we can try to find one that supports BSD flags ;; esac ;; esac fi done IFS="$lt_save_ifs" done test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm fi]) NM="$lt_cv_path_NM" ])# AC_PROG_NM # AC_CHECK_LIBM # ------------- # check for math library AC_DEFUN([AC_CHECK_LIBM], [AC_REQUIRE([AC_CANONICAL_HOST])dnl LIBM= case $host in *-*-beos* | *-*-cygwin* | *-*-pw32* | *-*-darwin*) # These system don't have libm, or don't need it ;; *-ncr-sysv4.3*) AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm") ;; *) AC_CHECK_LIB(m, cos, LIBM="-lm") ;; esac ])# AC_CHECK_LIBM # AC_LIBLTDL_CONVENIENCE([DIRECTORY]) # ----------------------------------- # sets LIBLTDL to the link flags for the libltdl convenience library and # LTDLINCL to the include flags for the libltdl header and adds # --enable-ltdl-convenience to the configure arguments. Note that # AC_CONFIG_SUBDIRS is not called here. If DIRECTORY is not provided, # it is assumed to be `libltdl'. LIBLTDL will be prefixed with # '${top_builddir}/' and LTDLINCL will be prefixed with '${top_srcdir}/' # (note the single quotes!). If your package is not flat and you're not # using automake, define top_builddir and top_srcdir appropriately in # the Makefiles. AC_DEFUN([AC_LIBLTDL_CONVENIENCE], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl case $enable_ltdl_convenience in no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; "") enable_ltdl_convenience=yes ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; esac LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) # For backwards non-gettext consistent compatibility... INCLTDL="$LTDLINCL" ])# AC_LIBLTDL_CONVENIENCE # AC_LIBLTDL_INSTALLABLE([DIRECTORY]) # ----------------------------------- # sets LIBLTDL to the link flags for the libltdl installable library and # LTDLINCL to the include flags for the libltdl header and adds # --enable-ltdl-install to the configure arguments. Note that # AC_CONFIG_SUBDIRS is not called here. If DIRECTORY is not provided, # and an installed libltdl is not found, it is assumed to be `libltdl'. # LIBLTDL will be prefixed with '${top_builddir}/'# and LTDLINCL with # '${top_srcdir}/' (note the single quotes!). If your package is not # flat and you're not using automake, define top_builddir and top_srcdir # appropriately in the Makefiles. # In the future, this macro may have to be called after AC_PROG_LIBTOOL. AC_DEFUN([AC_LIBLTDL_INSTALLABLE], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl AC_CHECK_LIB(ltdl, lt_dlinit, [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no], [if test x"$enable_ltdl_install" = xno; then AC_MSG_WARN([libltdl not installed, but installation disabled]) else enable_ltdl_install=yes fi ]) if test x"$enable_ltdl_install" = x"yes"; then ac_configure_args="$ac_configure_args --enable-ltdl-install" LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) else ac_configure_args="$ac_configure_args --enable-ltdl-install=no" LIBLTDL="-lltdl" LTDLINCL= fi # For backwards non-gettext consistent compatibility... INCLTDL="$LTDLINCL" ])# AC_LIBLTDL_INSTALLABLE # AC_LIBTOOL_CXX # -------------- # enable support for C++ libraries AC_DEFUN([AC_LIBTOOL_CXX], [AC_REQUIRE([_LT_AC_LANG_CXX]) ])# AC_LIBTOOL_CXX # _LT_AC_LANG_CXX # --------------- AC_DEFUN([_LT_AC_LANG_CXX], [AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([_LT_AC_PROG_CXXCPP]) _LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}CXX]) ])# _LT_AC_LANG_CXX # _LT_AC_PROG_CXXCPP # ------------------ AC_DEFUN([_LT_AC_PROG_CXXCPP], [ AC_REQUIRE([AC_PROG_CXX]) if test -n "$CXX" && ( test "X$CXX" != "Xno" && ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) || (test "X$CXX" != "Xg++"))) ; then AC_PROG_CXXCPP fi ])# _LT_AC_PROG_CXXCPP # AC_LIBTOOL_F77 # -------------- # enable support for Fortran 77 libraries AC_DEFUN([AC_LIBTOOL_F77], [AC_REQUIRE([_LT_AC_LANG_F77]) ])# AC_LIBTOOL_F77 # _LT_AC_LANG_F77 # --------------- AC_DEFUN([_LT_AC_LANG_F77], [AC_REQUIRE([AC_PROG_F77]) _LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}F77]) ])# _LT_AC_LANG_F77 # AC_LIBTOOL_GCJ # -------------- # enable support for GCJ libraries AC_DEFUN([AC_LIBTOOL_GCJ], [AC_REQUIRE([_LT_AC_LANG_GCJ]) ])# AC_LIBTOOL_GCJ # _LT_AC_LANG_GCJ # --------------- AC_DEFUN([_LT_AC_LANG_GCJ], [AC_PROVIDE_IFELSE([AC_PROG_GCJ],[], [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],[], [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ],[], [ifdef([AC_PROG_GCJ],[AC_REQUIRE([AC_PROG_GCJ])], [ifdef([A][M_PROG_GCJ],[AC_REQUIRE([A][M_PROG_GCJ])], [AC_REQUIRE([A][C_PROG_GCJ_OR_A][M_PROG_GCJ])])])])])]) _LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}GCJ]) ])# _LT_AC_LANG_GCJ # AC_LIBTOOL_RC # ------------- # enable support for Windows resource files AC_DEFUN([AC_LIBTOOL_RC], [AC_REQUIRE([LT_AC_PROG_RC]) _LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}RC]) ])# AC_LIBTOOL_RC # AC_LIBTOOL_LANG_C_CONFIG # ------------------------ # Ensure that the configuration vars for the C compiler are # suitably defined. Those variables are subsequently used by # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG], [_LT_AC_LANG_C_CONFIG]) AC_DEFUN([_LT_AC_LANG_C_CONFIG], [lt_save_CC="$CC" AC_LANG_PUSH(C) # Source file extension for C test sources. ac_ext=c # Object file extension for compiled C test sources. objext=o _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(){return(0);}' _LT_AC_SYS_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1) AC_LIBTOOL_PROG_COMPILER_PIC($1) AC_LIBTOOL_PROG_CC_C_O($1) AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1) AC_LIBTOOL_PROG_LD_SHLIBS($1) AC_LIBTOOL_SYS_DYNAMIC_LINKER($1) AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_SYS_LIB_STRIP AC_LIBTOOL_DLOPEN_SELF # Report which library types will actually be built AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix4* | aix5*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) AC_LIBTOOL_CONFIG($1) AC_LANG_POP CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_C_CONFIG # AC_LIBTOOL_LANG_CXX_CONFIG # -------------------------- # Ensure that the configuration vars for the C compiler are # suitably defined. Those variables are subsequently used by # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG], [_LT_AC_LANG_CXX_CONFIG(CXX)]) AC_DEFUN([_LT_AC_LANG_CXX_CONFIG], [AC_LANG_PUSH(C++) AC_REQUIRE([AC_PROG_CXX]) AC_REQUIRE([_LT_AC_PROG_CXXCPP]) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(allow_undefined_flag, $1)= _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(archive_expsym_cmds, $1)= _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= _LT_AC_TAGVAR(hardcode_libdir_separator, $1)= _LT_AC_TAGVAR(hardcode_minus_L, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_AC_TAGVAR(hardcode_automatic, $1)=no _LT_AC_TAGVAR(module_cmds, $1)= _LT_AC_TAGVAR(module_expsym_cmds, $1)= _LT_AC_TAGVAR(link_all_deplibs, $1)=unknown _LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_AC_TAGVAR(no_undefined_flag, $1)= _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Dependencies to place before and after the object being linked: _LT_AC_TAGVAR(predep_objects, $1)= _LT_AC_TAGVAR(postdep_objects, $1)= _LT_AC_TAGVAR(predeps, $1)= _LT_AC_TAGVAR(postdeps, $1)= _LT_AC_TAGVAR(compiler_lib_search_path, $1)= # Source file extension for C++ test sources. ac_ext=cpp # Object file extension for compiled C++ test sources. objext=o _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="int some_variable = 0;" # Code to be used in simple link tests lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC=$CC lt_save_LD=$LD lt_save_GCC=$GCC GCC=$GXX lt_save_with_gnu_ld=$with_gnu_ld lt_save_path_LD=$lt_cv_path_LD if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx else $as_unset lt_cv_prog_gnu_ld fi if test -n "${lt_cv_path_LDCXX+set}"; then lt_cv_path_LD=$lt_cv_path_LDCXX else $as_unset lt_cv_path_LD fi test -z "${LDCXX+set}" || LD=$LDCXX CC=${CXX-"c++"} compiler=$CC _LT_AC_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) # We don't want -fno-exception wen compiling C++ code, so set the # no_builtin_flag separately if test "$GXX" = yes; then _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' else _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= fi if test "$GXX" = yes; then # Set up default GNU C++ configuration AC_PROG_LD # Check if GNU C++ uses GNU ld as the underlying linker, since the # archiving commands below assume that GNU ld is being used. if test "$with_gnu_ld" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # If archive_cmds runs LD, not CC, wlarc should be empty # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to # investigate it a little bit more. (MM) wlarc='${wl}' # ancient GNU ld didn't support --whole-archive et. al. if eval "`$CC -print-prog-name=ld` --help 2>&1" | \ grep 'no-whole-archive' > /dev/null; then _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= fi else with_gnu_ld=no wlarc= # A generic and very simple default shared library creation # command for GNU C++ for the case where it uses the native # linker, instead of GNU ld. If possible, this setting should # overridden to take advantage of the native linker features on # the platform it is being used on. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' fi # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else GXX=no with_gnu_ld=no wlarc= fi # PORTME: fill in a description of your system's C++ link characteristics AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) _LT_AC_TAGVAR(ld_shlibs, $1)=yes case $host_os in aix3*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; aix4* | aix5*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*) for ld_flag in $LDFLAGS; do case $ld_flag in *-brtl*) aix_use_runtimelinking=yes break ;; esac done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_AC_TAGVAR(archive_cmds, $1)='' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes if test "$GXX" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && \ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. _LT_AC_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an empty executable. _LT_AC_SYS_LIBPATH_AIX _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an empty executable. _LT_AC_SYS_LIBPATH_AIX _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' # Exported symbols can be pulled into shared objects from archives _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared libraries. _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; beos*) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; chorus*) case $cc_basename in *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac ;; cygwin* | mingw* | pw32*) # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; darwin* | rhapsody*) case $host_os in rhapsody* | darwin1.[[012]]) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress' ;; *) # Darwin 1.3 on if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' else case ${MACOSX_DEPLOYMENT_TARGET} in 10.[[012]]) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup' ;; esac fi ;; esac _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_automatic, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes if test "$GXX" = yes ; then lt_int_apple_cc_single_mod=no output_verbose_link_cmd='echo' if $CC -dumpspecs 2>&1 | $EGREP 'single_module' >/dev/null ; then lt_int_apple_cc_single_mod=yes fi if test "X$lt_int_apple_cc_single_mod" = Xyes ; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' else _LT_AC_TAGVAR(archive_cmds, $1)='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring' fi _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds if test "X$lt_int_apple_cc_single_mod" = Xyes ; then _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' fi _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else case $cc_basename in xlc*) output_verbose_link_cmd='echo' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac fi ;; dgux*) case $cc_basename in ec++*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; ghcx*) # Green Hills C++ Compiler # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac ;; freebsd[[12]]*) # C++ shared libraries reported to be fairly broken before switch to ELF _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; freebsd-elf*) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no ;; freebsd* | dragonfly*) # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF # conventions _LT_AC_TAGVAR(ld_shlibs, $1)=yes ;; gnu*) ;; hpux9*) _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; aCC*) _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "[[-]]L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -nostdlib -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; hpux10*|hpux11*) if test $with_gnu_ld = no; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) ;; *) _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; esac fi case $host_cpu in hppa*64*|ia64*) _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH, # but as the default # location of the library. ;; esac case $cc_basename in CC*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; aCC*) case $host_cpu in hppa*64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes; then if test $with_gnu_ld = no; then case $host_cpu in hppa*64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; ia64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' ;; esac fi else # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; interix[[3-9]]*) _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; irix5* | irix6*) case $cc_basename in CC*) # SGI C++ _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' # Archives containing C++ object files must be created using # "CC -ar", where "CC" is the IRIX C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs' ;; *) if test "$GXX" = yes; then if test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` -o $lib' fi fi _LT_AC_TAGVAR(link_all_deplibs, $1)=yes ;; esac _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: ;; linux* | k*bsd*-gnu) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | grep "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath,$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; icpc*) # Intel C++ with_gnu_ld=yes # version 8.0 and above of icpc choke on multiply defined symbols # if we add $predep_objects and $postdep_objects, however 7.1 and # earlier do not add the objects themselves. case `$CC -V 2>&1` in *"Version 7."*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; *) # Version 8.0 or newer tmp_idyn= case $host_cpu in ia64*) tmp_idyn=' -i_dynamic';; esac _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' ;; esac _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive' ;; pgCC*) # Portland Group C++ compiler _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' ;; cxx*) # Compaq C++ _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib ${wl}-retain-symbols-file $wl$export_symbols' runpath_var=LD_RUN_PATH _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_AC_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file ${wl}$export_symbols' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' # Not sure whether something based on # $CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 # would be better. output_verbose_link_cmd='echo' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; esac ;; esac ;; lynxos*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; m88k*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; mvs*) case $cc_basename in cxx*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags' wlarc= _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no fi # Workaround some broken pre-1.5 toolchains output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"' ;; openbsd2*) # C++ shared libraries are fairly broken _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' fi output_verbose_link_cmd='echo' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; osf3*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Archives containing C++ object files must be created using # "CC -Bstatic", where "CC" is the KAI C++ compiler. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs' ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; cxx*) _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && echo ${wl}-set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; osf4* | osf5*) case $cc_basename in KCC*) # Kuck and Associates, Inc. (KAI) C++ Compiler # KCC will only create a shared library if the output file # ends with ".so" (or ".sl" for HP-UX), so rename the library # to its proper name (with version) after linking. _LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Archives containing C++ object files must be created using # the KAI C++ compiler. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs' ;; RCC*) # Rational C++ 2.4.1 # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; cxx*) _LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~ echo "-hidden">> $lib.exp~ $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname -Wl,-input -Wl,$lib.exp `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~ $rm $lib.exp' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. # # There doesn't appear to be a way to prevent this compiler from # explicitly linking system object files so we need to strip them # from the output so that they don't get included in the library # dependencies. output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list' ;; *) if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"' else # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; psos*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; lcc*) # Lucid # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac ;; solaris*) case $cc_basename in CC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_AC_TAGVAR(archive_cmds_need_lc,$1)=yes _LT_AC_TAGVAR(no_undefined_flag, $1)=' -zdefs' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag} -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -G${allow_undefined_flag} ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. # Supported since Solaris 2.6 (maybe 2.5.1?) _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;; esac _LT_AC_TAGVAR(link_all_deplibs, $1)=yes output_verbose_link_cmd='echo' # Archives containing C++ object files must be created using # "CC -xar", where "CC" is the Sun C++ compiler. This is # necessary to make sure instantiated templates are included # in the archive. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs' ;; gcx*) # Green Hills C++ Compiler _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' # The C++ compiler must be used to create the archive. _LT_AC_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs' ;; *) # GNU C++ compiler with Solaris linker if test "$GXX" = yes && test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs' if $CC --version | grep -v '^2\.7' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -shared -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd="$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\"" else # g++ 2.7 appears to require `-G' NOT `-shared' on this # platform. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp' # Commands to make compiler produce verbose output that lists # what "hidden" libraries, object files and flags are used when # linking a shared library. output_verbose_link_cmd="$CC -G $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\"" fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir' case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' ;; esac fi ;; esac ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. # For security reasons, it is highly recommended that you always # use absolute paths for naming shared libraries, and exclude the # DT_RUNPATH tag from executables and libraries. But doing so # requires that you compile everything twice, which is a pain. # So that behaviour is only enabled if SCOABSPATH is set to a # non-empty value in the environment. Most likely only useful for # creating official distributions of packages. # This is a hack until libtool officially supports absolute path # names for shared libraries. _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' case $cc_basename in CC*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac ;; vxworks*) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; *) # FIXME: insert proper C++ library support _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)]) test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no _LT_AC_TAGVAR(GCC, $1)="$GXX" _LT_AC_TAGVAR(LD, $1)="$LD" AC_LIBTOOL_POSTDEP_PREDEP($1) AC_LIBTOOL_PROG_COMPILER_PIC($1) AC_LIBTOOL_PROG_CC_C_O($1) AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1) AC_LIBTOOL_PROG_LD_SHLIBS($1) AC_LIBTOOL_SYS_DYNAMIC_LINKER($1) AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_CONFIG($1) AC_LANG_POP CC=$lt_save_CC LDCXX=$LD LD=$lt_save_LD GCC=$lt_save_GCC with_gnu_ldcxx=$with_gnu_ld with_gnu_ld=$lt_save_with_gnu_ld lt_cv_path_LDCXX=$lt_cv_path_LD lt_cv_path_LD=$lt_save_path_LD lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld ])# AC_LIBTOOL_LANG_CXX_CONFIG # AC_LIBTOOL_POSTDEP_PREDEP([TAGNAME]) # ------------------------------------ # Figure out "hidden" library dependencies from verbose # compiler output when linking a shared library. # Parse the compiler output and extract the necessary # objects, libraries and library flags. AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP],[ dnl we can't use the lt_simple_compile_test_code here, dnl because it contains code intended for an executable, dnl not a library. It's possible we should let each dnl tag define a new lt_????_link_test_code variable, dnl but it's only used here... ifelse([$1],[],[cat > conftest.$ac_ext < conftest.$ac_ext < conftest.$ac_ext < conftest.$ac_ext <&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 # # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac if test "$solaris_use_stlport4" != yes; then _LT_AC_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; solaris*) case $cc_basename in CC*) # The more standards-conforming stlport4 library is # incompatible with the Cstd library. Avoid specifying # it if it's in CXXFLAGS. Ignore libCrun as # -library=stlport4 depends on it. case " $CXX $CXXFLAGS " in *" -library=stlport4 "*) solaris_use_stlport4=yes ;; esac # Adding this requires a known-good setup of shared libraries for # Sun compiler versions before 5.6, else PIC objects from an old # archive will be linked into the output, leading to subtle bugs. if test "$solaris_use_stlport4" != yes; then _LT_AC_TAGVAR(postdeps,$1)='-library=Cstd -library=Crun' fi ;; esac ;; esac ]) case " $_LT_AC_TAGVAR(postdeps, $1) " in *" -lc "*) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no ;; esac ])# AC_LIBTOOL_POSTDEP_PREDEP # AC_LIBTOOL_LANG_F77_CONFIG # -------------------------- # Ensure that the configuration vars for the C compiler are # suitably defined. Those variables are subsequently used by # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG], [_LT_AC_LANG_F77_CONFIG(F77)]) AC_DEFUN([_LT_AC_LANG_F77_CONFIG], [AC_REQUIRE([AC_PROG_F77]) AC_LANG_PUSH(Fortran 77) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(allow_undefined_flag, $1)= _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(archive_expsym_cmds, $1)= _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= _LT_AC_TAGVAR(hardcode_libdir_separator, $1)= _LT_AC_TAGVAR(hardcode_minus_L, $1)=no _LT_AC_TAGVAR(hardcode_automatic, $1)=no _LT_AC_TAGVAR(module_cmds, $1)= _LT_AC_TAGVAR(module_expsym_cmds, $1)= _LT_AC_TAGVAR(link_all_deplibs, $1)=unknown _LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds _LT_AC_TAGVAR(no_undefined_flag, $1)= _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no # Source file extension for f77 test sources. ac_ext=f # Object file extension for compiled f77 test sources. objext=o _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="\ subroutine t return end " # Code to be used in simple link tests lt_simple_link_test_code="\ program t end " # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" CC=${F77-"f77"} compiler=$CC _LT_AC_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) AC_MSG_CHECKING([if libtool supports shared libraries]) AC_MSG_RESULT([$can_build_shared]) AC_MSG_CHECKING([whether to build shared libraries]) test "$can_build_shared" = "no" && enable_shared=no # On AIX, shared libraries and static libraries use the same namespace, and # are all built from PIC. case $host_os in aix3*) test "$enable_shared" = yes && enable_static=no if test -n "$RANLIB"; then archive_cmds="$archive_cmds~\$RANLIB \$lib" postinstall_cmds='$RANLIB $lib' fi ;; aix4* | aix5*) if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then test "$enable_shared" = yes && enable_static=no fi ;; esac AC_MSG_RESULT([$enable_shared]) AC_MSG_CHECKING([whether to build static libraries]) # Make sure either enable_shared or enable_static is yes. test "$enable_shared" = yes || enable_static=yes AC_MSG_RESULT([$enable_static]) _LT_AC_TAGVAR(GCC, $1)="$G77" _LT_AC_TAGVAR(LD, $1)="$LD" AC_LIBTOOL_PROG_COMPILER_PIC($1) AC_LIBTOOL_PROG_CC_C_O($1) AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1) AC_LIBTOOL_PROG_LD_SHLIBS($1) AC_LIBTOOL_SYS_DYNAMIC_LINKER($1) AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_CONFIG($1) AC_LANG_POP CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_F77_CONFIG # AC_LIBTOOL_LANG_GCJ_CONFIG # -------------------------- # Ensure that the configuration vars for the C compiler are # suitably defined. Those variables are subsequently used by # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG], [_LT_AC_LANG_GCJ_CONFIG(GCJ)]) AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG], [AC_LANG_SAVE # Source file extension for Java test sources. ac_ext=java # Object file extension for compiled Java test sources. objext=o _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code="class foo {}" # Code to be used in simple link tests lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }' # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" CC=${GCJ-"gcj"} compiler=$CC _LT_AC_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) # GCJ did not exist at the time GCC didn't implicitly link libc in. _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1) AC_LIBTOOL_PROG_COMPILER_PIC($1) AC_LIBTOOL_PROG_CC_C_O($1) AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1) AC_LIBTOOL_PROG_LD_SHLIBS($1) AC_LIBTOOL_SYS_DYNAMIC_LINKER($1) AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1) AC_LIBTOOL_CONFIG($1) AC_LANG_RESTORE CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_GCJ_CONFIG # AC_LIBTOOL_LANG_RC_CONFIG # ------------------------- # Ensure that the configuration vars for the Windows resource compiler are # suitably defined. Those variables are subsequently used by # AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'. AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG], [_LT_AC_LANG_RC_CONFIG(RC)]) AC_DEFUN([_LT_AC_LANG_RC_CONFIG], [AC_LANG_SAVE # Source file extension for RC test sources. ac_ext=rc # Object file extension for compiled RC test sources. objext=o _LT_AC_TAGVAR(objext, $1)=$objext # Code to be used in simple compile tests lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }' # Code to be used in simple link tests lt_simple_link_test_code="$lt_simple_compile_test_code" # ltmain only uses $CC for tagged configurations so make sure $CC is set. _LT_AC_SYS_COMPILER # save warnings/boilerplate of simple test code _LT_COMPILER_BOILERPLATE _LT_LINKER_BOILERPLATE # Allow CC to be a program name with arguments. lt_save_CC="$CC" CC=${RC-"windres"} compiler=$CC _LT_AC_TAGVAR(compiler, $1)=$CC _LT_CC_BASENAME([$compiler]) _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes AC_LIBTOOL_CONFIG($1) AC_LANG_RESTORE CC="$lt_save_CC" ])# AC_LIBTOOL_LANG_RC_CONFIG # AC_LIBTOOL_CONFIG([TAGNAME]) # ---------------------------- # If TAGNAME is not passed, then create an initial libtool script # with a default configuration from the untagged config vars. Otherwise # add code to config.status for appending the configuration named by # TAGNAME from the matching tagged config vars. AC_DEFUN([AC_LIBTOOL_CONFIG], [# The else clause should only fire when bootstrapping the # libtool distribution, otherwise you forgot to ship ltmain.sh # with your package, and you will get complaints that there are # no rules to generate ltmain.sh. if test -f "$ltmain"; then # See if we are running on zsh, and set the options which allow our commands through # without removal of \ escapes. if test -n "${ZSH_VERSION+set}" ; then setopt NO_GLOB_SUBST fi # Now quote all the things that may contain metacharacters while being # careful not to overquote the AC_SUBSTed values. We take copies of the # variables and quote the copies for generation of the libtool script. for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \ SED SHELL STRIP \ libname_spec library_names_spec soname_spec extract_expsyms_cmds \ old_striplib striplib file_magic_cmd finish_cmds finish_eval \ deplibs_check_method reload_flag reload_cmds need_locks \ lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \ lt_cv_sys_global_symbol_to_c_name_address \ sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ old_postinstall_cmds old_postuninstall_cmds \ _LT_AC_TAGVAR(compiler, $1) \ _LT_AC_TAGVAR(CC, $1) \ _LT_AC_TAGVAR(LD, $1) \ _LT_AC_TAGVAR(lt_prog_compiler_wl, $1) \ _LT_AC_TAGVAR(lt_prog_compiler_pic, $1) \ _LT_AC_TAGVAR(lt_prog_compiler_static, $1) \ _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) \ _LT_AC_TAGVAR(export_dynamic_flag_spec, $1) \ _LT_AC_TAGVAR(thread_safe_flag_spec, $1) \ _LT_AC_TAGVAR(whole_archive_flag_spec, $1) \ _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1) \ _LT_AC_TAGVAR(old_archive_cmds, $1) \ _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) \ _LT_AC_TAGVAR(predep_objects, $1) \ _LT_AC_TAGVAR(postdep_objects, $1) \ _LT_AC_TAGVAR(predeps, $1) \ _LT_AC_TAGVAR(postdeps, $1) \ _LT_AC_TAGVAR(compiler_lib_search_path, $1) \ _LT_AC_TAGVAR(archive_cmds, $1) \ _LT_AC_TAGVAR(archive_expsym_cmds, $1) \ _LT_AC_TAGVAR(postinstall_cmds, $1) \ _LT_AC_TAGVAR(postuninstall_cmds, $1) \ _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) \ _LT_AC_TAGVAR(allow_undefined_flag, $1) \ _LT_AC_TAGVAR(no_undefined_flag, $1) \ _LT_AC_TAGVAR(export_symbols_cmds, $1) \ _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) \ _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1) \ _LT_AC_TAGVAR(hardcode_libdir_separator, $1) \ _LT_AC_TAGVAR(hardcode_automatic, $1) \ _LT_AC_TAGVAR(module_cmds, $1) \ _LT_AC_TAGVAR(module_expsym_cmds, $1) \ _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1) \ _LT_AC_TAGVAR(fix_srcfile_path, $1) \ _LT_AC_TAGVAR(exclude_expsyms, $1) \ _LT_AC_TAGVAR(include_expsyms, $1); do case $var in _LT_AC_TAGVAR(old_archive_cmds, $1) | \ _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) | \ _LT_AC_TAGVAR(archive_cmds, $1) | \ _LT_AC_TAGVAR(archive_expsym_cmds, $1) | \ _LT_AC_TAGVAR(module_cmds, $1) | \ _LT_AC_TAGVAR(module_expsym_cmds, $1) | \ _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) | \ _LT_AC_TAGVAR(export_symbols_cmds, $1) | \ extract_expsyms_cmds | reload_cmds | finish_cmds | \ postinstall_cmds | postuninstall_cmds | \ old_postinstall_cmds | old_postuninstall_cmds | \ sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) # Double-quote double-evaled strings. eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" ;; *) eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" ;; esac done case $lt_echo in *'\[$]0 --fallback-echo"') lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\[$]0 --fallback-echo"[$]/[$]0 --fallback-echo"/'` ;; esac ifelse([$1], [], [cfgfile="${ofile}T" trap "$rm \"$cfgfile\"; exit 1" 1 2 15 $rm -f "$cfgfile" AC_MSG_NOTICE([creating $ofile])], [cfgfile="$ofile"]) cat <<__EOF__ >> "$cfgfile" ifelse([$1], [], [#! $SHELL # `$echo "$cfgfile" | sed 's%^.*/%%'` - Provide generalized library-building support services. # Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) # NOTE: Changes made to this file will be lost: look at ltmain.sh. # # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 # Free Software Foundation, Inc. # # This file is part of GNU Libtool: # Originally by Gordon Matzigkeit , 1996 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # A sed program that does not truncate output. SED=$lt_SED # Sed that helps us avoid accidentally triggering echo(1) options like -n. Xsed="$SED -e 1s/^X//" # The HP-UX ksh and POSIX shell print the target directory to stdout # if CDPATH is set. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # The names of the tagged configurations supported by this script. available_tags= # ### BEGIN LIBTOOL CONFIG], [# ### BEGIN LIBTOOL TAG CONFIG: $tagname]) # Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: # Shell to use when invoking shell scripts. SHELL=$lt_SHELL # Whether or not to build shared libraries. build_libtool_libs=$enable_shared # Whether or not to build static libraries. build_old_libs=$enable_static # Whether or not to add -lc for building shared libraries. build_libtool_need_lc=$_LT_AC_TAGVAR(archive_cmds_need_lc, $1) # Whether or not to disallow shared libs when runtime libs are static allow_libtool_libs_with_static_runtimes=$_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1) # Whether or not to optimize for fast installation. fast_install=$enable_fast_install # The host system. host_alias=$host_alias host=$host host_os=$host_os # The build system. build_alias=$build_alias build=$build build_os=$build_os # An echo program that does not interpret backslashes. echo=$lt_echo # The archiver. AR=$lt_AR AR_FLAGS=$lt_AR_FLAGS # A C compiler. LTCC=$lt_LTCC # LTCC compiler flags. LTCFLAGS=$lt_LTCFLAGS # A language-specific compiler. CC=$lt_[]_LT_AC_TAGVAR(compiler, $1) # Is the compiler the GNU C compiler? with_gcc=$_LT_AC_TAGVAR(GCC, $1) # An ERE matcher. EGREP=$lt_EGREP # The linker used to build libraries. LD=$lt_[]_LT_AC_TAGVAR(LD, $1) # Whether we need hard or soft links. LN_S=$lt_LN_S # A BSD-compatible nm program. NM=$lt_NM # A symbol stripping program STRIP=$lt_STRIP # Used to examine libraries when file_magic_cmd begins "file" MAGIC_CMD=$MAGIC_CMD # Used on cygwin: DLL creation program. DLLTOOL="$DLLTOOL" # Used on cygwin: object dumper. OBJDUMP="$OBJDUMP" # Used on cygwin: assembler. AS="$AS" # The name of the directory that contains temporary libtool files. objdir=$objdir # How to create reloadable object files. reload_flag=$lt_reload_flag reload_cmds=$lt_reload_cmds # How to pass a linker flag through the compiler. wl=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) # Object file suffix (normally "o"). objext="$ac_objext" # Old archive suffix (normally "a"). libext="$libext" # Shared library suffix (normally ".so"). shrext_cmds='$shrext_cmds' # Executable file suffix (normally ""). exeext="$exeext" # Additional compiler flags for building library objects. pic_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) pic_mode=$pic_mode # What is the maximum length of a command? max_cmd_len=$lt_cv_sys_max_cmd_len # Does compiler simultaneously support -c and -o options? compiler_c_o=$lt_[]_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1) # Must we lock files when doing compilation? need_locks=$lt_need_locks # Do we need the lib prefix for modules? need_lib_prefix=$need_lib_prefix # Do we need a version for libraries? need_version=$need_version # Whether dlopen is supported. dlopen_support=$enable_dlopen # Whether dlopen of programs is supported. dlopen_self=$enable_dlopen_self # Whether dlopen of statically linked programs is supported. dlopen_self_static=$enable_dlopen_self_static # Compiler flag to prevent dynamic linking. link_static_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_static, $1) # Compiler flag to turn off builtin functions. no_builtin_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) # Compiler flag to allow reflexive dlopens. export_dynamic_flag_spec=$lt_[]_LT_AC_TAGVAR(export_dynamic_flag_spec, $1) # Compiler flag to generate shared objects directly from archives. whole_archive_flag_spec=$lt_[]_LT_AC_TAGVAR(whole_archive_flag_spec, $1) # Compiler flag to generate thread-safe objects. thread_safe_flag_spec=$lt_[]_LT_AC_TAGVAR(thread_safe_flag_spec, $1) # Library versioning type. version_type=$version_type # Format of library name prefix. libname_spec=$lt_libname_spec # List of archive names. First name is the real one, the rest are links. # The last name is the one that the linker finds with -lNAME. library_names_spec=$lt_library_names_spec # The coded name of the library, if different from the real name. soname_spec=$lt_soname_spec # Commands used to build and install an old-style archive. RANLIB=$lt_RANLIB old_archive_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_cmds, $1) old_postinstall_cmds=$lt_old_postinstall_cmds old_postuninstall_cmds=$lt_old_postuninstall_cmds # Create an old-style archive from a shared archive. old_archive_from_new_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_new_cmds, $1) # Create a temporary old-style archive to link instead of a shared archive. old_archive_from_expsyms_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) # Commands used to build and install a shared archive. archive_cmds=$lt_[]_LT_AC_TAGVAR(archive_cmds, $1) archive_expsym_cmds=$lt_[]_LT_AC_TAGVAR(archive_expsym_cmds, $1) postinstall_cmds=$lt_postinstall_cmds postuninstall_cmds=$lt_postuninstall_cmds # Commands used to build a loadable module (assumed same as above if empty) module_cmds=$lt_[]_LT_AC_TAGVAR(module_cmds, $1) module_expsym_cmds=$lt_[]_LT_AC_TAGVAR(module_expsym_cmds, $1) # Commands to strip libraries. old_striplib=$lt_old_striplib striplib=$lt_striplib # Dependencies to place before the objects being linked to create a # shared library. predep_objects=$lt_[]_LT_AC_TAGVAR(predep_objects, $1) # Dependencies to place after the objects being linked to create a # shared library. postdep_objects=$lt_[]_LT_AC_TAGVAR(postdep_objects, $1) # Dependencies to place before the objects being linked to create a # shared library. predeps=$lt_[]_LT_AC_TAGVAR(predeps, $1) # Dependencies to place after the objects being linked to create a # shared library. postdeps=$lt_[]_LT_AC_TAGVAR(postdeps, $1) # The library search path used internally by the compiler when linking # a shared library. compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1) # Method to check whether dependent libraries are shared objects. deplibs_check_method=$lt_deplibs_check_method # Command to use when deplibs_check_method == file_magic. file_magic_cmd=$lt_file_magic_cmd # Flag that allows shared libraries with undefined symbols to be built. allow_undefined_flag=$lt_[]_LT_AC_TAGVAR(allow_undefined_flag, $1) # Flag that forces no undefined symbols. no_undefined_flag=$lt_[]_LT_AC_TAGVAR(no_undefined_flag, $1) # Commands used to finish a libtool library installation in a directory. finish_cmds=$lt_finish_cmds # Same as above, but a single script fragment to be evaled but not shown. finish_eval=$lt_finish_eval # Take the output of nm and produce a listing of raw symbols and C names. global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe # Transform the output of nm in a proper C declaration global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl # Transform the output of nm in a C name address pair global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address # This is the shared library runtime path variable. runpath_var=$runpath_var # This is the shared library path variable. shlibpath_var=$shlibpath_var # Is shlibpath searched before the hard-coded library search path? shlibpath_overrides_runpath=$shlibpath_overrides_runpath # How to hardcode a shared library path into an executable. hardcode_action=$_LT_AC_TAGVAR(hardcode_action, $1) # Whether we should hardcode library paths into libraries. hardcode_into_libs=$hardcode_into_libs # Flag to hardcode \$libdir into a binary during linking. # This must work even if \$libdir does not exist. hardcode_libdir_flag_spec=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) # If ld is used when linking, flag to hardcode \$libdir into # a binary during linking. This must work even if \$libdir does # not exist. hardcode_libdir_flag_spec_ld=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1) # Whether we need a single -rpath flag with a separated argument. hardcode_libdir_separator=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_separator, $1) # Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the # resulting binary. hardcode_direct=$_LT_AC_TAGVAR(hardcode_direct, $1) # Set to yes if using the -LDIR flag during linking hardcodes DIR into the # resulting binary. hardcode_minus_L=$_LT_AC_TAGVAR(hardcode_minus_L, $1) # Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into # the resulting binary. hardcode_shlibpath_var=$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1) # Set to yes if building a shared library automatically hardcodes DIR into the library # and all subsequent libraries and executables linked against it. hardcode_automatic=$_LT_AC_TAGVAR(hardcode_automatic, $1) # Variables whose values should be saved in libtool wrapper scripts and # restored at relink time. variables_saved_for_relink="$variables_saved_for_relink" # Whether libtool must link a program against all its dependency libraries. link_all_deplibs=$_LT_AC_TAGVAR(link_all_deplibs, $1) # Compile-time system search path for libraries sys_lib_search_path_spec=$lt_sys_lib_search_path_spec # Run-time system search path for libraries sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec # Fix the shell variable \$srcfile for the compiler. fix_srcfile_path=$lt_fix_srcfile_path # Set to yes if exported symbols are required. always_export_symbols=$_LT_AC_TAGVAR(always_export_symbols, $1) # The commands to list exported symbols. export_symbols_cmds=$lt_[]_LT_AC_TAGVAR(export_symbols_cmds, $1) # The commands to extract the exported symbol list from a shared archive. extract_expsyms_cmds=$lt_extract_expsyms_cmds # Symbols that should not be listed in the preloaded symbols. exclude_expsyms=$lt_[]_LT_AC_TAGVAR(exclude_expsyms, $1) # Symbols that must always be exported. include_expsyms=$lt_[]_LT_AC_TAGVAR(include_expsyms, $1) ifelse([$1],[], [# ### END LIBTOOL CONFIG], [# ### END LIBTOOL TAG CONFIG: $tagname]) __EOF__ ifelse([$1],[], [ case $host_os in aix3*) cat <<\EOF >> "$cfgfile" # AIX sometimes has problems with the GCC collect2 program. For some # reason, if we set the COLLECT_NAMES environment variable, the problems # vanish in a puff of smoke. if test "X${COLLECT_NAMES+set}" != Xset; then COLLECT_NAMES= export COLLECT_NAMES fi EOF ;; esac # We use sed instead of cat because bash on DJGPP gets confused if # if finds mixed CR/LF and LF-only lines. Since sed operates in # text mode, it properly converts lines to CR/LF. This bash problem # is reportedly fixed, but why not run on old versions too? sed '$q' "$ltmain" >> "$cfgfile" || (rm -f "$cfgfile"; exit 1) mv -f "$cfgfile" "$ofile" || \ (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile") chmod +x "$ofile" ]) else # If there is no Makefile yet, we rely on a make rule to execute # `config.status --recheck' to rerun these tests and create the # libtool script then. ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'` if test -f "$ltmain_in"; then test -f Makefile && make "$ltmain" fi fi ])# AC_LIBTOOL_CONFIG # AC_LIBTOOL_PROG_COMPILER_NO_RTTI([TAGNAME]) # ------------------------------------------- AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)= if test "$GCC" = yes; then _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin' AC_LIBTOOL_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions], lt_cv_prog_compiler_rtti_exceptions, [-fno-rtti -fno-exceptions], [], [_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"]) fi ])# AC_LIBTOOL_PROG_COMPILER_NO_RTTI # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE # --------------------------------- AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_REQUIRE([AC_CANONICAL_HOST]) AC_REQUIRE([LT_AC_PROG_SED]) AC_REQUIRE([AC_PROG_NM]) AC_REQUIRE([AC_OBJEXT]) # Check for command to grab the raw symbol name followed by C symbol from nm. AC_MSG_CHECKING([command to parse $NM output from $compiler object]) AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [ # These are sane defaults that work on at least a few old systems. # [They come from Ultrix. What could be older than Ultrix?!! ;)] # Character class describing NM global symbol codes. symcode='[[BCDEGRST]]' # Regexp to match symbols that can be accessed directly from C. sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)' # Transform an extracted symbol line into a proper C declaration lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'" # Transform an extracted symbol line into symbol name and symbol address lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" # Define system-specific variables. case $host_os in aix*) symcode='[[BCDT]]' ;; cygwin* | mingw* | pw32*) symcode='[[ABCDGISTW]]' ;; hpux*) # Its linker distinguishes data from code symbols if test "$host_cpu" = ia64; then symcode='[[ABCDEGRST]]' fi lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" ;; linux* | k*bsd*-gnu) if test "$host_cpu" = ia64; then symcode='[[ABCDGIRSTW]]' lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/ {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/ {\"\2\", (lt_ptr) \&\2},/p'" fi ;; irix* | nonstopux*) symcode='[[BCDEGRST]]' ;; osf*) symcode='[[BCDEGQRST]]' ;; solaris*) symcode='[[BDRT]]' ;; sco3.2v5*) symcode='[[DT]]' ;; sysv4.2uw2*) symcode='[[DT]]' ;; sysv5* | sco5v6* | unixware* | OpenUNIX*) symcode='[[ABDT]]' ;; sysv4) symcode='[[DFNSTU]]' ;; esac # Handle CRLF in mingw tool chain opt_cr= case $build_os in mingw*) opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp ;; esac # If we're using GNU nm, then use its standard symbol codes. case `$NM -V 2>&1` in *GNU* | *'with BFD'*) symcode='[[ABCDGIRSTW]]' ;; esac # Try without a prefix undercore, then with it. for ac_symprfx in "" "_"; do # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol. symxfrm="\\1 $ac_symprfx\\2 \\2" # Write the raw and C identifiers. lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ ]]\($symcode$symcode*\)[[ ]][[ ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'" # Check to see that the pipe works correctly. pipe_works=no rm -f conftest* cat > conftest.$ac_ext < $nlist) && test -s "$nlist"; then # Try sorting and uniquifying the output. if sort "$nlist" | uniq > "$nlist"T; then mv -f "$nlist"T "$nlist" else rm -f "$nlist"T fi # Make sure that we snagged all the symbols we need. if grep ' nm_test_var$' "$nlist" >/dev/null; then if grep ' nm_test_func$' "$nlist" >/dev/null; then cat < conftest.$ac_ext #ifdef __cplusplus extern "C" { #endif EOF # Now generate the symbol file. eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext' cat <> conftest.$ac_ext #if defined (__STDC__) && __STDC__ # define lt_ptr_t void * #else # define lt_ptr_t char * # define const #endif /* The mapping between symbol names and symbols. */ const struct { const char *name; lt_ptr_t address; } lt_preloaded_symbols[[]] = { EOF $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext cat <<\EOF >> conftest.$ac_ext {0, (lt_ptr_t) 0} }; #ifdef __cplusplus } #endif EOF # Now try linking the two files. mv conftest.$ac_objext conftstm.$ac_objext lt_save_LIBS="$LIBS" lt_save_CFLAGS="$CFLAGS" LIBS="conftstm.$ac_objext" CFLAGS="$CFLAGS$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)" if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then pipe_works=yes fi LIBS="$lt_save_LIBS" CFLAGS="$lt_save_CFLAGS" else echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD fi else echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD fi else echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD cat conftest.$ac_ext >&5 fi rm -f conftest* conftst* # Do not use the global_symbol_pipe unless it works. if test "$pipe_works" = yes; then break else lt_cv_sys_global_symbol_pipe= fi done ]) if test -z "$lt_cv_sys_global_symbol_pipe"; then lt_cv_sys_global_symbol_to_cdecl= fi if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then AC_MSG_RESULT(failed) else AC_MSG_RESULT(ok) fi ]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE # AC_LIBTOOL_PROG_COMPILER_PIC([TAGNAME]) # --------------------------------------- AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC], [_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)= _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= _LT_AC_TAGVAR(lt_prog_compiler_static, $1)= AC_MSG_CHECKING([for $compiler option to produce PIC]) ifelse([$1],[CXX],[ # C++ specific cases for pic, static, wl, etc. if test "$GXX" = yes; then _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | os2* | pw32*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; *djgpp*) # DJGPP does not support shared libraries at all _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; sysv4*MP*) if test -d /usr/nec; then _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; hpux*) # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac else case $host_os in aix4* | aix5*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; chorus*) case $cc_basename in cxch68*) # Green Hills C++ Compiler # _LT_AC_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a" ;; esac ;; darwin*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files case $cc_basename in xlc*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; esac ;; dgux*) case $cc_basename in ec++*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; ghcx*) # Green Hills C++ Compiler _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; freebsd* | dragonfly*) # FreeBSD uses GNU C++ ;; hpux9* | hpux10* | hpux11*) case $cc_basename in CC*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' if test "$host_cpu" != ia64; then _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z' fi ;; aCC*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac ;; *) ;; esac ;; interix*) # This is c89, which is MS Visual C++ (no shared libs) # Anyone wants to do a port? ;; irix5* | irix6* | nonstopux*) case $cc_basename in CC*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' # CC pic flag -KPIC is the default. ;; *) ;; esac ;; linux* | k*bsd*-gnu) case $cc_basename in KCC*) # KAI C++ Compiler _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; icpc* | ecpc*) # Intel C++ _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgCC*) # Portland Group C++ compiler. _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; cxx*) # Compaq C++ # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C++ 5.9 _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; esac ;; esac ;; lynxos*) ;; m88k*) ;; mvs*) case $cc_basename in cxx*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall' ;; *) ;; esac ;; netbsd*) ;; osf3* | osf4* | osf5*) case $cc_basename in KCC*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,' ;; RCC*) # Rational C++ 2.4.1 _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; cxx*) # Digital/Compaq C++ _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # Make sure the PIC flag is empty. It appears that all Alpha # Linux and Compaq Tru64 Unix objects are PIC. _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) ;; esac ;; psos*) ;; solaris*) case $cc_basename in CC*) # Sun C++ 4.2, 5.x and Centerline C++ _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' ;; gcx*) # Green Hills C++ Compiler _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' ;; *) ;; esac ;; sunos4*) case $cc_basename in CC*) # Sun C++ 4.x _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; lcc*) # Lucid _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic' ;; *) ;; esac ;; tandem*) case $cc_basename in NCC*) # NonStop-UX NCC 3.20 _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' ;; *) ;; esac ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) case $cc_basename in CC*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; esac ;; vxworks*) ;; *) _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ], [ if test "$GCC" = yes; then _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static' case $host_os in aix*) # All AIX code is PIC. if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; amigaos*) # FIXME: we need at least 68020 code to build shared libraries, but # adding the `-m68020' flag to GCC prevents building anything better, # like `-m68040'. _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4' ;; beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*) # PIC is the default for these OSes. ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). # Although the cygwin gcc ignores -fPIC, still need this for old-style # (--disable-auto-import) libraries _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' ;; darwin* | rhapsody*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common' ;; interix[[3-9]]*) # Interix 3.x gcc -fpic/-fPIC options generate broken code. # Instead, we relocate shared libraries at runtime. ;; msdosdjgpp*) # Just because we use GCC doesn't mean we suddenly get shared libraries # on systems that don't support them. _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no enable_shared=no ;; sysv4*MP*) if test -d /usr/nec; then _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic fi ;; hpux*) # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC' ;; esac else # PORTME Check for flag to pass linker flags through the system compiler. case $host_os in aix*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' if test "$host_cpu" = ia64; then # AIX 5 now supports IA64 processor _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' else _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp' fi ;; darwin*) # PIC is the default on this platform # Common symbols not allowed in MH_DYLIB files case $cc_basename in xlc*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; esac ;; mingw* | cygwin* | pw32* | os2*) # This hack is so that the source file can tell whether it is being # built for inclusion in a dll (and should export symbols for example). _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT' ;; hpux9* | hpux10* | hpux11*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but # not for PA HP-UX. case $host_cpu in hppa*64*|ia64*) # +Z the default ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z' ;; esac # Is there a better lt_prog_compiler_static that works with the bundled CC? _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive' ;; irix5* | irix6* | nonstopux*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # PIC (with -KPIC) is the default. _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; newsos6) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; linux* | k*bsd*-gnu) case $cc_basename in icc* | ecc*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static' ;; pgcc* | pgf77* | pgf90* | pgf95*) # Portland Group compilers (*not* the Pentium gcc compiler, # which looks to be a dead project) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; ccc*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All Alpha code is PIC. _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; *) case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' ;; *Sun\ F*) # Sun Fortran 8.3 passes all unrecognized flags to the linker _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='' ;; esac ;; esac ;; osf3* | osf4* | osf5*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' # All OSF/1 code is PIC. _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; rdos*) _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared' ;; solaris*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' case $cc_basename in f77* | f90* | f95*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';; *) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';; esac ;; sunos4*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4 | sysv4.2uw2* | sysv4.3*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; sysv4*MP*) if test -d /usr/nec ;then _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' fi ;; sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; unicos*) _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,' _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; uts4*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic' _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic' ;; *) _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no ;; esac fi ]) AC_MSG_RESULT([$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)]) # # Check to make sure the PIC flag actually works. # if test -n "$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)"; then AC_LIBTOOL_COMPILER_OPTION([if $compiler PIC flag $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) works], _LT_AC_TAGVAR(lt_prog_compiler_pic_works, $1), [$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])], [], [case $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) in "" | " "*) ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)" ;; esac], [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no]) fi case $host_os in # For platforms which do not support PIC, -DPIC is meaningless: *djgpp*) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)= ;; *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])" ;; esac # # Check to make sure the static flag actually works. # wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_AC_TAGVAR(lt_prog_compiler_static, $1)\" AC_LIBTOOL_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works], _LT_AC_TAGVAR(lt_prog_compiler_static_works, $1), $lt_tmp_static_flag, [], [_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=]) ]) # AC_LIBTOOL_PROG_LD_SHLIBS([TAGNAME]) # ------------------------------------ # See if the linker supports building shared libraries. AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_REQUIRE([LT_AC_PROG_SED])dnl AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries]) ifelse([$1],[CXX],[ _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' case $host_os in aix4* | aix5*) # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols' else _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols' fi ;; pw32*) _LT_AC_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds" ;; cygwin* | mingw*) _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/;/^.*[[ ]]__nm__/s/^.*[[ ]]__nm__\([[^ ]]*\)[[ ]][[^ ]]*/\1 DATA/;/^I[[ ]]/d;/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' ;; *) _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' ;; esac ],[ runpath_var= _LT_AC_TAGVAR(allow_undefined_flag, $1)= _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no _LT_AC_TAGVAR(archive_cmds, $1)= _LT_AC_TAGVAR(archive_expsym_cmds, $1)= _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)= _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1)= _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)= _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= _LT_AC_TAGVAR(thread_safe_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)= _LT_AC_TAGVAR(hardcode_libdir_separator, $1)= _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_minus_L, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_AC_TAGVAR(link_all_deplibs, $1)=unknown _LT_AC_TAGVAR(hardcode_automatic, $1)=no _LT_AC_TAGVAR(module_cmds, $1)= _LT_AC_TAGVAR(module_expsym_cmds, $1)= _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols' # include_expsyms should be a list of space-separated symbols to be *always* # included in the symbol list _LT_AC_TAGVAR(include_expsyms, $1)= # exclude_expsyms can be an extended regexp of symbols to exclude # it will be wrapped by ` (' and `)$', so one must not match beginning or # end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', # as well as any symbol that contains `d'. _LT_AC_TAGVAR(exclude_expsyms, $1)="_GLOBAL_OFFSET_TABLE_" # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out # platforms (ab)use it in PIC code, but their linkers get confused if # the symbol is explicitly referenced. Since portable code cannot # rely on this symbol name, it's probably fine to never include it in # preloaded symbol tables. extract_expsyms_cmds= # Just being paranoid about ensuring that cc_basename is set. _LT_CC_BASENAME([$compiler]) case $host_os in cygwin* | mingw* | pw32*) # FIXME: the MSVC++ port hasn't been tested in a loooong time # When not using gcc, we currently assume that we are using # Microsoft Visual C++. if test "$GCC" != yes; then with_gnu_ld=no fi ;; interix*) # we just hope/assume this is gcc and not c89 (= MSVC++) with_gnu_ld=yes ;; openbsd*) with_gnu_ld=no ;; esac _LT_AC_TAGVAR(ld_shlibs, $1)=yes if test "$with_gnu_ld" = yes; then # If archive_cmds runs LD, not CC, wlarc should be empty wlarc='${wl}' # Set some defaults for GNU ld with shared library support. These # are reset later if shared libraries are not supported. Putting them # here allows them to be overridden if necessary. runpath_var=LD_RUN_PATH _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic' # ancient GNU ld didn't support --whole-archive et. al. if $LD --help 2>&1 | grep 'no-whole-archive' > /dev/null; then _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' else _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= fi supports_anon_versioning=no case `$LD -v 2>/dev/null` in *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11 *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ... *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ... *\ 2.11.*) ;; # other 2.11 versions *) supports_anon_versioning=yes ;; esac # See if GNU ld supports shared libraries. case $host_os in aix3* | aix4* | aix5*) # On AIX/PPC, the GNU linker is very broken if test "$host_cpu" != ia64; then _LT_AC_TAGVAR(ld_shlibs, $1)=no cat <&2 *** Warning: the GNU linker, at least up to release 2.9.1, is reported *** to be unable to reliably create shared libraries on AIX. *** Therefore, libtool is disabling shared libraries support. If you *** really care for shared libraries, you may want to modify your PATH *** so that a non-GNU linker is found, and then restart. EOF fi ;; amigaos*) _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Samuel A. Falvo II reports # that the semantics of dynamic libraries on AmigaOS, at least up # to version 4, is to share data among multiple programs linked # with the same dynamic library. Since this doesn't match the # behavior of shared libraries on other platforms, we can't use # them. _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; beos*) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported # Joseph Beckenbach says some releases of gcc # support --undefined. This deserves some investigation. FIXME _LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; cygwin* | mingw* | pw32*) # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless, # as there is no search path for DLLs. _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_AC_TAGVAR(always_export_symbols, $1)=no _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]][[ ]]/s/.*[[ ]]\([[^ ]]*\)/\1 DATA/'\'' -e '\''/^[[AITW]][[ ]]/s/.*[[ ]]//'\'' | sort | uniq > $export_symbols' if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' # If the export-symbols file already is a .def file (1st line # is EXPORTS), use it as is; otherwise, prepend... _LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then cp $export_symbols $output_objdir/$soname.def; else echo EXPORTS > $output_objdir/$soname.def; cat $export_symbols >> $output_objdir/$soname.def; fi~ $CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; interix[[3-9]]*) _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc. # Instead, shared libraries are loaded at an image base (0x10000000 by # default) and relocated if they conflict, which is a slow very memory # consuming and fragmenting process. To avoid this, we pick a random, # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link # time. Moving up from 0x10000000 also allows more sbrk(2) space. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib' ;; gnu* | linux* | k*bsd*-gnu) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then tmp_addflag= case $cc_basename,$host_cpu in pgcc*) # Portland Group C compiler _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag' ;; pgf77* | pgf90* | pgf95*) # Portland Group f77 and f90 compilers _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_addflag=' $pic_flag -Mnomain' ;; ecc*,ia64* | icc*,ia64*) # Intel C compiler on ia64 tmp_addflag=' -i_dynamic' ;; efc*,ia64* | ifort*,ia64*) # Intel Fortran compiler on ia64 tmp_addflag=' -i_dynamic -nofor_main' ;; ifc* | ifort*) # Intel Fortran compiler tmp_addflag=' -nofor_main' ;; esac case `$CC -V 2>&1 | sed 5q` in *Sun\ C*) # Sun C 5.9 _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive' tmp_sharedflag='-G' ;; *Sun\ F*) # Sun Fortran 8.3 tmp_sharedflag='-G' ;; *) tmp_sharedflag='-shared' ;; esac _LT_AC_TAGVAR(archive_cmds, $1)='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' if test $supports_anon_versioning = yes; then _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $output_objdir/$libname.ver~ cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~ $echo "local: *; };" >> $output_objdir/$libname.ver~ $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib' fi else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' wlarc= else _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' fi ;; solaris*) if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then _LT_AC_TAGVAR(ld_shlibs, $1)=no cat <&2 *** Warning: The releases 2.8.* of the GNU linker cannot reliably *** create shared libraries on Solaris systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.9.1 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. EOF elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*) case `$LD -v 2>&1` in *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) _LT_AC_TAGVAR(ld_shlibs, $1)=no cat <<_LT_EOF 1>&2 *** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not *** reliably create shared libraries on SCO systems. Therefore, libtool *** is disabling shared libraries support. We urge you to upgrade GNU *** binutils to release 2.16.91.0.3 or newer. Another option is to modify *** your PATH or compiler configuration so that the native linker is *** used, and then restart. _LT_EOF ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname,-retain-symbols-file,$export_symbols -o $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac ;; sunos4*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' wlarc= _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; esac if test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no; then runpath_var= _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)= _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)= _LT_AC_TAGVAR(whole_archive_flag_spec, $1)= fi else # PORTME fill in a description of your system's linker (not GNU ld) case $host_os in aix3*) _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_AC_TAGVAR(always_export_symbols, $1)=yes _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' # Note: this linker hardcodes the directories in LIBPATH if there # are no directories specified by -L. _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then # Neither direct hardcoding nor static linking is supported with a # broken collect2. _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported fi ;; aix4* | aix5*) if test "$host_cpu" = ia64; then # On IA64, the linker does run time linking by default, so we don't # have to do anything special. aix_use_runtimelinking=no exp_sym_flag='-Bexport' no_entry_flag="" else # If we're using GNU nm, then we don't want the "-C" option. # -C means demangle to AIX nm, but means don't demangle with GNU nm if $NM -V 2>&1 | grep 'GNU' > /dev/null; then _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols' else _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols' fi aix_use_runtimelinking=no # Test if we are trying to use run time linking or normal # AIX style linking. If -brtl is somewhere in LDFLAGS, we # need to do runtime linking. case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*) for ld_flag in $LDFLAGS; do if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then aix_use_runtimelinking=yes break fi done ;; esac exp_sym_flag='-bexport' no_entry_flag='-bnoentry' fi # When large executables or shared objects are built, AIX ld can # have problems creating the table of contents. If linking a library # or program results in "error TOC overflow" add -mminimal-toc to # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. _LT_AC_TAGVAR(archive_cmds, $1)='' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes if test "$GCC" = yes; then case $host_os in aix4.[[012]]|aix4.[[012]].*) # We only want to do this on AIX 4.2 and lower, the check # below for broken collect2 doesn't work under 4.3+ collect2name=`${CC} -print-prog-name=collect2` if test -f "$collect2name" && \ strings "$collect2name" | grep resolve_lib_name >/dev/null then # We have reworked collect2 : else # We have old collect2 _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported # It fails to find uninstalled libraries when the uninstalled # path is not listed in the libpath. Setting hardcode_minus_L # to unsupported forces relinking _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)= fi ;; esac shared_flag='-shared' if test "$aix_use_runtimelinking" = yes; then shared_flag="$shared_flag "'${wl}-G' fi else # not using gcc if test "$host_cpu" = ia64; then # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release # chokes on -Wl,-G. The following line is correct: shared_flag='-G' else if test "$aix_use_runtimelinking" = yes; then shared_flag='${wl}-G' else shared_flag='${wl}-bM:SRE' fi fi fi # It seems that -bexpall does not export symbols beginning with # underscore (_), so it is better to generate a list of symbols to export. _LT_AC_TAGVAR(always_export_symbols, $1)=yes if test "$aix_use_runtimelinking" = yes; then # Warning - without using the other runtime loading flags (-brtl), # -berok will link without error, but may produce a broken library. _LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok' # Determine the default libpath from the value encoded in an empty executable. _LT_AC_SYS_LIBPATH_AIX _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag" else if test "$host_cpu" = ia64; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib' _LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs" _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols" else # Determine the default libpath from the value encoded in an empty executable. _LT_AC_SYS_LIBPATH_AIX _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath" # Warning - without using the other run time loading flags, # -berok will link without error, but may produce a broken library. _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok' _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok' # Exported symbols can be pulled into shared objects from archives _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes # This is similar to how AIX traditionally builds its shared libraries. _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname' fi fi ;; amigaos*) _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # see comment about different semantics on the GNU ld section _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; bsdi[[45]]*) _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic ;; cygwin* | mingw* | pw32*) # When not using gcc, we currently assume that we are using # Microsoft Visual C++. # hardcode_libdir_flag_spec is actually meaningless, as there is # no search path for DLLs. _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=' ' _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported # Tell ltmain to make .lib files, not .a files. libext=lib # Tell ltmain to make .dll files, not .so files. shrext_cmds=".dll" # FIXME: Setting linknames here is a bad hack. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | $SED -e '\''s/ -lc$//'\''` -link -dll~linknames=' # The linker will automatically build a .lib file if we build a DLL. _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='true' # FIXME: Should let the user specify the lib program. _LT_AC_TAGVAR(old_archive_cmds, $1)='lib -OUT:$oldlib$oldobjs$old_deplibs' _LT_AC_TAGVAR(fix_srcfile_path, $1)='`cygpath -w "$srcfile"`' _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes ;; darwin* | rhapsody*) case $host_os in rhapsody* | darwin1.[[012]]) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress' ;; *) # Darwin 1.3 on if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' else case ${MACOSX_DEPLOYMENT_TARGET} in 10.[[012]]) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress' ;; 10.*) _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup' ;; esac fi ;; esac _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_automatic, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes if test "$GCC" = yes ; then output_verbose_link_cmd='echo' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring' _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' else case $cc_basename in xlc*) output_verbose_link_cmd='echo' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $xlcverstring' _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags' # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $xlcverstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[ ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}' ;; *) _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac fi ;; dgux*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; freebsd1*) _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor # support. Future versions do this automatically, but an explicit c++rt0.o # does not break anything, and helps significantly (at the cost of a little # extra space). freebsd2.2*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # Unfortunately, older versions of FreeBSD 2 do not have this feature. freebsd2*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; # FreeBSD 3 and greater uses gcc -shared to do shared libraries. freebsd* | dragonfly*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; hpux9*) if test "$GCC" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' else _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: _LT_AC_TAGVAR(hardcode_direct, $1)=yes # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' ;; hpux10*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' else _LT_AC_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' fi if test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes fi ;; hpux11*) if test "$GCC" = yes -a "$with_gnu_ld" = no; then case $host_cpu in hppa*64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac else case $host_cpu in hppa*64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' ;; ia64*) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags' ;; esac fi if test "$with_gnu_ld" = no; then _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: case $host_cpu in hppa*64*|ia64*) _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' # hardcode_minus_L: Not really in the search PATH, # but as the default location of the library. _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes ;; esac fi ;; irix5* | irix6* | nonstopux*) if test "$GCC" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir' fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: _LT_AC_TAGVAR(link_all_deplibs, $1)=yes ;; netbsd*) if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out else _LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; newsos6) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; openbsd*) if test -f /usr/libexec/ld.so; then _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E' else case $host_os in openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' ;; *) _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir' ;; esac fi else _LT_AC_TAGVAR(ld_shlibs, $1)=no fi ;; os2*) _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported _LT_AC_TAGVAR(archive_cmds, $1)='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' ;; osf3*) if test "$GCC" = yes; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' else _LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: ;; osf4* | osf5*) # as osf3* with the addition of -msym flag if test "$GCC" = yes; then _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir' else _LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*' _LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~$rm $lib.exp' # Both c and cxx compiler support -rpath directly _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir' fi _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=: ;; solaris*) _LT_AC_TAGVAR(no_undefined_flag, $1)=' -z text' if test "$GCC" = yes; then wlarc='${wl}' _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $CC -shared ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$rm $lib.exp' else wlarc='' _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no case $host_os in solaris2.[[0-5]] | solaris2.[[0-5]].*) ;; *) # The compiler driver will combine and reorder linker options, # but understands `-z linker_flag'. GCC discards it without `$wl', # but is careful enough not to reorder. # Supported since Solaris 2.6 (maybe 2.5.1?) if test "$GCC" = yes; then _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract$convenience ${wl}-z ${wl}defaultextract' else _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' fi ;; esac _LT_AC_TAGVAR(link_all_deplibs, $1)=yes ;; sunos4*) if test "x$host_vendor" = xsequent; then # Use $CC to link under sequent, because it throws in some extra .o # files that make .init and .fini sections work. _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' fi _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_direct, $1)=yes _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4) case $host_vendor in sni) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_direct, $1)=yes # is this really true??? ;; siemens) ## LD is ld it makes a PLAMLIB ## CC just makes a GrossModule. _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs' _LT_AC_TAGVAR(hardcode_direct, $1)=no ;; motorola) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie ;; esac runpath_var='LD_RUN_PATH' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; sysv4.3*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport' ;; sysv4*MP*) if test -d /usr/nec; then _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var=LD_RUN_PATH hardcode_runpath_var=yes _LT_AC_TAGVAR(ld_shlibs, $1)=yes fi ;; sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*) _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; sysv5* | sco3.2v5* | sco5v6*) # Note: We can NOT use -z defs as we might desire, because we do not # link with -lc, and that would cause any symbols used from libc to # always be unresolved, which means just about no library would # ever link correctly. If we're not using GNU ld we use -z text # though, which does catch some bad symbols but isn't as heavy-handed # as -z defs. _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text' _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs' _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`' _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':' _LT_AC_TAGVAR(link_all_deplibs, $1)=yes _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport' runpath_var='LD_RUN_PATH' if test "$GCC" = yes; then _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' else _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags' fi ;; uts4*) _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir' _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no ;; *) _LT_AC_TAGVAR(ld_shlibs, $1)=no ;; esac fi ]) AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)]) test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no # # Do we need to explicitly link libc? # case "x$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)" in x|xyes) # Assume -lc should be added _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes if test "$enable_shared" = yes && test "$GCC" = yes; then case $_LT_AC_TAGVAR(archive_cmds, $1) in *'~'*) # FIXME: we may have to deal with multi-command sequences. ;; '$CC '*) # Test whether the compiler implicitly links with -lc since on some # systems, -lgcc has to come before -lc. If gcc already passes -lc # to ld, don't add -lc before -lgcc. AC_MSG_CHECKING([whether -lc should be explicitly linked in]) $rm conftest* echo "$lt_simple_compile_test_code" > conftest.$ac_ext if AC_TRY_EVAL(ac_compile) 2>conftest.err; then soname=conftest lib=conftest libobjs=conftest.$ac_objext deplibs= wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) pic_flag=$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) compiler_flags=-v linker_flags=-v verstring= output_objdir=. libname=conftest lt_save_allow_undefined_flag=$_LT_AC_TAGVAR(allow_undefined_flag, $1) _LT_AC_TAGVAR(allow_undefined_flag, $1)= if AC_TRY_EVAL(_LT_AC_TAGVAR(archive_cmds, $1) 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) then _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no else _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes fi _LT_AC_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag else cat conftest.err 1>&5 fi $rm conftest* AC_MSG_RESULT([$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)]) ;; esac fi ;; esac ])# AC_LIBTOOL_PROG_LD_SHLIBS # _LT_AC_FILE_LTDLL_C # ------------------- # Be careful that the start marker always follows a newline. AC_DEFUN([_LT_AC_FILE_LTDLL_C], [ # /* ltdll.c starts here */ # #define WIN32_LEAN_AND_MEAN # #include # #undef WIN32_LEAN_AND_MEAN # #include # # #ifndef __CYGWIN__ # # ifdef __CYGWIN32__ # # define __CYGWIN__ __CYGWIN32__ # # endif # #endif # # #ifdef __cplusplus # extern "C" { # #endif # BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); # #ifdef __cplusplus # } # #endif # # #ifdef __CYGWIN__ # #include # DECLARE_CYGWIN_DLL( DllMain ); # #endif # HINSTANCE __hDllInstance_base; # # BOOL APIENTRY # DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) # { # __hDllInstance_base = hInst; # return TRUE; # } # /* ltdll.c ends here */ ])# _LT_AC_FILE_LTDLL_C # _LT_AC_TAGVAR(VARNAME, [TAGNAME]) # --------------------------------- AC_DEFUN([_LT_AC_TAGVAR], [ifelse([$2], [], [$1], [$1_$2])]) # old names AC_DEFUN([AM_PROG_LIBTOOL], [AC_PROG_LIBTOOL]) AC_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) AC_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) AC_DEFUN([AM_PROG_LD], [AC_PROG_LD]) AC_DEFUN([AM_PROG_NM], [AC_PROG_NM]) # This is just to silence aclocal about the macro not being used ifelse([AC_DISABLE_FAST_INSTALL]) AC_DEFUN([LT_AC_PROG_GCJ], [AC_CHECK_TOOL(GCJ, gcj, no) test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2" AC_SUBST(GCJFLAGS) ]) AC_DEFUN([LT_AC_PROG_RC], [AC_CHECK_TOOL(RC, windres, no) ]) # Cheap backport of AS_EXECUTABLE_P and required macros # from Autoconf 2.59; we should not use $as_executable_p directly. # _AS_TEST_PREPARE # ---------------- m4_ifndef([_AS_TEST_PREPARE], [m4_defun([_AS_TEST_PREPARE], [if test -x / >/dev/null 2>&1; then as_executable_p='test -x' else as_executable_p='test -f' fi ])])# _AS_TEST_PREPARE # AS_EXECUTABLE_P # --------------- # Check whether a file is executable. m4_ifndef([AS_EXECUTABLE_P], [m4_defun([AS_EXECUTABLE_P], [AS_REQUIRE([_AS_TEST_PREPARE])dnl $as_executable_p $1[]dnl ])])# AS_EXECUTABLE_P # NOTE: This macro has been submitted for inclusion into # # GNU Autoconf as AC_PROG_SED. When it is available in # # a released version of Autoconf we should remove this # # macro and use it instead. # # LT_AC_PROG_SED # -------------- # Check for a fully-functional sed program, that truncates # as few characters as possible. Prefer GNU sed if found. AC_DEFUN([LT_AC_PROG_SED], [AC_MSG_CHECKING([for a sed that does not truncate output]) AC_CACHE_VAL(lt_cv_path_SED, [# Loop through the user's path and test for sed and gsed. # Then use that list of sed's as ones to test for truncation. as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for lt_ac_prog in sed gsed; do for ac_exec_ext in '' $ac_executable_extensions; do if AS_EXECUTABLE_P(["$as_dir/$lt_ac_prog$ac_exec_ext"]); then lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext" fi done done done IFS=$as_save_IFS lt_ac_max=0 lt_ac_count=0 # Add /usr/xpg4/bin/sed as it is typically found on Solaris # along with /bin/sed that truncates output. for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do test ! -f $lt_ac_sed && continue cat /dev/null > conftest.in lt_ac_count=0 echo $ECHO_N "0123456789$ECHO_C" >conftest.in # Check for GNU sed and select it if it is found. if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then lt_cv_path_SED=$lt_ac_sed break fi while true; do cat conftest.in conftest.in >conftest.tmp mv conftest.tmp conftest.in cp conftest.in conftest.nl echo >>conftest.nl $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break cmp -s conftest.out conftest.nl || break # 10000 chars as input seems more than enough test $lt_ac_count -gt 10 && break lt_ac_count=`expr $lt_ac_count + 1` if test $lt_ac_count -gt $lt_ac_max; then lt_ac_max=$lt_ac_count lt_cv_path_SED=$lt_ac_sed fi done done ]) SED=$lt_cv_path_SED AC_SUBST([SED]) AC_MSG_RESULT([$SED]) ]) # pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- # # Copyright © 2004 Scott James Remnant . # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # As a special exception to the GNU General Public License, if you # distribute this file as part of a program that contains a # configuration script generated by Autoconf, you may include it under # the same distribution terms that you use for the rest of that program. # PKG_PROG_PKG_CONFIG([MIN-VERSION]) # ---------------------------------- AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) fi if test -n "$PKG_CONFIG"; then _pkg_min_version=m4_default([$1], [0.9.0]) AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) PKG_CONFIG="" fi fi[]dnl ])# PKG_PROG_PKG_CONFIG # PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) # # Check to see whether a particular set of modules exists. Similar # to PKG_CHECK_MODULES(), but does not set variables or print errors. # # # Similar to PKG_CHECK_MODULES, make sure that the first instance of # this or PKG_CHECK_MODULES is called, or make sure to call # PKG_CHECK_EXISTS manually # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then m4_ifval([$2], [$2], [:]) m4_ifvaln([$3], [else $3])dnl fi]) # _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) # --------------------------------------------- m4_define([_PKG_CONFIG], [if test -n "$PKG_CONFIG"; then if test -n "$$1"; then pkg_cv_[]$1="$$1" else PKG_CHECK_EXISTS([$3], [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], [pkg_failed=yes]) fi else pkg_failed=untried fi[]dnl ])# _PKG_CONFIG # _PKG_SHORT_ERRORS_SUPPORTED # ----------------------------- AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then _pkg_short_errors_supported=yes else _pkg_short_errors_supported=no fi[]dnl ])# _PKG_SHORT_ERRORS_SUPPORTED # PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], # [ACTION-IF-NOT-FOUND]) # # # Note that if there is a possibility the first call to # PKG_CHECK_MODULES might not happen, you should be sure to include an # explicit call to PKG_PROG_PKG_CONFIG in your configure.ac # # # -------------------------------------------------------------- AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no AC_MSG_CHECKING([for $1]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"` else $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"` fi # Put the nasty error message in config.log where it belongs echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD ifelse([$4], , [AC_MSG_ERROR(dnl [Package requirements ($2) were not met: $$1_PKG_ERRORS Consider adjusting the PKG_CONFIG_PATH environment variable if you installed software in a non-standard prefix. _PKG_TEXT ])], [AC_MSG_RESULT([no]) $4]) elif test $pkg_failed = untried; then ifelse([$4], , [AC_MSG_FAILURE(dnl [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. _PKG_TEXT To get pkg-config, see .])], [$4]) else $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) ifelse([$3], , :, [$3]) fi[]dnl ])# PKG_CHECK_MODULES # Copyright (C) 2002, 2003, 2005, 2006 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_AUTOMAKE_VERSION(VERSION) # ---------------------------- # Automake X.Y traces this macro to ensure aclocal.m4 has been # generated from the m4 files accompanying Automake X.Y. # (This private macro should not be called outside this file.) AC_DEFUN([AM_AUTOMAKE_VERSION], [am__api_version='1.10' dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to dnl require some minimum version. Point them to the right macro. m4_if([$1], [1.10], [], [AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl ]) # _AM_AUTOCONF_VERSION(VERSION) # ----------------------------- # aclocal traces this macro to find the Autoconf version. # This is a private macro too. Using m4_define simplifies # the logic in aclocal, which can simply ignore this definition. m4_define([_AM_AUTOCONF_VERSION], []) # AM_SET_CURRENT_AUTOMAKE_VERSION # ------------------------------- # Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced. # This function is AC_REQUIREd by AC_INIT_AUTOMAKE. AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION], [AM_AUTOMAKE_VERSION([1.10])dnl _AM_AUTOCONF_VERSION(m4_PACKAGE_VERSION)]) # AM_AUX_DIR_EXPAND -*- Autoconf -*- # Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets # $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to # `$srcdir', `$srcdir/..', or `$srcdir/../..'. # # Of course, Automake must honor this variable whenever it calls a # tool from the auxiliary directory. The problem is that $srcdir (and # therefore $ac_aux_dir as well) can be either absolute or relative, # depending on how configure is run. This is pretty annoying, since # it makes $ac_aux_dir quite unusable in subdirectories: in the top # source directory, any form will work fine, but in subdirectories a # relative path needs to be adjusted first. # # $ac_aux_dir/missing # fails when called from a subdirectory if $ac_aux_dir is relative # $top_srcdir/$ac_aux_dir/missing # fails if $ac_aux_dir is absolute, # fails when called from a subdirectory in a VPATH build with # a relative $ac_aux_dir # # The reason of the latter failure is that $top_srcdir and $ac_aux_dir # are both prefixed by $srcdir. In an in-source build this is usually # harmless because $srcdir is `.', but things will broke when you # start a VPATH build or use an absolute $srcdir. # # So we could use something similar to $top_srcdir/$ac_aux_dir/missing, # iff we strip the leading $srcdir from $ac_aux_dir. That would be: # am_aux_dir='\$(top_srcdir)/'`expr "$ac_aux_dir" : "$srcdir//*\(.*\)"` # and then we would define $MISSING as # MISSING="\${SHELL} $am_aux_dir/missing" # This will work as long as MISSING is not called from configure, because # unfortunately $(top_srcdir) has no meaning in configure. # However there are other variables, like CC, which are often used in # configure, and could therefore not use this "fixed" $ac_aux_dir. # # Another solution, used here, is to always expand $ac_aux_dir to an # absolute PATH. The drawback is that using absolute paths prevent a # configured tree to be moved without reconfiguration. AC_DEFUN([AM_AUX_DIR_EXPAND], [dnl Rely on autoconf to set up CDPATH properly. AC_PREREQ([2.50])dnl # expand $ac_aux_dir to an absolute path am_aux_dir=`cd $ac_aux_dir && pwd` ]) # AM_CONDITIONAL -*- Autoconf -*- # Copyright (C) 1997, 2000, 2001, 2003, 2004, 2005, 2006 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 8 # AM_CONDITIONAL(NAME, SHELL-CONDITION) # ------------------------------------- # Define a conditional. AC_DEFUN([AM_CONDITIONAL], [AC_PREREQ(2.52)dnl ifelse([$1], [TRUE], [AC_FATAL([$0: invalid condition: $1])], [$1], [FALSE], [AC_FATAL([$0: invalid condition: $1])])dnl AC_SUBST([$1_TRUE])dnl AC_SUBST([$1_FALSE])dnl _AM_SUBST_NOTMAKE([$1_TRUE])dnl _AM_SUBST_NOTMAKE([$1_FALSE])dnl if $2; then $1_TRUE= $1_FALSE='#' else $1_TRUE='#' $1_FALSE= fi AC_CONFIG_COMMANDS_PRE( [if test -z "${$1_TRUE}" && test -z "${$1_FALSE}"; then AC_MSG_ERROR([[conditional "$1" was never defined. Usually this means the macro was only invoked conditionally.]]) fi])]) # Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 9 # There are a few dirty hacks below to avoid letting `AC_PROG_CC' be # written in clear, in which case automake, when reading aclocal.m4, # will think it sees a *use*, and therefore will trigger all it's # C support machinery. Also note that it means that autoscan, seeing # CC etc. in the Makefile, will ask for an AC_PROG_CC use... # _AM_DEPENDENCIES(NAME) # ---------------------- # See how the compiler implements dependency checking. # NAME is "CC", "CXX", "GCJ", or "OBJC". # We try a few techniques and use that to set a single cache variable. # # We don't AC_REQUIRE the corresponding AC_PROG_CC since the latter was # modified to invoke _AM_DEPENDENCIES(CC); we would have a circular # dependency, and given that the user is not expected to run this macro, # just rely on AC_PROG_CC. AC_DEFUN([_AM_DEPENDENCIES], [AC_REQUIRE([AM_SET_DEPDIR])dnl AC_REQUIRE([AM_OUTPUT_DEPENDENCY_COMMANDS])dnl AC_REQUIRE([AM_MAKE_INCLUDE])dnl AC_REQUIRE([AM_DEP_TRACK])dnl ifelse([$1], CC, [depcc="$CC" am_compiler_list=], [$1], CXX, [depcc="$CXX" am_compiler_list=], [$1], OBJC, [depcc="$OBJC" am_compiler_list='gcc3 gcc'], [$1], UPC, [depcc="$UPC" am_compiler_list=], [$1], GCJ, [depcc="$GCJ" am_compiler_list='gcc3 gcc'], [depcc="$$1" am_compiler_list=]) AC_CACHE_CHECK([dependency style of $depcc], [am_cv_$1_dependencies_compiler_type], [if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then # We make a subdir and do the tests there. Otherwise we can end up # making bogus files that we don't know about and never remove. For # instance it was reported that on HP-UX the gcc test will end up # making a dummy file named `D' -- because `-MD' means `put the output # in D'. mkdir conftest.dir # Copy depcomp to subdir because otherwise we won't find it if we're # using a relative directory. cp "$am_depcomp" conftest.dir cd conftest.dir # We will build objects and dependencies in a subdirectory because # it helps to detect inapplicable dependency modes. For instance # both Tru64's cc and ICC support -MD to output dependencies as a # side effect of compilation, but ICC will put the dependencies in # the current directory while Tru64 will put them in the object # directory. mkdir sub am_cv_$1_dependencies_compiler_type=none if test "$am_compiler_list" = ""; then am_compiler_list=`sed -n ['s/^#*\([a-zA-Z0-9]*\))$/\1/p'] < ./depcomp` fi for depmode in $am_compiler_list; do # Setup a source with many dependencies, because some compilers # like to wrap large dependency lists on column 80 (with \), and # we should not choose a depcomp mode which is confused by this. # # We need to recreate these files for each test, as the compiler may # overwrite some of them when testing with obscure command lines. # This happens at least with the AIX C compiler. : > sub/conftest.c for i in 1 2 3 4 5 6; do echo '#include "conftst'$i'.h"' >> sub/conftest.c # Using `: > sub/conftst$i.h' creates only sub/conftst1.h with # Solaris 8's {/usr,}/bin/sh. touch sub/conftst$i.h done echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf case $depmode in nosideeffect) # after this tag, mechanisms are not by side-effect, so they'll # only be used when explicitly requested if test "x$enable_dependency_tracking" = xyes; then continue else break fi ;; none) break ;; esac # We check with `-c' and `-o' for the sake of the "dashmstdout" # mode. It turns out that the SunPro C++ compiler does not properly # handle `-M -o', and we need to detect this. if depmode=$depmode \ source=sub/conftest.c object=sub/conftest.${OBJEXT-o} \ depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \ $SHELL ./depcomp $depcc -c -o sub/conftest.${OBJEXT-o} sub/conftest.c \ >/dev/null 2>conftest.err && grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 && grep sub/conftest.${OBJEXT-o} sub/conftest.Po > /dev/null 2>&1 && ${MAKE-make} -s -f confmf > /dev/null 2>&1; then # icc doesn't choke on unknown options, it will just issue warnings # or remarks (even with -Werror). So we grep stderr for any message # that says an option was ignored or not supported. # When given -MP, icc 7.0 and 7.1 complain thusly: # icc: Command line warning: ignoring option '-M'; no argument required # The diagnosis changed in icc 8.0: # icc: Command line remark: option '-MP' not supported if (grep 'ignoring option' conftest.err || grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else am_cv_$1_dependencies_compiler_type=$depmode break fi fi done cd .. rm -rf conftest.dir else am_cv_$1_dependencies_compiler_type=none fi ]) AC_SUBST([$1DEPMODE], [depmode=$am_cv_$1_dependencies_compiler_type]) AM_CONDITIONAL([am__fastdep$1], [ test "x$enable_dependency_tracking" != xno \ && test "$am_cv_$1_dependencies_compiler_type" = gcc3]) ]) # AM_SET_DEPDIR # ------------- # Choose a directory name for dependency files. # This macro is AC_REQUIREd in _AM_DEPENDENCIES AC_DEFUN([AM_SET_DEPDIR], [AC_REQUIRE([AM_SET_LEADING_DOT])dnl AC_SUBST([DEPDIR], ["${am__leading_dot}deps"])dnl ]) # AM_DEP_TRACK # ------------ AC_DEFUN([AM_DEP_TRACK], [AC_ARG_ENABLE(dependency-tracking, [ --disable-dependency-tracking speeds up one-time build --enable-dependency-tracking do not reject slow dependency extractors]) if test "x$enable_dependency_tracking" != xno; then am_depcomp="$ac_aux_dir/depcomp" AMDEPBACKSLASH='\' fi AM_CONDITIONAL([AMDEP], [test "x$enable_dependency_tracking" != xno]) AC_SUBST([AMDEPBACKSLASH])dnl _AM_SUBST_NOTMAKE([AMDEPBACKSLASH])dnl ]) # Generate code to set up dependency tracking. -*- Autoconf -*- # Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. #serial 3 # _AM_OUTPUT_DEPENDENCY_COMMANDS # ------------------------------ AC_DEFUN([_AM_OUTPUT_DEPENDENCY_COMMANDS], [for mf in $CONFIG_FILES; do # Strip MF so we end up with the name of the file. mf=`echo "$mf" | sed -e 's/:.*$//'` # Check whether this is an Automake generated Makefile or not. # We used to match only the files named `Makefile.in', but # some people rename them; so instead we look at the file content. # Grep'ing the first line is not enough: some people post-process # each Makefile.in and add a new line on top of each file to say so. # Grep'ing the whole file is not good either: AIX grep has a line # limit of 2048, but all sed's we know have understand at least 4000. if sed 10q "$mf" | grep '^#.*generated by automake' > /dev/null 2>&1; then dirpart=`AS_DIRNAME("$mf")` else continue fi # Extract the definition of DEPDIR, am__include, and am__quote # from the Makefile without running `make'. DEPDIR=`sed -n 's/^DEPDIR = //p' < "$mf"` test -z "$DEPDIR" && continue am__include=`sed -n 's/^am__include = //p' < "$mf"` test -z "am__include" && continue am__quote=`sed -n 's/^am__quote = //p' < "$mf"` # When using ansi2knr, U may be empty or an underscore; expand it U=`sed -n 's/^U = //p' < "$mf"` # Find all dependency output files, they are included files with # $(DEPDIR) in their names. We invoke sed twice because it is the # simplest approach to changing $(DEPDIR) to its actual value in the # expansion. for file in `sed -n " s/^$am__include $am__quote\(.*(DEPDIR).*\)$am__quote"'$/\1/p' <"$mf" | \ sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do # Make sure the directory exists. test -f "$dirpart/$file" && continue fdir=`AS_DIRNAME(["$file"])` AS_MKDIR_P([$dirpart/$fdir]) # echo "creating $dirpart/$file" echo '# dummy' > "$dirpart/$file" done done ])# _AM_OUTPUT_DEPENDENCY_COMMANDS # AM_OUTPUT_DEPENDENCY_COMMANDS # ----------------------------- # This macro should only be invoked once -- use via AC_REQUIRE. # # This code is only required when automatic dependency tracking # is enabled. FIXME. This creates each `.P' file that we will # need in order to bootstrap the dependency handling code. AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS], [AC_CONFIG_COMMANDS([depfiles], [test x"$AMDEP_TRUE" != x"" || _AM_OUTPUT_DEPENDENCY_COMMANDS], [AMDEP_TRUE="$AMDEP_TRUE" ac_aux_dir="$ac_aux_dir"]) ]) # Do all the work for Automake. -*- Autoconf -*- # Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, # 2005, 2006 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 12 # This macro actually does too much. Some checks are only needed if # your package does certain things. But this isn't really a big deal. # AM_INIT_AUTOMAKE(PACKAGE, VERSION, [NO-DEFINE]) # AM_INIT_AUTOMAKE([OPTIONS]) # ----------------------------------------------- # The call with PACKAGE and VERSION arguments is the old style # call (pre autoconf-2.50), which is being phased out. PACKAGE # and VERSION should now be passed to AC_INIT and removed from # the call to AM_INIT_AUTOMAKE. # We support both call styles for the transition. After # the next Automake release, Autoconf can make the AC_INIT # arguments mandatory, and then we can depend on a new Autoconf # release and drop the old call support. AC_DEFUN([AM_INIT_AUTOMAKE], [AC_PREREQ([2.60])dnl dnl Autoconf wants to disallow AM_ names. We explicitly allow dnl the ones we care about. m4_pattern_allow([^AM_[A-Z]+FLAGS$])dnl AC_REQUIRE([AM_SET_CURRENT_AUTOMAKE_VERSION])dnl AC_REQUIRE([AC_PROG_INSTALL])dnl if test "`cd $srcdir && pwd`" != "`pwd`"; then # Use -I$(srcdir) only when $(srcdir) != ., so that make's output # is not polluted with repeated "-I." AC_SUBST([am__isrc], [' -I$(srcdir)'])_AM_SUBST_NOTMAKE([am__isrc])dnl # test to see if srcdir already configured if test -f $srcdir/config.status; then AC_MSG_ERROR([source directory already configured; run "make distclean" there first]) fi fi # test whether we have cygpath if test -z "$CYGPATH_W"; then if (cygpath --version) >/dev/null 2>/dev/null; then CYGPATH_W='cygpath -w' else CYGPATH_W=echo fi fi AC_SUBST([CYGPATH_W]) # Define the identity of the package. dnl Distinguish between old-style and new-style calls. m4_ifval([$2], [m4_ifval([$3], [_AM_SET_OPTION([no-define])])dnl AC_SUBST([PACKAGE], [$1])dnl AC_SUBST([VERSION], [$2])], [_AM_SET_OPTIONS([$1])dnl dnl Diagnose old-style AC_INIT with new-style AM_AUTOMAKE_INIT. m4_if(m4_ifdef([AC_PACKAGE_NAME], 1)m4_ifdef([AC_PACKAGE_VERSION], 1), 11,, [m4_fatal([AC_INIT should be called with package and version arguments])])dnl AC_SUBST([PACKAGE], ['AC_PACKAGE_TARNAME'])dnl AC_SUBST([VERSION], ['AC_PACKAGE_VERSION'])])dnl _AM_IF_OPTION([no-define],, [AC_DEFINE_UNQUOTED(PACKAGE, "$PACKAGE", [Name of package]) AC_DEFINE_UNQUOTED(VERSION, "$VERSION", [Version number of package])])dnl # Some tools Automake needs. AC_REQUIRE([AM_SANITY_CHECK])dnl AC_REQUIRE([AC_ARG_PROGRAM])dnl AM_MISSING_PROG(ACLOCAL, aclocal-${am__api_version}) AM_MISSING_PROG(AUTOCONF, autoconf) AM_MISSING_PROG(AUTOMAKE, automake-${am__api_version}) AM_MISSING_PROG(AUTOHEADER, autoheader) AM_MISSING_PROG(MAKEINFO, makeinfo) AM_PROG_INSTALL_SH AM_PROG_INSTALL_STRIP AC_REQUIRE([AM_PROG_MKDIR_P])dnl # We need awk for the "check" target. The system "awk" is bad on # some platforms. AC_REQUIRE([AC_PROG_AWK])dnl AC_REQUIRE([AC_PROG_MAKE_SET])dnl AC_REQUIRE([AM_SET_LEADING_DOT])dnl _AM_IF_OPTION([tar-ustar], [_AM_PROG_TAR([ustar])], [_AM_IF_OPTION([tar-pax], [_AM_PROG_TAR([pax])], [_AM_PROG_TAR([v7])])]) _AM_IF_OPTION([no-dependencies],, [AC_PROVIDE_IFELSE([AC_PROG_CC], [_AM_DEPENDENCIES(CC)], [define([AC_PROG_CC], defn([AC_PROG_CC])[_AM_DEPENDENCIES(CC)])])dnl AC_PROVIDE_IFELSE([AC_PROG_CXX], [_AM_DEPENDENCIES(CXX)], [define([AC_PROG_CXX], defn([AC_PROG_CXX])[_AM_DEPENDENCIES(CXX)])])dnl AC_PROVIDE_IFELSE([AC_PROG_OBJC], [_AM_DEPENDENCIES(OBJC)], [define([AC_PROG_OBJC], defn([AC_PROG_OBJC])[_AM_DEPENDENCIES(OBJC)])])dnl ]) ]) # When config.status generates a header, we must update the stamp-h file. # This file resides in the same directory as the config header # that is generated. The stamp files are numbered to have different names. # Autoconf calls _AC_AM_CONFIG_HEADER_HOOK (when defined) in the # loop where config.status creates the headers, so we can generate # our stamp files there. AC_DEFUN([_AC_AM_CONFIG_HEADER_HOOK], [# Compute $1's index in $config_headers. _am_stamp_count=1 for _am_header in $config_headers :; do case $_am_header in $1 | $1:* ) break ;; * ) _am_stamp_count=`expr $_am_stamp_count + 1` ;; esac done echo "timestamp for $1" >`AS_DIRNAME([$1])`/stamp-h[]$_am_stamp_count]) # Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_SH # ------------------ # Define $install_sh. AC_DEFUN([AM_PROG_INSTALL_SH], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl install_sh=${install_sh-"\$(SHELL) $am_aux_dir/install-sh"} AC_SUBST(install_sh)]) # Copyright (C) 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # Check whether the underlying file-system supports filenames # with a leading dot. For instance MS-DOS doesn't. AC_DEFUN([AM_SET_LEADING_DOT], [rm -rf .tst 2>/dev/null mkdir .tst 2>/dev/null if test -d .tst; then am__leading_dot=. else am__leading_dot=_ fi rmdir .tst 2>/dev/null AC_SUBST([am__leading_dot])]) # Check to see how 'make' treats includes. -*- Autoconf -*- # Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 3 # AM_MAKE_INCLUDE() # ----------------- # Check to see how make treats includes. AC_DEFUN([AM_MAKE_INCLUDE], [am_make=${MAKE-make} cat > confinc << 'END' am__doit: @echo done .PHONY: am__doit END # If we don't find an include directive, just comment out the code. AC_MSG_CHECKING([for style of include used by $am_make]) am__include="#" am__quote= _am_result=none # First try GNU make style include. echo "include confinc" > confmf # We grep out `Entering directory' and `Leaving directory' # messages which can occur if `w' ends up in MAKEFLAGS. # In particular we don't look at `^make:' because GNU make might # be invoked under some other name (usually "gmake"), in which # case it prints its new name instead of `make'. if test "`$am_make -s -f confmf 2> /dev/null | grep -v 'ing directory'`" = "done"; then am__include=include am__quote= _am_result=GNU fi # Now try BSD make style include. if test "$am__include" = "#"; then echo '.include "confinc"' > confmf if test "`$am_make -s -f confmf 2> /dev/null`" = "done"; then am__include=.include am__quote="\"" _am_result=BSD fi fi AC_SUBST([am__include]) AC_SUBST([am__quote]) AC_MSG_RESULT([$_am_result]) rm -f confinc confmf ]) # Fake the existence of programs that GNU maintainers use. -*- Autoconf -*- # Copyright (C) 1997, 1999, 2000, 2001, 2003, 2004, 2005 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 5 # AM_MISSING_PROG(NAME, PROGRAM) # ------------------------------ AC_DEFUN([AM_MISSING_PROG], [AC_REQUIRE([AM_MISSING_HAS_RUN]) $1=${$1-"${am_missing_run}$2"} AC_SUBST($1)]) # AM_MISSING_HAS_RUN # ------------------ # Define MISSING if not defined so far and test if it supports --run. # If it does, set am_missing_run to use it, otherwise, to nothing. AC_DEFUN([AM_MISSING_HAS_RUN], [AC_REQUIRE([AM_AUX_DIR_EXPAND])dnl AC_REQUIRE_AUX_FILE([missing])dnl test x"${MISSING+set}" = xset || MISSING="\${SHELL} $am_aux_dir/missing" # Use eval to expand $SHELL if eval "$MISSING --run true"; then am_missing_run="$MISSING --run " else am_missing_run= AC_MSG_WARN([`missing' script is too old or missing]) fi ]) # Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_MKDIR_P # --------------- # Check for `mkdir -p'. AC_DEFUN([AM_PROG_MKDIR_P], [AC_PREREQ([2.60])dnl AC_REQUIRE([AC_PROG_MKDIR_P])dnl dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P, dnl while keeping a definition of mkdir_p for backward compatibility. dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile. dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of dnl Makefile.ins that do not define MKDIR_P, so we do our own dnl adjustment using top_builddir (which is defined more often than dnl MKDIR_P). AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl case $mkdir_p in [[\\/$]]* | ?:[[\\/]]*) ;; */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;; esac ]) # Helper functions for option handling. -*- Autoconf -*- # Copyright (C) 2001, 2002, 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 3 # _AM_MANGLE_OPTION(NAME) # ----------------------- AC_DEFUN([_AM_MANGLE_OPTION], [[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])]) # _AM_SET_OPTION(NAME) # ------------------------------ # Set option NAME. Presently that only means defining a flag for this option. AC_DEFUN([_AM_SET_OPTION], [m4_define(_AM_MANGLE_OPTION([$1]), 1)]) # _AM_SET_OPTIONS(OPTIONS) # ---------------------------------- # OPTIONS is a space-separated list of Automake options. AC_DEFUN([_AM_SET_OPTIONS], [AC_FOREACH([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])]) # _AM_IF_OPTION(OPTION, IF-SET, [IF-NOT-SET]) # ------------------------------------------- # Execute IF-SET if OPTION is set, IF-NOT-SET otherwise. AC_DEFUN([_AM_IF_OPTION], [m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])]) # Check to make sure that the build environment is sane. -*- Autoconf -*- # Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005 # Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 4 # AM_SANITY_CHECK # --------------- AC_DEFUN([AM_SANITY_CHECK], [AC_MSG_CHECKING([whether build environment is sane]) # Just in case sleep 1 echo timestamp > conftest.file # Do `set' in a subshell so we don't clobber the current shell's # arguments. Must try -L first in case configure is actually a # symlink; some systems play weird games with the mod time of symlinks # (eg FreeBSD returns the mod time of the symlink's containing # directory). if ( set X `ls -Lt $srcdir/configure conftest.file 2> /dev/null` if test "$[*]" = "X"; then # -L didn't work. set X `ls -t $srcdir/configure conftest.file` fi rm -f conftest.file if test "$[*]" != "X $srcdir/configure conftest.file" \ && test "$[*]" != "X conftest.file $srcdir/configure"; then # If neither matched, then we have a broken ls. This can happen # if, for instance, CONFIG_SHELL is bash and it inherits a # broken ls alias from the environment. This has actually # happened. Such a system could not be considered "sane". AC_MSG_ERROR([ls -t appears to fail. Make sure there is not a broken alias in your environment]) fi test "$[2]" = conftest.file ) then # Ok. : else AC_MSG_ERROR([newly created file is older than distributed files! Check your system clock]) fi AC_MSG_RESULT(yes)]) # Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # AM_PROG_INSTALL_STRIP # --------------------- # One issue with vendor `install' (even GNU) is that you can't # specify the program used to strip binaries. This is especially # annoying in cross-compiling environments, where the build's strip # is unlikely to handle the host's binaries. # Fortunately install-sh will honor a STRIPPROG variable, so we # always use install-sh in `make install-strip', and initialize # STRIPPROG with the value of the STRIP variable (set by the user). AC_DEFUN([AM_PROG_INSTALL_STRIP], [AC_REQUIRE([AM_PROG_INSTALL_SH])dnl # Installed binaries are usually stripped using `strip' when the user # run `make install-strip'. However `strip' might not be the right # tool to use in cross-compilation environments, therefore Automake # will honor the `STRIP' environment variable to overrule this program. dnl Don't test for $cross_compiling = yes, because it might be `maybe'. if test "$cross_compiling" != no; then AC_CHECK_TOOL([STRIP], [strip], :) fi INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s" AC_SUBST([INSTALL_STRIP_PROGRAM])]) # Copyright (C) 2006 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # _AM_SUBST_NOTMAKE(VARIABLE) # --------------------------- # Prevent Automake from outputing VARIABLE = @VARIABLE@ in Makefile.in. # This macro is traced by Automake. AC_DEFUN([_AM_SUBST_NOTMAKE]) # Check how to create a tarball. -*- Autoconf -*- # Copyright (C) 2004, 2005 Free Software Foundation, Inc. # # This file is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. # serial 2 # _AM_PROG_TAR(FORMAT) # -------------------- # Check how to create a tarball in format FORMAT. # FORMAT should be one of `v7', `ustar', or `pax'. # # Substitute a variable $(am__tar) that is a command # writing to stdout a FORMAT-tarball containing the directory # $tardir. # tardir=directory && $(am__tar) > result.tar # # Substitute a variable $(am__untar) that extract such # a tarball read from stdin. # $(am__untar) < result.tar AC_DEFUN([_AM_PROG_TAR], [# Always define AMTAR for backward compatibility. AM_MISSING_PROG([AMTAR], [tar]) m4_if([$1], [v7], [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'], [m4_case([$1], [ustar],, [pax],, [m4_fatal([Unknown tar format])]) AC_MSG_CHECKING([how to create a $1 tar archive]) # Loop over all known methods to create a tar archive until one works. _am_tools='gnutar m4_if([$1], [ustar], [plaintar]) pax cpio none' _am_tools=${am_cv_prog_tar_$1-$_am_tools} # Do not fold the above two line into one, because Tru64 sh and # Solaris sh will not grok spaces in the rhs of `-'. for _am_tool in $_am_tools do case $_am_tool in gnutar) for _am_tar in tar gnutar gtar; do AM_RUN_LOG([$_am_tar --version]) && break done am__tar="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$$tardir"' am__tar_="$_am_tar --format=m4_if([$1], [pax], [posix], [$1]) -chf - "'"$tardir"' am__untar="$_am_tar -xf -" ;; plaintar) # Must skip GNU tar: if it does not support --format= it doesn't create # ustar tarball either. (tar --version) >/dev/null 2>&1 && continue am__tar='tar chf - "$$tardir"' am__tar_='tar chf - "$tardir"' am__untar='tar xf -' ;; pax) am__tar='pax -L -x $1 -w "$$tardir"' am__tar_='pax -L -x $1 -w "$tardir"' am__untar='pax -r' ;; cpio) am__tar='find "$$tardir" -print | cpio -o -H $1 -L' am__tar_='find "$tardir" -print | cpio -o -H $1 -L' am__untar='cpio -i -H $1 -d' ;; none) am__tar=false am__tar_=false am__untar=false ;; esac # If the value was cached, stop now. We just wanted to have am__tar # and am__untar set. test -n "${am_cv_prog_tar_$1}" && break # tar/untar a dummy directory, and stop if the command works rm -rf conftest.dir mkdir conftest.dir echo GrepMe > conftest.dir/file AM_RUN_LOG([tardir=conftest.dir && eval $am__tar_ >conftest.tar]) rm -rf conftest.dir if test -s conftest.tar; then AM_RUN_LOG([$am__untar /dev/null 2>&1 && break fi done rm -rf conftest.dir AC_CACHE_VAL([am_cv_prog_tar_$1], [am_cv_prog_tar_$1=$_am_tool]) AC_MSG_RESULT([$am_cv_prog_tar_$1])]) AC_SUBST([am__tar]) AC_SUBST([am__untar]) ]) # _AM_PROG_TAR m4_include([acinclude.m4])