guile-lib-0.2.6.1/0000775000076400007640000000000013320656106010544 500000000000000guile-lib-0.2.6.1/src/0000775000076400007640000000000013320656103011330 500000000000000guile-lib-0.2.6.1/src/texinfo/0000775000076400007640000000000013320656103013004 500000000000000guile-lib-0.2.6.1/src/texinfo/nodal-tree.scm0000664000076400007640000001040313314016560015460 00000000000000;; (texinfo nodal-tree) -- rendering stexinfo to a nodal tree ;; Copyright (C) 2003,2004,2011 Andy Wingo ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; ;;This module exports a procedure to chunk a stexi doument into pieces, ;;delimited by sectioning commands (@code{@@chapter}, ;;@code{@@appendixsec}, etc.). Note that the sectioning commands must be ;;preceded by a @code{@@node}, a condition that the output of ;;@code{(sxml texinfo)} respects. ;; ;;The output is a nodal tree (see (container nodal-tree)), with the ;;following fields defined for each node: ;; ;;; Code: (define-module (texinfo nodal-tree) #:use-module (container nodal-tree) #:use-module (sxml simple) #:use-module (scheme kwargs) #:use-module (texinfo) ;; texi-command-depth #:export (stexi->nodal-tree)) (define (node? elt) (and (pair? elt) (eq? (car elt) 'node))) (define (chunking-section? elt max-depth) (and (pair? elt) (texi-command-depth (car elt) max-depth))) (define (append-child! parent kid) (if parent (node-set! parent 'children (append! (node-ref parent 'children) (list kid))))) (define (find-parent node) (or (and=> (node-ref node 'parent) find-parent) node)) ;; There has to be a more functional way to do this! Probably involves ;; building up from the leaves, instead of building down from the root. ;; Thankfully, the ugliness of this code isn't exported. (define/kwargs (stexi->nodal-tree (stexi #f) (max-depth 4) (initial-depth 0)) "Break @var{stexi} into a nodal tree. Only break until sectioning identifiers of depth @var{max-depth}. The following fields are defined for each node: @table @code @item name The name of the section. @item value The content of the section, as @code{stexi}. The containing element is @code{texinfo}. @item parent A reference to the parent node. @item children A list of subnodes, corresponding to the subsections of the current section. @end table" (define (make-node* parent tree-title) (let ((node (make-node 'name (sxml->string tree-title) 'value #f 'parent parent 'children '()))) (append-child! parent node) node)) (or (eq? (car stexi) 'texinfo) (error "Invalid stexi")) (let lp ((in stexi) (val '()) (node (make-node* #f (cadr stexi))) (parent #f) (depth initial-depth)) ;; (pk (or (null? in) (car in)) val node parent depth) (cond ((null? in) (node-set! node 'value (reverse val)) (find-parent node)) ((or (chunking-section? (car in) max-depth) (and (node? (car in)) (pair? in) (pair? (cdr in)) (chunking-section? (cadr in) max-depth))) (node-set! node 'value (reverse val)) (let* ((node-statement (if (node? (car in)) (car in) #f)) (in (if node-statement (cdr in) in)) (new-depth (texi-command-depth (caar in) max-depth))) (let new-parent ((parent node) (diff (- new-depth depth))) (cond ((not parent) (error "invalid stexi")) ((positive? diff) (or (eq? diff 1) (error "can only descend by one depth level at a time" (car in))) (lp (cdr in) `(,(car in) ,@(if node-statement (list node-statement) '()) (% (title ,(sxml->string (car in)))) texinfo) (make-node* parent (car in)) parent new-depth)) (else (new-parent (node-ref parent 'parent) (1+ diff))))))) (else (lp (cdr in) (cons (car in) val) node parent depth))))) ;;; arch-tag: aff19153-493d-4755-ba6f-22cc7fb43c60 guile-lib-0.2.6.1/src/apicheck.scm0000664000076400007640000002346013314016560013530 00000000000000;; (apicheck) -- check for API incompatibilities ;; Copyright (C) 2007, 2013 Andy Wingo ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; ;; @code{(apicheck)} exports two routines. @code{apicheck-generate} ;; produces a description of the Scheme API exported by a set of modules ;; as an S-expression. @code{apicheck-validate} verifies that the API ;; exported by a set of modules is compatible with an API description ;; generated by @code{apicheck-generate}. ;; ;; It would be nice to have Makefile.am fragments here, but for now, see ;; the Guile-Library source distribution for information on how to ;; integrate apicheck with your module's unit test suite. ;; ;;; Code: (define-module (apicheck) #:use-module (unit-test) #:use-module (oop goops) #:use-module (ice-9 pretty-print) #:use-module ((ice-9 common-list) #:select (uniq)) #:use-module ((srfi srfi-1) #:select (append-map lset-difference)) #:export (apicheck-generate apicheck-validate)) (define *path* (make-fluid)) (fluid-set! *path* '()) (define-macro (with-path k v body) `(with-fluids ((*path* (acons ,k ,v (fluid-ref *path*)))) ,body)) (define (write-current-path port) (for-each (lambda (p) (format port "In ~a ~a:\n" (car p) (cdr p))) (fluid-ref *path*))) (define (fail message . args) (write-current-path (current-error-port)) (display message (current-error-port)) (for-each (lambda (x) (display #\space (current-error-port)) (write x (current-error-port))) args) (newline (current-error-port)) (error "API changed incompatibly") (apply error message args)) (define (interface module) (case (module-kind module) ((interface) module) (else (fail "Invalid API: imported module ~a not an interface" module)))) (define (module-all-uses module) (let ((direct (module-uses module))) (map interface (append direct (apply append (map module-all-uses direct)))))) (define (module-exports module) (module-map (lambda (k v) k) module)) (define (symbolcomp pred) (lambda (a b) (pred (symbol->string a) (symbol->string b)))) (define symbol? (symbolcomp string>?)) (define (symlist? (car a) (car b)) #f) (else (symlist) (list 'class)) ((is-a? val ) (cons 'generic (sort (map method-specializer-names (generic-function-methods val)) symlist #### This file is part of Guile-Lib. #### Guile-Lib is free software: you can redistribute it, as a whole, #### and/or modify it under the terms of the GNU General Public #### License as published by the Free Software Foundation, either #### version 3 of the License, or (at your option) any later version. #### Each Guile-Lib module contained in Guile-Lib has its own copying #### conditions, specified in the comments at the beginning of the #### module's source file. #### Guile-Lib is distributed in the hope that it will be useful, but #### WITHOUT ANY WARRANTY; without even the implied warranty of #### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #### General Public License for more details. #### You should have received a copy of the GNU General Public License #### along with Guile-Lib. If not, see #### . #### #### #### Copyright (C) 2018 David Pirotte #### David Pirotte #### This file is part of Guile-Lib. #### Guile-Lib is free software: you can redistribute it, as a whole, #### and/or modify it under the terms of the GNU General Public #### License as published by the Free Software Foundation, either #### version 3 of the License, or (at your option) any later version. #### Each Guile-Lib module contained in Guile-Lib has its own copying #### conditions, specified in the comments at the beginning of the #### module's source file. #### Guile-Lib is distributed in the hope that it will be useful, but #### WITHOUT ANY WARRANTY; without even the implied warranty of #### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU #### General Public License for more details. #### You should have received a copy of the GNU General Public License #### along with Guile-Lib. If not, see #### . #### VPATH = @srcdir@ am__is_gnu_make = { \ if test -z '$(MAKELEVEL)'; then \ false; \ elif test -n '$(MAKE_HOST)'; then \ true; \ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ true; \ else \ false; \ fi; \ } am__make_running_with_option = \ case $${target_option-} in \ ?) ;; \ *) echo "am__make_running_with_option: internal error: invalid" \ "target option '$${target_option-}' specified" >&2; \ exit 1;; \ esac; \ has_opt=no; \ sane_makeflags=$$MAKEFLAGS; \ if $(am__is_gnu_make); then \ sane_makeflags=$$MFLAGS; \ else \ case $$MAKEFLAGS in \ *\\[\ \ ]*) \ bs=\\; \ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ esac; \ fi; \ skip_next=no; \ strip_trailopt () \ { \ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ }; \ for flg in $$sane_makeflags; do \ test $$skip_next = yes && { skip_next=no; continue; }; \ case $$flg in \ *=*|--*) continue;; \ -*I) strip_trailopt 'I'; skip_next=yes;; \ -*I?*) strip_trailopt 'I';; \ -*O) strip_trailopt 'O'; skip_next=yes;; \ -*O?*) strip_trailopt 'O';; \ -*l) strip_trailopt 'l'; skip_next=yes;; \ -*l?*) strip_trailopt 'l';; \ -[dEDm]) skip_next=yes;; \ -[JT]) skip_next=yes;; \ esac; \ case $$flg in \ *$$target_option*) has_opt=yes; break;; \ esac; \ done; \ test $$has_opt = yes am__make_dryrun = (target_option=n; $(am__make_running_with_option)) am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ pkglibexecdir = $(libexecdir)/@PACKAGE@ am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd install_sh_DATA = $(install_sh) -c -m 644 install_sh_PROGRAM = $(install_sh) -c install_sh_SCRIPT = $(install_sh) -c INSTALL_HEADER = $(INSTALL_DATA) transform = $(program_transform_name) NORMAL_INSTALL = : PRE_INSTALL = : POST_INSTALL = : NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/guile-ext.m4 \ $(top_srcdir)/m4/guile.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = AM_V_P = $(am__v_P_@AM_V@) am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) am__v_P_0 = false am__v_P_1 = : AM_V_GEN = $(am__v_GEN_@AM_V@) am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) am__v_GEN_0 = @echo " GEN " $@; am__v_GEN_1 = AM_V_at = $(am__v_at_@AM_V@) am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = DIST_SOURCES = am__can_run_installinfo = \ case $$AM_UPDATE_INFO_DIR in \ n|no|NO) false;; \ *) (install-info --version) >/dev/null 2>&1;; \ esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ *) f=$$p;; \ esac; am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; am__install_max = 40 am__nobase_strip_setup = \ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` am__nobase_strip = \ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" am__nobase_list = $(am__nobase_strip_setup); \ for p in $$list; do echo "$$p $$p"; done | \ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ if (++n[$$2] == $(am__install_max)) \ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ END { for (dir in files) print dir, files[dir] }' am__base_list = \ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' am__uninstall_files_from_dir = { \ test -z "$$files" \ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ $(am__cd) "$$dir" && rm -f $$files; }; \ } am__installdirs = "$(DESTDIR)$(godir)" "$(DESTDIR)$(moddir)" DATA = $(nobase_go_DATA) $(nobase_mod_DATA) am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) # Read a list of newline-separated strings from the standard input, # and print each of them once, without duplicates. Input order is # *not* preserved. am__uniquify_input = $(AWK) '\ BEGIN { nonempty = 0; } \ { items[$$0] = 1; nonempty = 1; } \ END { if (nonempty) { for (i in items) print i; }; } \ ' # Make sure the list of sources is unique. This is necessary because, # e.g., the same source file might be shared among _SOURCES variables # for different programs/libraries. am__define_uniq_tagged_files = \ list='$(am__tagged_files)'; \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` ETAGS = etags CTAGS = ctags am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/am/guile.mk DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ ACLOCAL_FLAGS = @ACLOCAL_FLAGS@ AMTAR = @AMTAR@ AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ AWK = @AWK@ CC = @CC@ CCDEPMODE = @CCDEPMODE@ CFLAGS = @CFLAGS@ CPP = @CPP@ CPPFLAGS = @CPPFLAGS@ CYGPATH_W = @CYGPATH_W@ DEFS = @DEFS@ DEPDIR = @DEPDIR@ ECHO_C = @ECHO_C@ ECHO_N = @ECHO_N@ ECHO_T = @ECHO_T@ EGREP = @EGREP@ EXEEXT = @EXEEXT@ GREP = @GREP@ GUILD = @GUILD@ GUILE = @GUILE@ GUILE_CFLAGS = @GUILE_CFLAGS@ GUILE_CONFIG = @GUILE_CONFIG@ GUILE_EFFECTIVE_VERSION = @GUILE_EFFECTIVE_VERSION@ GUILE_EXTENSION = @GUILE_EXTENSION@ GUILE_GLOBAL_SITE = @GUILE_GLOBAL_SITE@ GUILE_LDFLAGS = @GUILE_LDFLAGS@ GUILE_LIBS = @GUILE_LIBS@ GUILE_LTLIBS = @GUILE_LTLIBS@ GUILE_SITE = @GUILE_SITE@ GUILE_SITE_CCACHE = @GUILE_SITE_CCACHE@ GUILE_TOOLS = @GUILE_TOOLS@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ LDFLAGS = @LDFLAGS@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LTLIBOBJS = @LTLIBOBJS@ MAINT = @MAINT@ MAKEINFO = @MAKEINFO@ MKDIR_P = @MKDIR_P@ OBJEXT = @OBJEXT@ PACKAGE = @PACKAGE@ PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ PACKAGE_NAME = @PACKAGE_NAME@ PACKAGE_STRING = @PACKAGE_STRING@ PACKAGE_TARNAME = @PACKAGE_TARNAME@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PKG_CONFIG = @PKG_CONFIG@ PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SITECCACHEDIR = @SITECCACHEDIR@ SITEDIR = @SITEDIR@ STRIP = @STRIP@ VERSION = @VERSION@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ abs_top_srcdir = @abs_top_srcdir@ ac_ct_CC = @ac_ct_CC@ am__include = @am__include@ am__leading_dot = @am__leading_dot@ am__quote = @am__quote@ am__tar = @am__tar@ am__untar = @am__untar@ bindir = @bindir@ build = @build@ build_alias = @build_alias@ build_cpu = @build_cpu@ build_os = @build_os@ build_vendor = @build_vendor@ builddir = @builddir@ datadir = @datadir@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ guile_site = @guile_site@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ host_os = @host_os@ host_vendor = @host_vendor@ htmldir = @htmldir@ includedir = @includedir@ infodir = @infodir@ install_sh = @install_sh@ libdir = @libdir@ libexecdir = @libexecdir@ localedir = @localedir@ localstatedir = @localstatedir@ mandir = @mandir@ mkdir_p = @mkdir_p@ oldincludedir = @oldincludedir@ pdfdir = @pdfdir@ prefix = @prefix@ program_transform_name = @program_transform_name@ psdir = @psdir@ runstatedir = @runstatedir@ sbindir = @sbindir@ sharedstatedir = @sharedstatedir@ srcdir = @srcdir@ sysconfdir = @sysconfdir@ target_alias = @target_alias@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ moddir = @SITEDIR@ godir = @SITECCACHEDIR@ GOBJECTS = $(SOURCES:%.scm=%.go) nobase_mod_DATA = $(SOURCES) $(NOCOMP_SOURCES) nobase_go_DATA = $(GOBJECTS) # Make sure source files are installed first, so that the mtime of # installed compiled files is greater than that of installed source # files. See # # for details. guile_install_go_files = install-nobase_goDATA GUILE_WARNINGS = -Wunbound-variable -Warity-mismatch -Wformat SUFFIXES = .scm .go SXML_FILES = \ sxml/apply-templates.scm \ sxml/fold.scm \ sxml/simple.scm \ sxml/ssax/input-parse.scm \ sxml/ssax.scm \ sxml/transform.scm \ sxml/unicode.scm \ sxml/upstream/SSAX-expanded.scm \ sxml/upstream/SSAX.scm \ sxml/upstream/SXML-tree-trans.scm \ sxml/upstream/SXPath-old.scm \ sxml/upstream/input-parse.scm \ sxml/upstream/assert.scm \ sxml/xpath.scm \ sxml/ssax-simple.scm # Note, texinfo/nodal-tree.scm is not in guile 2.0, so it is in SOURCES # below. TEXINFO_FILES = \ texinfo/docbook.scm \ texinfo/html.scm \ texinfo/indexing.scm \ texinfo/plain-text.scm \ texinfo/reflection.scm \ texinfo/serialize.scm \ texinfo.scm STATPROF_FILES = \ statprof.scm # Guile-Lib depends on Guile 2.0, and Guile 2.0 already comes with the # `(sxml ...)' and `(texinfo ...)' modules unmodified. We don't need # the following variables anymore, also removed from the SOURCES # definition below. # SXML_SOURCES = # TEXINFO_SOURCES = # STATPROF_SOURCES = SOURCES = \ apicheck.scm \ compat/guile-2.scm \ config/load.scm \ container/delay-tree.scm \ container/nodal-tree.scm \ container/async-queue.scm \ debugging/assert.scm \ debugging/time.scm \ graph/topological-sort.scm \ htmlprag.scm \ io/string.scm \ logging/logger.scm \ logging/port-log.scm \ logging/rotating-log.scm \ math/minima.scm \ math/primes.scm \ match-bind.scm \ md5.scm \ os/process.scm \ scheme/documentation.scm \ scheme/kwargs.scm \ search/basic.scm \ string/completion.scm \ string/soundex.scm \ string/transform.scm \ string/wrap.scm \ term/ansi-color.scm \ texinfo/nodal-tree.scm \ text/parse-lalr.scm \ unit-test.scm EXTRA_DIST = \ $(SOURCES) \ $(NOCOMP_SOURCES) CLEANFILES = $(GOBJECTS) all: all-am .SUFFIXES: .SUFFIXES: .scm .go $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/am/guile.mk $(am__configure_deps) @for dep in $?; do \ case '$(am__configure_deps)' in \ *$$dep*) \ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ && { if test -f $@; then exit 0; else break; fi; }; \ exit 1;; \ esac; \ done; \ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --foreign src/Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ *) \ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ esac; $(top_srcdir)/am/guile.mk $(am__empty): $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): install-nobase_goDATA: $(nobase_go_DATA) @$(NORMAL_INSTALL) @list='$(nobase_go_DATA)'; test -n "$(godir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(godir)'"; \ $(MKDIR_P) "$(DESTDIR)$(godir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(godir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(godir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(godir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(godir)/$$dir" || exit $$?; }; \ done uninstall-nobase_goDATA: @$(NORMAL_UNINSTALL) @list='$(nobase_go_DATA)'; test -n "$(godir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(godir)'; $(am__uninstall_files_from_dir) install-nobase_modDATA: $(nobase_mod_DATA) @$(NORMAL_INSTALL) @list='$(nobase_mod_DATA)'; test -n "$(moddir)" || list=; \ if test -n "$$list"; then \ echo " $(MKDIR_P) '$(DESTDIR)$(moddir)'"; \ $(MKDIR_P) "$(DESTDIR)$(moddir)" || exit 1; \ fi; \ $(am__nobase_list) | while read dir files; do \ xfiles=; for file in $$files; do \ if test -f "$$file"; then xfiles="$$xfiles $$file"; \ else xfiles="$$xfiles $(srcdir)/$$file"; fi; done; \ test -z "$$xfiles" || { \ test "x$$dir" = x. || { \ echo " $(MKDIR_P) '$(DESTDIR)$(moddir)/$$dir'"; \ $(MKDIR_P) "$(DESTDIR)$(moddir)/$$dir"; }; \ echo " $(INSTALL_DATA) $$xfiles '$(DESTDIR)$(moddir)/$$dir'"; \ $(INSTALL_DATA) $$xfiles "$(DESTDIR)$(moddir)/$$dir" || exit $$?; }; \ done uninstall-nobase_modDATA: @$(NORMAL_UNINSTALL) @list='$(nobase_mod_DATA)'; test -n "$(moddir)" || list=; \ $(am__nobase_strip_setup); files=`$(am__nobase_strip)`; \ dir='$(DESTDIR)$(moddir)'; $(am__uninstall_files_from_dir) ID: $(am__tagged_files) $(am__define_uniq_tagged_files); mkid -fID $$unique tags: tags-am TAGS: tags tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) set x; \ here=`pwd`; \ $(am__define_uniq_tagged_files); \ shift; \ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ test -n "$$unique" || unique=$$empty_fix; \ if test $$# -gt 0; then \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ "$$@" $$unique; \ else \ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ $$unique; \ fi; \ fi ctags: ctags-am CTAGS: ctags ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) $(am__define_uniq_tagged_files); \ test -z "$(CTAGS_ARGS)$$unique" \ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ $$unique GTAGS: here=`$(am__cd) $(top_builddir) && pwd` \ && $(am__cd) $(top_srcdir) \ && gtags -i $(GTAGS_ARGS) "$$here" cscopelist: cscopelist-am cscopelist-am: $(am__tagged_files) list='$(am__tagged_files)'; \ case "$(srcdir)" in \ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ *) sdir=$(subdir)/$(srcdir) ;; \ esac; \ for i in $$list; do \ if test -f "$$i"; then \ echo "$(subdir)/$$i"; \ else \ echo "$$sdir/$$i"; \ fi; \ done >> $(top_builddir)/cscope.files distclean-tags: -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags distdir: $(DISTFILES) @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ list='$(DISTFILES)'; \ dist_files=`for file in $$list; do echo $$file; done | \ sed -e "s|^$$srcdirstrip/||;t" \ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ case $$dist_files in \ */*) $(MKDIR_P) `echo "$$dist_files" | \ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ sort -u` ;; \ esac; \ for file in $$dist_files; do \ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ if test -d $$d/$$file; then \ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ if test -d "$(distdir)/$$file"; then \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ fi; \ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ else \ test -f "$(distdir)/$$file" \ || cp -p $$d/$$file "$(distdir)/$$file" \ || exit 1; \ fi; \ done check-am: all-am check: check-am all-am: Makefile $(DATA) installdirs: for dir in "$(DESTDIR)$(godir)" "$(DESTDIR)$(moddir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ done install: install-am install-exec: install-exec-am install-data: install-data-am uninstall: uninstall-am install-am: all-am @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am installcheck: installcheck-am install-strip: if test -z '$(STRIP)'; then \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ install; \ else \ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ fi mostlyclean-generic: clean-generic: -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) maintainer-clean-generic: @echo "This command is intended for maintainers to use" @echo "it deletes files that may require special tools to rebuild." clean: clean-am clean-am: clean-generic mostlyclean-am distclean: distclean-am -rm -f Makefile distclean-am: clean-am distclean-generic distclean-tags dvi: dvi-am dvi-am: html: html-am html-am: info: info-am info-am: install-data-am: install-nobase_goDATA install-nobase_modDATA install-dvi: install-dvi-am install-dvi-am: install-exec-am: install-html: install-html-am install-html-am: install-info: install-info-am install-info-am: install-man: install-pdf: install-pdf-am install-pdf-am: install-ps: install-ps-am install-ps-am: installcheck-am: maintainer-clean: maintainer-clean-am -rm -f Makefile maintainer-clean-am: distclean-am maintainer-clean-generic mostlyclean: mostlyclean-am mostlyclean-am: mostlyclean-generic pdf: pdf-am pdf-am: ps: ps-am ps-am: uninstall-am: uninstall-nobase_goDATA uninstall-nobase_modDATA .MAKE: install-am install-strip .PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \ cscopelist-am ctags ctags-am distclean distclean-generic \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-dvi \ install-dvi-am install-exec install-exec-am install-html \ install-html-am install-info install-info-am install-man \ install-nobase_goDATA install-nobase_modDATA install-pdf \ install-pdf-am install-ps install-ps-am install-strip \ installcheck installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ pdf-am ps ps-am tags tags-am uninstall uninstall-am \ uninstall-nobase_goDATA uninstall-nobase_modDATA .PRECIOUS: Makefile $(guile_install_go_files): install-nobase_modDATA .scm.go: $(AM_V_GEN)$(top_builddir)/pre-inst-env \ guild compile $(GUILE_WARNINGS) -o "$@" "$<" printenv: printf '$(moddir)\n$(godir)\n' # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: guile-lib-0.2.6.1/src/io/0000775000076400007640000000000013320656103011737 500000000000000guile-lib-0.2.6.1/src/io/string.scm0000664000076400007640000000754613314016560013705 00000000000000;; (io string) -- input and output with strings ;; Written 1995, 1996 by Oleg Kiselyov (oleg@acm.org) ;; Modified 1996, 1997, 1998, 2001 by A. Jaffer (agj@alum.mit.edu) ;; Modified 2003 by Steve VanDevender (stevev@hexadecimal.uoregon.edu) ;; Modified 2004 Andy Wingo ;; This file is based SLIB's strsrch.scm, and is in the public domain. ;;; Commentary: ;; ;;@c texinfo, really ;; Procedures that do io with strings. ;; ;;; Code: (define-module (io string) #:use-module (scheme documentation) #:export (find-string-from-port?)) (define-with-docs find-string-from-port? "Looks for @var{str} in @var{}, optionally within the first @var{max-no-char} characters." (lambda (str . max-no-char) (set! max-no-char (if (null? max-no-char) #f (car max-no-char))) (letrec ((no-chars-read 0) (peeked? #f) (my-peek-char ; Return a peeked char or #f (lambda () (and (or (not (number? max-no-char)) (< no-chars-read max-no-char)) (let ((c (peek-char ))) (cond (peeked? c) ((eof-object? c) #f) ((procedure? max-no-char) (set! peeked? #t) (if (max-no-char c) #f c)) ((eqv? max-no-char c) #f) (else c)))))) (next-char (lambda () (set! peeked? #f) (read-char ) (set! no-chars-read (+ 1 no-chars-read)))) (match-1st-char ; of the string str (lambda () (let ((c (my-peek-char))) (and c (begin (next-char) (if (char=? c (string-ref str 0)) (match-other-chars 1) (match-1st-char))))))) ;; There has been a partial match, up to the point pos-to-match ;; (for example, str[0] has been found in the stream) ;; Now look to see if str[pos-to-match] for would be found, too (match-other-chars (lambda (pos-to-match) (if (>= pos-to-match (string-length str)) no-chars-read ; the entire string has matched (let ((c (my-peek-char))) (and c (if (not (char=? c (string-ref str pos-to-match))) (backtrack 1 pos-to-match) (begin (next-char) (match-other-chars (+ 1 pos-to-match))))))))) ;; There had been a partial match, but then a wrong char showed up. ;; Before discarding previously read (and matched) characters, we check ;; to see if there was some smaller partial match. Note, characters read ;; so far (which matter) are those of str[0..matched-substr-len - 1] ;; In other words, we will check to see if there is such i>0 that ;; substr(str,0,j) = substr(str,i,matched-substr-len) ;; where j=matched-substr-len - i (backtrack (lambda (i matched-substr-len) (let ((j (- matched-substr-len i))) (if (<= j 0) ;; backed off completely to the begining of str (match-1st-char) (let loop ((k 0)) (if (>= k j) (match-other-chars j) ; there was indeed a shorter match (if (char=? (string-ref str k) (string-ref str (+ i k))) (loop (+ 1 k)) (backtrack (+ 1 i) matched-substr-len)))))))) ) (match-1st-char)))) ;;; arch-tag: 99289f4f-5fdb-4c6e-924a-1c510a61a03e ;;; string.scm ends here guile-lib-0.2.6.1/src/htmlprag.scm0000664000076400007640000030401713314016560013577 00000000000000;; (htmlprag) -- pragmatic parsing of real-world HTML ;; Copyright (C) 2003-2004 Neil W. Van Dyke ;; Modified 2004 by Andy Wingo to fit in with guile-lib. ;; This program is free software: you can redistribute it and/or modify ;; it under the terms of the GNU Lesser General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU Lesser General Public License for more details. ;; ;; You should have received a copy of the GNU Lesser General Public ;; License along with this program. If not, see ;; . ;; The license of the code that this is based on, when it came from Neil ;; W. Van Dyke, was the LGPL version 2.1. Neil notes that other ;; licensing options for his code are available; interested parties ;; should contact him directly. ;;; Commentary: ;; ;;; HtmlPrag provides permissive HTML parsing capability to Scheme programs, ;;; which is useful for software agent extraction of information from Web ;;; pages, for programmatically transforming HTML files, and for implementing ;;; interactive Web browsers. HtmlPrag emits ``SHTML,'' which is an encoding ;;; of HTML in [SXML], so that conventional HTML may be processed with XML ;;; tools such as [SXPath] and [SXML-Tools]. Like [SSAX-HTML], HtmlPrag ;;; provides a permissive tokenizer, but also attempts to recover structure. ;;; HtmlPrag also includes procedures for encoding SHTML in HTML syntax. ;;; ;;; The HtmlPrag parsing behavior is permissive in that it accepts erroneous ;;; HTML, handling several classes of HTML syntax errors gracefully, without ;;; yielding a parse error. This is crucial for parsing arbitrary real-world ;;; Web pages, since many pages actually contain syntax errors that would ;;; defeat a strict or validating parser. HtmlPrag's handling of errors is ;;; intended to generally emulate popular Web browsers' interpretation of the ;;; structure of erroneous HTML. We euphemistically term this kind of parse ;;; ``pragmatic.'' ;;; ;;; HtmlPrag also has some support for [XHTML], although XML namespace ;;; qualifiers [XML-Names] are currently accepted but stripped from the ;;; resulting SHTML. Note that valid XHTML input is of course better handled ;;; by a validating XML parser like [SSAX]. ;;; ;;; To receive notification of new versions of HtmlPrag, and to be polled for ;;; input on changes to HtmlPrag being considered, ask the author to add you to ;;; the moderated, announce-only email list, @code{htmlprag-announce}. ;;; ;;; Thanks to Oleg Kiselyov and Kirill Lisovsky for their help with SXML. ;; ;;; Code: (define-module (htmlprag)) ;; Exports defined at the end of the file ;; THIS FILE GENERATED Thu May 13 21:41:40 EDT 2004 -- DO NOT EDIT MANUALLY ;; ############# BEGIN CANONICAL htmlprag.scm ############# ;;; @Package HtmlPrag ;;; @Subtitle Pragmatic Parsing of HTML to SHTML and SXML ;;; @HomePage http://www.neilvandyke.org/htmlprag/ ;;; @Author Neil W. Van Dyke ;;; @AuthorEmail neil@@neilvandyke.org ;;; @Version 0.11 ;;; @Date 13 May 2004 ;; $Id: htmlprag.scm,v 1.304 2004/05/14 01:28:51 neil Exp $ ;;; @legal ;;; Copyright @copyright{} 2003-2004 Neil W. Van Dyke. This program is Free ;;; Software; you can redistribute it and/or modify it under the terms of the ;;; GNU Lesser General Public License as published by the Free Software ;;; Foundation; either version 2.1 of the License, or (at your option) any ;;; later version. This program is distributed in the hope that it will be ;;; useful, but without any warranty; without even the implied warranty of ;;; merchantability or fitness for a particular purpose. See the GNU Lesser ;;; General Public License [LGPL] for more details. For other license options ;;; and commercial consulting, contact the author. ;;; @end legal ;;; @section Introduction ;;; HtmlPrag provides permissive HTML parsing capability to Scheme programs, ;;; which is useful for software agent extraction of information from Web ;;; pages, for programmatically transforming HTML files, and for implementing ;;; interactive Web browsers. HtmlPrag emits ``SHTML,'' which is an encoding ;;; of HTML in [SXML], so that conventional HTML may be processed with XML ;;; tools such as [SXPath] and [SXML-Tools]. Like [SSAX-HTML], HtmlPrag ;;; provides a permissive tokenizer, but also attempts to recover structure. ;;; HtmlPrag also includes procedures for encoding SHTML in HTML syntax. ;;; ;;; The HtmlPrag parsing behavior is permissive in that it accepts erroneous ;;; HTML, handling several classes of HTML syntax errors gracefully, without ;;; yielding a parse error. This is crucial for parsing arbitrary real-world ;;; Web pages, since many pages actually contain syntax errors that would ;;; defeat a strict or validating parser. HtmlPrag's handling of errors is ;;; intended to generally emulate popular Web browsers' interpretation of the ;;; structure of erroneous HTML. We euphemistically term this kind of parse ;;; ``pragmatic.'' ;;; ;;; HtmlPrag also has some support for [XHTML], although XML namespace ;;; qualifiers [XML-Names] are currently accepted but stripped from the ;;; resulting SHTML. Note that valid XHTML input is of course better handled ;;; by a validating XML parser like [SSAX]. ;;; ;;; To receive notification of new versions of HtmlPrag, and to be polled for ;;; input on changes to HtmlPrag being considered, ask the author to add you to ;;; the moderated, announce-only email list, @code{htmlprag-announce}. ;;; ;;; Thanks to Oleg Kiselyov and Kirill Lisovsky for their help with SXML. ;;; @section Portability ;;; HtmlPrag officially requires R5RS, [SRFI-6], and [SRFI-23], but is known to ;;; also work on some non-R5RS implementations. The current version tests ;;; successfully under Bigloo 2.6d, Chicken 1.22, Gauche 0.7.4.2, Guile 1.6.4, ;;; MIT Scheme 7.7.90, PLT MzScheme 206p1, RScheme 0.7.3.3-b20, SISC 1.8.7 ;;; (using Kaffe 1.1.4), and STklos 0.55. With a minor change to the source ;;; code, HtmlPrag also tests successfully under Scheme 48 0.57 and Scsh 0.6.3. ;;; ;;; Kawa has been removed temporarily from the test list, but should run if ;;; Sun's Java implementation can be used. SXM has removed temporarily from ;;; the test list, until the test suite code can be adjusted to not exceed ;;; SXM's limit on literals. ;; TODO: Note about packagings. ;; ;; Some packagings of HtmlPrag for particular Scheme implementations are ;; available from the HtmlPrag Web page and elsewhere. ;; TODO: Note conventional prefix option with module systems that support it. ;; ;; @lisp ;; (require (prefix htmlprag: (lib "htmlprag.ss" "htmlprag"))) ;; @end lisp ;;; In addition to the documented public bindings, the HtmlPrag source code ;;; includes some internal-use-only toplevel bindings. The names of these ;;; begin with the ``@code{htmlprag-internal:}'' prefix. Packagings of ;;; HtmlPrag for particular Scheme implementations should suppress these ;;; bindings from export when possible. ;; The following bindings are used internally by HtmlPrag for portability, ;; with the intention that packagings of HtmlPrag use faster or more ;; appropriate bindings for the particular Scheme implementation. ;; @defproc htmlprag-internal:a2c num ;; ;; Returns the character with ASCII value @var{num}. In most Scheme ;; implementations, this is the same as @code{integer->char}. Two exceptions ;; are Scheme 48 0.57 and Scsh 0.6.3, for which the user must manually edit ;; file @code{htmlprag.scm} to bind this variable to @code{ascii->char}. A ;; future version of HtmlPrag will automatically use @code{ascii->char} where ;; available. (define htmlprag-internal:a2c integer->char) ;; @defproc htmlprag-internal:append! a b ;; ;; Returns a concatenation of lists @var{a} and @var{b}, modifying the tail of ;; @var{a} to point to the head of @var{b} if both lists are non-null. A ;; future version should use the more general @code{append!} where available. (define (htmlprag-internal:append! a b) (cond ((null? a) b) ((null? b) a) (else (let loop ((sub a)) (if (null? (cdr sub)) (begin (set-cdr! sub b) a) (loop (cdr sub))))))) ;; @defproc htmlprag-internal:reverse!ok lst ;; ;; Returns a reversed list @var{lst}, possibly destructive. A future version ;; will use @code{reverse!} where available, and @code{reverse} elsewhere. (define htmlprag-internal:reverse!ok reverse) ;; @defproc htmlprag-internal:down str ;; ;; Returns a string that is equivalent to @var{str} with all characters mapped ;; to lowercase, as if by @code{char-downcase}, without mutating @var{str}. A ;; future version should use the Scheme implementation's native nondestructive ;; procedure where available. (define (htmlprag-internal:down s) (list->string (map char-downcase (string->list s)))) ;; @defproc htmlprag-internal:error proc-str msg obj ;; ;; For Bigloo, this is changed to: ;; ;; @lisp ;; (define htmlprag-internal:error error) ;; @end lisp (define (htmlprag-internal:error p m o) (error (string-append p " - " m) o)) ;; TODO: Make htmlprag-internal:error be syntax. ;; @defproc htmlprag-internal:down!ok str ;; ;; Returns a string that is equivalent to @var{str} with all characters mapped ;; to lowercase, as if by @code{char-downcase}, possibly mutating @var{str}. ;; A future version should use the Scheme implementation's native destructive ;; or nondestructive procedure where available. (define htmlprag-internal:down!ok htmlprag-internal:down) ;; @defproc htmlprag-internal:gosc os ;; ;; One-shot version of the conventional @code{get-output-string}. The result ;; of any subsequent attempt to write to the port or get the output string is ;; undefined. This may or may not free up resources. (define (htmlprag-internal:gosc os) (let ((str (get-output-string os))) ;; Note: By default, we don't call close-output-port, since at least one ;; tested Scheme implementation barfs on that. ;; ;; (close-output-port os) str)) ;; @defvar htmlprag-internal:at ;; ;; Constant bound to the symbol @code{@@}. This is to make code portable to ;; Scheme implementations with readers that cannot read @code{@@} as a symbol. ;; (Actually, RScheme can now read @code{@@}, which leaves Stalin as the only ;; one the author knows of, so we'll probably go back to just using literal ;; @code{@@} symbols. (define htmlprag-internal:at (string->symbol "@")) ;;; @section SHTML and SXML ;; TODO: Introduce SHTML. ;;; Some constants and a procedure are defined for convenience and portability ;;; when examining the SHTML produced by the tokenizer and parser. ;;; @defvar shtml-comment-symbol ;;; @defvarx shtml-decl-symbol ;;; @defvarx shtml-empty-symbol ;;; @defvarx shtml-end-symbol ;;; @defvarx shtml-entity-symbol ;;; @defvarx shtml-pi-symbol ;;; @defvarx shtml-start-symbol ;;; @defvarx shtml-text-symbol ;;; @defvarx shtml-top-symbol ;;; ;;; These variables are bound to the following case-sensitive symbols used in ;;; SHTML, respectively: @code{*COMMENT*}, @code{*DECL*}, @code{*EMPTY*}, ;;; @code{*END*}, @code{*ENTITY*}, @code{*PI*}, @code{*START*}, @code{*TEXT*}, ;;; and @code{*TOP*}. These can be used in lieu of the literal symbols in ;;; programs read by a case-insensitive Scheme reader. (define shtml-comment-symbol (string->symbol "*COMMENT*")) (define shtml-decl-symbol (string->symbol "*DECL*")) (define shtml-empty-symbol (string->symbol "*EMPTY*")) (define shtml-end-symbol (string->symbol "*END*")) (define shtml-entity-symbol (string->symbol "*ENTITY*")) (define shtml-pi-symbol (string->symbol "*PI*")) (define shtml-start-symbol (string->symbol "*START*")) (define shtml-text-symbol (string->symbol "*TEXT*")) (define shtml-top-symbol (string->symbol "*TOP*")) ;;; @defvar shtml-named-char-id ;;; @defvarx shtml-numeric-char-id ;;; ;;; These variables are bound to the SHTML entity public identifier strings ;;; for symbolic and numeric character entities. These strings are currently ;;; @code{"additional"} and @code{"additional-char"}, respectively, but are ;;; likely to change in a future version of HtmlPrag, so programs should use ;;; the bindings rather than the literal strings directly. (define shtml-named-char-id "additional") (define shtml-numeric-char-id "additional-char") ;; TODO: Make public procedures for creating character entities, since the ;; current SHTML syntax for them is pretty nasty. ;;; @defproc shtml-entity-value entity ;;; ;;; Yields the value for the SHTML entity. Values of named entities are ;;; symbols, and values of numeric entities are numbers. For example: ;;; ;;; @lisp ;;; (define (f s) (shtml-entity-value (car (cdr (html->shtml s))))) ;;; (f " ") @result{} nbsp ;;; (f "ߐ") @result{} 2000 ;;; @end lisp (define (shtml-entity-value entity) (if (and (list? entity) (= (length entity) 3) (eqv? (car entity) shtml-entity-symbol)) (let ((public-id (list-ref entity 1)) (system-id (list-ref entity 2))) (cond ((equal? public-id shtml-named-char-id) (string->symbol system-id)) ((equal? public-id shtml-numeric-char-id) (string->number system-id)) (else (htmlprag-internal:error "shtml-entity-value" "invalid entity public id" public-id)))) (htmlprag-internal:error "shtml-entity-value" "not an entity" entity))) ;;; @section Tokenizing ;;; The tokenizer is used by the higher-level structural parser, but can also ;;; be called directly for debugging purposes or unusual applications. Some of ;;; the list structure of tokens, such as for start tag tokens, is mutated and ;;; incorporated into the SHTML list structure emitted by the parser. ;; TODO: Document the token format. ;;; @defproc make-html-tokenizer in normalized? ;;; ;;; Constructs an HTML tokenizer procedure on input port @var{in}. If boolean ;;; @var{normalized?} is true, then tokens will be in a format conducive to use ;;; with a parser emitting normalized SXML. Each call to the resulting ;;; procedure yields a successive token from the input. When the tokens have ;;; been exhausted, the procedure returns the null list. For example: ;;; ;;; @lisp ;;; (define input (open-input-string "bar")) ;;; (define next (make-html-tokenizer input #f)) ;;; (next) @result{} (a (@@ (href "foo"))) ;;; (next) @result{} "bar" ;;; (next) @result{} (*END* a) ;;; (next) @result{} () ;;; (next) @result{} () ;;; @end lisp (define make-html-tokenizer ;; TODO: Have the tokenizer replace contiguous whitespace within individual ;; text tokens with single space characters (except for when in `pre' ;; and verbatim elements). The parser will introduce new contiguous ;; whitespace (e.g., when text tokens are concatenated, invalid end ;; tags are removed, whitespace is irrelevant between certain ;; elements), but then the parser only has to worry about the first and ;; last character of each string. Perhaps the text tokens should have ;; both leading and trailing whitespace stripped, and contain flags for ;; whether or not leading and trailing whitespace occurred. (letrec ((no-token '()) ;; TODO: Maybe make this an option. (verbatim-to-eof-elems '(plaintext)) ;; TODO: Implement proper parsing of `verbatim-pair-elems' elements. ;; Note that we must support invalid termination like this: (verbatim-pair-elems '(script server style xmp)) (ws-chars (list #\space (htmlprag-internal:a2c 9) (htmlprag-internal:a2c 10) (htmlprag-internal:a2c 11) (htmlprag-internal:a2c 12) (htmlprag-internal:a2c 13))) (output-string->string-or-false (lambda (os) (let ((s (htmlprag-internal:gosc os))) (if (string=? s "") #f s)))) (output-string->symbol-or-false (lambda (os) (let ((s (output-string->string-or-false os))) (if s (string->symbol s) #f)))) ) (lambda (in normalized?) ;; TODO: Make a tokenizer option that causes XML namespace qualifiers to ;; be ignored. (letrec ( ;; Port buffer with inexpensive unread of one character and slightly ;; more expensive pushback of second character to unread. The ;; procedures themselves do no consing. The tokenizer currently ;; needs two-symbol lookahead, due to ambiguous "/" while parsing ;; element and attribute names, which could be either empty-tag ;; syntax or XML qualified names. (c #f) (next-c #f) (c-consumed? #t) (read-c (lambda () (if c-consumed? (if next-c (begin (set! c next-c) (set! next-c #f)) (set! c (read-char in))) (set! c-consumed? #t)))) (unread-c (lambda () (if c-consumed? (set! c-consumed? #f) ;; TODO: Procedure name in error message really ;; isn't "make-html-tokenizer"... (htmlprag-internal:error "make-html-tokenizer" "already unread" c)))) (push-c (lambda (new-c) (if c-consumed? (begin (set! c new-c) (set! c-consumed? #f)) (if next-c (htmlprag-internal:error "make-html-tokenizer" "pushback full" c) (begin (set! next-c c) (set! c new-c) (set! c-consumed? #f)))))) ;; TODO: These procedures are a temporary convenience for ;; enumerating the pertinent character classes, with an eye ;; towards removing redundant tests of character class. These ;; procedures should be eliminated in a future version. (c-eof? (lambda () (eof-object? c))) (c-amp? (lambda () (eqv? c #\&))) (c-apos? (lambda () (eqv? c #\'))) (c-bang? (lambda () (eqv? c #\!))) (c-colon? (lambda () (eqv? c #\:))) (c-quot? (lambda () (eqv? c #\"))) (c-equals? (lambda () (eqv? c #\=))) (c-gt? (lambda () (eqv? c #\>))) (c-lt? (lambda () (eqv? c #\<))) (c-minus? (lambda () (eqv? c #\-))) (c-pound? (lambda () (eqv? c #\#))) (c-ques? (lambda () (eqv? c #\?))) (c-semi? (lambda () (eqv? c #\;))) (c-slash? (lambda () (eqv? c #\/))) (c-splat? (lambda () (eqv? c #\*))) (c-lf? (lambda () (eqv? c #\newline))) (c-angle? (lambda () (memv c '(#\< #\>)))) (c-ws? (lambda () (memv c ws-chars))) (c-alpha? (lambda () (char-alphabetic? c))) (c-digit? (lambda () (char-numeric? c))) (c-alphanum? (lambda () (or (c-alpha?) (c-digit?)))) (c-hexlet? (lambda () (memv c '(#\a #\b #\c #\d #\e #\f #\A #\B #\C #\D #\E #\F)))) (skip-ws (lambda () (read-c) (if (c-ws?) (skip-ws) (unread-c)))) (make-start-token (if normalized? (lambda (name ns attrs) (list name (cons htmlprag-internal:at attrs))) (lambda (name ns attrs) (if (null? attrs) (list name) (list name (cons htmlprag-internal:at attrs)))))) (make-empty-token (lambda (name ns attrs) (cons shtml-empty-symbol (make-start-token name ns attrs)))) (make-end-token (if normalized? (lambda (name ns attrs) (list shtml-end-symbol name (cons htmlprag-internal:at attrs))) (lambda (name ns attrs) (if (null? attrs) (list shtml-end-symbol name) (list shtml-end-symbol name (cons htmlprag-internal:at attrs)))))) (make-named-char-token (lambda (name-str) (list shtml-entity-symbol shtml-named-char-id name-str))) (make-numeric-char-token (lambda (number) (list shtml-entity-symbol shtml-numeric-char-id (number->string number)))) (make-comment-token (lambda (str) (list shtml-comment-symbol str))) (make-decl-token (lambda (parts) (cons shtml-decl-symbol parts))) (scan-qname ;; TODO: Make sure we don't accept local names that have "*", since ;; this can break SXML tools. Have to validate this ;; afterwards if "verbatim-safe?". Also check for "@" and ;; maybe "@@". Check qname parsing code, especially for ;; verbatim mode. This is important! (lambda (verbatim-safe?) ;; Note: If we accept some invalid local names, we only need two ;; symbols of lookahead to determine the end of a qname. (letrec ((os #f) (ns '()) (vcolons 0) (good-os (lambda () (or os (begin (set! os (open-output-string)) os))))) (let loop () (read-c) (cond ((c-eof?) #f) ((or (c-ws?) (c-splat?)) (if verbatim-safe? (unread-c))) ((or (c-angle?) (c-equals?) (c-quot?) (c-apos?)) (unread-c)) ((c-colon?) (or (null? ns) (set! ns (cons ":" ns))) (if os (begin (set! ns (cons (htmlprag-internal:gosc os) ns)) (set! os #f))) (loop)) ((c-slash?) (read-c) (cond ((or (c-eof?) (c-ws?) (c-equals?) (c-apos?) (c-quot?) (c-angle?) (c-splat?)) (unread-c) (push-c #\/)) (else (write-char #\/ (good-os)) (write-char c os) (loop)))) (else (write-char c (good-os)) (loop)))) (let ((ns (if (null? ns) #f (apply string-append (htmlprag-internal:reverse!ok ns)))) (local (if os (htmlprag-internal:gosc os) #f))) (if verbatim-safe? ;; TODO: Make sure we don't have ambiguous ":" or drop ;; any characters! (cons ns local) ;; Note: We represent "xmlns:" syntax as normal qnames, ;; for lack of something better to do with them when we ;; don't support XML namespaces. ;; ;; TODO: Local names are currently forced to lowercase, ;; since HTML is usually case-insensitive. If XML ;; namespaces are used, we might wish to keep local ;; names case-sensitive. (if local (if ns (if (string=? ns "xmlns") (string->symbol (string-append ns ":" local)) (cons ns (string->symbol (htmlprag-internal:down!ok local)))) (string->symbol (htmlprag-internal:down!ok local))) (if ns (string->symbol (htmlprag-internal:down!ok ns)) ;; TODO: Ensure that it's OK to return #f as a ;; name. #f))))))) (scan-tag (lambda (start?) (skip-ws) (let ((tag-name (scan-qname #f)) (tag-ns #f) (tag-attrs #f) (tag-empty? #f)) ;; Scan element name. (if (pair? tag-name) (begin (set! tag-ns (car tag-name)) (set! tag-name (cdr tag-name)))) ;; TODO: Ensure there's no case in which a #f tag-name isn't ;; compensated for later. ;; ;; Scan element attributes. (set! tag-attrs (let scan-attr-list () (read-c) (cond ((c-eof?) '()) ((c-angle?) (unread-c) '()) ((c-slash?) (set! tag-empty? #t) (scan-attr-list)) ((c-alpha?) (unread-c) (let ((attr (scan-attr))) (cons attr (scan-attr-list)))) (else (scan-attr-list))))) ;; Find ">" or unnatural end. (let loop () (read-c) (cond ((c-eof?) no-token) ((c-slash?) (set! tag-empty? #t) (loop)) ((c-gt?) #f) ((c-ws?) (loop)) (else (unread-c)))) ;; Change the tokenizer mode if necessary. (cond ((not start?) #f) (tag-empty? #f) ;; TODO: Maybe make one alist lookup here, instead of ;; two. ((memq tag-name verbatim-to-eof-elems) (set! nexttok verbeof-nexttok)) ((memq tag-name verbatim-pair-elems) (set! nexttok (make-verbpair-nexttok tag-name)))) ;; Return a token object. (if start? (if tag-empty? (make-empty-token tag-name tag-ns tag-attrs) (make-start-token tag-name tag-ns tag-attrs)) (make-end-token tag-name tag-ns tag-attrs))))) (scan-attr (lambda () (let ((name (scan-qname #f)) (val #f)) (if (pair? name) (set! name (cdr name))) (let loop-equals-or-end () (read-c) (cond ((c-eof?) no-token) ((c-ws?) (loop-equals-or-end)) ((c-equals?) (let loop-quote-or-unquoted () (read-c) (cond ((c-eof?) no-token) ((c-ws?) (loop-quote-or-unquoted)) ((or (c-apos?) (c-quot?)) (let ((term c)) (set! val (open-output-string)) (let loop-quoted-val () (read-c) (cond ((c-eof?) #f) ((eqv? c term) #f) (else (write-char c val) (loop-quoted-val)))))) ((c-angle?) (unread-c)) (else (set! val (open-output-string)) (write-char c val) (let loop-unquoted-val () (read-c) (cond ((c-eof?) no-token) ((c-apos?) #f) ((c-quot?) #f) ((or (c-ws?) (c-angle?) (c-slash?)) (unread-c)) (else (write-char c val) (loop-unquoted-val)))))))) (else (unread-c)))) (if normalized? (list name (if val (htmlprag-internal:gosc val) (symbol->string name))) (if val (list name (htmlprag-internal:gosc val)) (list name)))))) (scan-comment ;; TODO: Rewrite this to use tail recursion rather than a state ;; variable. (lambda () (let ((os (open-output-string)) (state 'start-minus)) (let loop () (read-c) (cond ((c-eof?) #f) ((c-minus?) (set! state (case state ((start-minus) 'start-minus-minus) ((start-minus-minus body) 'end-minus) ((end-minus) 'end-minus-minus) ((end-minus-minus) (write-char #\- os) state) (else (htmlprag-internal:error "make-html-tokenizer" "invalid state" state)))) (loop)) ((and (c-gt?) (eq? state 'end-minus-minus)) #f) (else (case state ((end-minus) (write-char #\- os)) ((end-minus-minus) (display "--" os))) (set! state 'body) (write-char c os) (loop)))) (make-comment-token (htmlprag-internal:gosc os))))) (scan-pi (lambda () (skip-ws) (let ((name (open-output-string)) (val (open-output-string))) (let scan-name () (read-c) (cond ((c-eof?) #f) ((c-ws?) #f) ((c-alpha?) (write-char c name) (scan-name)) (else (unread-c)))) ;; TODO: Do we really want to emit #f for PI name? (set! name (output-string->symbol-or-false name)) (let scan-val () (read-c) (cond ((c-eof?) #f) ;; ((c-amp?) (display (scan-entity) val) ;; (scan-val)) ((c-ques?) (read-c) (cond ((c-eof?) (write-char #\? val)) ((c-gt?) #f) (else (write-char #\? val) (unread-c) (scan-val)))) (else (write-char c val) (scan-val)))) (list shtml-pi-symbol name (htmlprag-internal:gosc val))))) (scan-decl ;; TODO: Find if SXML includes declaration forms, and if so, ;; use whatever format SXML wants. ;; ;; TODO: Rewrite to eliminate state variables. (letrec ((scan-parts (lambda () (let ((part (open-output-string)) (nonsymbol? #f) (state 'before) (last? #f)) (let loop () (read-c) (cond ((c-eof?) #f) ((c-ws?) (case state ((before) (loop)) ((quoted) (write-char c part) (loop)))) ((and (c-gt?) (not (eq? state 'quoted))) (set! last? #t)) ((and (c-lt?) (not (eq? state 'quoted))) (unread-c)) ((c-quot?) (case state ((before) (set! state 'quoted) (loop)) ((unquoted) (unread-c)) ((quoted) #f))) (else (if (eq? state 'before) (set! state 'unquoted)) (set! nonsymbol? (or nonsymbol? (not (c-alphanum?)))) (write-char c part) (loop)))) (set! part (htmlprag-internal:gosc part)) (if (string=? part "") '() (cons (if (or (eq? state 'quoted) nonsymbol?) part ;; TODO: Normalize case of things we make ;; into symbols here. (string->symbol part)) (if last? '() (scan-parts)))))))) (lambda () (make-decl-token (scan-parts))))) (scan-entity (lambda () (read-c) (cond ((c-eof?) "&") ((c-alpha?) ;; TODO: Do entity names have a maximum length? (let ((name (open-output-string))) (write-char c name) (let loop () (read-c) (cond ((c-eof?) #f) ((c-alpha?) (write-char c name) (loop)) ((c-semi?) #f) (else (unread-c)))) (set! name (htmlprag-internal:gosc name)) ;; TODO: Make the entity map an option. (let ((pair (assoc name '(("amp" . "&") ("apos" . "'") ("gt" . ">") ("lt" . "<") ("quot" . "\""))))) (if pair (cdr pair) (make-named-char-token name))))) ((c-pound?) (let ((num (open-output-string)) (hex? #f)) (read-c) (cond ((c-eof?) #f) ((memv c '(#\x #\X)) (set! hex? #t) (read-c))) (let loop () (cond ((c-eof?) #f) ((c-semi?) #f) ((or (c-digit?) (and hex? (c-hexlet?))) (write-char c num) (read-c) (loop)) (else (unread-c)))) (set! num (htmlprag-internal:gosc num)) (if (string=? num "") "&#;" (let ((n (string->number num (if hex? 16 10)))) (if (and (<= 32 n 255) (not (= n 127))) (string (htmlprag-internal:a2c n)) (make-numeric-char-token n)))))) (else (unread-c) "&")))) (normal-nexttok (lambda () (read-c) (cond ((c-eof?) no-token) ((c-lt?) (let loop () (read-c) (cond ((c-eof?) "<") ((c-ws?) (loop)) ((c-slash?) (scan-tag #f)) ((c-ques?) (scan-pi)) ((c-bang?) (let loop () (read-c) (cond ((c-eof?) no-token) ((c-ws?) (loop)) ((c-minus?) (scan-comment)) (else (unread-c) (scan-decl))))) ((c-alpha?) (unread-c) (scan-tag #t)) (else (unread-c) "<")))) ((c-gt?) ">") (else (let ((os (open-output-string))) (let loop () (cond ((c-eof?) #f) ((c-angle?) (unread-c)) ((c-amp?) (let ((entity (scan-entity))) (if (string? entity) (begin (display entity os) (read-c) (loop)) (let ((saved-nexttok nexttok)) (set! nexttok (lambda () (set! nexttok saved-nexttok) entity)))))) (else (write-char c os) (or (c-lf?) (begin (read-c) (loop)))))) (let ((text (htmlprag-internal:gosc os))) (if (equal? text "") (nexttok) text))))))) (verbeof-nexttok (lambda () (read-c) (if (c-eof?) no-token (let ((os (open-output-string))) (let loop () (or (c-eof?) (begin (write-char c os) (or (c-lf?) (begin (read-c) (loop)))))) (htmlprag-internal:gosc os))))) (make-verbpair-nexttok (lambda (elem-name) (lambda () (let ((os (open-output-string))) ;; Accumulate up to a newline-terminated line. (let loop () (read-c) (cond ((c-eof?) ;; Got EOF in verbatim context, so set the normal ;; nextok procedure, then fall out of loop. (set! nexttok normal-nexttok)) ((c-lt?) ;; Got "<" in verbatim context, so get next ;; character. (read-c) (cond ((c-eof?) ;; Got "<" then EOF, so set to the normal ;; nexttok procedure, add the "<" to the ;; verbatim string, and fall out of loop. (set! nexttok normal-nexttok) (write-char #\< os)) ((c-slash?) ;; Got "symbol (htmlprag-internal:down local)) elem-name)) ;; This is the terminator tag, so ;; scan to the end of it, set the ;; nexttok, and fall out of the loop. (begin (let scan-to-end () (read-c) (cond ((c-eof?) #f) ((c-gt?) #f) ((c-lt?) (unread-c)) ((c-alpha?) (unread-c) ;; Note: This is an ;; expensive way to skip ;; over an attribute, but ;; in practice more ;; verbatim end tags will ;; not have attributes. (scan-attr) (scan-to-end)) (else (scan-to-end)))) (set! nexttok (lambda () (set! nexttok normal-nexttok) (make-end-token elem-name #f '())))) ;; This isn't the terminator tag, so ;; add to the verbatim string the ;; "string-or-false os) (nexttok)))))) (nexttok #f)) (set! nexttok normal-nexttok) (lambda () (nexttok)))))) ;;; @defproc tokenize-html in normalized? ;;; ;;; Returns a list of tokens from input port @var{in}, normalizing according to ;;; boolean @var{normalized?}. This is probably most useful as a debugging ;;; convenience. For example: ;;; ;;; @lisp ;;; (tokenize-html (open-input-string "bar") #f) ;;; @result{} ((a (@@ (href "foo"))) "bar" (*END* a)) ;;; @end lisp (define (tokenize-html in normalized?) (let ((next-tok (make-html-tokenizer in normalized?))) (let loop ((tok (next-tok))) (if (null? tok) '() (cons tok (loop (next-tok))))))) ;;; @defproc shtml-token-kind token ;;; ;;; Returns a symbol indicating the kind of tokenizer @var{token}: ;;; @code{*COMMENT*}, @code{*DECL*}, @code{*EMPTY*}, @code{*END*}, ;;; @code{*ENTITY*}, @code{*PI*}, @code{*START*}, @code{*TEXT*}. ;;; This is used by higher-level parsing code. For example: ;;; ;;; @lisp ;;; (map shtml-token-kind ;;; (tokenize-html (open-input-string ">shtml} rather than calling the tokenizer directly. ;; @defvar htmlprag-internal:empty-elements ;; ;; List of names of HTML element types that have no content, represented as a ;; list of symbols. This is used internally by the parser and encoder. The ;; effect of mutating this list is undefined. ;; TODO: Document exactly which elements these are, after we make the new ;; parameterized parser constructor. (define htmlprag-internal:empty-elements '(area base br frame hr img input isindex keygen link meta object param spacer wbr)) ;;; @defproc parse-html/tokenizer tokenizer normalized? ;;; ;;; Emits a parse tree like @code{html->shtml} and related procedures, except ;;; using @var{tokenizer} as a source of tokens, rather than tokenizing from an ;;; input port. This procedure is used internally, and generally should not be ;;; called directly. (define parse-html/tokenizer ;; TODO: Document the algorithm, then see if rewriting as idiomatic Scheme ;; can make it more clear. (letrec ((empty-elements ;; TODO: Maybe make this an option. This might also be an ;; acceptable way to parse old HTML that uses the `p' element ;; as a paragraph terminator. htmlprag-internal:empty-elements) (parent-constraints ;; TODO: Maybe make this an option. '((area . (map)) (body . (html)) (caption . (table)) (colgroup . (table)) (dd . (dl)) (dt . (dl)) (frame . (frameset)) (head . (html)) (isindex . (head)) (li . (dir menu ol ul)) (meta . (head)) (noframes . (frameset)) (option . (select)) (p . (body td th)) (param . (applet)) (tbody . (table)) (td . (tr)) (th . (tr)) (thead . (table)) (title . (head)) (tr . (table tbody thead)))) (start-tag-name (lambda (tag-token) (car tag-token))) (end-tag-name (lambda (tag-token) (list-ref tag-token 1)))) (lambda (tokenizer normalized?) ;; Example `begs' value: ;; ;; ( ((head ...) . ( (title ...) )) ;; ((html ...) . ( (head ...) (*COMMENT* ...) )) ;; (#f . ( (html ...) (*DECL* doctype ...) )) ) (let ((begs (list (cons #f '())))) (letrec ((add-to-current-beg (lambda (tok) (set-cdr! (car begs) (cons tok (cdr (car begs)))))) (finish-all-begs (lambda () (let ((toplist #f)) (map (lambda (beg) (set! toplist (finish-beg beg))) begs) toplist))) (finish-beg (lambda (beg) (let ((start-tok (car beg))) (if start-tok (htmlprag-internal:append! (car beg) (htmlprag-internal:reverse!ok (cdr beg))) (htmlprag-internal:reverse!ok (cdr beg)))))) (finish-begs-to (lambda (name lst) (let* ((top (car lst)) (starttag (car top))) (cond ((not starttag) #f) ((eqv? name (start-tag-name starttag)) (set! begs (cdr lst)) (finish-beg top) #t) (else (if (finish-begs-to name (cdr lst)) (begin (finish-beg top) #t) #f)))))) (finish-begs-upto (lambda (parents lst) (let* ((top (car lst)) (starttag (car top))) (cond ((not starttag) #f) ((memq (start-tag-name starttag) parents) (set! begs lst) #t) (else (if (finish-begs-upto parents (cdr lst)) (begin (finish-beg top) #t) #f))))))) (let loop () (let ((tok (tokenizer))) (if (null? tok) (finish-all-begs) (let ((kind (shtml-token-kind tok))) (cond ((memv kind `(,shtml-comment-symbol ,shtml-decl-symbol ,shtml-entity-symbol ,shtml-pi-symbol ,shtml-text-symbol)) (add-to-current-beg tok)) ((eqv? kind shtml-start-symbol) (let* ((name (start-tag-name tok)) (cell (assq name parent-constraints))) (and cell (finish-begs-upto (cdr cell) begs)) (add-to-current-beg tok) (or (memq name empty-elements) (set! begs (cons (cons tok '()) begs))))) ((eqv? kind shtml-empty-symbol) ;; Empty tag token, so just add it to current ;; beginning while stripping off leading `*EMPTY*' ;; symbol so that the token becomes normal SXML ;; element syntax. (add-to-current-beg (cdr tok))) ((eqv? kind shtml-end-symbol) (let ((name (end-tag-name tok))) (if name ;; Try to finish to a start tag matching this ;; end tag. If none, just drop the token, ;; though we used to add it to the current ;; beginning. (finish-begs-to name begs) ;; We have an anonymous end tag, so match it ;; with the most recent beginning. If no ;; beginning to match, then just drop the ;; token, though we used to add it to the ;; current beginning. (and (car (car begs)) (begin (finish-beg (car begs)) (set! begs (cdr begs))))))) (else (htmlprag-internal:error "parse-html/tokenizer" "unknown tag kind" kind))) (loop)))))))))) ;; @defproc htmlprag-internal:parse-html input normalized? top? ;; ;; This procedure is now used internally by @code{html->shtml} and its ;; variants, and should not be used directly by programs. The interface is ;; likely to change in future versions of HtmlPrag. (define (htmlprag-internal:parse-html input normalized? top?) (let ((parse (lambda () (parse-html/tokenizer (make-html-tokenizer (cond ((input-port? input) input) ((string? input) (open-input-string input)) (else (htmlprag-internal:error "htmlprag-internal:parse-html" "invalid input type" input))) normalized?) normalized?)))) (if top? (cons shtml-top-symbol (parse)) (parse)))) ;;; @defproc html->sxml-0nf input ;;; @defprocx html->sxml-1nf input ;;; @defprocx html->sxml-2nf input ;;; @defprocx html->sxml input ;;; ;;; Permissively parse HTML from @var{input}, which is either an input port or ;;; a string, and emit an SHTML equivalent or approximation. To borrow and ;;; slightly modify an example from [SSAX-HTML]: ;;; ;;; @lisp ;;; (html->shtml ;;; "whatever ;;; link