pax_global_header00006660000000000000000000000064131117726370014522gustar00rootroot0000000000000052 comment=b561cdfc53fd97d0317ccb6790952debb2d138ca esrap-20170630-git/000077500000000000000000000000001311177263700136615ustar00rootroot00000000000000esrap-20170630-git/.travis.yml000066400000000000000000000013151311177263700157720ustar00rootroot00000000000000language: lisp env: PREFIX="$(pwd)/sbcl" SBCL_HOME="$(pwd)/sbcl/lib/sbcl" SBCL="$(pwd)/sbcl/bin/sbcl" SBCL_OPTIONS="--noinform --no-userinit" install: - curl -L sbcl.tar.bz2 "${SBCL_DOWNLOAD_URL}" | tar -xj - ( cd sbcl-* && INSTALL_ROOT="${PREFIX}" sh install.sh ) - curl -o cl "${CL_LAUNCH_DOWNLOAD_URL}" - chmod +x cl - curl -o quicklisp.lisp "${QUICKLISP_DOWNLOAD_URL}" - ./cl -L quicklisp.lisp '(quicklisp-quickstart:install :path "quicklisp/")' script: - ./cl -S '(:source-registry (:directory "'$(pwd)'") :ignore-inherited-configuration)' -e '(load "quicklisp/setup.lisp")' -e '(ql:quickload :esrap/tests)' '(or (esrap-tests:run-tests) (uiop:quit -1))' esrap-20170630-git/Makefile000066400000000000000000000005571311177263700153300ustar00rootroot00000000000000.PHONY: doc web wc clean all test all: echo "Targets: clean, wc, doc, test, web" clean: rm -f *.fasl *~ make -C doc clean make -C web clean wc: wc -l *.lisp doc: make -C doc web: doc make -C web gh-pages: web rm -rf web-tmp mv web web-tmp git checkout gh-pages cp web-tmp/index.html . git commit -a -c master mv web-tmp web git checkout -f master esrap-20170630-git/README.org000066400000000000000000000132211311177263700153260ustar00rootroot00000000000000#+TITLE: ESRAP -- a packrat parser for Common Lisp * Introduction In addition to regular Packrat / Parsing Grammar / TDPL features ESRAP supports: + dynamic redefinition of nonterminals + inline grammars + semantic predicates + introspective facilities (describing grammars, tracing, setting breaks) + left-recursive grammars + functions as terminals + accurate, customizable parse error reports Homepage & Documentation https://scymtym.github.io/esrap/ #+ATTR_HTML: :alt "build status image" :title Build Status :align right [[https://travis-ci.org/scymtym/esrap][https://travis-ci.org/scymtym/esrap.svg]] References + Bryan Ford, 2002, "Packrat Parsing: a Practical Linear Time Algorithm with Backtracking". http://pdos.csail.mit.edu/~baford/packrat/thesis/ + A. Warth et al, 2008, "Packrat Parsers Can Support Left Recursion". http://www.vpri.org/pdf/tr2007002_packrat.pdf License #+begin_example Copyright (c) 2007-2013 Nikodemus Siivola Copyright (c) 2012-2017 Jan Moringen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #+end_example * Syntax Overview #+begin_example -- case-sensitive terminal (~ ) -- case-insensitive terminal character -- any single character (string ) -- any string of length (character-ranges ) -- character ranges ( ) -- semantic parsing (function ) -- call to parse some text (not ) -- complement of expression (and &rest ) -- sequence (or &rest ) -- ordered-choices (* ) -- greedy-repetition (+ ) -- greedy-positive-repetition (? ) -- optional (& ) -- followed-by; does not consume (! ) -- not-followed-by; does not consume (< ) -- lookbehind characters; does not consume (> ) -- lookahead characters; does not consume #+end_example * Trivial Examples #+begin_src lisp :results none :exports none :session "doc" (ql:quickload :esrap) #+end_src The =parse= function takes an expression: #+begin_src lisp :results value code :exports both :session "doc" (multiple-value-list (esrap:parse '(or "foo" "bar") "foo")) #+end_src #+RESULTS: #+BEGIN_SRC lisp ("foo" NIL T) #+END_SRC New rules can be added. Normally you'd use the declarative =defrule= interface to define new rules, but everything it does can be done directly by building instances of the =rule= class and using =add-rule= to activate them. #+begin_src lisp :results value code :exports both :session "doc" (progn (esrap:add-rule 'foo+ (make-instance 'esrap:rule :expression '(+ "foo"))) (multiple-value-list (esrap:parse 'foo+ "foofoofoo"))) #+end_src #+RESULTS: #+BEGIN_SRC lisp (("foo" "foo" "foo") NIL T) #+END_SRC The equivalent =defrule= form is #+begin_src lisp :results value code :exports code :session "doc" (esrap:defrule foo+ '(+ "foo")) #+end_src Note that rules can be redefined, i.e. this =defrule= form replaces the previous definition of the =foo+= rule. Rules can transform their matches: #+begin_src lisp :results silent :exports code :session "doc" (esrap:add-rule 'decimal (make-instance 'esrap:rule :expression '(+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) :transform (lambda (list start end) (declare (ignore start end)) (parse-integer (format nil "~{~A~}" list))))) #+end_src or using =defrule= #+begin_src lisp :results value code :exports code :session "doc" (esrap:defrule decimal (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) (:lambda (list) (parse-integer (format nil "~{~A~}" list)))) #+end_src Any lisp function can be used as a semantic predicate: #+begin_src lisp :results value code :exports both :session "doc" (list (multiple-value-list (esrap:parse '(oddp decimal) "123")) (multiple-value-list (esrap:parse '(evenp decimal) "123" :junk-allowed t))) #+end_src #+RESULTS: #+BEGIN_SRC lisp ((123 NIL T) (NIL 0)) #+END_SRC * Example Files More complete examples can be found in the following self-contained example files: + [[file:examples/sexp.lisp]]: complete sample grammar and usage + [[file:examples/symbol-table.lisp]]: grammar with lexical scope + [[file:examples/left-recursion.lisp]]: multiple grammars with left recursion + [[file:examples/function-terminals.lisp]]: grammars with functions as terminals esrap-20170630-git/TODO.org000066400000000000000000000132001311177263700151330ustar00rootroot00000000000000Esrap TODO * Optimizations ** DONE Common special variable for cache and heads As access to special variables can be slow, it may make sense to store =*cache*= and =*head*= in slots of a structure =context= and this structure in a new special variable =*context*=. ** TODO Error vs Success results We're interested in the production/failure, and the position. The vast majority of parses result in failures, and we cons up a =failed-parse= for each. Unless the whole parse fails, the only thing we are interested in is the position. ...and even if the whole parse fails, we use the additional information only to display a fancy error message. So: unless =parse= has been called with =:debug t=, use the =fixnum= indicating the position to indicate a failed parse. ** TODO Results of sequence expressions In the case of a success and matching a sequence we don't really need the whole result object for each submatch. Maybe return result as multiple values from each rule, and have =with-cached-result= cons up the object to store it when? ** TODO Character ranges =(or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")= can be implemented with a range-check for =char-code=. Similarly (or "foobar" "foo" "bar") can be implemented using tricks from pkhuong's STRING-CASE. ** STARTED Cache The cache is a big bottleneck. Try out a few different designs. Early experiments show that while it's easy to make something that conses less, it's not trivial to make it much faster than the current simple hash-table based version. Some statistics: - 5-10% of positions in a given text have only failure results. If we can efficiently record the rules these are for... - 40-50% of positions in a given text end up with exactly one successful result, irrespective of number of failures. Not sure if we can use this. - 75% of positions in a given text end up with results. This should make a good estimate for the size of the cache needed. GC is another related bottleneck. Not because we cons so much, but because we have this massive cache that keeps being written to, so we have boxed objects on dirty pages. To reduce the GC pressure first optimize the result handling. If the issue still exists, see the first option below. Maybe: Map rule to a position cache. In the position cache, need to be able to differentiate between 3 states: no result, success, failure. Need to also be able to store the result. If we store results in a single global result vector, and use N bits per position in the position cache: 0 no result, 1 failure, anything else is the position of the result object in the global vector. Maybe: PCL-style multikey cache. Maybe: Basic two-level cache. (Version of this on a branch.) * Features ** TODO Thread safety Parsing is currently thread-safe if =parse= has been compiler-macroexanded. =*RULES*= needs locking, but isn't used during actual parsing. ** TODO Add =define-expression-syntax= ** STARTED Grammar objects Rules should be contained in grammars, so that symbols like =cl:if= can refer to different rules in different contexts. Grammars can also enforce rule numbering, making caching results easier. It should be possible to inherit from other grammars. *** TODO Optimizations leading to old rules being used are in principle fine but I would like to have that behind a flag so it can be turned off for debugging. No need to export or document the flag. *** DONE Accepting designators is good *** DONE No circular dependencies sounds good to me *** DONE Overall I value backwards compatibility highly. If keeping it doesn't make things clearly worse or implementation much harder, I would keep it. Therefore I would prefer ADD-RULE and &co to default to grammar at runtime, and have that as either a keyword or an optional argument. Analogously to how INTERN &co work. *** DONE DEFRULE on the other hand should IMO choose the grammar (if not explicitly given) at compile-time if at all possible. Earlier is better there, I suspect. *** DONE Naming things with string designators instead of symbols. I see the attraction, but this means that all rules in a grammar are public and prone to conflict, no? If I have a grammar G1 in package BAR that specifies a rule called BAR:WHITESPACE and a G2 in FOO that specified FOO:WHITESPACE, then G3 in QUUX can use both without problems. If both rules are really called "WHITESPACE", things can get confusing pretty quickly... *** DONE Grammar names in tests ** STARTED Character classes Have =standard-grammar= instances that define things like digit, whitespace, ascii, etc. This will probably be done in a different system. ** CANCELED Transform Subseq #+BEGIN_SRC lisp (defrule decimal (+ (or "0" "1" ...)) (:subseq-function parse-integer)) #+END_SRC ** DONE Character ranges Make it easy to specify character ranges, eg. =(char #\0 #\9)=. * Improvements ** TODO Run all tests in evaluated mode ** TODO Get rid of =*current-cell*= ** TODO Documentation strings Structures and classes have a mixture of documentation strings and documentation comments. Which do we want? After deciding, make this consistent. ** STARTED Reference example files from manual ** DONE Better error reports For parse errors, in particular "incomplete parse" errors, provide a better description of where the parse actually failed, which rule or rules were involved and what input was expected. ** DONE Tests for rule tracing functionality ** DONE Remove =concat= * Bugs ** NEW Tracing does not work for interpreted rules esrap-20170630-git/coverage.lisp000066400000000000000000000030061311177263700163440ustar00rootroot00000000000000;;;; coverage.lisp --- Helper script for coverage report generation. ;;;; ;;;; Copyright (C) 2014 Jan Moringen ;;;; ;;;; Author: Jan Moringen (require :sb-cover) (defun compute-coverage-for-system (system &key (output-directory (merge-pathnames (concatenate 'string (string system) "/") "coverage-report/"))) (flet ((set-store-coverage (storep) (eval `(declaim (optimize (sb-cover:store-coverage-data ,(if storep 3 0)))))) (load-system-silently (system &rest args) (let* ((*standard-output* (make-broadcast-stream)) (*trace-output* *standard-output*)) (handler-bind ((style-warning #'muffle-warning)) (apply #'asdf:load-system system args))))) (load-system-silently system) ; load dependencies (unwind-protect (progn (set-store-coverage t) (load-system-silently system :force t) (set-store-coverage nil) (let ((*compile-print* nil) (*compile-progress* nil) (*compile-verbose* nil)) (asdf:test-system system)) (sb-cover:report output-directory)) (set-store-coverage nil) (load-system-silently system :force t) (sb-cover:clear-coverage)))) (mapcar #'compute-coverage-for-system '(:esrap)) esrap-20170630-git/coverage.sh000077500000000000000000000007621311177263700160200ustar00rootroot00000000000000#!/bin/sh # Helper script for coverage report generation. # # Copyright (C) 2014 Jan Moringen # # Author: Jan Moringen SBCL="${HOME}/opt/sbcl/bin/sbcl" QUICKLISP="${HOME}/.local/share/common-lisp/quicklisp" "${SBCL}" --noinform --disable-ldb --lose-on-corruption \ --no-userinit --disable-debugger \ --load "${QUICKLISP}/setup.lisp" \ --load "coverage.lisp" \ --quit esrap-20170630-git/doc/000077500000000000000000000000001311177263700144265ustar00rootroot00000000000000esrap-20170630-git/doc/.gitignore000066400000000000000000000000171311177263700164140ustar00rootroot00000000000000*.html include esrap-20170630-git/doc/Makefile000066400000000000000000000021121311177263700160620ustar00rootroot00000000000000.PHONY: clean html include doc doc: html clean: rm -rf include rm -f *.pdf *.html *.info rm -f *.aux *.cp *.fn *.fns *.ky *.log *.pg *.toc *.tp *.tps *.vr include: sbcl \ --noinform --disable-ldb --lose-on-corruption \ --no-userinit --disable-debugger \ --eval '(require :asdf)' \ --eval '(let ((asdf:*central-registry* (cons #p"../" asdf:*central-registry*))) (require :esrap))' \ --load docstrings.lisp \ --eval '(sb-texinfo:generate-includes "include/" (list :esrap) :base-package :esrap)' \ --quit esrap.html: esrap.texinfo style.css docstrings.lisp ../*.lisp ../*.asd make include makeinfo --html --no-split --css-include=style.css esrap.texinfo html: esrap.html esrap-20170630-git/doc/docstrings.lisp000066400000000000000000001071311311177263700175010ustar00rootroot00000000000000;;; -*- lisp -*- ;;;; A docstring extractor for the sbcl manual. Creates ;;;; @include-ready documentation from the docstrings of exported ;;;; symbols of specified packages. ;;;; This software is part of the SBCL software system. SBCL is in the ;;;; public domain and is provided with absolutely no warranty. See ;;;; the COPYING file for more information. ;;;; ;;;; Written by Rudi Schlatte , mangled ;;;; by Nikodemus Siivola. ;;;; TODO ;;;; * Verbatim text ;;;; * Quotations ;;;; * Method documentation untested ;;;; * Method sorting, somehow ;;;; * Index for macros & constants? ;;;; * This is getting complicated enough that tests would be good ;;;; * Nesting (currently only nested itemizations work) ;;;; * doc -> internal form -> texinfo (so that non-texinfo format are also ;;;; easily generated) ;;;; FIXME: The description below is no longer complete. This ;;;; should possibly be turned into a contrib with proper documentation. ;;;; Formatting heuristics (tweaked to format SAVE-LISP-AND-DIE sanely): ;;;; ;;;; Formats SYMBOL as @code{symbol}, or @var{symbol} if symbol is in ;;;; the argument list of the defun / defmacro. ;;;; ;;;; Lines starting with * or - that are followed by intented lines ;;;; are marked up with @itemize. ;;;; ;;;; Lines containing only a SYMBOL that are followed by indented ;;;; lines are marked up as @table @code, with the SYMBOL as the item. (eval-when (:compile-toplevel :load-toplevel :execute) (require 'sb-introspect)) (defpackage :sb-texinfo (:use :cl :sb-mop) (:shadow #:documentation) (:export #:generate-includes #:document-package) (:documentation "Tools to generate TexInfo documentation from docstrings.")) (in-package :sb-texinfo) ;;;; various specials and parameters (defvar *texinfo-output*) (defvar *texinfo-variables*) (defvar *documentation-package*) (defvar *base-package*) (defparameter *undocumented-packages* '(sb-pcl sb-int sb-kernel sb-sys sb-c)) (defparameter *documentation-types* '(compiler-macro function method-combination setf ;;structure ; also handled by `type' type variable) "A list of symbols accepted as second argument of `documentation'") (defparameter *character-replacements* '((#\* . "star") (#\/ . "slash") (#\+ . "plus") (#\< . "lt") (#\> . "gt") (#\= . "equals")) "Characters and their replacement names that `alphanumize' uses. If the replacements contain any of the chars they're supposed to replace, you deserve to lose.") (defparameter *characters-to-drop* '(#\\ #\` #\') "Characters that should be removed by `alphanumize'.") (defparameter *texinfo-escaped-chars* "@{}" "Characters that must be escaped with #\@ for Texinfo.") (defparameter *itemize-start-characters* '(#\* #\-) "Characters that might start an itemization in docstrings when at the start of a line.") (defparameter *symbol-characters* "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890*=<>:-+&#'!?/" "List of characters that make up symbols in a docstring.") (defparameter *symbol-delimiters* " ,.!?;[]") (defparameter *ordered-documentation-kinds* '(package type structure condition class macro)) ;;;; utilities (defun flatten (list) (cond ((null list) nil) ((consp (car list)) (nconc (flatten (car list)) (flatten (cdr list)))) ((null (cdr list)) (cons (car list) nil)) (t (cons (car list) (flatten (cdr list)))))) (defun whitespacep (char) (find char #(#\tab #\space #\page))) (defun setf-name-p (name) (or (symbolp name) (and (listp name) (= 2 (length name)) (eq (car name) 'setf)))) (defgeneric specializer-name (specializer)) (defmethod specializer-name ((specializer eql-specializer)) (list 'eql (eql-specializer-object specializer))) (defmethod specializer-name ((specializer class)) (class-name specializer)) (defun ensure-class-precedence-list (class) (unless (class-finalized-p class) (finalize-inheritance class)) (class-precedence-list class)) (defun specialized-lambda-list (method) ;; courtecy of AMOP p. 61 (let* ((specializers (method-specializers method)) (lambda-list (method-lambda-list method)) (n-required (length specializers))) (append (mapcar (lambda (arg specializer) (if (eq specializer (find-class 't)) arg `(,arg ,(specializer-name specializer)))) (subseq lambda-list 0 n-required) specializers) (subseq lambda-list n-required)))) (defun string-lines (string) "Lines in STRING as a vector." (coerce (with-input-from-string (s string) (loop for line = (read-line s nil nil) while line collect line)) 'vector)) (defun indentation (line) "Position of first non-SPACE character in LINE." (position-if-not (lambda (c) (char= c #\Space)) line)) (defun docstring (x doc-type) (cl:documentation x doc-type)) (defun flatten-to-string (list) (format nil "~{~A~^-~}" (flatten list))) (defun alphanumize (original) "Construct a string without characters like *`' that will f-star-ck up filename handling. See `*character-replacements*' and `*characters-to-drop*' for customization." (let ((name (remove-if (lambda (x) (member x *characters-to-drop*)) (if (listp original) (flatten-to-string original) (string original)))) (chars-to-replace (mapcar #'car *character-replacements*))) (flet ((replacement-delimiter (index) (cond ((or (< index 0) (>= index (length name))) "") ((alphanumericp (char name index)) "-") (t "")))) (loop for index = (position-if #'(lambda (x) (member x chars-to-replace)) name) while index do (setf name (concatenate 'string (subseq name 0 index) (replacement-delimiter (1- index)) (cdr (assoc (aref name index) *character-replacements*)) (replacement-delimiter (1+ index)) (subseq name (1+ index)))))) name)) ;;;; generating various names (defgeneric name (thing) (:documentation "Name for a documented thing. Names are either symbols or lists of symbols.")) (defmethod name ((symbol symbol)) symbol) (defmethod name ((cons cons)) cons) (defmethod name ((package package)) (short-package-name package)) (defmethod name ((method method)) (list (generic-function-name (method-generic-function method)) (method-qualifiers method) (specialized-lambda-list method))) ;;; Node names for DOCUMENTATION instances (defgeneric name-using-kind/name (kind name doc)) (defmethod name-using-kind/name (kind (name string) doc) (declare (ignore kind doc)) name) (defmethod name-using-kind/name (kind (name symbol) doc) (declare (ignore kind)) (format nil "~@[~A:~]~A" (short-package-name (get-package doc)) name)) (defmethod name-using-kind/name (kind (name list) doc) (declare (ignore kind)) (assert (setf-name-p name)) (format nil "(setf ~@[~A:~]~A)" (short-package-name (get-package doc)) (second name))) (defmethod name-using-kind/name ((kind (eql 'method)) name doc) (format nil "~A~{ ~A~} ~A" (name-using-kind/name nil (first name) doc) (second name) (third name))) (defun node-name (doc) "Returns TexInfo node name as a string for a DOCUMENTATION instance." (let ((kind (get-kind doc))) (format nil "~:(~A~) ~(~A~)" kind (name-using-kind/name kind (get-name doc) doc)))) (defun short-package-name (package) (unless (eq package *base-package*) (car (sort (copy-list (cons (package-name package) (package-nicknames package))) #'< :key #'length)))) ;;; Definition titles for DOCUMENTATION instances (defgeneric title-using-kind/name (kind name doc)) (defmethod title-using-kind/name (kind (name string) doc) (declare (ignore kind doc)) name) (defmethod title-using-kind/name (kind (name symbol) doc) (declare (ignore kind)) (format nil "~@[~A:~]~A" (short-package-name (get-package doc)) name)) (defmethod title-using-kind/name (kind (name list) doc) (declare (ignore kind)) (assert (setf-name-p name)) (format nil "(setf ~@[~A:~]~A)" (short-package-name (get-package doc)) (second name))) (defmethod title-using-kind/name ((kind (eql 'method)) name doc) (format nil "~{~A ~}~A" (second name) (title-using-kind/name nil (first name) doc))) (defun title-name (doc) "Returns a string to be used as name of the definition." (string-downcase (title-using-kind/name (get-kind doc) (get-name doc) doc))) (defun include-pathname (doc) (let* ((kind (get-kind doc)) (name (nstring-downcase (if (eq 'package kind) (format nil "package-~A" (alphanumize (get-name doc))) (format nil "~A-~A-~A" (case (get-kind doc) ((function generic-function) "fun") (structure "struct") (variable "var") (otherwise (symbol-name (get-kind doc)))) (alphanumize (let ((*base-package* nil)) (short-package-name (get-package doc)))) (alphanumize (get-name doc))))))) (make-pathname :name name :type "texinfo"))) ;;;; documentation class and related methods (defclass documentation () ((name :initarg :name :reader get-name) (kind :initarg :kind :reader get-kind) (string :initarg :string :reader get-string) (children :initarg :children :initform nil :reader get-children) (package :initform *documentation-package* :reader get-package))) (defmethod print-object ((documentation documentation) stream) (print-unreadable-object (documentation stream :type t) (princ (list (get-kind documentation) (get-name documentation)) stream))) (defgeneric make-documentation (x doc-type string)) (defmethod make-documentation ((x package) doc-type string) (declare (ignore doc-type)) (make-instance 'documentation :name (name x) :kind 'package :string string)) (defmethod make-documentation (x (doc-type (eql 'function)) string) (declare (ignore doc-type)) (let* ((fdef (and (fboundp x) (fdefinition x))) (name x) (kind (cond ((and (symbolp x) (special-operator-p x)) 'special-operator) ((and (symbolp x) (macro-function x)) 'macro) ((typep fdef 'generic-function) (assert (or (symbolp name) (setf-name-p name))) 'generic-function) (fdef (assert (or (symbolp name) (setf-name-p name))) 'function))) (children (when (eq kind 'generic-function) (collect-gf-documentation fdef)))) (make-instance 'documentation :name (name x) :string string :kind kind :children children))) (defmethod make-documentation ((x method) doc-type string) (declare (ignore doc-type)) (make-instance 'documentation :name (name x) :kind 'method :string string)) (defmethod make-documentation (x (doc-type (eql 'type)) string) (make-instance 'documentation :name (name x) :string string :kind (etypecase (find-class x nil) (structure-class 'structure) (standard-class 'class) (sb-pcl::condition-class 'condition) ((or built-in-class null) 'type)))) (defmethod make-documentation (x (doc-type (eql 'variable)) string) (make-instance 'documentation :name (name x) :string string :kind (if (constantp x) 'constant 'variable))) (defmethod make-documentation (x (doc-type (eql 'setf)) string) (declare (ignore doc-type)) (make-instance 'documentation :name (name x) :kind 'setf-expander :string string)) (defmethod make-documentation (x doc-type string) (make-instance 'documentation :name (name x) :kind doc-type :string string)) (defun maybe-documentation (x doc-type) "Returns a DOCUMENTATION instance for X and DOC-TYPE, or NIL if there is no corresponding docstring." (let ((docstring (docstring x doc-type))) (when docstring (make-documentation x doc-type docstring)))) (defun lambda-list (doc) (case (get-kind doc) ((package constant variable type structure class condition nil) nil) (method (third (get-name doc))) (t ;; KLUDGE: Eugh. ;; ;; believe it or not, the above comment was written before CSR ;; came along and obfuscated this. (2005-07-04) (let ((name (get-name doc))) (when (or (symbolp name) (and (consp name) (eq 'setf (car name)))) (labels ((clean (x &key optional key) (typecase x (atom x) ((cons (member &optional)) (cons (car x) (clean (cdr x) :optional t))) ((cons (member &key)) (cons (car x) (clean (cdr x) :key t))) ((cons (member &whole &environment)) ;; Skip these (clean (cddr x) :optional optional :key key)) ((cons (member &aux)) ;; Drop everything after &AUX. nil) ((cons cons) (cons (cond (key (if (consp (caar x)) (caaar x) (caar x))) (optional (caar x)) (t (clean (car x)))) (clean (cdr x) :key key :optional optional))) (cons (cons (cond ((or key optional) (car x)) (t (clean (car x)))) (clean (cdr x) :key key :optional optional)))))) (clean (sb-introspect:function-lambda-list name)))))))) (defun get-string-name (x) (let ((name (get-name x))) (cond ((symbolp name) (symbol-name name)) ((and (consp name) (eq 'setf (car name))) (symbol-name (second name))) ((stringp name) name) (t (error "Don't know which symbol to use for name ~S" name))))) (defun documentation< (x y) (let ((p1 (position (get-kind x) *ordered-documentation-kinds*)) (p2 (position (get-kind y) *ordered-documentation-kinds*))) (if (or (not (and p1 p2)) (= p1 p2)) (string< (get-string-name x) (get-string-name y)) (< p1 p2)))) ;;;; turning text into texinfo (defun escape-for-texinfo (string &optional downcasep) "Return STRING with characters in *TEXINFO-ESCAPED-CHARS* escaped with #\@. Optionally downcase the result." (let ((result (with-output-to-string (s) (loop for char across string when (find char *texinfo-escaped-chars*) do (write-char #\@ s) do (write-char char s))))) (if downcasep (nstring-downcase result) result))) (defun empty-p (line-number lines) (and (< -1 line-number (length lines)) (not (indentation (svref lines line-number))))) ;;; line markups (defvar *not-symbols* '("ANSI" "CLHS")) (defun frob-ellipsis (line) (let ((p (search "..." line))) (if p (frob-ellipsis (replace (copy-seq line) "+++" :start1 p)) line))) (defun locate-symbols (line) "Return a list of index pairs of symbol-like parts of LINE." ;; This would be a good application for a regex ... (let (result) (flet ((grab (start end) (unless (member (subseq line start end) *not-symbols*) (push (list start end) result))) (got-symbol-p (start) (let ((end (when (< start (length line)) (position-if (lambda (char) (find char " )")) line :start start)))) (when end (every (lambda (char) (find char *symbol-characters*)) (subseq line start end)))))) (do ((begin nil) (maybe-begin t) (i 0 (1+ i))) ((>= i (length line)) ;; symbol at end of line (when (and begin (or (> i (1+ begin)) (not (member (char line begin) '(#\A #\I))))) (grab begin i)) (nreverse result)) (cond ((and begin (find (char line i) *symbol-delimiters*)) ;; symbol end; remember it if it's not "A" or "I" (when (or (> i (1+ begin)) (not (member (char line begin) '(#\A #\I)))) (grab begin i)) (setf begin nil maybe-begin t)) ((and begin (not (find (char line i) *symbol-characters*))) ;; Not a symbol: abort (setf begin nil)) ((and maybe-begin (not begin) (find (char line i) *symbol-characters*)) ;; potential symbol begin at this position (setf begin i maybe-begin nil)) ((find (char line i) *symbol-delimiters*) ;; potential symbol begin after this position (setf maybe-begin t)) ((and (eql #\( (char line i)) (got-symbol-p (1+ i))) ;; a type designator, or a function call as part of the text? (multiple-value-bind (exp end) (let ((*package* (find-package :cl-user))) (ignore-errors (read-from-string (frob-ellipsis line) nil nil :start i))) (when exp (grab i end) (setf begin nil maybe-begin nil i end)))) (t ;; Not reading a symbol, not at potential start of symbol (setf maybe-begin nil))))))) (defun texinfo-line (line) "Format symbols in LINE texinfo-style: either as code or as variables if the symbol in question is contained in symbols *TEXINFO-VARIABLES*." (with-output-to-string (result) (let ((last 0)) (dolist (symbol/index (locate-symbols line)) (write-string (subseq line last (first symbol/index)) result) (let ((symbol-name (apply #'subseq line symbol/index))) (format result (if (member symbol-name *texinfo-variables* :test #'string=) "@var{~A}" "@code{~A}") (string-downcase symbol-name))) (setf last (second symbol/index))) (write-string (subseq line last) result)))) ;;; lisp sections (defun lisp-section-p (line line-number lines) "Returns T if the given LINE looks like start of lisp code -- ie. if it starts with whitespace followed by a paren or semicolon, and the previous line is empty" (let ((offset (indentation line))) (and offset (plusp offset) (find (find-if-not #'whitespacep line) "(;") (empty-p (1- line-number) lines)))) (defun collect-lisp-section (lines line-number) (let ((lisp (loop for index = line-number then (1+ index) for line = (and (< index (length lines)) (svref lines index)) while (indentation line) collect line))) ;; KLUDGE: makeinfo likes to stick an newline after @lisp sections ;; we generate, so balance it out by adding one before. Grr. (values (length lisp) `("@lisp" "" ,@lisp "@end lisp")))) ;;; itemized sections (defun maybe-itemize-offset (line) "Return NIL or the indentation offset if LINE looks like it starts an item in an itemization." (let* ((offset (indentation line)) (char (when offset (char line offset)))) (and offset (member char *itemize-start-characters* :test #'char=) (char= #\Space (find-if-not (lambda (c) (char= c char)) line :start offset)) offset))) (defun collect-maybe-itemized-section (lines starting-line) ;; Return index of next line to be processed outside (let ((this-offset (maybe-itemize-offset (svref lines starting-line))) (result nil) (lines-consumed 0)) (loop for line-number from starting-line below (length lines) for line = (svref lines line-number) for indentation = (indentation line) for offset = (maybe-itemize-offset line) do (cond ((not indentation) ;; empty line -- inserts paragraph. (push "" result) (incf lines-consumed)) ((and offset (> indentation this-offset)) ;; nested itemization -- handle recursively ;; FIXME: tables in itemizations go wrong (multiple-value-bind (sub-lines-consumed sub-itemization) (collect-maybe-itemized-section lines line-number) (when sub-lines-consumed (incf line-number (1- sub-lines-consumed)) ; +1 on next loop (incf lines-consumed sub-lines-consumed) (setf result (append (reverse sub-itemization) result))))) ((and offset (= indentation this-offset)) ;; start of new item (push (format nil "@item ~A" (texinfo-line (subseq line (1+ offset)))) result) (incf lines-consumed)) ((and (not offset) (> indentation this-offset)) ;; continued item from previous line (push (texinfo-line line) result) (incf lines-consumed)) (t ;; end of itemization (loop-finish)))) ;; a single-line itemization isn't. (if (> (count-if (lambda (line) (> (length line) 0)) result) 1) (values lines-consumed `("@itemize" ,@(reverse result) "@end itemize")) nil))) ;;; table sections (defun tabulation-body-p (offset line-number lines) (when (< line-number (length lines)) (let ((offset2 (indentation (svref lines line-number)))) (and offset2 (< offset offset2))))) (defun tabulation-p (offset line-number lines direction) (let ((step (ecase direction (:backwards (1- line-number)) (:forwards (1+ line-number))))) (when (and (plusp line-number) (< line-number (length lines))) (and (eql offset (indentation (svref lines line-number))) (or (when (eq direction :backwards) (empty-p step lines)) (tabulation-p offset step lines direction) (tabulation-body-p offset step lines)))))) (defun maybe-table-offset (line-number lines) "Return NIL or the indentation offset if LINE looks like it starts an item in a tabulation. Ie, if it is (1) indented, (2) preceded by an empty line, another tabulation label, or a tabulation body, (3) and followed another tabulation label or a tabulation body." (let* ((line (svref lines line-number)) (offset (indentation line)) (prev (1- line-number)) (next (1+ line-number))) (when (and offset (plusp offset)) (and (or (empty-p prev lines) (tabulation-body-p offset prev lines) (tabulation-p offset prev lines :backwards)) (or (tabulation-body-p offset next lines) (tabulation-p offset next lines :forwards)) offset)))) ;;; FIXME: This and itemization are very similar: could they share ;;; some code, mayhap? (defun collect-maybe-table-section (lines starting-line) ;; Return index of next line to be processed outside (let ((this-offset (maybe-table-offset starting-line lines)) (result nil) (lines-consumed 0)) (loop for line-number from starting-line below (length lines) for line = (svref lines line-number) for indentation = (indentation line) for offset = (maybe-table-offset line-number lines) do (cond ((not indentation) ;; empty line -- inserts paragraph. (push "" result) (incf lines-consumed)) ((and offset (= indentation this-offset)) ;; start of new item, or continuation of previous item (if (and result (search "@item" (car result) :test #'char=)) (push (format nil "@itemx ~A" (texinfo-line line)) result) (progn (push "" result) (push (format nil "@item ~A" (texinfo-line line)) result))) (incf lines-consumed)) ((> indentation this-offset) ;; continued item from previous line (push (texinfo-line line) result) (incf lines-consumed)) (t ;; end of itemization (loop-finish)))) ;; a single-line table isn't. (if (> (count-if (lambda (line) (> (length line) 0)) result) 1) (values lines-consumed `("" "@table @emph" ,@(reverse result) "@end table" "")) nil))) ;;; section markup (defmacro with-maybe-section (index &rest forms) `(multiple-value-bind (count collected) (progn ,@forms) (when count (dolist (line collected) (write-line line *texinfo-output*)) (incf ,index (1- count))))) (defun write-texinfo-string (string &optional lambda-list) "Try to guess as much formatting for a raw docstring as possible." (let ((*texinfo-variables* (flatten lambda-list)) (lines (string-lines (escape-for-texinfo string nil)))) (loop for line-number from 0 below (length lines) for line = (svref lines line-number) do (cond ((with-maybe-section line-number (and (lisp-section-p line line-number lines) (collect-lisp-section lines line-number)))) ((with-maybe-section line-number (and (maybe-itemize-offset line) (collect-maybe-itemized-section lines line-number)))) ((with-maybe-section line-number (and (maybe-table-offset line-number lines) (collect-maybe-table-section lines line-number)))) (t (write-line (texinfo-line line) *texinfo-output*)))))) ;;;; texinfo formatting tools (defun hide-superclass-p (class-name super-name) (let ((super-package (symbol-package super-name))) (or ;; KLUDGE: We assume that we don't want to advertise internal ;; classes in CP-lists, unless the symbol we're documenting is ;; internal as well. (and (member super-package #.'(mapcar #'find-package *undocumented-packages*)) (not (eq super-package (symbol-package class-name)))) ;; KLUDGE: We don't generally want to advertise SIMPLE-ERROR or ;; SIMPLE-CONDITION in the CPLs of conditions that inherit them ;; simply as a matter of convenience. The assumption here is that ;; the inheritance is incidental unless the name of the condition ;; begins with SIMPLE-. (and (member super-name '(simple-error simple-condition)) (let ((prefix "SIMPLE-")) (mismatch prefix (string class-name) :end2 (length prefix))) t ; don't return number from MISMATCH )))) (defun hide-slot-p (symbol slot) ;; FIXME: There is no pricipal reason to avoid the slot docs fo ;; structures and conditions, but their DOCUMENTATION T doesn't ;; currently work with them the way we'd like. (not (and (typep (find-class symbol nil) 'standard-class) (docstring slot t)))) (defun texinfo-anchor (doc) (format *texinfo-output* "@anchor{~A}~%" (node-name doc))) ;;; KLUDGE: &AUX *PRINT-PRETTY* here means "no linebreaks please" (defun texinfo-begin (doc &aux *print-pretty*) (let ((kind (get-kind doc))) (format *texinfo-output* "@~A {~:(~A~)} ~({~A}~@[ ~{~A~^ ~}~]~)~%" (case kind ((package constant variable) "defvr") ((structure class condition type) "deftp") (t "deffn")) (map 'string (lambda (char) (if (eql char #\-) #\Space char)) (string kind)) (title-name doc) ;; &foo would be amusingly bold in the pdf thanks to TeX/Texinfo ;; interactions,so we escape the ampersand -- amusingly for TeX. ;; sbcl.texinfo defines macros that expand @&key and friends to &key. (mapcar (lambda (name) (cond ((and (member name lambda-list-keywords) (char= #\& (aref (string name) 0))) (format nil "@amp-~A" (subseq (string name) 1))) ((member name lambda-list-keywords) (format nil "@~A" name)) (t name))) (lambda-list doc))))) (defun texinfo-index (doc) (let ((title (title-name doc))) (case (get-kind doc) ((structure type class condition) (format *texinfo-output* "@tindex ~A~%" title)) ((variable constant) (format *texinfo-output* "@vindex ~A~%" title)) ((compiler-macro function method-combination macro generic-function) (format *texinfo-output* "@findex ~A~%" title))))) (defun texinfo-inferred-body (doc) (when (member (get-kind doc) '(class structure condition)) (let ((name (get-name doc))) ;; class precedence list (format *texinfo-output* "Class precedence list: @code{~(~{~(~A~)~^, ~}~)}~%~%" (remove-if (lambda (class) (hide-superclass-p name class)) (mapcar #'class-name (ensure-class-precedence-list (find-class name))))) ;; slots (let ((slots (remove-if (lambda (slot) (hide-slot-p name slot)) (class-direct-slots (find-class name))))) (when slots (format *texinfo-output* "Slots:~%@itemize~%") (dolist (slot slots) (format *texinfo-output* "@item ~(@code{~A}~#[~:; --- ~]~ ~:{~2*~@[~2:*~A~P: ~{@code{@w{~S}}~^, ~}~]~:^; ~}~)~%~%" (slot-definition-name slot) (remove nil (mapcar (lambda (name things) (if things (list name (length things) things))) '("initarg" "reader" "writer") (list (slot-definition-initargs slot) (slot-definition-readers slot) (slot-definition-writers slot))))) ;; FIXME: Would be neater to handler as children (write-texinfo-string (docstring slot t))) (format *texinfo-output* "@end itemize~%~%")))))) (defun texinfo-body (doc) (write-texinfo-string (get-string doc))) (defun texinfo-end (doc) (write-line (case (get-kind doc) ((package variable constant) "@end defvr") ((structure type class condition) "@end deftp") (t "@end deffn")) *texinfo-output*)) (defun write-texinfo (doc) "Writes TexInfo for a DOCUMENTATION instance to *TEXINFO-OUTPUT*." (texinfo-anchor doc) (texinfo-begin doc) (texinfo-index doc) (texinfo-inferred-body doc) (texinfo-body doc) (texinfo-end doc) ;; FIXME: Children should be sorted one way or another (mapc #'write-texinfo (get-children doc))) ;;;; main logic (defun collect-gf-documentation (gf) "Collects method documentation for the generic function GF" (loop for method in (generic-function-methods gf) for doc = (maybe-documentation method t) when doc collect doc)) (defun collect-name-documentation (name) (loop for type in *documentation-types* for doc = (maybe-documentation name type) when doc collect doc)) (defun collect-symbol-documentation (symbol) "Collects all docs for a SYMBOL and (SETF SYMBOL), returns a list of the form DOC instances. See `*documentation-types*' for the possible values of doc-type." (nconc (collect-name-documentation symbol) (collect-name-documentation (list 'setf symbol)))) (defun collect-documentation (package) "Collects all documentation for all external symbols of the given package, as well as for the package itself." (let* ((*documentation-package* (find-package package)) (docs nil)) (check-type package package) (do-external-symbols (symbol package) (setf docs (nconc (collect-symbol-documentation symbol) docs))) (let ((doc (maybe-documentation *documentation-package* t))) (when doc (push doc docs))) docs)) (defmacro with-texinfo-file (pathname &body forms) `(with-open-file (*texinfo-output* ,pathname :direction :output :if-does-not-exist :create :if-exists :supersede) ,@forms)) (defun write-ifnottex () ;; We use @amp-key, etc to escape & from TeX in lambda lists -- so ;; we need to define them for info as well. (flet ((macro (name) (let ((string (string-downcase name))) (format *texinfo-output* "@macro amp-~A~%~A~%@end macro~%" (subseq string 1) string)))) (macro '&allow-other-keys) (macro '&optional) (macro '&rest) (macro '&key) (macro '&body))) (defun generate-includes (directory packages &key (base-package :cl-user)) "Create files in `directory' containing Texinfo markup of all docstrings of each exported symbol in `packages'. `directory' is created if necessary. If you supply a namestring that doesn't end in a slash, you lose. The generated files are of the form \"__.texinfo\" and can be included via @include statements. Texinfo syntax-significant characters are escaped in symbol names, but if a docstring contains invalid Texinfo markup, you lose." (handler-bind ((warning #'muffle-warning)) (let ((directory (merge-pathnames (pathname directory))) (*base-package* (find-package base-package))) (ensure-directories-exist directory) (dolist (package packages) (dolist (doc (collect-documentation (find-package package))) (with-texinfo-file (merge-pathnames (include-pathname doc) directory) (write-texinfo doc)))) (with-texinfo-file (merge-pathnames "ifnottex.texinfo" directory) (write-ifnottex)) directory))) (defun document-package (package &optional filename) "Create a file containing all available documentation for the exported symbols of `package' in Texinfo format. If `filename' is not supplied, a file \".texinfo\" is generated. The definitions can be referenced using Texinfo statements like @ref{__.texinfo}. Texinfo syntax-significant characters are escaped in symbol names, but if a docstring contains invalid Texinfo markup, you lose." (handler-bind ((warning #'muffle-warning)) (let* ((package (find-package package)) (filename (or filename (make-pathname :name (string-downcase (short-package-name package)) :type "texinfo"))) (docs (sort (collect-documentation package) #'documentation<))) (with-texinfo-file filename (dolist (doc docs) (write-texinfo doc))) filename))) esrap-20170630-git/doc/esrap.texinfo000066400000000000000000000256711311177263700171510ustar00rootroot00000000000000\input texinfo @c -*-texinfo-*- @c %**start of header @setfilename esrap.info @c %**end of header @settitle Esrap @c for install-info @dircategory Software development @direntry * Esrap: a packrat parser for Common Lisp @end direntry @titlepage @title Esrap @subtitle a packrat parser for Common Lisp @c The following two commands start the copyright page. @page @vskip 0pt plus 1filll @insertcopying @end titlepage In addition to regular Packrat / Parsing Grammar / TDPL features Esrap supports @itemize @item dynamic redefinition of nonterminals @item inline grammars @item semantic predicates @item introspective facilities for development @item support for left-recursive rules @item functions as terminals @item accurate, customizable parse error reports @end itemize Esrap was originally written by @email{nikodemus@@random-state.net, Nikodemus Siivola}. It is now developed and maintained by @email{jmoringe@@uni-bielefeld.de, Jan Moringen}. Esrap is maintained in Git: @example git clone -b stable git://github.com/scymtym/esrap.git @end example will get you a local copy (omit @code{-b stable} to get the latest development version). @example @url{http://github.com/scymtym/esrap} @end example is the GitHub project page. Esrap is licenced under an MIT-style licence. For more on packrat parsing, see @url{http://pdos.csail.mit.edu/~baford/packrat/thesis/} for Bryan Ford's 2002 thesis: ``Packrat Parsing: a Practical Linear Time Algorithm with Backtracking''. For left recursion support in packrat parsers, see @url{http://www.vpri.org/pdf/tr2007002_packrat.pdf} for A. Warth et al's 2008 paper: ``Packrat Parsers Can Support Left Recursion''. @contents @ifnottex @include include/ifnottex.texinfo @end ifnottex @chapter Parsing Expressions Parsing proceeds by matching text against parsing expressions. Matching has three components: success vs failure, consumption of input, and associated production. Parsing expressions that fail never consume input. Parsing expressions that succeed may or may not consume input. A parsing expressions can be: @heading Terminal A terminal is a character or a string of length one, which succeeds and consumes a single character if that character matches the terminal. Additionally, Esrap supports some pseudoterminals. @itemize @item The wild terminal symbol @code{character} always succeeds, consuming and producing a single character. @item Expressions of the form @code{(character-ranges range ...)} match a single character from the given range(s), consuming and producing that character. A range can be either a list of the form @code{(#\start_char #\stop_char)} or a single character. @item Multicharacter strings can be used to specify sequences of terminals: @code{"foo"} succeeds and consumes input as if @code{(and #\f #\o #\o)}. Produces the consumed string. @item Expressions of the form @code{(string length)} can be used to specify sequences of arbitrary characters: @code{(string 2)} succeeds and consumes input as if @code{(and character character)}. Produces the consumed string. @end itemize @heading Nonterminal Nonterminals are specified using symbols. A nonterminal symbol succeeds if the parsing expression associated with it succeeds, and consumes whatever the input that expression consumes. The production of a nonterminal depends on the associated expression and an optional transformation rule. Nonterminals are defined using @code{defrule}. @emph{Note: Currently all rules share the same namespace, so you should not use symbols in the COMMON-LISP package or other shared packages to name your rules unless you are certain there are no other Esrap using components in your Lisp image. In a future version of Esrap grammar objects will be introduced to allow multiple definitions of nonterminals. Symbols in the COMMON-LISP package are specifically reserved for use by Esrap.} @heading Sequence @lisp (and subexpression ...) @end lisp A sequence succeeds if all subexpressions succeed, and consumes all input consumed by the subexpressions. A sequence produces the productions of its subexpressions as a list. @heading Ordered Choice @lisp (or subexpression ...) @end lisp An ordered choice succeeds if any of the subexpressions succeeds, and consumes all the input consumed by the successful subexpression. An ordered choice produces whatever the successful subexpression produces. Subexpressions are checked strictly in the specified order, and once a subexpression succeeds no further ones will be tried. @heading Negation @lisp (not subexpression) @end lisp A negation succeeds if the subexpression fails, and consumes one character of input. A negation produces the character it consumes. @heading Greedy Repetition @lisp (* subexpresssion) @end lisp A greedy repetition always succeeds, consuming all input consumed by applying subexpression repeatedly as long as it succeeds. A greedy repetition produces the productions of the subexpression as a list. @heading Greedy Positive Repetition @lisp (+ subexpresssion) @end lisp A greedy repetition succeeds if subexpression succeeds at least once, and consumes all input consumed by applying subexpression repeatedly as long as it succeeds. A greedy positive repetition produces the productions of the subexpression as a list. @heading Optional @lisp (? subexpression) @end lisp Optionals always succeed, and consume whatever input the subexpression consumes. An optional produces whatever the subexpression produces, or @code{nil} if the subexpression does not succeed. @heading Followed-By Predicate @lisp (& subexpression) @end lisp A followed-by predicate succeeds if the subexpression succeeds, and @emph{consumes no input}. A followed-by predicate produces whatever the subexpression produces. @heading Not-Followed-By Predicate @lisp (! subexpression) @end lisp A not-followed-by predicate succeeds if the subexpression does not succeed, and @emph{consumes no input}. A not-followed-by predicate produces @code{nil}. @heading Lookbehind @lisp (< amount subexpression) @end lisp A lookbehind succeeds if @code{subexpression} succeeds at the input position reached by moving backward @code{amount}, a positive integer, characters from the current position and @emph{consumes no input}. A lookbehind produces whatever @code{subexpression} produces. @heading Lookahead @lisp (> amount subexpression) @end lisp A lookahead succeeds if @code{subexpression} succeeds at the input position reached by moving forward @code{amount}, a positive integer, characters from the current position and @emph{consumes no input}. A lookahead produces whatever @code{subexpression} produces. @heading Semantic Predicates @lisp (predicate-name subexpression) @end lisp The @code{predicate-name} is a symbol naming a global function. A semantic predicate succeeds if subexpression succeeds @emph{and} the named function returns true for the production of the subexpression. A semantic predicate produces whatever the subexpression produces. @emph{Note: semantic predicates may change in the future to produce whatever the predicate function returns.} @heading Functions as Terminals @lisp (function function-name) @end lisp @code{function-name} is a symbol naming a global function. @code{function-name}'s lambda-list has to be compatible to @code{(text position end)} where @code{text} is the whole input and @code{position} and @code{end} indicate the maximal subsequence @code{function-name} should attempt to parse. A function terminal succeeds if either @enumerate @item @code{function-name} returns @code{T} as its third value. @item @code{function-name} returns @code{nil} as its third value (or returns only two values) and @code{nil} as its second value. This indicates that the entire remaining input has been consumed. @item @code{function-name} returns @code{nil} as its third value (or returns only two values) and an integer @code{> position} as its second value indicating the position up to which @code{text} has been consumed. @item @code{function-name} returns a value of type @code{successful-parse} as its first value. @end enumerate When a function terminal succeeds, the first return value is an arbitrary production. A function terminal fails if either @enumerate @item @code{function-name} returns two values: an ignored value and @code{position}. Returning @code{position} indicates that no progress has been made. @item @code{function-name} returns three values: an ignored value, @code{nil} or an integer @code{>= position} and a string or a condition explaining the failure. In this case, when the second value is not @code{nil}, it indicates the exact position of the failure. @item @code{function-name} returns a value of type @code{error-result} as its first value. @end enumerate Note that rules which use functions as terminals do not automatically pick up redefinitions of the used functions. For that to happen, the rules have to be redefined as well. See @file{example-function-terminals.lisp} for examples. @heading Left Recursion One aspect of designing Esrap rules is @emph{left recursion}. A @emph{direct left recursive} rule is of the form @lisp (defrule left-recursion (or (and left-recursion STUFF) ALTERNATIVES)) @end lisp The simplest @emph{indirect left recursive} rule is of the form @lisp (defrule left-recursion.1 left-recursion.2) (defrule left-recursion.2 (or (and left-recursion.1 STUFF) ALTERNATIVES)) @end lisp Esrap can handle both kinds of left recursive rules, but the linear-time guarantee generally no longer holds in such cases. The special variable @code{*on-left-recursion*} can be set to either @code{nil} or @code{:error} to control Esrap's behavior with respect to allowing left recursion. See @file{example-left-recursion.lisp} for examples. @chapter Dictionary @section Primary Interface @include include/macro-esrap-defrule.texinfo @include include/fun-esrap-parse.texinfo @include include/fun-esrap-describe-grammar.texinfo @section Utilities @include include/fun-esrap-text.texinfo @section Introspection and Intercession @include include/fun-esrap-add-rule.texinfo @include include/fun-esrap-change-rule.texinfo @include include/fun-esrap-find-rule.texinfo @include include/fun-esrap-remove-rule.texinfo @include include/fun-esrap-rule-dependencies.texinfo @include include/fun-esrap-rule-expression.texinfo @include include/fun-esrap-setf-rule-expression.texinfo @include include/fun-esrap-rule-symbol.texinfo @include include/fun-esrap-trace-rule.texinfo @include include/fun-esrap-untrace-rule.texinfo @include include/fun-esrap-expression-start-terminals.texinfo @include include/fun-esrap-describe-terminal.texinfo @section Error Conditions @include include/var-esrap-star-on-left-recursion-star.texinfo @include include/fun-esrap-esrap-error-position.texinfo @include include/fun-esrap-esrap-parse-error-result.texinfo @include include/fun-esrap-esrap-parse-error-context.texinfo @include include/condition-esrap-esrap-error.texinfo @include include/condition-esrap-left-recursion.texinfo @include include/condition-esrap-esrap-parse-error.texinfo @include include/condition-esrap-undefined-rule-error.texinfo @bye esrap-20170630-git/doc/style.css000066400000000000000000000010021311177263700162710ustar00rootroot00000000000000 .node { visibility:hidden; height: 0px; } .menu { visibility:hidden; height: 0px; } .chapter { background-color:#e47700; padding: 0.2em; } .section { background-color:#e47700; padding: 0.2em; } .settitle { background-color:#e47700; } .contents { border: 2px solid black; margin: 1cm 1cm 1cm 1cm; padding-left: 3mm; } .lisp { padding: 0; margin: 0em; } body { padding: 2em 8em; font-family: sans-serif; } h1 { padding: 1em; text-align: center; } li { margin: 1em; } esrap-20170630-git/esrap.asd000066400000000000000000000122171311177263700154670ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (defsystem :esrap :version "0.15" :description "A Packrat / Parsing Grammar / TDPL parser for Common Lisp." :long-description "A Packrat / Parsing Grammar / TDPL parser for Common Lisp. Notable features include * dynamic redefinition of nonterminals * inline grammars * semantic predicates * introspective facilities (describing grammars, tracing, setting breaks) * left-recursive grammars * functions as terminals * accurate, customizable parse error reports See README.org and :homepage for more information." :author ("Nikodemus Siivola " "Jan Moringen ") :maintainer "Jan Moringen " :homepage "https://scymtym.github.io/esrap" :bug-tracker "https://github.com/scymtym/esrap/issues" :source-control (:git "https://github.com/scymtym/esrap.git") :licence "MIT" :depends-on (:alexandria) :components ((:module "src" :serial t :components ((:file "package") (:file "types") (:file "protocol") (:file "variables") (:file "conditions") (:file "expressions") (:file "rule") (:file "results") (:file "cache") (:file "evaluator") (:file "macros") (:file "interface") (:file "editor-support"))) (:module "examples" :components ((:static-file "sexp.lisp") (:static-file "symbol-table.lisp") (:static-file "left-recursion.lisp") (:static-file "function-terminals.lisp"))) (:static-file "README.org")) :in-order-to ((test-op (test-op :esrap/tests)))) (defmethod perform :after ((op load-op) (sys (eql (find-system :esrap)))) ;; Since version 0.16 ;; * DEFRULE accepts an :ERROR-REPORT option ;; Since version 0.15 ;; * All transforms that support it, can access bounds via &BOUNDS. ;; Since version 0.14 (pushnew :esrap.lookahead *features*) (pushnew :esrap.lookbehind *features*) ;; Since version 0.13 (pushnew :esrap.expression-start-terminals *features*) ;; Since version 0.12 (pushnew :esrap.function-terminals *features*) ;; Since version 0.11 (pushnew :esrap.multiple-transforms *features*) ;; Since version 0.10 (pushnew :esrap.can-handle-left-recursion *features*) ;; For consistency with examples which contain (require :esrap). (provide :esrap)) (defsystem :esrap/tests :description "Tests for ESRAP." :author ("Nikodemus Siivola " "Jan Moringen ") :maintainer "Jan Moringen " :licence "MIT" :depends-on (:esrap (:version :fiveam "1.3")) :serial t :components ((:module "examples" :components ((:file "left-recursion") (:file "function-terminals"))) (:module "test" :serial t :components ((:file "package") (:file "util") (:file "tests") (:file "examples") (:file "readme"))))) (defmethod perform ((operation test-op) (system (eql (find-system :esrap/tests)))) (funcall (intern "RUN-TESTS" :esrap-tests))) esrap-20170630-git/examples/000077500000000000000000000000001311177263700154775ustar00rootroot00000000000000esrap-20170630-git/examples/function-terminals.lisp000066400000000000000000000103231311177263700222100ustar00rootroot00000000000000;;;; Esrap example: some grammars with function-based terminals. (cl:require :esrap) (cl:defpackage #:esrap-example.function-terminals (:use #:cl #:esrap) (:export #:indented-block #:common-lisp)) (cl:in-package #:esrap-example.function-terminals) ;;; Ex. 1. Using a custom terminal for context sensitive parsing. ;;; ;;; Among many other things, this can be used to implement ;;; indentation-based grammars such as Python's. (defrule whitespace (+ #\space) (:constant nil)) ;; *CURRENT-INDENT* tracks the current indentation and CURRENT-INDENT ;; *succeeds when it can consume exactly CURRENT-INDENT* units of ;; *indentation. (defvar *current-indent* 0) (defun current-indent-p (indent) (= indent *current-indent*)) (defrule indent (* #\space) (:function length)) (defrule current-indent (current-indent-p indent)) ;; Just a dummy rule for the statement-like elements of the ;; grammar. This is not the focus of this example. For simplicity, ;; each statement is on one line. (defrule statement (+ (character-ranges (#\a #\z))) (:text t)) (defrule line (and statement #\newline) (:function first)) (defrule block-content (or if line)) (defrule indented-block-content (and current-indent block-content) (:function second)) ;; PARSE-INDENTED-BLOCK is the real meat. It determines the new ;; indentation depth via a nested (PARSE INDENT ...) call which does ;; not consume input. The block's content can then be parsed with a ;; suitably increased current indent. ;; ;; The result of the second PARSE call is returned "raw" in case of ;; success. This allows the associated result tree to be attached to ;; the global result tree and permits lazy computation of rule ;; productions within the sub-tree (beneficial if e.g. the result of ;; the parse, despite successful, is not used in the global result). (defun parse-indented-block (text position end) (multiple-value-bind (new-indent new-position) (parse 'indent text :start position :end end :junk-allowed t) (if (> new-indent *current-indent*) (let ((*current-indent* new-indent)) (parse '(+ indented-block-content) text :start position :end end :raw t)) (values nil new-position "Expected indent")))) (defrule indented-block #'parse-indented-block) (defrule if (and (and "if" whitespace) statement (and #\: #\Newline) indented-block (? (and (and current-indent "else" #\: #\Newline) indented-block))) (:destructure (if-keyword condition colon then (&optional else-keyword else)) (declare (ignore if-keyword colon else-keyword)) (list* 'if condition then (when else (list else))))) (defun test-indentation () (parse 'indented-block " foo bar quux if foo: bla if baz: bli blo else: whoop blu ")) ;;; Ex. 2. Using CL:READ to parse lisp. (defun parse-using-read (text position end) (handler-case ;; When successful, READ-FROM-STRING returns the read object and ;; the position up to which TEXT has been consumed. (read-from-string text t nil :start position :end end) ;; When READ-FROM-STRING fails, indicate the parse failure, ;; including CONDITION as explanation. (stream-error (condition) ;; For STREAM-ERRORs, we can try to determine and return the ;; exact position of the failure. (let ((position (ignore-errors (file-position (stream-error-stream condition))))) (values nil position condition))) (error (condition) ;; For general ERRORs, we cannot determine the exact position of ;; the failure. (values nil nil condition)))) (defrule common-lisp #'parse-using-read) ;; When parsing anything by using CL:READ, it is probably a good idea ;; to disable *READ-EVAL*. The package in which symbols will be ;; interned has to be kept in mind as well. (defun test-read () (with-standard-io-syntax (let (; (*package* (find-package :my-package-for-symbols)) (*read-eval* nil)) ;; This contains deliberate syntax errors to highlight the error ;; position and error message reporting implemented in ;; PARSE-USING-READ. (parse 'common-lisp "(list 'i :::love 'lisp")))) esrap-20170630-git/examples/left-recursion.lisp000066400000000000000000000074041311177263700213360ustar00rootroot00000000000000;;;; Esrap example: some grammars with left-recursive rules. (cl:require :esrap) (cl:defpackage #:left-recursive-grammars (:use #:cl #:alexandria #:esrap) (:export #:la-expr #:ra-expr #:primary)) (cl:in-package :left-recursive-grammars) ;;; Left associative expressions (defrule la-expr la-term) (defrule la-literal (digit-char-p character) (:lambda (x) (parse-integer (text x)))) (defrule la-term (and la-factor (? (and (or #\+ #\-) la-term))) (:destructure (left (&optional op right)) (if op (list (find-symbol op :cl) left right) left))) (defrule la-factor (and (or la-literal la-expr) (? (and (or #\* #\/) la-factor))) (:destructure (left (&optional op right)) (if op (list (find-symbol op :cl) left right) left))) (defun test-la () (let ((*on-left-recursion* :error)) (assert (equal (parse 'la-expr "1*2+3*4+5") '(+ (* 1 2) (+ (* 3 4) 5)))))) ;;; Right associative expressions (defrule ra-expr ra-term) (defrule ra-literal (digit-char-p character) (:lambda (x) (parse-integer (text x)))) (defrule ra-term (and (? (and ra-term (or #\+ #\-))) ra-factor) (:destructure ((&optional left op) right) (if op (list (find-symbol op :cl) left right) right))) (defrule ra-factor (and (? (and ra-factor (or #\* #\/))) (or ra-literal ra-expr)) (:destructure ((&optional left op) right) (if op (list (find-symbol op :cl) left right) right))) (defun test-ra () (let ((*on-left-recursion* :error)) (parse 'ra-expr "1*2+3*4+5")) ; |- Error (assert (equal (parse 'ra-expr "1*2+3*4+5") '(+ (+ (* 1 2) (* 3 4)) 5)))) ;;; The following example is given in ;;; ;;; Alessandro Warth, James R. Douglass, Todd Millstein, 2008, ;;; "Packrat Parsers Can Support Left Recursion". ;;; http://www.vpri.org/pdf/tr2007002_packrat.pdf (defrule primary primary-no-new-array) (defrule primary-no-new-array (or class-instance-creation-expression method-invocation field-access array-access "this")) (defrule class-instance-creation-expression (or (and "new" class-or-interface-type "()") (and primary ".new" identifier "()"))) ;; Note: in the paper, the first case is ;; ;; (and primary "." identifier "()") ;; ;; but that seems to be an error. (defrule method-invocation (or (and primary "." method-name "()") (and (and) (and) method-name "()")) (:destructure (structure dot name parens) (declare (ignore dot parens)) (list :method-invocation structure name))) (defrule field-access (or (and primary "." identifier) (and "super." identifier)) (:destructure (structure dot field) (declare (ignore dot)) (list :field-access structure field))) (defrule array-access (or (and primary "[" expression "]") (and expression-name "[" expression "]")) (:destructure (structure open index close) (declare (ignore open close)) (list :array-access structure index))) (defrule class-or-interface-type (or class-name interface-type-name)) (defrule class-name (or "C" "D")) (defrule interface-type-name (or "I" "J")) (defrule identifier (or "x" "y" class-or-interface-type)) (defrule method-name (or "m" "n")) (defrule expression-name identifier) (defrule expression (or "i" "j")) (defun test-warth () (mapc (curry #'apply (lambda (input expected) (assert (equal (parse 'primary input) expected)))) '(("this" "this") ("this.x" (:field-access "this" "x")) ("this.x.y" (:field-access (:field-access "this" "x") "y")) ("this.x.m()" (:method-invocation (:field-access "this" "x") "m")) ("x[i][j].y" (:field-access (:array-access (:array-access "x" "i") "j") "y"))))) esrap-20170630-git/examples/sexp.lisp000066400000000000000000000042531311177263700173530ustar00rootroot00000000000000;;;; Esrap example: a simple S-expression grammar (cl:require :esrap) (cl:defpackage #:sexp-grammar (:use #:cl #:esrap)) (cl:in-package #:sexp-grammar) ;;; A semantic predicate for filtering out double quotes. (defun not-doublequote (char) (not (eql #\" char))) (defun not-integer (string) (when (find-if-not #'digit-char-p string) t)) ;;; Utility rules. (defrule whitespace (+ (or #\space #\tab #\newline)) (:constant nil)) (defrule alphanumeric (alphanumericp character)) (defrule string-char (or (not-doublequote character) (and #\\ #\"))) ;;; Here we go: an S-expression is either a list or an atom, with possibly leading whitespace. (defrule sexp (and (? whitespace) (or magic list atom)) (:function second) (:lambda (s &bounds start end) (list s (cons start end)))) (defrule magic "foobar" (:constant :magic) (:when (eq * :use-magic))) (defrule list (and #\( sexp (* sexp) (? whitespace) #\)) (:destructure (p1 car cdr w p2) (declare (ignore p1 p2 w)) (cons car cdr))) (defrule atom (or string integer symbol)) (defrule string (and #\" (* string-char) #\") (:destructure (q1 string q2) (declare (ignore q1 q2)) (text string))) (defrule integer (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) (:lambda (list) (parse-integer (text list) :radix 10))) (defrule symbol (not-integer (+ alphanumeric)) ;; NOT-INTEGER is not strictly needed because ATOM considers INTEGER before ;; a STRING, we know can accept all sequences of alphanumerics -- we already ;; know it isn't an integer. (:lambda (list) (intern (text list)))) ;;;; Try these (parse 'sexp "FOO123") (parse 'sexp "123") (parse 'sexp "\"foo\"") (parse 'sexp " ( 1 2 3 (FOO\"foo\"123 ) )") (parse 'sexp "foobar") (let ((* :use-magic)) (parse 'sexp "foobar")) (describe-grammar 'sexp) (trace-rule 'sexp :recursive t) (parse 'sexp "(foo bar 1 quux)") (untrace-rule 'sexp :recursive t) (defparameter *orig* (rule-expression (find-rule 'sexp))) (change-rule 'sexp '(and (? whitespace) (or list symbol))) (parse 'sexp "(foo bar quux)") (parse 'sexp "(foo bar 1 quux)" :junk-allowed t) (change-rule 'sexp *orig*) (parse 'sexp "(foo bar 1 quux)" :junk-allowed t) esrap-20170630-git/examples/symbol-table.lisp000066400000000000000000000046161311177263700207710ustar00rootroot00000000000000;;;; Esrap example: a simple grammar with scopes and symbol tables. (cl:require :esrap) (cl:defpackage #:symbol-table (:use #:cl #:esrap)) (cl:in-package #:symbol-table) ;;; Use the :AROUND construction to maintain a stack of symbol tables ;;; during parsing. ;;; ;;; It is important to note that the bodies of :AROUND options are ;;; executed during result construction, not parsing. Therefore, ;;; :AROUND cannot be used to introduce context sensitivity into ;;; parsing. However, this can be done when using functions as ;;; terminals, see example-function-terminals.lisp. (declaim (special *symbol-table*)) (defvar *symbol-table* nil) (defstruct (symbol-table (:constructor make-symbol-table (&optional %parent))) (%table (make-hash-table :test #'equal)) %parent) (defun lookup/direct (name &optional (table *symbol-table*)) (values (gethash name (symbol-table-%table table)))) (defun lookup (name &optional (table *symbol-table*)) (or (lookup/direct name table) (alexandria:when-let ((parent (symbol-table-%parent table))) (lookup name parent)))) (defun (setf lookup) (new-value name &optional (table *symbol-table*)) (when (lookup/direct name table) (error "~@" name)) (setf (gethash name (symbol-table-%table table)) new-value)) (defrule whitespace (+ (or #\Space #\Tab #\Newline)) (:constant nil)) (defrule name (+ (alphanumericp character)) (:text t)) (defrule type (+ (alphanumericp character)) (:text t)) (defrule declaration (and name #\: type) (:destructure (name colon type) (declare (ignore colon)) (setf (lookup name) (list name :type type)) (values))) (defrule use name (:lambda (name) (list :use (or (lookup name) (error "~@" name))))) (defrule statement (+ (or scope declaration use)) (:lambda (items) (remove nil items))) (defrule statement/ws (and statement (? whitespace)) (:function first)) (defrule scope (and (and #\{ (? whitespace)) (* statement/ws) (and #\} (? whitespace))) (:function second) (:around () (let ((*symbol-table* (make-symbol-table *symbol-table*))) (list* :scope (apply #'append (call-transform)))))) (parse 'scope "{ a:int a { a b:double a b { a:string a b } a b } a }") esrap-20170630-git/src/000077500000000000000000000000001311177263700144505ustar00rootroot00000000000000esrap-20170630-git/src/cache.lisp000066400000000000000000000201461311177263700164070ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; MEMOIZATION CACHE ;;; ;;; Because each [rule, position] tuple has an unambiguous ;;; result per source text, we can cache this result -- this is what ;;; makes packrat parsing O(N). ;;; ;;; For now we just use EQUAL hash-tables, but a specialized ;;; representation would probably pay off. (declaim (inline make-cache get-cached (setf get-cached))) (defun make-cache () (make-hash-table :test #'equal)) (defun get-cached (symbol position cache) (gethash (cons symbol position) cache)) (defun (setf get-cached) (result symbol position cache) (setf (gethash (cons symbol position) cache) result)) ;; In case of left recursion, this stores (defstruct (head (:predicate nil) (:copier nil)) ;; The rule at which the left recursion started. (rule (required-argument :rule) :type symbol) ;; The set of involved rules. (involved-set '() :type list) ;; The set of rules which can still be applied in the current round ;; of "seed parse" growing. (eval-set '() :type list)) ;;; Left-recursion support (declaim (inline make-heads get-head (setf get-head))) (defun make-heads () (make-hash-table :test #'equal)) (defun get-head (position heads) (gethash position heads)) (defun (setf get-head) (head position heads) (setf (gethash position heads) head)) (defun recall (rule position cache heads thunk) (let ((result (get-cached rule position cache)) (head (get-head position heads))) (cond ;; If not growing a seed parse, just return what is stored in ;; the cache. ((not head) result) ;; Do not evaluate any rule that is not involved in this left ;; recursion. ((and (not result) (not (or (eq rule (head-rule head)) (member rule (head-involved-set head))))) (make-failed-parse nil position nil)) ;; Allow involved rules to be evaluated, but only once, during a ;; seed-growing iteration. Subsequent requests just return what ;; is stored in the cache. (t (when (member rule (head-eval-set head)) (removef (head-eval-set head) rule :count 1) (setf result (funcall thunk position) (get-cached rule position cache) result)) result)))) ;;; Context (declaim (inline make-context context-cache context-heads context-nonterminal-stack (setf context-nonterminal-stack))) (defstruct (context (:constructor make-context ())) (cache (make-cache) :type hash-table :read-only t) (heads (make-heads) :type hash-table :read-only t) (nonterminal-stack '() :type list)) (declaim (type context *context*)) (defvar *context* (make-context)) (defmacro with-pushed-nonterminal ((symbol context) &body body) (with-gensyms (previous cell) (once-only (context) `(let* ((,previous (context-nonterminal-stack ,context)) (,cell (list* ,symbol ,previous))) (declare (dynamic-extent ,cell)) (setf (context-nonterminal-stack ,context) ,cell) (prog1 (progn ,@body) (setf (context-nonterminal-stack ,context) ,previous)))))) ;;; SYMBOL and POSITION must all lexical variables! (defmacro with-cached-result ((symbol position &optional (text nil)) &body forms) (with-gensyms (context cache heads result) `(flet ((do-it (position) ,@forms)) (let* ((,context *context*) (,cache (context-cache ,context)) (,heads (context-heads ,context)) (,result (recall ,symbol ,position ,cache ,heads #'do-it))) (cond ;; Found left-recursion marker in the cache. Depending on ;; *ERROR-ON-LEFT-RECURSION*, we either signal an error or ;; prepare recovery from this situation (which is performed ;; by one of the "cache miss" cases (see below) up the ;; call-stack). ((left-recursion-result-p ,result) ;; If error on left-recursion has been requested, do that. (when (eq *on-left-recursion* :error) (left-recursion ,text ,position ,symbol (reverse (mapcar #'left-recursion-result-rule (context-nonterminal-stack ,context))))) ;; Otherwise, mark left recursion and fail this partial ;; parse. (let ((head (or (left-recursion-result-head ,result) (setf (left-recursion-result-head ,result) (make-head :rule ,symbol))))) ;; Put this head into left recursion markers on the ;; stack. Add rules on the stack to the "involved set". (dolist (item (context-nonterminal-stack ,context)) (when (eq (left-recursion-result-head item) head) (return)) (setf (left-recursion-result-head item) head) (pushnew (left-recursion-result-rule item) (head-involved-set head)))) (make-failed-parse ,symbol ,position nil)) ;; Cache hit without left-recursion. (,result ,result) ;; Cache miss. (t ;; First add a left recursion marker for this pair, then ;; compute the result, potentially recovering from left ;; recursion and cache that. (let* ((result (make-left-recursion-result ,symbol)) (result1 (with-pushed-nonterminal (result ,context) (setf (get-cached ,symbol ,position ,cache) result (get-cached ,symbol ,position ,cache) (do-it position))))) ;; If we detect left recursion, handle it. (when (and (not (error-result-p result1)) (left-recursion-result-head result)) (let ((head (left-recursion-result-head result))) ;; Grow "seed parse" (grow-lr in the paper): ;; repeatedly apply rules involved in left-recursion ;; until no progress can be made. (setf (get-head ,position ,heads) head) (loop (setf (head-eval-set head) (copy-list (head-involved-set head))) (let ((result2 (do-it ,position))) (when (or (error-result-p result2) (<= (result-position result2) (result-position result1))) ; no progress (return)) (setf (get-cached ,symbol ,position ,cache) (%make-successful-parse ,symbol (result-position result2) result2 #'successful-parse-production) result1 result2))) (setf (get-head ,position ,heads) nil))) result1))))))) esrap-20170630-git/src/conditions.lisp000066400000000000000000000166701311177263700175240ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) (define-condition invalid-expression-error (error) ((expression :initarg :expression :reader invalid-expression-error-expression)) (:default-initargs :expression (required-argument :expression)) (:documentation "Signaled when an invalid expression is encountered.")) (defmethod print-object ((condition invalid-expression-error) stream) (format stream "Invalid expression: ~S" (invalid-expression-error-expression condition))) (defun invalid-expression-error (expression) (error 'invalid-expression-error :expression expression)) (define-condition esrap-error (parse-error) ((text :initarg :text :initform nil :reader esrap-error-text)) (:documentation "Signaled when an Esrap parse fails. Use ESRAP-ERROR-TEXT to obtain the string that was being parsed, and ESRAP-ERROR-POSITION the position at which the error occurred.")) (defmethod print-object :before ((condition esrap-error) stream) (when (or *print-escape* *print-readably* (and *print-lines* (<= *print-lines* 5))) (return-from print-object)) ;; FIXME: this looks like it won't do the right thing when used as ;; part of a logical block. (if-let ((text (esrap-error-text condition)) (position (esrap-error-position condition))) (labels ((safe-index (index) (min (max index 0) (length text))) (find-newline (&key (start 0) (end (length text)) (from-end t)) (let ((start (safe-index start)) (end (safe-index end))) (cond ((when-let ((position (position #\Newline text :start start :end end :from-end from-end))) (1+ position))) ((and from-end (zerop start)) start) ((and (not from-end) (= end (length text))) end))))) ;; FIXME: magic numbers (let* ((line (count #\Newline text :end position)) (column (- position (or (find-newline :end position) 0) 1)) (min-start (- position 160)) (max-end (+ position 24)) (line-start (or (find-newline :start min-start :end position) (safe-index min-start))) (start (cond ((= (safe-index min-start) line-start) line-start) ((find-newline :start min-start :end (1- line-start))) (t line-start))) (end (or (find-newline :start position :end max-end :from-end nil) (safe-index max-end))) (*print-circle* nil)) (format stream "At~:[~; end of input~]~2%~ ~2@T~<~@;~A~:>~%~ ~2@T~V@T^ (Line ~D, Column ~D, Position ~D)~2%" (= position (length text)) (list (subseq text start end)) (- position line-start) (1+ line) (1+ column) position))) (format stream "~2&~2%"))) (define-condition esrap-parse-error (esrap-error) ((result :initarg :result :type result :reader esrap-parse-error-result) (%context :accessor esrap-parse-error-%context :initform nil)) (:default-initargs :result (required-argument :result)) (:documentation "This error is signaled when a parse attempt fails in a way that .")) (defmethod esrap-error-position ((condition esrap-parse-error)) (result-position (esrap-parse-error-context condition))) (defmethod esrap-parse-error-context ((condition esrap-parse-error)) (or (esrap-parse-error-%context condition) (setf (esrap-parse-error-%context condition) (let ((result (esrap-parse-error-result condition))) (or (result-context result) result))))) (defmethod print-object ((object esrap-parse-error) stream) (cond (*print-readably* (call-next-method)) (*print-escape* (print-unreadable-object (object stream :type t :identity t) (format stream "~@[~S~]~@[ @~D~]" (esrap-parse-error-context object) (esrap-error-position object)))) (t (error-report (esrap-parse-error-context object) stream)))) (declaim (ftype (function (string result) (values &optional nil)) esrap-parse-error)) (defun esrap-parse-error (text result) (error 'esrap-parse-error :text text :result result)) (define-condition left-recursion (esrap-error) ((position :initarg :position :initform nil :reader esrap-error-position) (nonterminal :initarg :nonterminal :initform nil :reader left-recursion-nonterminal) (path :initarg :path :initform nil :reader left-recursion-path)) (:documentation "May be signaled when left recursion is detected during Esrap parsing. LEFT-RECURSION-NONTERMINAL names the symbol for which left recursion was detected, and LEFT-RECURSION-PATH lists nonterminals of which the left recursion cycle consists. Note: This error is only signaled if *ON-LEFT-RECURSION* is bound to :ERROR.")) (defmethod print-object ((condition left-recursion) stream) (format stream "Left recursion in nonterminal ~S. ~_Path: ~ ~{~S~^ -> ~}" (left-recursion-nonterminal condition) (left-recursion-path condition))) (defun left-recursion (text position nonterminal path-butlast) (error 'left-recursion :text text :position position :nonterminal nonterminal :path (append path-butlast (list nonterminal)))) (define-condition undefined-rule (condition) ((symbol :initarg :symbol :type symbol :reader undefined-rule-symbol))) (defmethod print-object ((condition undefined-rule) stream) (format stream "~@" (undefined-rule-symbol condition))) (define-condition undefined-rule-error (undefined-rule error) () (:documentation "Signaled when an undefined rule is encountered.")) (defun undefined-rule (symbol) (error 'undefined-rule-error :symbol symbol)) esrap-20170630-git/src/editor-support.lisp000066400000000000000000000034241311177263700203440ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) (defvar *indentation-hint-table* nil) (defun hint-slime-indentation () ;; See https://github.com/nikodemus/esrap/issues/24. (unless (member "SWANK-INDENTATION" *modules* :test #'string=) (return-from hint-slime-indentation)) (when-let* ((swank (find-package :swank)) (tables (find-symbol (string '#:*application-hints-tables*) swank)) (table (make-hash-table :test #'eq))) (setf (gethash 'defrule table) '(4 4 &rest (&whole 2 &lambda &body))) (set tables (cons table (remove *indentation-hint-table* (symbol-value tables)))) (setf *indentation-hint-table* table) t)) (hint-slime-indentation) esrap-20170630-git/src/evaluator.lisp000066400000000000000000000775751311177263700173700ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; Utilities (declaim (ftype (function * (values function &optional)) resolve-function)) (defun resolve-function (name arguments expression) (check-function-reference name expression) (cond ((member (symbol-package name) (load-time-value (mapcar #'find-package '(#:cl #:esrap)))) (symbol-function name)) (t ;; KLUDGE: Calling via a variable symbol can be slow, but if we ;; grab the SYMBOL-FUNCTION here we will not see redefinitions. (handler-bind ((style-warning #'muffle-warning)) (compile nil `(lambda ,arguments (,name ,@arguments))))))) ;;; COMPILING RULES (defvar *current-rule* nil) (defun compile-rule (symbol expression condition transform around) (declare (type (or boolean function) condition transform around)) (let* ((*current-rule* symbol) ;; Must bind *CURRENT-RULE* before compiling the expression! (function (compile-expression expression)) ;; We use a single static INACTIVE-RULE instance to represent ;; (error) results produced by inactive rules. The actual ;; error position has to be added in a post-processing step. (rule-not-active (make-inactive-rule symbol 0))) (cond ((not condition) (named-lambda inactive-rule (text position end) (declare (ignore text position end)) rule-not-active)) (transform (locally (declare (type function transform)) (flet ((exec-rule/transform (text position end) (let ((result (funcall function text position end))) (if (error-result-p result) (make-failed-parse/no-position symbol result) (if around (locally (declare (type function around)) (make-successful-parse symbol (result-position result) result (flet ((call-rule () (funcall transform (successful-parse-production result) position (result-position result)))) (funcall around position (result-position result) #'call-rule)))) (make-successful-parse symbol (result-position result) result (funcall transform (successful-parse-production result) position (result-position result)))))))) (if (eq t condition) (named-lambda rule/transform (text position end) (with-cached-result (symbol position text) (exec-rule/transform text position end))) (locally (declare (type function condition)) (named-lambda condition-rule/transform (text position end) (with-cached-result (symbol position text) (if (funcall condition) (exec-rule/transform text position end) rule-not-active)))))))) (t (if (eq t condition) (named-lambda rule (text position end) (with-cached-result (symbol position text) (funcall function text position end))) (locally (declare (type function condition)) (named-lambda conditional-rule (text position end) (with-cached-result (symbol position text) (if (funcall condition) (funcall function text position end) rule-not-active))))))))) ;;; EXPRESSION COMPILER & EVALUATOR (defun eval-expression (expression text position end) (expression-case expression (character (eval-character text position end)) (terminal (if (consp expression) (eval-terminal (string (second expression)) text position end nil) (eval-terminal (string expression) text position end t))) (nonterminal (eval-nonterminal expression text position end)) (string (eval-string expression text position end)) (and (eval-sequence expression text position end)) (or (eval-ordered-choise expression text position end)) (not (eval-negation expression text position end)) (* (eval-greedy-repetition expression text position end)) (+ (eval-greedy-positive-repetition expression text position end)) (? (eval-optional expression text position end)) (& (eval-followed-by expression text position end)) (! (eval-not-followed-by expression text position end)) (< (eval-look-behind expression text position end)) (> (eval-look-ahead expression text position end)) (character-ranges (eval-character-ranges expression text position end)) (function (eval-terminal-function expression text position end)) (predicate (eval-semantic-predicate expression text position end)))) (declaim (ftype (function (*) (values function &optional)) compile-expression)) (defun compile-expression (expression) (expression-case expression (character (compile-character)) (terminal (if (consp expression) (compile-terminal (string (second expression)) nil) (compile-terminal (string expression) t))) (nonterminal (compile-nonterminal expression)) (string (compile-string expression)) (and (compile-sequence expression)) (or (compile-ordered-choise expression)) (not (compile-negation expression)) (* (compile-greedy-repetition expression)) (+ (compile-greedy-positive-repetition expression)) (? (compile-optional expression)) (& (compile-followed-by expression)) (! (compile-not-followed-by expression)) (< (compile-look-behind expression)) (> (compile-look-ahead expression)) (character-ranges (compile-character-ranges expression)) (function (compile-terminal-function expression)) (predicate (compile-semantic-predicate expression)))) (defmacro expression-lambda (name args &body body) (unless (length= 3 (parse-ordinary-lambda-list args)) (error "~@")) (let ((name (symbolicate '#:compiled- name))) (destructuring-bind (text-var position-var end-var) args `(named-lambda ,name ,args (declare (type string ,text-var) (type input-position ,position-var) (type input-length ,end-var)) ,@body)))) ;;; Characters and strings (declaim (ftype (function (string input-position input-length) (values result &optional)) eval-character)) (defun eval-character (text position end) (if (< position end) (%make-successful-parse 'character (1+ position) nil (list (char text position))) (make-failed-parse 'character end nil))) (defun compile-character () #'eval-character) (declaim (inline exec-string)) (defun exec-string (expression length text position end) (let ((limit (+ length position))) (if (<= limit end) (make-successful-parse expression limit nil (subseq text position limit)) (make-failed-parse expression end nil)))) (declaim (ftype (function (* string input-position input-length) (values result &optional)) eval-string)) (defun eval-string (expression text position end) (with-expression (expression (string length)) (declare (type input-position length)) (exec-string expression length text position end))) (defun compile-string (expression) (with-expression (expression (string length)) (declare (type input-position length)) (expression-lambda #:string (text position end) (exec-string expression length text position end)))) ;;; Terminals (declaim (inline match-terminal/case-sensitive-p match-terminal/case-insensitive-p match-terminal/1/case-sensitive-p match-terminal/1/case-insensitive-p)) (defun match-terminal/case-sensitive-p (string length text position end) (and (<= (+ length position) end) (string= string text :start2 position :end2 (+ position length)))) (defun match-terminal/case-insensitive-p (string length text position end) (and (<= (+ length position) end) (string-equal string text :start2 position :end2 (+ position length)))) (defun match-terminal/1/case-sensitive-p (char text position end) (and (< position end) (char= (char text position) char))) (defun match-terminal/1/case-insensitive-p (char text position end) (and (< position end) (char-equal (char text position) char))) (defun eval-terminal (string text position end case-sensitive-p) (let ((length (length string))) (if (if case-sensitive-p (match-terminal/case-sensitive-p string length text position end) (match-terminal/case-insensitive-p string length text position end)) (make-successful-parse string (the input-position (+ length position)) nil string) (make-failed-parse string position nil)))) (defun compile-terminal (string case-sensitive-p) (macrolet ((with-results ((expression length result) form) `(if ,form (%make-successful-parse ,expression (the input-position (+ ,length position)) nil ,result) (make-failed-parse ,expression position nil)))) (let ((length (length string)) (result (list string))) (cond ((and (= 1 length) case-sensitive-p) (let ((char (char string 0))) (expression-lambda #:terminal/1/case-sensitive (text position end) (with-results (string 1 result) (match-terminal/1/case-sensitive-p char text position end))))) ((= 1 length) (let ((char (char string 0))) (expression-lambda #:terminal/1/case-insensitive (text position end) (with-results (string 1 result) (match-terminal/1/case-insensitive-p char text position end))))) (case-sensitive-p (expression-lambda #:terminal/case-sensitive (text position end) (with-results (string length result) (match-terminal/case-sensitive-p string length text position end)))) (t (expression-lambda #:terminal/case-insensitive (text position end) (with-results (string length result) (match-terminal/case-insensitive-p string length text position end)))))))) (declaim (ftype (function (* function string input-position input-length) (values result &optional)) exec-terminal-function)) (defun exec-terminal-function (expression function text position end) ;; The protocol is as follows: ;; ;; FUNCTION succeeded if one of ;; 1) returns three values and RESULT is T ;; 2) returns two values and END-POSITION is NIL ;; 3) returns two values and (> END-POSITION POSITION) ;; 4) returns one value of type SUCCESSFUL-PARSE ;; ;; FUNCTION failed if one of ;; 1) returns at least two values and (= END-POSITION POSITION) ;; (since no progress has been made), but only if RESULT is not T ;; 2) returns three values and RESULT is a string or a condition ;; 3) returns one value of type ERROR-RESULT ;; ;; When RESULT is a string or a condition, END-POSITION can indicate ;; the exact position of the failure but is also allowed to be NIL. ;; ;; RESULT can be T to indicate success even if (= END-POSITION ;; POSITION). (multiple-value-bind (production end-position result) (funcall function text position end) (declare (type (or null input-position) end-position) (type (or null string condition (eql t)) result)) (cond ((result-p production) production) ((or (eq result t) (and (null result) (or (null end-position) (> end-position position)))) (make-successful-parse expression (or end-position end) nil production)) (t (make-failed-parse expression (or end-position position) result))))) (defun eval-terminal-function (expression text position end) (with-expression (expression (function function)) (let ((function (ensure-function function))) (exec-terminal-function expression function text position end)))) (defun compile-terminal-function (expression) (with-expression (expression (function function-name)) (let ((function (resolve-function function-name '(text position end) expression))) (expression-lambda #:terminal-function (text position end) (exec-terminal-function expression function text position end))))) ;;; Nonterminals (defun exec-nonterminal (symbol text position end) (let* ((rule (or (find-rule symbol) (undefined-rule symbol))) (expression (rule-expression rule)) (condition (rule-condition rule)) (around (rule-around rule)) (transform (rule-transform rule))) (with-cached-result (symbol position text) (labels ((call-transform (result) (declare (type function transform)) (funcall transform (successful-parse-production result) position (result-position result))) (exec-rule/transform (result) (cond ((error-result-p result) (make-failed-parse/no-position symbol result)) (around (locally (declare (type function around)) (make-successful-parse symbol (result-position result) result (funcall around position (result-position result) (curry #'call-transform result))))) (t (make-successful-parse symbol (result-position result) result (call-transform result))))) (exec-expression () (let ((result (eval-expression expression text position end))) (if transform (exec-rule/transform result) result))) (process-condition () (cond ((not condition) (make-inactive-rule symbol 0)) ((or (eq t condition) (funcall (the function condition))) (exec-expression)) (t (make-inactive-rule symbol 0))))) (declare (dynamic-extent #'exec-rule/transform #'exec-expression #'process-condition)) (process-condition))))) (defun eval-nonterminal (symbol text position end) (if *eval-nonterminals* (exec-nonterminal symbol text position end) (funcall (cell-function (ensure-rule-cell symbol)) text position end))) (defun compile-nonterminal (symbol) (let ((cell (reference-rule-cell symbol *current-rule*))) (declare (type rule-cell cell)) (expression-lambda #:nonterminal (text position end) (funcall (cell-function cell) text position end)))) ;;; Sequences ;;; ;;; FIXME: It might be better if we actually chained the closures ;;; here, instead of looping over them -- benchmark first, though. (defun eval-sequence (expression text position end) (with-expression (expression (and &rest subexprs)) (let ((results '())) (dolist (expr subexprs (make-successful-parse expression position (nreverse results) #'list-of-result-productions)) (let ((result (eval-expression expr text position end))) (if (error-result-p result) (return (make-failed-parse expression position (nreverse (list* result results)))) (setf position (result-position result))) (push result results)))))) (defun compile-sequence (expression) (with-expression (expression (and &rest subexprs)) (let ((functions (mapcar #'compile-expression subexprs))) (expression-lambda #:sequence (text position end) (let ((results '())) (dolist (fun functions (make-successful-parse expression position (nreverse results) #'list-of-result-productions)) (let ((result (funcall fun text position end))) (if (error-result-p result) (return (make-failed-parse expression position (nreverse (list* result results)))) (setf position (result-position result))) (push result results)))))))) ;;; Ordered choises (declaim (inline make-ordered-choise-result)) (defun make-ordered-choise-result (expression result errors) (if errors (make-successful-parse expression (result-position result) (nreverse (list* result errors)) (successful-parse-production result)) result)) (defun eval-ordered-choise (expression text position end) (with-expression (expression (or &rest subexprs)) (let ((errors '())) (dolist (expr subexprs (make-failed-parse/no-position expression (nreverse errors))) (let ((result (eval-expression expr text position end))) (if (error-result-p result) (push result errors) (return (make-ordered-choise-result expression result errors)))))))) (defun check-ordered-choise-prefix (string previous-strings) ;; Check for "FOO" followed by "FOOBAR" -- the latter would never ;; match, but it's an easy mistake to make. (not (some (lambda (previous) (let ((end (min (length previous) (length string)))) (not (or (mismatch string previous :end1 end) (warn "~@" previous string 'or))))) previous-strings))) (defun analyze-ordered-choise (sub-expressions) (let ((type :characters) (canonized '())) (dolist (sub sub-expressions) (when (and (typep sub '(or character string))) (let ((string (string sub))) (when (check-ordered-choise-prefix string canonized) (push string canonized)))) (case type (:general) (:strings (unless (typep sub '(or character string)) (setf type :general))) (:characters (unless (typep sub '(or character (string 1))) (setf type (if (typep sub 'string) :strings :general)))))) (values type (nreverse canonized)))) (defun compile-ordered-choise (expression) (with-expression (expression (or &rest subexprs)) (multiple-value-bind (type canonized) (analyze-ordered-choise subexprs) ;; FIXME: Optimize case-insensitive terminals as well. (ecase type (:characters ;; If every subexpression is a length 1 string, we can represent the whole ;; choise with a single string. (let ((choises (apply #'concatenate 'string canonized)) (productions (map 'vector #'list canonized))) (declare (type string choises)) (expression-lambda #:character-choise/characters (text position end) (if-let ((index (and (< position end) (position (char text position) choises)))) (%make-successful-parse expression (+ 1 position) nil (aref productions index)) (make-failed-parse expression position nil))))) (:strings ;; If every subexpression is a string, we can represent the whole choise ;; with a list of strings. (let ((choises (mapcar #'list canonized))) (expression-lambda #:character-choise/strings (text position end) (dolist (choise choises (make-failed-parse expression position nil)) (let* ((string (car choise)) (len (length string))) (declare (type string string)) (when (match-terminal/case-sensitive-p string len text position end) (return (%make-successful-parse expression (the input-position (+ len position)) nil choise)))))))) (:general ;; In the general case, compile subexpressions and call. (let ((functions (mapcar #'compile-expression subexprs))) (expression-lambda #:ordered-choise/general (text position end) (let ((errors '())) (dolist (fun functions (make-failed-parse/no-position expression (nreverse errors))) (declare (type function fun)) (let ((result (funcall fun text position end))) (if (error-result-p result) (push result errors) (return (make-ordered-choise-result expression result errors))))))))))))) ;;; Negations (declaim (ftype (function (function * string input-position input-position) (values result &optional)) exec-negation)) (defun exec-negation (fun expr text position end) (let ((result)) (if (and (< position end) (error-result-p (setf result (funcall fun text position end)))) (%make-successful-parse expr (1+ position) result (list (char text position))) (make-failed-parse expr position result)))) (defun eval-negation (expression text position end) (with-expression (expression (not subexpr)) (flet ((eval-sub (text position end) (eval-expression subexpr text position end))) (declare (dynamic-extent #'eval-sub)) (exec-negation #'eval-sub expression text position end)))) (defun compile-negation (expression) (with-expression (expression (not subexpr)) (let ((sub (compile-expression subexpr))) (expression-lambda #:negation (text position end) (exec-negation sub expression text position end))))) ;;; Greedy repetitions (defun eval-greedy-repetition (expression text position end) (funcall (compile-greedy-repetition expression) text position end)) (defun compile-greedy-repetition (expression) (with-expression (expression (* subexpr)) (let ((function (compile-expression subexpr))) (expression-lambda #:greedy-repetition (text position end) (let ((last) (results '())) (loop for result = (funcall function text position end) until (error-result-p (setf last result)) do (setf position (result-position result)) (push result results)) (make-successful-parse expression position (nreverse (list* last results)) #'list-of-result-productions/butlast)))))) ;;; Greedy positive repetitions (defun eval-greedy-positive-repetition (expression text position end) (funcall (compile-greedy-positive-repetition expression) text position end)) (defun compile-greedy-positive-repetition (expression) (with-expression (expression (+ subexpr)) (let ((function (compile-expression subexpr))) (expression-lambda #:greedy-positive-repetition (text position end) (let* ((last nil) (results)) (loop for result = (funcall function text position end) until (error-result-p (setf last result)) do (setf position (result-position result)) (push result results)) (if results (make-successful-parse expression position (nreverse (list* last results)) #'list-of-result-productions/butlast) (make-failed-parse expression position last))))))) ;;; Optionals (defun eval-optional (expression text position end) (with-expression (expression (? subexpr)) (let ((result (eval-expression subexpr text position end))) (if (error-result-p result) (%make-successful-parse expression position result '(nil)) result)))) (defun compile-optional (expression) (with-expression (expression (? subexpr)) (let ((function (compile-expression subexpr))) (expression-lambda #:optional (text position end) (let ((result (funcall function text position end))) (if (error-result-p result) (%make-successful-parse expression position result '(nil)) result)))))) ;;; Followed-by's (defun eval-followed-by (expression text position end) (with-expression (expression (& subexpr)) (let ((result (eval-expression subexpr text position end))) (if (error-result-p result) (make-failed-parse expression position result) (make-successful-parse expression position result #'successful-parse-production))))) (defun compile-followed-by (expression) (with-expression (expression (& subexpr)) (let ((function (compile-expression subexpr))) (expression-lambda #:followed-by (text position end) (let ((result (funcall function text position end))) (if (error-result-p result) (make-failed-parse expression position result) (make-successful-parse expression position result #'successful-parse-production))))))) ;;; Not followed-by's (defun eval-not-followed-by (expression text position end) (with-expression (expression (! subexpr)) (let ((result (eval-expression subexpr text position end))) (if (error-result-p result) (%make-successful-parse expression position result '(nil)) (make-failed-parse expression position result))))) (defun compile-not-followed-by (expression) (with-expression (expression (! subexpr)) (let ((function (compile-expression subexpr))) (expression-lambda #:not-followed-by (text position end) (let ((result (funcall function text position end))) (if (error-result-p result) (%make-successful-parse expression position result '(nil)) (make-failed-parse expression position result))))))) ;;; Look{ahead,behind} (macrolet ((define-look (direction operator look-position test) (let ((eval-name (symbolicate '#:eval-look- direction)) (compile-name (symbolicate '#:compile-look- direction)) (lambda-name (symbolicate '#:compiled-look- direction))) `(progn (defun ,eval-name (expression text position end) (with-expression (expression (,operator n subexpr)) (declare (type input-position n)) (let* ((look-position ,look-position) (result (when ,test (eval-expression subexpr text look-position end)))) (if (or (not result) (error-result-p result)) (make-failed-parse expression position result) (make-successful-parse expression position result #'successful-parse-production))))) (defun ,compile-name (expression) (with-expression (expression (,operator n subexpr)) (declare (type input-position n)) (let ((function (compile-expression subexpr))) (expression-lambda ,lambda-name (text position end) (let* ((look-position ,look-position) (result (when ,test (funcall function text look-position end)))) (if (or (not result) (error-result-p result)) (make-failed-parse expression position result) (make-successful-parse expression position result #'successful-parse-production))))))))))) (define-look :behind < (- position n) (>= look-position 0)) (define-look :ahead > (+ position n) (<= look-position end))) ;;; Semantic predicates (defun eval-semantic-predicate (expression text position end) (with-expression (expression ((t predicate-name) subexpr)) (let ((result (eval-expression subexpr text position end))) (if (error-result-p result) (make-failed-parse expression position result) (let ((production (successful-parse-production result))) (if (funcall (symbol-function predicate-name) production) result (make-failed-parse expression position result))))))) (defun compile-semantic-predicate (expression) (with-expression (expression ((t predicate-name) subexpr)) (let* ((function (compile-expression subexpr)) (predicate (resolve-function predicate-name '(production) expression))) (expression-lambda #:semantic-predicate (text position end) (let ((result (funcall function text position end))) (if (error-result-p result) (make-failed-parse expression position result) (let ((production (successful-parse-production result))) (if (funcall predicate production) result (make-failed-parse expression position result))))))))) ;;; Character ranges (declaim (ftype (function (* * string input-position input-length) (values result &optional)) exec-character-ranges)) (defun exec-character-ranges (expression ranges text position end) (flet ((oops () (make-failed-parse expression position nil))) (if (< position end) (let ((char (char text position))) (if (loop for range in ranges do (if (characterp range) (when (char= range char) (return t)) (when (char<= (first range) char (second range)) (return t)))) (make-successful-parse expression (1+ position) nil char) (oops))) (oops)))) (defun eval-character-ranges (expression text position end) (with-expression (expression (character-ranges &rest ranges)) (exec-character-ranges expression ranges text position end))) (defun compile-character-ranges (expression) (with-expression (expression (character-ranges &rest ranges)) (expression-lambda #:character-ranges (text position end) (exec-character-ranges expression ranges text position end)))) esrap-20170630-git/src/expressions.lisp000066400000000000000000000365411311177263700177340ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; (eval-when (:compile-toplevel :load-toplevel :execute) (defvar *expression-kinds* `((character . (eql character)) (character-ranges . (cons (eql character-ranges))) (string . (cons (eql string) (cons array-length null))) (and . (cons (eql and))) (or . (cons (eql or))) ,@(mapcar (lambda (symbol) `(,symbol . (cons (eql ,symbol) (cons t null)))) '(not * + ? & !)) ,@(mapcar (lambda (symbol) `(,symbol . (cons (eql ,symbol) (cons (and positive-integer input-position) (cons t null))))) '(< >)) (terminal . terminal) (nonterminal . nonterminal) (predicate . predicate) (function . (cons (eql function) (cons symbol null))) (t . t)) "Names and corresponding types of acceptable expression constructors.")) (eval-when (:compile-toplevel :load-toplevel :execute) (defmacro expression-case (expression &body clauses) "Similar to (cl:typecase EXPRESSION CLAUSES) but clause heads designate kinds of expressions instead of types. See *EXPRESSION-KINDS*." (let ((available (copy-list *expression-kinds*))) (labels ((type-for-expression-kind (kind) (if-let ((cell (assoc kind available))) (progn (removef available cell) (cdr cell)) (error "Invalid or duplicate clause: ~S" kind))) (process-clause (clause) (destructuring-bind (kind &body body) clause (etypecase kind (cons `((or ,@(mapcar #'type-for-expression-kind kind)) ,@body)) (symbol `(,(type-for-expression-kind kind) ,@body)))))) (let ((clauses (mapcar #'process-clause clauses))) ;; We did not provide clauses for all expression ;; constructors and did not specify a catch-all clause => ;; error. (when (and (assoc t available) (> (length available) 1)) (error "Unhandled expression kinds: ~{~S~^, ~}" (remove t (mapcar #'car available)))) ;; If we did not specify a catch-all clause, insert one ;; which signals INVALID-EXPRESSION-ERROR. (once-only (expression) `(typecase ,expression ,@clauses ,@(when (assoc t available) `((t (invalid-expression-error ,expression))))))))))) (defmacro with-expression ((expr spec) &body body) (destructuring-bind (type &optional (first-var (gensym))) (etypecase (first spec) ((cons symbol (cons symbol null)) (first spec)) (symbol (list (first spec)))) (let ((lambda-list (list* first-var (rest spec)))) (once-only (expr) `(destructuring-bind ,lambda-list ,expr ,@(unless (eq t type) `((unless (eq ',type ,first-var) (error "~S-expression expected, got: ~S" ',type ,expr)))) (locally ,@body)))))) ;;; (defun check-function-reference (name expression) (cond ((not (fboundp name)) (warn 'simple-style-warning :format-control "~@" :format-arguments (list name expression)) nil) ((or (macro-function name) (special-operator-p name)) (warn 'simple-style-warning :format-control "~@<~S in expression ~S is not a ~ function (but a macro or special ~ operator).~@:>" :format-arguments (list name expression)) nil) (t t))) (defun check-expression (expression) (labels ((rec (expression) (expression-case expression ((character string function terminal nonterminal)) (character-ranges (unless (every (of-type 'character-range) (rest expression)) (invalid-expression-error expression))) ((and or) (mapc #'rec (rest expression))) ((not * + ? & ! predicate) (rec (second expression))) ((< >) (rec (third expression)))))) (rec expression))) (defun %expression-dependencies (expression) (labels ((rec (expression result) (expression-case expression ((character string character-ranges function terminal) result) (nonterminal (if (member expression result :test #'eq) result (let ((rule (find-rule expression)) (result (list* expression result))) (if rule (rec (rule-expression rule) result) result)))) ((and or) (reduce #'rec (rest expression) :initial-value result :from-end t)) ((not * + ? & ! predicate) (rec (second expression) result)) ((< >) (rec (third expression) result))))) (rec expression '()))) (defun %expression-direct-dependencies (expression) (labels ((rec (expression result) (expression-case expression ((character string character-ranges function terminal) result) (nonterminal (list* expression result)) ((and or) (reduce #'rec (rest expression) :initial-value result :from-end t)) ((not * + ? & ! predicate) (rec (second expression) result)) ((< >) (rec (third expression) result))))) (rec expression '()))) (defun expression-start-terminals (expression &key (when-rule-error-report nil when-rule-error-report-p)) "Return a list of terminals or tree of expressions with which a text parsable by EXPRESSION can start. A tree instead of a list is returned when EXPRESSION contains semantic predicates, NOT or !. Elements in the returned list or tree are * case (in)sensitive characters, character ranges, case (in)sensitive strings, function terminals * semantic predicates represented as (PREDICATE-NAME NESTED-ELEMENTS) where NESTED-ELEMENTS is the list of start terminals of the expression to which PREDICATE-NAME is applied. * NOT and ! expressions are represented as ({not,!} NESTED-ELEMENTS) where NESTED-ELEMENTS is the list of start terminals of the negated expression. * < and > expressions are represented as ({<,>} OFFSET NESTED-ELEMENTS) where OFFSET is a positive integer and NESTED-ELEMENTS is the list of start terminals of the expression that should match OFFSET characters backward/forward from the current position. The (outermost) list is sorted likes this: 1. string terminals 2. character terminals 3. the CHARACTER wildcard terminal 4. semantic predicates 5. everything else If supplied, WHEN-RULE-ERROR-REPORT restricts processing of nonterminals to rules whose :ERROR-REPORT option is compatible with the value of WHEN-RULE-ERROR-REPORT." (labels ((rec (expression seen) (expression-case expression ((character string character-ranges function terminal) (list expression)) (predicate (when-let ((result (rec/sorted (second expression) seen))) (list (list (first expression) result)))) (nonterminal (unless (member expression seen :test #'equal) (when-let ((rule (find-rule expression))) (when (or (not when-rule-error-report-p) (error-report-behavior-suitable-for-report-part-p (rule-error-report rule) when-rule-error-report)) (rec (rule-expression rule) (list* expression seen)))))) ((not !) (when-let ((result (rec/sorted (second expression) seen))) (list (list (first expression) result)))) ((+ &) (rec (second expression) seen)) ((? *) (values (rec (second expression) seen) t)) ((< >) (with-expression (expression ((t direction) amount subexpr)) (list (list direction amount (rec subexpr seen))))) (and (let ((result '())) (dolist (sub-expression (rest expression) result) (multiple-value-bind (sub-start-terminals optionalp) (rec sub-expression seen) (when sub-start-terminals (appendf result sub-start-terminals) (unless optionalp (return result))))))) (or (mapcan (rcurry #'rec seen) (rest expression))))) (rec/without-duplicates (expression seen) (remove-duplicates (rec expression seen) :test #'equal)) (rec/sorted (expression seen) (stable-sort (rec/without-duplicates expression seen) #'expression<))) (rec/sorted expression '()))) (defun expression< (left right) (or (and (typep left 'string) (typep right '(not string))) (and (typep left 'string) (string-lessp left right)) (and (typep left 'character) (typep right '(not (or string character)))) (and (typep left 'character) (typep right 'character) (char-lessp left right)) (and (typep left '(eql character)) (typep left '(not (eql character)))) (and (typep left '(cons predicate-name)) (typep right '(not (or string character (eql character) (cons predicate-name))))) (typep right '(not (or string character (eql character) (cons predicate-name)))))) (defun expression-equal-p (left right) (labels ((rec (left right) (cond ((and (typep left '(or string character)) (typep right '(or string character))) (string= left right)) ((and (consp left) (consp right)) (and (rec (car left) (car right)) (rec (cdr left) (cdr right)))) (t (equalp left right))))) (declare (dynamic-extent #'rec)) (rec left right))) (defun describe-terminal (terminal &optional (stream *standard-output*)) "Print a description of TERMINAL onto STREAM. In additional to actual terminals, TERMINAL can be of the forms (PREDICATE-NAME TERMINALS) ({not,!} TERMINALS) ({<,>} OFFSET TERMINALS) (i.e. as produced by EXPRESSION-START-TERMINALS)." (labels ((output (format-control &rest format-arguments) (apply #'format stream format-control format-arguments)) (rec/sub-expression (sub-expression prefix separator) (output prefix (length sub-expression)) (rec (first sub-expression)) (loop :for terminal :in (rest sub-expression) :do (output separator) (rec terminal))) (rec (terminal) (expression-case terminal (character (output "any character")) (string (output "a string of length ~D" (second terminal))) (character-ranges (output "a character in ~{[~{~C~^-~C~}]~^ or ~}" (mapcar #'ensure-list (rest terminal)))) (function (output "a string that can be parsed by the function ~S" (second terminal))) (terminal (labels ((rec (thing) (etypecase thing (character ;; For non-graphic or whitespace characters, ;; just print the name. (output "the character ~:[~*~A~:;~A (~A)~]" (and (graphic-char-p thing) (not (member thing '(#\Space #\Tab #\Newline)))) thing (char-name thing))) (string (if (length= 1 thing) (rec (char thing 0)) (output "the string ~S" thing))) ((cons (eql ~)) (rec (second thing)) (output ", disregarding case"))))) (rec terminal))) ((not !) (let ((sub-expression (second terminal))) (typecase sub-expression ((cons (eql character) null) (output "")) (t (output "anything but") (pprint-logical-block (stream sub-expression) (rec/sub-expression sub-expression "~[~; ~:; ~5:T~]" "~@:_ and ")))))) ((< >) (with-expression (terminal ((t direction) amount sub-expression)) (pprint-logical-block (stream sub-expression) (rec/sub-expression sub-expression "~[~;~:; ~4:T~]" "~@:_ or ") (output "~[~; ~:;~@:_~]~ ~D character~:P ~[before~;after~] the ~ current position" (length sub-expression) amount (case direction (< 0) (> 1)))))) (predicate (let ((sub-expression (second terminal))) (pprint-logical-block (stream sub-expression) (rec/sub-expression sub-expression "~[~;~:; ~4:T~]" "~@:_ or ") (output "~[~; ~:;~@:_~]satisfying ~A" (length sub-expression) (first terminal))))) (t (error "~@" terminal))))) (rec terminal))) ;; For use as ~/esrap:print-terminal/ in format control. (defun print-terminal (stream terminal &optional colonp atp) (declare (ignore colonp atp)) (describe-terminal terminal stream)) esrap-20170630-git/src/interface.lisp000066400000000000000000000474101311177263700173070ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) (defun parse (expression text &key (start 0) end junk-allowed raw) "Parses TEXT using EXPRESSION from START to END. Incomplete parses, that is not consuming the entirety of TEXT, are allowed only if JUNK-ALLOWED is true. Returns three values: 1) A production, if the parse succeeded, NIL otherwise. 2) The position up to which TEXT has been consumed or NIL if the entirety of TEXT has been consumed. 3) If the parse succeeded, even if it did not consume any input, T is returned as a third value. The third return value is necessary to distinguish successful and failed parses for cases like (parse '(! #\\a) \"a\" :junk-allowed t) (parse '(! #\\a) \"b\" :junk-allowed t) in which the first two return values cannot indicate failures. RAW controls whether the parse result is interpreted and translated into the return values described above. If RAW is true, a parse result of type RESULT or ERROR-RESULT is returned as a single value. Note that the combination of arguments :junk-allowed t :raw t does not make sense since the JUNK-ALLOWED parameter is used when parse results are interpreted and translated into return values which does not happen when :raw t." ;; There is no backtracking in the toplevel expression -- so there's ;; no point in compiling it as it will be executed only once -- unless ;; it's a constant, for which we have a compiler-macro. (when (and junk-allowed raw) (error "~@" (list :junk-allowed junk-allowed :raw raw))) (let* ((end (or end (length text))) (*context* (make-context)) (result (eval-expression expression text start end))) (declare (dynamic-extent *context*)) (if raw result (process-parse-result result text start end junk-allowed)))) (define-compiler-macro parse (&whole form expression text &rest arguments &key &allow-other-keys) (flet ((make-expansion (result-var rawp junk-allowed-p body) ;; This inline-lambda provides keyword defaults and ;; parsing, so the compiler-macro doesn't have to worry ;; about evaluation order. (with-gensyms (expr-fun) `(let ((,expr-fun (load-time-value (compile-expression ,expression)))) ((lambda (text &key (start 0) end ,@(if rawp '(raw)) ,@(if junk-allowed-p '(junk-allowed))) (let* ((end (or end (length text))) (*context* (make-context)) (,result-var (funcall ,expr-fun text start end))) (declare (dynamic-extent *context*)) ,body)) ,text ,@(remove-from-plist arguments :raw)))))) (cond ((not (constantp expression)) ; cannot use ENV due to LOAD-TIME-VALUE form) ((let ((raw (getf arguments :raw 'missing))) (when (and (not (eq raw 'missing)) (constantp raw)) ; cannot used ENV due to following EVAL (let ((rawp (eval raw))) (make-expansion 'result nil (not rawp) (if rawp 'result '(process-parse-result result text start end junk-allowed))))))) (t (make-expansion 'result t t '(if raw result (process-parse-result result text start end junk-allowed))))))) (defun process-parse-result (result text start end junk-allowed) (cond ;; Successfully parsed something. ((successful-parse-p result) (with-accessors ((position result-position) (production successful-parse-production)) result (cond ((= position end) ; Consumed all input. (values production nil t)) (junk-allowed ; Did not consume all input; junk (values production position t)) ; is OK. (t ; Junk is not OK. (esrap-parse-error text result))))) ;; Did not parse anything, but junk is allowed. (junk-allowed (values nil start)) ;; Did not parse anything and junk is not allowed. (t (esrap-parse-error text result)))) (defmacro defrule (&whole form symbol expression &body options) "Define SYMBOL as a nonterminal, using EXPRESSION as associated the parsing expression. Multiple OPTIONS specifying transforms are composed in the order of appearance: (:text t) (:function parse-integer) => (alexandria:compose #'parse-integer #'text) Following OPTIONS can be specified: * (:WHEN TEST) The rule is active only when TEST evaluates to true. This can be used to specify optional extensions to a grammar. This option can only be supplied once. * (:CONSTANT CONSTANT) No matter what input is consumed or what EXPRESSION produces, the production of the rule is always CONSTANT. * (:FUNCTION FUNCTION) If provided the production of the expression is transformed using FUNCTION. FUNCTION can be a function name or a lambda-expression. * (:IDENTITY BOOLEAN) If true, the production of expression is used as-is, as if (:FUNCTION IDENTITY) has been specified. If no production option is specified, this is the default. * (:TEXT BOOLEAN) If true, the production of expression is flattened and concatenated into a string as if by (:FUNCTION TEXT) has been specified. * (:LAMBDA LAMBDA-LIST &BODY BODY) If provided, same as using the corresponding lambda-expression with :FUNCTION. As an extension of the standard lambda list syntax, LAMBDA-LIST accepts the optional pseudo lambda-list keyword ESRAP:&BOUNDS, which (1) must appear after all standard lambda list keywords. (2) can be followed by one or two variables to which bounding indexes of the matching substring are bound. Therefore: LAMBDA-LIST ::= (STANDARD-LAMBDA-LIST-ELEMENTS [&BOUNDS START [END]]) * (:DESTRUCTURE DESTRUCTURING-LAMBDA-LIST &BODY BODY) If provided, same as using a lambda-expression that destructures its argument using DESTRUCTURING-BIND and the provided lambda-list with :FUNCTION. DESTRUCTURING-LAMBDA-LIST can use ESRAP:&BOUNDS in the same way as described for :LAMBDA. * (:AROUND ([&BOUNDS START [END]]) &BODY BODY) If provided, execute BODY around the construction of the production of the rule. BODY has to call ESRAP:CALL-TRANSFORM to trigger the computation of the production. Any transformation provided via :LAMBDA, :FUNCTION or :DESTRUCTURE is executed inside the call to ESRAP:CALL-TRANSFORM. As a result, modification to the dynamic state are visible within the transform. ESRAP:&BOUNDS can be used in the same way as described for :LAMBDA and :DESTRUCTURE. This option can be used to safely track nesting depth, manage symbol tables or for other stack-like operations. * (:ERROR-REPORT ( T | NIL | :CONTEXT | :DETAIL )) Defaults to T if not provided. Controls whether and how the rule is used in parse error reports: * T The rule is used in parse error reports without restriction (i.e. when describing the context of a failure as well as listing failed rules and expected inputs). * NIL The rule is not used in parse error reports in any capacity. In particular, inputs expected by the rule are not mentioned. This value is useful for things like whitespace rules since something like \"expected space, tab or newline\", even if it would have allowed the parser to continue for one character, is rarely helpful. * :CONTEXT The rule is used in the \"context\" part of parse error reports. The rule is neither mentioned in the list of failed rules nor are inputs expected by it. * :DETAIL The rule is not used in the \"context\" part of parse error reports, but can appear in the list of failed rules. Inputs expected by the rule are mentioned as well. " (multiple-value-bind (transforms around when error-report) (parse-defrule-options options form) (let ((transform (expand-transforms transforms))) `(eval-when (:load-toplevel :execute) (add-rule ',symbol (make-instance 'rule :expression ',expression :guard-expression ',(cdr when) :condition ,(car when) :transform ,transform :around ,around :error-report ,error-report)))))) (defun add-rule (symbol rule) "Associates RULE with the nonterminal SYMBOL. Signals an error if the rule is already associated with a nonterminal. If the symbol is already associated with a rule, the old rule is removed first." ;; FIXME: This needs locking and WITHOUT-INTERRUPTS. (check-type symbol nonterminal) (when (rule-symbol rule) (error "~S is already associated with the nonterminal ~S -- remove it first." rule (rule-symbol rule))) (let* ((cell (ensure-rule-cell symbol)) (function (compile-rule symbol (rule-expression rule) (rule-condition rule) (rule-transform rule) (rule-around rule))) (trace-info (cell-trace-info cell))) (set-cell-info cell function rule) (setf (cell-trace-info cell) nil (slot-value rule '%symbol) symbol) (when trace-info (destructuring-bind (break condition) (rest trace-info) (trace-rule symbol :break break :condition condition))) symbol)) (defun find-rule (symbol) "Returns rule designated by SYMBOL, if any. Symbol must be a nonterminal symbol." (check-type symbol nonterminal) (when-let ((cell (find-rule-cell symbol))) (cell-rule cell))) (defun remove-rule (symbol &key force) "Makes the nonterminal SYMBOL undefined. If the nonterminal is defined an already referred to by other rules, an error is signalled unless :FORCE is true." (check-type symbol nonterminal) ;; FIXME: Lock and WITHOUT-INTERRUPTS. (let* ((cell (find-rule-cell symbol)) (rule (cell-rule cell)) (trace-info (cell-trace-info cell))) (when cell (flet ((frob () (set-cell-info cell (undefined-rule-function symbol) nil) ; TODO update trace-info as part of this function? (when trace-info (setf (cell-trace-info cell) (list* (cell-%info cell) (rest trace-info)))) (when rule (detach-rule rule)))) (cond ((and rule (cell-referents cell)) (unless force (error "Nonterminal ~S is used by other nonterminal~P:~% ~{~S~^, ~}" symbol (length (cell-referents cell)) (cell-referents cell))) (frob)) ((not (cell-referents cell)) (frob) ;; There are no references to the rule at all, so ;; we can remove the cell. (unless trace-info (delete-rule-cell symbol))))) rule))) (defvar *trace-level* 0) (defun trace-rule (symbol &key recursive break condition) "Turn on tracing of nonterminal SYMBOL. If RECURSIVE is true, turn on tracing for the whole grammar rooted at SYMBOL. If RECURSIVE is a positive integer, turn on tracing for all rules reachable from the nonterminal SYMBOL in that number of steps. If BREAK is true, break is entered when the rule is invoked. If supplied, CONDITION has to be a function whose lambda-list is compatible to (symbol text position end). This function is called to determine whether trace actions should be executed for the traced rule. SYMBOL is the name of the rule being executed. TEXT is the whole text being parsed. POSITION is the position within TEXT at which the rule is executed. END is the end position of the portion of TEXT being parsed." (let ((seen (make-hash-table :test #'eq))) (labels ((traced (symbol break fun text position end) (when break (break "rule ~S" symbol)) (format *trace-output* "~&~V@T~D: ~S ~S?~%" *trace-level* (1+ *trace-level*) symbol position) (finish-output *trace-output*) (let* ((*trace-level* (1+ *trace-level*)) (result (funcall fun text position end))) (format *trace-output* "~&~V@T~D: ~S " (1- *trace-level*) *trace-level* symbol) (if (error-result-p result) (format *trace-output* "-|~%") (format *trace-output* "~S-~S -> ~S~%" position (result-position result) (successful-parse-production result))) (finish-output *trace-output*) result)) (traced/condition (condition symbol break fun text position end) (if (funcall condition symbol text position end) (traced symbol break fun text position end) (funcall fun text position end))) (trace-one (symbol cell depth) ;; Avoid infinite recursion and processing sub-trees ;; multiple times. (if (gethash cell seen) (return-from trace-one) (setf (gethash cell seen) t)) ;; If there is old trace information, removed it first. (when (cell-trace-info cell) (untrace-rule symbol)) ;; Wrap the cell function in a tracing function. Store ;; old info in trace-info slot of CELL. (let ((fun (cell-function cell)) (rule (cell-rule cell)) (info (cell-%info cell))) (set-cell-info cell (if condition (curry #'traced/condition condition symbol break fun) (curry #'traced symbol break fun)) rule) (setf (cell-trace-info cell) (list info break condition)) ;; If requested, trace dependencies ;; recursively. Checking RULE avoids recursing into ;; referenced but undefined rules. (when (and rule (if (integerp depth) (plusp depth) depth)) (dolist (dep (%rule-direct-dependencies rule)) (trace-one dep (find-rule-cell dep) (if (integerp depth) (1- depth) depth))))) t)) (trace-one symbol (or (find-rule-cell symbol) (undefined-rule symbol)) recursive)))) (defun untrace-rule (symbol &key recursive break condition) "Turn off tracing of nonterminal SYMBOL. If RECURSIVE is true, turn off tracing for the whole grammar rooted at SYMBOL. If RECURSIVE is a positive integer, turn off tracing for all rules reachable from the nonterminal SYMBOL in that number of steps. BREAK and CONDITION are ignored, and are provided only for symmetry with TRACE-RULE." (declare (ignore break condition)) (let ((seen (make-hash-table :test #'eq))) (labels ((untrace-one (cell depth) ;; Avoid infinite recursion and processing sub-trees ;; multiple times. (if (gethash cell seen) (return-from untrace-one) (setf (gethash cell seen) t)) ;; Restore info from trace-info slot of CELL. (let ((rule (cell-rule cell)) (trace-info (cell-trace-info cell))) (when trace-info (setf (cell-%info cell) (first trace-info) (cell-trace-info cell) nil)) ;; If requested, trace dependencies ;; recursively. Checking RULE avoids recursing into ;; referenced but undefined rules. (when (and rule (if (integerp depth) (plusp depth) depth)) (dolist (dep (%rule-direct-dependencies rule)) (untrace-one (find-rule-cell dep) (if (integerp depth) (1- depth) depth))))) nil)) (untrace-one (or (find-rule-cell symbol) (undefined-rule symbol)) recursive)))) (defun rule-expression (rule) "Return the parsing expression associated with the RULE." (slot-value rule '%expression)) (defun (setf rule-expression) (expression rule) "Modify RULE to use EXPRESSION as the parsing expression. The rule must be detached beforehand." (let ((name (rule-symbol rule))) (when name (error "~@" name)) (setf (slot-value rule '%expression) expression))) (defun change-rule (symbol expression) "Modifies the nonterminal SYMBOL to use EXPRESSION instead. Temporarily removes the rule while it is being modified." (let ((rule (remove-rule symbol :force t))) (unless rule (undefined-rule symbol)) (setf (rule-expression rule) expression) (add-rule symbol rule))) (defun describe-grammar (symbol &optional (stream *standard-output*)) "Prints the grammar tree rooted at nonterminal SYMBOL to STREAM for human inspection." (check-type symbol nonterminal) (flet ((max-symbol-length (symbols) (reduce #'max symbols :key (compose #'length #'prin1-to-string) :initial-value 0)) (output-rule (length rule) (format stream "~3T~S~VT<- ~S~@[ : ~S~]~%" (rule-symbol rule) length (rule-expression rule) (when (rule-condition rule) (rule-guard-expression rule))))) (if-let ((rule (find-rule symbol))) (progn (format stream "~&Grammar ~S:~%" symbol) (multiple-value-bind (defined undefined) (rule-dependencies rule) (let ((length (+ 4 (max (max-symbol-length defined) (max-symbol-length undefined))))) (output-rule length rule) (mapc (compose (curry #'output-rule length) #'find-rule) defined) (when undefined (format stream "~%Undefined nonterminal~P:~%~{~3T~S~%~}" (length undefined) undefined))))) (format stream "Symbol ~S is not a defined nonterminal." symbol)))) esrap-20170630-git/src/macros.lisp000066400000000000000000000302671311177263700166350ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; Miscellany (defun text (&rest arguments) "Arguments must be strings, or lists whose leaves are strings. Catenates all the strings in arguments into a single string." (with-output-to-string (s) (labels ((cat-list (list) (dolist (elt list) (etypecase elt (string (write-string elt s)) (character (write-char elt s)) (list (cat-list elt)))))) (cat-list arguments)))) (defun singleton-option (context form keyword type &key default) (let ((value default) (value-seen nil)) (lambda (&optional (new-value nil new-value-p)) (cond ((not new-value-p) value) ((not (typep new-value type)) (error 'simple-type-error :datum new-value :expected-type type :format-control "~@" :format-arguments (list new-value keyword context))) (value-seen (error "~@" keyword context form)) (t (setf value-seen t value new-value)))))) ;;; DEFRULE support functions (defun parse-lambda-list-maybe-containing-&bounds (lambda-list) "Parse &BOUNDS section in LAMBDA-LIST and return three values: 1. The standard lambda list sublist of LAMBDA-LIST 2. A symbol that should be bound to the start of a matching substring 3. A symbol that should be bound to the end of a matching substring 4. A list containing symbols that were GENSYM'ed. The second and/or third values are GENSYMS if LAMBDA-LIST contains a partial or no &BOUNDS section, in which case fourth value contains them for use with IGNORE." (let ((length (length lambda-list)) (index (position '&bounds lambda-list))) (multiple-value-bind (lambda-list start end gensyms) (cond ;; Look for &BOUNDS START END. ((eql index (- length 3)) (values (subseq lambda-list 0 index) (nth (+ index 1) lambda-list) (nth (+ index 2) lambda-list) '())) ;; Look for &BOUNDS START. ((eql index (- length 2)) (let ((end (gensym "END"))) (values (subseq lambda-list 0 index) (nth (+ index 1) lambda-list) end (list end)))) ;; &BOUNDS is present but not followed by either one or two ;; names. (index (error "~@" '&bounds (subseq lambda-list index))) ;; No &BOUNDS section. (t (let ((start (gensym "START")) (end (gensym "END"))) (values lambda-list start end (list start end))))) (check-type start symbol) (check-type end symbol) (values lambda-list start end gensyms)))) (defun check-lambda-list (lambda-list spec &key (report-lambda-list lambda-list)) (multiple-value-bind (required* optional* rest* keyword* allow-other-keys-p auxp keyp) (parse-ordinary-lambda-list lambda-list) (labels ((fail (expected actual) (let ((expected (ensure-list expected)) (actual (ensure-list actual))) (error "~@" (first expected) (rest expected) report-lambda-list (first actual) (rest actual)))) (check-section (section expected actual) (typecase expected ((eql nil) (when actual (fail (list "without ~A parameters" section) (list "has ~A parameters" section)))) ((eql t) (unless actual (fail (list "with ~A parameters" section) (list "has no ~A parameters" section)))) (integer (unless (length= expected actual) (fail (list "with ~D ~A parameter~:*~:P" expected section) (list "has ~D ~A parameter~:*~:P" (length actual) section)))))) (check-binary (name expected actual) (when (member expected '(t nil)) (unless (eq expected (when actual t)) (fail (list "~:[without~;with~] ~A" expected name) (list "~:[has no~;has~] ~A" actual name))))) (check-simple-spec (&key required optional rest keyword allow-other-keys aux key) (check-section "required" required required*) (check-section "optional" optional optional*) (check-binary '&rest rest rest*) (check-section "keyword" keyword keyword*) (check-binary '&allow-other-keys allow-other-keys allow-other-keys-p) (check-section "aux" aux auxp) (check-binary '&key key keyp)) (check-spec (spec) (typecase spec ((cons (eql or)) (loop :with errors = () :for sub-spec :in (rest spec) :do (handler-case (progn (check-spec sub-spec) (return)) (error (condition) (push condition errors))) :finally (error "~@<~{~A~^~@:_~}~@:>" errors))) (list (apply #'check-simple-spec spec))))) (check-spec spec)))) (defun parse-defrule-options (options form) (let ((when (singleton-option 'defrule form :when t :default '(t . t))) (transform nil) (around nil) (error-report (singleton-option 'defrule form :error-report 'rule-error-report :default t))) (dolist (option options) (destructuring-ecase option ((:when expr &rest rest) (when rest (error "~@" :when form)) (funcall when (cons (cond ((not (constantp expr)) `(lambda () ,expr)) ((eval expr) t)) expr))) ((:constant value) (declare (ignore value)) (push option transform)) ((:text value) (when value (push option transform))) ((:identity value) (when value (push option transform))) ((:lambda lambda-list &body forms) (multiple-value-bind (lambda-list* start-var end-var ignore) (parse-lambda-list-maybe-containing-&bounds lambda-list) (check-lambda-list lambda-list* '(or (:required 1) (:optional 1)) :report-lambda-list lambda-list) (push (list :lambda lambda-list* start-var end-var ignore forms) transform))) ((:function designator) (declare (ignore designator)) (push option transform)) ((:destructure lambda-list &body forms) (multiple-value-bind (lambda-list* start-var end-var ignore) (parse-lambda-list-maybe-containing-&bounds lambda-list) (push (list :destructure lambda-list* start-var end-var ignore forms) transform))) ((:around lambda-list &body forms) (multiple-value-bind (lambda-list* start end ignore) (parse-lambda-list-maybe-containing-&bounds lambda-list) (check-lambda-list lambda-list* '() :report-lambda-list lambda-list) (setf around `(lambda (,start ,end transform) (declare (ignore ,@ignore) (function transform)) (flet ((call-transform () (funcall transform))) ,@forms))))) ((:error-report behavior) (funcall error-report behavior)))) (values transform around (funcall when) (funcall error-report)))) (defun expand-transforms (transforms) (labels ((make-transform-body (start end start-var end-var ignore body) (let* ((start-end-vars (list start-var end-var)) (other-ignore (set-difference ignore start-end-vars))) (multiple-value-bind (forms declarations) (parse-body body) `(,@(when other-ignore `((declare (ignore ,@other-ignore)))) ,@declarations (let (,@(unless (member start-var ignore :test #'eq) `((,start-var ,start))) ,@(unless (member end-var ignore :test #'eq) `((,end-var ,end)))) ,@forms))))) (process-option (options start end production) (destructuring-bind (&optional option &rest rest) options (unless option (return-from process-option (values production t))) (destructuring-ecase option ((:constant value) (process-option rest start end `(progn ,production ,value))) ((:identity value) (declare (ignore value)) (process-option rest start end production)) ((:text value) (declare (ignore value)) (process-option rest start end `(text ,production))) ((:function designator) ; TODO resolve-function? (values (process-option rest start end `(,designator ,production)) t)) ((:lambda lambda-list start-var end-var ignore forms) (values (process-option rest start end `((lambda ,lambda-list ,@(make-transform-body start end start-var end-var ignore forms)) ,production)) t)) ((:destructure lambda-list start-var end-var ignore forms) (values (process-option rest start end `(destructuring-bind ,lambda-list ,production ,@(make-transform-body start end start-var end-var ignore forms))) t)))))) (with-gensyms (production start end) (multiple-value-bind (form production-used-p) (process-option (reverse transforms) start end production) `(lambda (,production ,start ,end) (declare ,@(unless production-used-p `((ignore ,production))) (ignorable ,start ,end)) ,form))))) esrap-20170630-git/src/package.lisp000066400000000000000000000036441311177263700167430ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:defpackage #:esrap (:use #:cl #:alexandria) #+sbcl (:lock t) (:export #:&bounds #:! #:? #:+ #:* #:& #:~ #:character-ranges #:*on-left-recursion* #:add-rule #:call-transform #:change-rule #:defrule #:describe-grammar #:describe-terminal #:esrap-error #:esrap-error-position #:esrap-error-text #:esrap-parse-error #:esrap-parse-error-result #:esrap-parse-error-context #:expression-start-terminals #:find-rule #:invalid-expression-error #:invalid-expression-error-expression #:left-recursion #:left-recursion-nonterminal #:left-recursion-path #:parse #:remove-rule #:rule #:rule-dependencies #:rule-expression #:rule-symbol #:text #:trace-rule #:untrace-rule #:undefined-rule-error #:undefined-rule-symbol )) esrap-20170630-git/src/protocol.lisp000066400000000000000000000031501311177263700172010ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; Error protocolx (defgeneric esrap-error-position (condition) (:documentation "Return the input position at which the parse failure represented by CONDITION occurred.")) (defgeneric esrap-parse-error-result (condition) (:documentation "Return the result associated to the parse error represented by CONDITION.")) (defgeneric esrap-parse-error-context (condition) (:documentation "Return the context result associated to the parse error represented by CONDITION.")) esrap-20170630-git/src/results.lisp000066400000000000000000000537171311177263700170570ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ;;;; We always return a RESULT -- ERROR-RESULT for failed parses, and ;;;; SUCCESSFUL-PARSE for successes. ;;;; ;;;; We implement a simple lazy evaluation for the productions. This ;;;; is used to perform semantic actions only when necessary -- either ;;;; when we call a semantic predicate or once parse has finished. (cl:in-package #:esrap) (defstruct (result (:constructor nil) (:copier nil)) ;; Expression that succeeded/failed to match. (expression nil :read-only t) ;; Position at which match was attempted. ;; Either ;; * the position at which the parse failed ;; * or function returning that position when called with the ;; FAILED-PARSE instance and optionally a minimum position as its ;; arguments. (%position #'max-of-result-positions :type (or function input-position)) ;; One of the following things: ;; * nested error, closer to actual failure site ;; * a (possibly empty) list thereof ;; * a string describing the failure ;; * a condition instance describing the failure (detail nil :type (or result list string condition) :read-only t)) ;; The following function is only called from slow paths. (declaim (ftype (function (result) (values input-position &optional)) result-position)) (defun result-position (result) (let ((position (result-%position result))) (if (functionp position) (setf (result-%position result) (funcall position (ensure-list (result-detail result)))) position))) (defmethod print-object ((object result) stream) (print-unreadable-object (object stream :type t) (let ((*print-level* (min 2 (or *print-level* 2))) (*print-length* (min 3 (or *print-length* 3)))) (format stream "~S~@[ @~D~]" (result-expression object) (result-position object))))) (defstruct (error-result (:include result) (:constructor nil) (:copier nil))) (defstruct (inactive-rule (:include error-result) (:constructor make-inactive-rule (expression %position)) (:copier nil))) (declaim (ftype (function (inactive-rule) (values nonterminal &optional)) inactive-rule-rule)) (defun inactive-rule-rule (result) (result-expression result)) (defstruct (failed-parse (:include error-result) (:constructor make-failed-parse (expression %position detail)) (:constructor make-failed-parse/no-position (expression detail)) (:copier nil))) ;; This is placed in the cache as a place in which information ;; regarding left recursion can be stored temporarily. (declaim (inline make-left-recursion-result left-recursion-result-p)) (defstruct (left-recursion-result (:include error-result) (:constructor make-left-recursion-result (expression)) (:copier nil)) (head nil :type (or null head))) (declaim (ftype (function (left-recursion-result) (values nonterminal &optional)) left-recursion-result-rule)) (defun left-recursion-result-rule (result) (result-expression result)) (defstruct (successful-parse (:include result) (:constructor %make-successful-parse (expression %position detail %production)) (:copier nil)) ;; Either a list of results, whose first element is the production, ;; or a function to call that will return the production. (%production nil :type (or list function))) (defun successful-parse-production (result) (let ((thunk (successful-parse-%production result))) (if (functionp thunk) (let ((value (funcall thunk (result-detail result)))) (setf (successful-parse-%production result) (list value)) value) (first thunk)))) ;; Result helper functions (defmacro make-successful-parse (expression position detail production) `(%make-successful-parse ,expression ,position ,detail ,(typecase production (symbol `(list ,production)) ((cons (eql function)) production) (t `(lambda (detail) (declare (ignore detail)) ,production))))) (defun result-nonterminal-p (result) (typep (result-expression result) 'nonterminal)) (defun result-unsatisfied-predicate-p (result) (and (failed-parse-p result) (typep (result-expression result) 'predicate) (successful-parse-p (result-detail result)))) (defun result-trivial-predicate-p (result) (and (typep (result-expression result) 'predicate) (expression-case (second (result-expression result)) ((character character-ranges string terminal) t) (t nil)))) (declaim (ftype (function (result rule-error-report-pattern) (values boolean &optional)) result-suitable-for-report-part-p)) (defun result-suitable-for-report-part-p (result part) (when (result-nonterminal-p result) (rule-suitable-for-report-part-p (result-expression result) part))) (declaim (ftype (function (list &optional input-position) (values input-position &optional)) max-of-result-positions)) (defun max-of-result-positions (results &optional (start 0)) (reduce #'max results :key #'result-position :initial-value start)) (declaim (ftype (function (list) (values list &optional)) list-of-result-productions list-of-result-productions/butlast)) (defun list-of-result-productions (results) (mapcar #'successful-parse-production results)) (defun list-of-result-productions/butlast (results) (loop :for rest :on results :while (rest rest) :collect (successful-parse-production (first rest)))) ;;; For technical reasons, INACTIVE-RULE instances cannot be directly ;;; created with the correct value in the POSITION slot. Fix this by ;;; copying the position from adjacent results, if possible. (defun maybe-augment-inactive-rules (results) (unless (some #'inactive-rule-p results) (return-from maybe-augment-inactive-rules results)) (loop :for previous = nil :then (if (result-p current) current previous) :for current :in results :collect (if (and (inactive-rule-p current) (result-p previous)) (make-inactive-rule (result-expression current) (result-position previous)) current))) (declaim (ftype (function (function result &key (:augment-inactive-rules t))) map-results) (ftype (function (function result &key (:when-error-report rule-error-report-pattern))) map-max-results map-max-leaf-results)) ;;; Apply FUNCTION to RESULT and potentially all its ancestor results ;;; (by providing a RECURSE function to FUNCTION) and return whatever ;;; FUNCTION returns. ;;; ;;; More concretely, the lambda-list of FUNCTION has to be compatible ;;; to ;;; ;;; (result recurse) ;;; ;;; where RESULT is the result object currently being visited and ;;; RECURSE is a function of no arguments that, when called, continues ;;; the traversal into children of RESULT and returns whatever ;;; FUNCTION returns for the sub-tree of ancestor results. (defun map-results (function result &key (augment-inactive-rules t)) (let ((function (ensure-function function)) (augment (if augment-inactive-rules #'maybe-augment-inactive-rules #'identity))) (labels ((do-result (result) (flet ((recurse () (let ((detail (result-detail result))) (typecase detail (cons (mapcar #'do-result (funcall augment detail))) (result (do-result detail)))))) (declare (dynamic-extent #'recurse)) (funcall function result #'recurse)))) (declare (dynamic-extent #'do-result)) (do-result result)))) ;;; Like MAP-RESULTS but only process results the position of which ;;; (computed as the recursive maximum over ancestors for inner result ;;; nodes) is maximal within the result tree RESULT. ;;; ;;; Furthermore, stop the traversal at results corresponding to !, NOT ;;; and PREDICATE expressions since failed parses among their ;;; respective ancestors are not causes of a failed (or successful) ;;; parse in the usual sense. ;;; ;;; Also restrict processing of nonterminals according to their ;;; :ERROR-REPORT option and WHEN-ERROR-REPORT. (defun map-max-results (function result &key (when-error-report nil when-error-report-p)) ;; Process result tree in two passes: ;; ;; 1. Use MAP-RESULTS to visit results, processing each with either ;; PROCESS-{LEAF or INNER}-RESULT, and collecting results into a ;; tree with nodes of the form ;; ;; (RECURSIVE-MAX-POSITION RESULT LIST-OF-CHILDREN) ;; ;; 2. Use local function MAP-MAX-RESULTS to traverse the tree ;; calling FUNCTION on the RESULT of each node. (let ((function (ensure-function function))) (labels ((process-leaf-result (result) (list (result-position result) result '())) (process-inner-result (result recurse) (declare (type function recurse)) (let ((children (remove nil (typecase (result-detail result) (result (list (funcall recurse))) (cons (funcall recurse)))))) (cond (children (let* ((max (reduce #'max children :key #'first)) (max-children (remove max children :test-not #'= :key #'first))) (list max result max-children))) ((not (successful-parse-p result)) (process-leaf-result result))))) (process-result (result recurse) ;; Treat results produced by inactive rules as if the ;; rule was not part of the grammar. (unless (inactive-rule-p result) (let ((expression (result-expression result))) (expression-case expression ;; Do not recurse into results for negation-ish ;; expressions. ((! not < >) (process-leaf-result result)) ;; If the associated rule is a nonterminal, maybe ;; suppress the result depending on the error-report ;; slot of the rule. (nonterminal (when (or (not when-error-report-p) (rule-suitable-for-report-part-p expression when-error-report)) (process-inner-result result recurse))) (t (process-inner-result result recurse)))))) (map-max-results (node) (destructuring-bind (position result children) node (declare (ignore position)) (flet ((recurse () (mapcar #'map-max-results children))) (declare (dynamic-extent #'recurse)) (funcall function result #'recurse))))) (declare (dynamic-extent #'process-leaf-result #'process-inner-result #'process-result #'map-max-results)) (if-let ((max-result-root (map-results #'process-result result))) (map-max-results max-result-root) (funcall function result (constantly '())))))) (defun map-max-leaf-results (function result &rest args &key when-error-report) (declare (ignore when-error-report)) (let ((function (ensure-function function))) (apply #'map-max-results (lambda (result recurse) (declare (type function recurse)) ;; In addition to actual leafs, treat unsatisfied ;; predicate results or trivial predicates as leafs (the ;; latter are one level above leafs anyway and allow for ;; better "expected" messages). (when (or (result-unsatisfied-predicate-p result) (result-trivial-predicate-p result) (not (funcall recurse))) (funcall function result))) result args))) (declaim (inline flattened-children)) (defun flattened-children (recurse) (let ((all-children (funcall (the function recurse)))) (remove-duplicates (reduce #'append all-children) :test #'eq))) ;;; Return a "context"-providing child result of RESULT, i.e. the most ;;; specific ancestor result of RESULT the path to which contains no ;;; forks: ;;; ;;; RESULT ;;; | ;;; `-child1 ;;; | ;;; `-child2 ;;; | ;;; `-nonterminal <- context ;;; | ;;; +-child4 ;;; | | ;;; | ... ;;; `-child5 ;;; | ;;; ... ;;; (defun result-context (result) (first (map-max-results (lambda (result recurse) (declare (type function recurse)) (let ((children (flattened-children recurse))) (cond ;; unsatisfied predicate result => collect into the result. ;; ;; This suppresses children of RESULT. The actual context ;; will normally be a nonterminal result above RESULT. ((result-unsatisfied-predicate-p result) (list result)) ;; nonterminal with a single child => return the child. ((and (length= 1 children) (or (result-suitable-for-report-part-p (first children) :context) (not (result-suitable-for-report-part-p result :context)))) children) ;; nonterminal with multiple children, i.e. common ;; derivation ends here => return RESULT. (t (list result))))) result :when-error-report '(:context :detail)))) ;;; Return an explicit description (i.e. a STRING or CONDITION) of the ;;; cause of the parse failure if such a thing can be found in the ;;; result tree rooted at RESULT. (defun result-root-cause (result) (first (map-max-results (lambda (result recurse) (cond ((typep result 'inactive-rule) (list (let ((*package* (load-time-value (find-package :keyword)))) (format nil "Rule ~S is not active" (result-expression result))))) ((typep (result-detail result) '(or string condition)) (list (result-detail result))) ((result-unsatisfied-predicate-p result) (list (format nil "The production~ ~2%~ ~2@T~<~S~:>~ ~2%~ does not satisfy the predicate ~S." (list (successful-parse-production (result-detail result))) (first (result-expression result))))) (t (flattened-children recurse)))) result))) ;;; Return a list of terminals that would have allowed the failed ;;; parsed represented by RESULT to succeed. (defun result-expected-input (result) (let ((expected '())) (map-max-leaf-results (lambda (leaf) (mapc (lambda (start-terminal) (pushnew start-terminal expected :test #'expression-equal-p)) (typecase leaf (failed-parse (expression-start-terminals (result-expression leaf) :when-rule-error-report :detail)) (successful-parse '((not (character))))))) result :when-error-report :detail) (sort expected #'expression<))) ;;; Return a list of children of RESULT that are the roots of disjoint ;;; result sub-trees. ;;; ;;; Precondition: RESULT is a nonterminal with multiple children ;;; (I.e. RESULT is typically the return value of RESULT-CONTEXT). (defun partition-results (result) (flet ((child-closure (result) (let ((results (list result))) (map-max-results (lambda (result recurse) (pushnew result results :test #'eq) (funcall recurse)) result) results))) (declare (dynamic-extent #'child-closure)) (map-max-results (lambda (result recurse) (let ((children (flattened-children recurse))) (cond ;; Unsatisfied predicate result => return RESULT. ((result-unsatisfied-predicate-p result) (list result)) ;; No children => certainly no fork in ancestors => return ;; RESULT. ((null children) (list result)) ;; Only a single child, i.e. children have not been ;; partitioned => return RESULT. ((length= 1 children) (if (result-suitable-for-report-part-p (first children) :detail) children (list result))) ;; Multiple children, but not all of them are nonterminals ;; and RESULT is a nonterminal => do not use the partition ;; into CHILDREN and instead return RESULT. ((and (result-suitable-for-report-part-p result :detail) (notevery #'result-nonterminal-p children)) (list result)) ;; Multiple children, all of which are nonterminals. If the ;; child-closures of all children are disjoint => use the ;; partition into children. Otherwise => do not use the ;; partition and instead return RESULT. (t (let ((closures (mapcar #'child-closure children))) (loop :named outer :for (closure1 . rest) :on closures :do (loop :for closure2 :in rest :do (when (intersection closure1 closure2 :test #'eq) (return-from outer (list result)))) :finally (return-from outer children))))))) result :when-error-report :detail))) ;;; Given the "context" result (see RESULT-CONTEXT) CONTEXT, determine ;;; the set of failed ancestor results (see PARTITION-RESULTS). ;;; Display the context and all failed ancestor results optionally ;;; printing the reason for the failure and listing the respective ;;; expected inputs that would have allowed the failed results to ;;; succeed. (defun error-report (context stream) (let* ((partitioned (partition-results context)) (expected (mapcar (lambda (root) (let ((reason (result-root-cause root)) (expected (result-expected-input root))) (list root (when reason (list reason)) (length expected) expected))) partitioned)) (expected (sort expected #'expression< :key #'first))) ;; Print context (if any), then print each failure result from the ;; PARTITIONED set with its name and the set of expected inputs, ;; if any. (format stream "~@<~@[In context ~/esrap:print-result/:~ ~@:_~@:_~ ~]~ ~{~{~ While parsing ~/esrap:print-result/. ~ ~@[Problem:~@:_~@:_~ ~2@T~<~@;~A~:>~ ~[~:;~@:_~@:_~]~:*~ ~]~ ~[~ ~*~ ~:;~ ~:*Expected:~@:_~@:_~ ~[~ ~2@T~{~/esrap::print-terminal/~}~ ~:;~ ~5@T~{~/esrap::print-terminal/~^~@:_ or ~}~ ~]~ ~]~ ~}~^~@:_~@:_~}~ ~:>" context expected))) (defvar *result-pprint-dispatch* (let ((dispatch (copy-pprint-dispatch))) (set-pprint-dispatch 'string (lambda (stream x) (write x :stream stream :escape t :pretty nil)) 0 dispatch) (set-pprint-dispatch 'character (lambda (stream x) (if (or (not (graphic-char-p x)) (member x '(#\Space #\Tab #\Newline))) (write-string (char-name x) stream) (write (string x) :stream stream :escape t :pretty nil))) 0 dispatch) dispatch)) ;; For use as ~/esrap::print-result/ in format control. (defun print-result (stream result &optional colon? at?) (declare (ignore colon? at?)) (let ((*print-pprint-dispatch* *result-pprint-dispatch*)) (princ (result-expression result) stream))) esrap-20170630-git/src/rule.lisp000066400000000000000000000154621311177263700163200ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; RULE REPRESENTATION AND STORAGE ;;; ;;; For each rule, there is a RULE-CELL in *RULES*, whose %INFO slot has the ;;; function that implements the rule in car, and the rule object in CDR. A ;;; RULE object can be attached to only one non-terminal at a time, which is ;;; accessible via RULE-SYMBOL. (defvar *rules* (make-hash-table)) (defun clear-rules () (clrhash *rules*) nil) (defstruct (rule-cell (:conc-name cell-) (:constructor make-rule-cell (symbol &aux (%info (cons (undefined-rule-function symbol) nil)))) (:copier nil) (:predicate nil)) ;; A cons ;; ;; (FUNCTION . RULE) ;; ;; where ;; ;; FUNCTION is a function with lambda-list (text position end) which ;; is called to do the actual parsing work (or immediately signal an ;; error in case of referenced but undefined rules). ;; ;; RULE is a RULE instance associated to the cell or nil for ;; referenced but undefined rules. (%info (required-argument :%info) :type (cons function t)) ;; Either NIL if the corresponding rule is not currently traced or a ;; list ;; ;; (INFO BREAK CONDITION) ;; ;; where ;; ;; INFO is the original value (i.e. before the rule was traced) of ;; the %INFO slot of the cell. ;; ;; BREAK is a Boolean indicating whether to CL:BREAK when the traced ;; rule is executed. ;; ;; CONDITION is NIL or a function that is called when the traced ;; rule is executed to determine whether the trace action should be ;; performed. (trace-info nil) (referents nil :type list)) (declaim (inline cell-function)) (defun cell-function (cell) (car (cell-%info cell))) (defun cell-rule (cell) (cdr (cell-%info cell))) (defun set-cell-info (cell function rule) ;; Atomic update (setf (cell-%info cell) (cons function rule)) cell) (defun undefined-rule-function (symbol) (lambda (&rest args) (declare (ignore args)) (undefined-rule symbol))) (defun ensure-rule-cell (symbol) (check-type symbol nonterminal) ;; FIXME: Need to lock *RULES*. (ensure-gethash symbol *rules* (make-rule-cell symbol))) (defun delete-rule-cell (symbol) (remhash symbol *rules*)) (defun reference-rule-cell (symbol referent) (let ((cell (ensure-rule-cell symbol))) (when referent (pushnew referent (cell-referents cell))) cell)) (defun dereference-rule-cell (symbol referent) (let ((cell (ensure-rule-cell symbol))) (setf (cell-referents cell) (delete referent (cell-referents cell))) cell)) (defun find-rule-cell (symbol) (check-type symbol nonterminal) (gethash symbol *rules*)) (defclass rule () ((%symbol :initform nil :reader rule-symbol) (%expression :initarg :expression :initform (required-argument :expression)) ;; Only for DESCRIBE-GRAMMAR. The %CONDITION slot stores the actual ;; condition. (%guard-expression :initarg :guard-expression :initform t :reader rule-guard-expression) ;; Either T for rules that are always active (the common case), ;; NIL for rules that are never active, or a function to call ;; to find out if the rule is active or not. (%condition :initarg :condition :initform t :reader rule-condition) (%transform :initarg :transform :initform nil :reader rule-transform) (%around :initarg :around :initform nil :reader rule-around) ;; Describes in which parts of an error report this rule, its ;; children and the input (transitively) expected by the rule may ;; be mentioned. This allows preventing "utility" rules from ;; cluttering up error reports. (%error-report :initarg :error-report :type rule-error-report :reader rule-error-report :initform t))) (setf (documentation 'rule-symbol 'function) "Returns the nonterminal associated with the RULE, or NIL if the rule is not attached to any nonterminal.") (declaim (ftype (function (symbol rule-error-report-pattern) (values boolean &optional)) rule-suitable-for-report-part-p)) (defun rule-suitable-for-report-part-p (symbol part-or-parts) (when-let ((rule (find-rule symbol))) (error-report-behavior-suitable-for-report-part-p (rule-error-report rule) part-or-parts))) (defun detach-rule (rule) (dolist (dep (%rule-direct-dependencies rule)) (dereference-rule-cell dep (rule-symbol rule))) (setf (slot-value rule '%symbol) nil)) (defmethod shared-initialize :after ((rule rule) slots &key) (declare (ignore slots)) (check-expression (rule-expression rule))) (defmethod print-object ((rule rule) stream) (print-unreadable-object (rule stream :type t :identity nil) (format stream "~:[(detached)~;~:*~S <- ~S~]" (rule-symbol rule) (rule-expression rule)))) (defun sort-dependencies (symbol dependencies) (let ((symbols (delete symbol dependencies)) (defined nil) (undefined nil)) (dolist (sym symbols) (if (find-rule sym) (push sym defined) (push sym undefined))) (values defined undefined))) (defun rule-dependencies (rule) "Returns the dependencies of the RULE: primary value is a list of defined nonterminal symbols, and secondary value is a list of undefined nonterminal symbols." (sort-dependencies (rule-symbol rule) (%expression-dependencies (rule-expression rule)))) (defun rule-direct-dependencies (rule) (sort-dependencies (rule-symbol rule) (%expression-direct-dependencies (rule-expression rule)))) (defun %rule-direct-dependencies (rule) (delete (rule-symbol rule) (%expression-direct-dependencies (rule-expression rule)))) esrap-20170630-git/src/types.lisp000066400000000000000000000062711311177263700165130ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) ;;; Input types (deftype input-position () 'array-index) (deftype input-length () 'array-length) ;;; Parser behavior types (deftype left-recursion-policy () '(or null (eql :error))) ;;; Expression types (deftype nonterminal () "Any symbol except CHARACTER and NIL can be used as a nonterminal symbol." '(and symbol (not (member character nil)))) (deftype terminal () "Literal strings and characters are used as case-sensitive terminal symbols, and expressions of the form \(~ ) denote case-insensitive terminals." '(or string character (cons (eql ~) (cons (or string character) null)))) (deftype character-range () "A character range is either a single character or a list of two characters." '(or character (cons character (cons character null)))) (deftype predicate-name () '(and symbol (not (member character-ranges string and or not * + ? & ! ~ < > function)))) (deftype predicate () '(cons predicate-name (cons (not null) null))) ;;; Rule-related types (deftype error-report-part () "Named part of a parse error report." `(member :context :detail)) (deftype rule-error-report () "Suitability of a rule for error report parts. In addition to the ERROR-REPORT-PART values, NIL indicates unsuitability for all error report parts, while T indicates suitability for all parts." '(or (member t nil) error-report-part)) (deftype rule-error-report-pattern () "ERROR-REPORT-PART or a list thereof." '(or (member t nil) error-report-part (cons error-report-part))) (declaim (ftype (function (rule-error-report rule-error-report-pattern) (values boolean &optional)) error-report-behavior-suitable-for-report-part-p)) (defun error-report-behavior-suitable-for-report-part-p (query part-or-parts) "Return true if QUERY is suitable for PART-OR-PARTS." (when (or (eq query part-or-parts) (eq query t) (and (consp part-or-parts) (member query part-or-parts :test #'eq))) t)) esrap-20170630-git/src/variables.lisp000066400000000000000000000031001311177263700173030ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap) (declaim (type left-recursion-policy *on-left-recursion*)) (defvar *on-left-recursion* nil "This special variable controls Esrap's behavior with respect to allowing left recursion. When :ERROR, PARSE signals a LEFT-RECURSION error when it encounters a left recursive rule. Otherwise the rule is processed. Note: when processing left recursive rules, linear-time guarantees generally no longer hold.") (defparameter *eval-nonterminals* nil) esrap-20170630-git/test/000077500000000000000000000000001311177263700146405ustar00rootroot00000000000000esrap-20170630-git/test/examples.lisp000066400000000000000000000111661311177263700173540ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap-tests) (in-suite esrap) (test-both-modes example-left-recursion.left-associative "Left associate grammar from example-left-recursion.lisp." ;; This grammar should work without left recursion. (let ((*on-left-recursion* :error)) (is (equal '(+ (* 1 2) (+ (* 3 4) 5)) (parse 'left-recursive-grammars:la-expr "1*2+3*4+5"))))) (test-both-modes example-left-recursion.right-associative "Right associate grammar from example-left-recursion.lisp." ;; This grammar combination of grammar and input would require left ;; recursion. (let ((*on-left-recursion* :error)) (signals left-recursion (parse 'left-recursive-grammars:ra-expr "1*2+3*4+5"))) (is (equal '(+ (+ (* 1 2) (* 3 4)) 5) (parse 'left-recursive-grammars:ra-expr "1*2+3*4+5")))) (test-both-modes example-left-recursion.warth.smoke "Warth's Java expression example from example-left-recursion.lisp." (mapc (curry #'apply (lambda (input expected) (is (equal expected (parse 'left-recursive-grammars:primary input))))) '(("this" "this") ("this.x" (:field-access "this" "x")) ("this.x.y" (:field-access (:field-access "this" "x") "y")) ("this.x.m()" (:method-invocation (:field-access "this" "x") "m")) ("x[i][j].y" (:field-access (:array-access (:array-access "x" "i") "j") "y"))))) (test-both-modes example-function-terminals.indented-block.smoke "Context-sensitive parsing via function terminals." (is (equal '("foo" "bar" "quux" (if "foo" ("bla" (if "baz" ("bli" "blo") ("whoop")))) "blu") (parse 'esrap-example.function-terminals:indented-block " foo bar quux if foo: bla if baz: bli blo else: whoop blu ")))) (test-both-modes example-function-terminals.indented-block.condition "Context-sensitive parsing via function terminals." (let ((input "if foo: bla ")) (signals-esrap-error (input esrap-parse-error 0 ("In context INDENTED-BLOCK:" "While parsing INDENTED-BLOCK." "Problem:" "Expected indent" "Expected:" "a string that can be parsed by the function")) (parse 'esrap-example.function-terminals:indented-block input)))) (test-both-modes example-function-terminals.read.smoke "Using CL:READ as a terminal." (macrolet ((test-case (input expected) `(is (equal ,expected (with-standard-io-syntax (parse 'esrap-example.function-terminals:common-lisp ,input)))))) (test-case "(1 2 3)" '(1 2 3)) (test-case "foo" 'cl-user::foo) (test-case "#C(1 3/4)" #C(1 3/4)))) (test-both-modes example-function-terminals.read.condition "Test error reporting in the CL:READ-based rule" (handler-case (with-standard-io-syntax (parse 'esrap-example.function-terminals:common-lisp "(list 'i :::love 'lisp")) (esrap-parse-error (condition) #-sbcl (declare (ignore condition)) ;; Different readers may report this differently. #+sbcl (is (<= 9 (esrap-error-position condition) 16)) ;; Not sure how other lisps report this. #+sbcl (is (search "too many colons" (princ-to-string condition)))))) esrap-20170630-git/test/package.lisp000066400000000000000000000025121311177263700171240ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:defpackage #:esrap-tests (:use #:alexandria #:cl #:esrap #:fiveam) (:shadowing-import-from #:esrap #:!) (:export #:run-tests)) (cl:in-package #:esrap-tests) (def-suite esrap) (defun run-tests () (run! 'esrap)) esrap-20170630-git/test/readme.lisp000066400000000000000000000042131311177263700167660ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap-tests) (in-suite esrap) (test-both-modes examples-from-readme.foo "README examples related to \"foo+\" rule." (is (equal '("foo" nil t) (multiple-value-list (parse '(or "foo" "bar") "foo")))) (is (eq 'foo+ (add-rule 'foo+ (make-instance 'rule :expression '(+ "foo"))))) (is (equal '(("foo" "foo" "foo") nil t) (multiple-value-list (parse 'foo+ "foofoofoo"))))) (test-both-modes examples-from-readme.decimal "README examples related to \"decimal\" rule." (is (eq 'decimal (add-rule 'decimal (make-instance 'rule :expression `(+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) :transform (lambda (list start end) (declare (ignore start end)) (parse-integer (format nil "~{~A~}" list))))))) (is (eql 123 (parse '(oddp decimal) "123"))) (is (equal '(nil 0) (multiple-value-list (parse '(evenp decimal) "123" :junk-allowed t))))) esrap-20170630-git/test/tests.lisp000066400000000000000000001311001311177263700166670ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2017 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap-tests) (in-suite esrap) ;;; defrule tests (test defrule.check-expression "Test expression checking in DEFRULE." (macrolet ((is-invalid-expr (expression) `(signals invalid-expression-error (defrule foo ,expression)))) (is-invalid-expr (~ 1)) (is-invalid-expr (string)) (is-invalid-expr (character-ranges 1)) (is-invalid-expr (character-ranges (#\a))) (is-invalid-expr (character-ranges (#\a #\b #\c))) (is-invalid-expr (and (string))) (is-invalid-expr (not)) (is-invalid-expr (foo)) (is-invalid-expr (function)) (is-invalid-expr (function foo bar)) (is-invalid-expr (function 1)) (is-invalid-expr (function (lambda (x) x))) (is-invalid-expr (< #\a)) (is-invalid-expr (< 0 #\a)) (is-invalid-expr (< -1 #\a)) (is-invalid-expr (< 1 (string))) (is-invalid-expr (> #\a)) (is-invalid-expr (> 0 #\a)) (is-invalid-expr (> -1 #\a)) (is-invalid-expr (> 1 (string))))) (test defrule.ignore-declarations "Test ignore declarations generated by DEFRULE." (macrolet ((does-not-warn (condition-class &body body) `(finishes (handler-case (compile nil '(lambda () ,@body)) (,condition-class (condition) (fail "Signaled an unexpected warning: ~A." condition)))))) (does-not-warn style-warning (defrule foo (and) (:function second) (:lambda (x) (declare (ignore x))))))) (test defrule.style-warnings "Test signaling of style-warnings from DEFRULE." (macrolet ((signals-style-warning (expression) `(signals style-warning (defrule foo ,expression)))) ;; Function terminal. (signals-style-warning (function no-such-function)) (signals-style-warning (function cond)) ;; Semantic predicate (signals-style-warning (no-such-function #\a)) (signals-style-warning (cond #\a)))) (test defrule.errors "Test signaling of errors for DEFRULE syntax errors." (flet ((test-case (form &optional expected-report) (signals error (macroexpand form)) (when expected-report (handler-case (macroexpand form) (error (condition) (is (search expected-report (princ-to-string condition)))))))) (test-case '(defrule multiple-guards "foo" (:when foo) (:when bar))) (test-case '(defrule multiple-expressions-in-when "foo" (:when foo bar))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (a b)))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (&optional a b)))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (&rest a)))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (&key a)))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (&key &allow-other-keys)))) (test-case '(defrule lambda-lambda-list "foo" (:lambda (&bounds))) "Expected &BOUNDS START END") (test-case '(defrule lambda-lambda-list "foo" (:lambda (&bounds a b c))) "Expected &BOUNDS START END") (test-case '(defrule destructure-lambda-list "foo" (:destructure (&bounds))) "Expected &BOUNDS START END") (test-case '(defrule destructure-lambda-list "foo" (:destructure (&bounds a b c))) "Expected &BOUNDS START END") (test-case '(defrule around-lambda-list "foo" (:around (a)))) (test-case '(defrule around-lambda-list "foo" (:around (&optional a)))) (test-case '(defrule around-lambda-list "foo" (:around (&rest a)))) (test-case '(defrule around-lambda-list "foo" (:around (&key a)))) (test-case '(defrule around-lambda-list "foo" (:around (&key &allow-other-keys)))) (test-case '(defrule around-lambda-list "foo" (:around (&bounds))) "Expected &BOUNDS START END") (test-case '(defrule around-lambda-list "foo" (:around (&bounds a b c))) "Expected &BOUNDS START END") (test-case '(defrule error-report.invalid "foo" (:error-report "invalid"))) (test-case '(defrule error-report.repeated "foo" (:error-report nil) (:error-report t))))) ;;; A few semantic predicates (defun not-doublequote (char) (not (eql #\" char))) (defun not-digit (char) (when (find-if-not #'digit-char-p char) t)) (defun not-newline (char) (not (eql #\newline char))) (defun not-space (char) (not (eql #\space char))) (defun even-length-p (productions) (evenp (length productions))) (defrule oddity (and #\a (or #\c (even-length-p (+ oddity))) #\b)) ;;; Utility rules (defrule beginning-of-input (! (< 1 character)) (:constant :beginning-of-input)) (defrule whitespace (+ (or #\space #\tab #\newline)) (:text t) (:error-report nil)) (defrule empty-line #\newline (:constant "")) (defrule non-empty-line (and (+ (not-newline character)) (? #\newline)) (:destructure (text newline) (declare (ignore newline)) (text text))) (defrule line (or empty-line non-empty-line) (:identity t)) (defrule trimmed-line line (:lambda (line) (string-trim '(#\space #\tab) line))) (defrule trimmed-lines (* trimmed-line) (:identity t)) (defrule digits (+ (digit-char-p character)) (:text t)) (defrule integer (and (? whitespace) digits (and (? whitespace) (or (& #\,) (! character)))) (:function second) (:function parse-integer)) (defrule list-of-integers (+ (or (and integer #\, list-of-integers) integer)) (:destructure (match) (if (integerp match) (list match) (destructuring-bind (int comma list) match (declare (ignore comma)) (cons int list))))) (defrule detail.1 (and "d1" " " "detail" whitespace) (:error-report :detail)) (defrule detail.2 (and "d2" " " "detail" whitespace) (:error-report nil)) (defrule context.1 (and "context" " " (or (and "d3" " " "detail") detail.1)) (:error-report :context)) (defrule error-report (or detail.1 detail.2 (and "d4" " " "detail") context.1)) (test-both-modes parse.smoke (macrolet ((test-case (expected rule text &rest args) `(is (equal '(,@expected) (multiple-value-list (parse ,rule ,text ,@args)))))) (test-case (("1," "2," "" "3," "4.") nil t) 'trimmed-lines "1, 2, 3, 4.") (test-case (123 nil t) 'integer " 123") (test-case (123 nil t) 'integer " 123 ") (test-case (123 nil t) 'integer "123 ") (test-case ((123 45 6789 0) nil t) 'list-of-integers "123, 45 , 6789, 0") (test-case ((123 45 6789 0) nil t) 'list-of-integers " 123 ,45,6789, 0 ") ;; Ensure that parsing with :junk-allowed returns the correct ;; position. (test-case (nil 1) 'list-of-integers " a" :start 1 :junk-allowed t) ;; Test successful parse that does not consume input. This case ;; can only be detected by examining the third return value. (test-case (nil 1 t) '(? list-of-integers) " a" :start 1 :junk-allowed t) ;; Handling of :raw (by the compiler-macro). (test-case (123 nil t) 'integer "123" :raw nil) (is (typep (parse 'integer "123" :raw t) 'esrap::successful-parse)) (is (typep (parse 'integer "12a" :raw t) 'esrap::error-result)))) (defrule single-token/bounds.1 (+ (not-space character)) (:lambda (result &bounds start end) (format nil "~A[~S-~S]" (text result) start end))) (defrule single-token/bounds.2 (and (not-space character) (* (not-space character))) (:destructure (first &rest rest &bounds start end) (format nil "~C~A(~S-~S)" first (text rest) start end))) (defrule tokens/bounds.1 (and (? whitespace) (or (and single-token/bounds.1 whitespace tokens/bounds.1) single-token/bounds.1)) (:destructure (whitespace match) (declare (ignore whitespace)) (if (stringp match) (list match) (destructuring-bind (token whitespace list) match (declare (ignore whitespace)) (cons token list))))) (defrule tokens/bounds.2 (and (? whitespace) (or (and single-token/bounds.2 whitespace tokens/bounds.2) single-token/bounds.2)) (:destructure (whitespace match) (declare (ignore whitespace)) (if (stringp match) (list match) (destructuring-bind (token whitespace list) match (declare (ignore whitespace)) (cons token list))))) (test-both-modes bounds.1 (is (equal '("foo[0-3]") (parse 'tokens/bounds.1 "foo"))) (is (equal '("foo[0-3]" "bar[4-7]" "quux[11-15]") (parse 'tokens/bounds.1 "foo bar quux")))) (test-both-modes bounds.2 (is (equal '("foo(0-3)") (parse 'tokens/bounds.2 "foo"))) (is (equal '("foo(0-3)" "bar(4-7)" "quux(11-15)") (parse 'tokens/bounds.2 "foo bar quux")))) ;;; Look{ahead, behind} (eval-when (:compile-toplevel :load-toplevel :execute) (defun make-look*-test (expression input expected-production expected-position successp) `(is (equal '(,expected-production ,expected-position ,@(when successp '(t))) (multiple-value-list (parse ',expression ,input :junk-allowed t)))))) (test-both-modes lookahead.smoke "Smoke test for lookahead expressions." (macrolet ((test-case (expression input expected-production expected-position &optional (successp t)) (make-look*-test expression input expected-production expected-position successp))) ;; End of input. (test-case (! (> 3 character)) "abc" nil 0) ;; Some simple cases. (test-case (> 1 #\b) "abc" "b" 0) (test-case (> 2 #\c) "abc" "c" 0) (test-case (> 3 #\d) "abc" nil 0 nil) (test-case (> 3 (! character)) "abc" nil 0) (test-case (> 4 (! character)) "abc" nil 0 nil) ;; Make sure we can parse something before and after the ;; lookahead. (test-case (and #\a (> 1 #\c) #\b) "abc" ("a" "c" "b") 2) (test-case (and #\a (> 2 #\d) #\b) "abc" nil 0 nil) (test-case (and #\a (> 2 (! character)) #\b) "abc" ("a" nil "b") 2))) (test-both-modes lookbehind.smoke "Smoke test for lookbehind expressions." (macrolet ((test-case (expression input expected-production expected-position &optional (successp t)) (make-look*-test expression input expected-production expected-position successp))) ;; Beginning of input (test-case (! (< 1 character)) "abc" nil 0) ;; Parse some stuff, so we can go back with the lookbehind. (test-case (and "abc" (< 1 #\d)) "abc" nil 0 nil) (test-case (and "abc" (< 1 #\c)) "abc" ("abc" "c") nil) (test-case (and "abc" (< 2 #\b)) "abc" ("abc" "b") nil) (test-case (and "abc" (< 3 #\a)) "abc" ("abc" "a") nil) (test-case (and "abc" (< 4 #\a)) "abc" nil 0 nil) ;; Lookbehind at the beginning of the input. (test-case (< 1 #\a) "abc" nil 0 nil))) ;;; Function terminals (defun parse-integer1 (text position end) (parse-integer text :start position :end end :junk-allowed t)) (defrule function-terminals.integer #'parse-integer1) (test-both-modes function-terminals.parse-integer "Test using the function PARSE-INTEGER1 as a terminal." (macrolet ((test-case (input expected &optional (expression ''function-terminals.integer)) `(is (equal ,expected (parse ,expression ,input))))) (test-case "1" 1) (test-case " 1" 1) (test-case "-1" -1) (test-case "-1" '(-1 nil) '(and (? function-terminals.integer) (* character))) (test-case "a" '(nil (#\a)) '(and (? function-terminals.integer) (* character))))) (defun parse-5-as (text position end) (let ((chars '()) (amount 0)) (dotimes (i 5) (let ((char (when (< (+ position i) end) (aref text (+ position i))))) (unless (eql char #\a) (return-from parse-5-as (values nil (+ position i) "Expected \"a\"."))) (push char chars) (incf amount))) (values (nreverse chars) (+ position amount)))) (defrule function-terminals.parse-5-as #'parse-5-as) (test-both-modes function-terminals.parse-5-as.smoke "Test using PARSE-A as a terminal." (macrolet ((test-case (input expected &optional (expression ''function-terminals.parse-5-as)) `(is (equal ,expected (parse ,expression ,input))))) (test-case "aaaaa" '(#\a #\a #\a #\a #\a)) (test-case "b" '(nil "b") '(and (? function-terminals.parse-5-as) #\b)) (test-case "aaaaab" '((#\a #\a #\a #\a #\a) "b") '(and (? function-terminals.parse-5-as) #\b)))) (test-both-modes function-terminals.parse-5-as.condition "Test using PARSE-A as a terminal." (handler-case (parse 'function-terminals.parse-5-as "aaaab") (esrap-parse-error (condition) (is (eql 4 (esrap-error-position condition))) (is (search "Expected \"a\"." (princ-to-string condition)))))) (defun function-terminals.nested-parse (text position end) (parse '(and #\d function-terminals.nested-parse) text :start position :end end :junk-allowed t)) (defrule function-terminals.nested-parse (or (and #'function-terminals.nested-parse #\a) (and #\b #'function-terminals.nested-parse) #\c)) (test-both-modes function-terminals.nested-parse "Test a function terminal which itself calls PARSE." (is (equal '("b" ("d" (("d" "c") "a"))) (parse 'function-terminals.nested-parse "bddca")))) (test-both-modes function-terminals.nested-parse.condition "Test propagation of failure information through function terminals." (signals esrap-parse-error (parse 'function-terminals.nested-parse "bddxa"))) (defun function-terminals.without-consuming (text position end) (declare (ignore end)) (if (char= (aref text position) #\a) (values :ok position t) (values nil position "\"a\" expected"))) (test-both-modes function-terminals.without-consuming "Test that function terminals can succeed without consuming input." (is (equal '((:ok "a") nil t) (multiple-value-list (parse '(and #'function-terminals.without-consuming #\a) "a")))) (is (equal '(((:ok "a" :ok) (:ok "a" :ok)) 2 t) (multiple-value-list (parse '(+ (and #'function-terminals.without-consuming #\a #'function-terminals.without-consuming)) "aaab" :junk-allowed t))))) ;;; Left recursion tests (defun make-input-and-expected-result (size) (labels ((make-expected (size) (if (plusp size) (list (make-expected (1- size)) "l") "r"))) (let ((expected (make-expected size))) (values (apply #'concatenate 'string (flatten expected)) expected)))) (defrule left-recursion.direct (or (and left-recursion.direct #\l) #\r)) (test-both-modes left-recursion.direct.success "Test parsing with one left recursive rule for different inputs." (dotimes (i 20) (multiple-value-bind (input expected) (make-input-and-expected-result i) (is (equal expected (parse 'left-recursion.direct input)))))) (test-both-modes left-recursion.direct.condition "Test signaling of `left-recursion' condition if requested." (let ((*on-left-recursion* :error)) (signals (left-recursion) (parse 'left-recursion.direct "l")) (handler-case (parse 'left-recursion.direct "l") (left-recursion (condition) (is (string= "l" (esrap-error-text condition))) (is (= 0 (esrap-error-position condition))) (is (eq 'left-recursion.direct (left-recursion-nonterminal condition))) (is (equal '(left-recursion.direct left-recursion.direct) (left-recursion-path condition))))))) (defrule left-recursion.indirect.1 left-recursion.indirect.2) (defrule left-recursion.indirect.2 (or (and left-recursion.indirect.1 "l") "r")) (test-both-modes left-recursion.indirect.success "Test parsing with mutually left recursive rules for different inputs." (dotimes (i 20) (multiple-value-bind (input expected) (make-input-and-expected-result i) (is (equal expected (parse 'left-recursion.indirect.1 input))) (is (equal expected (parse 'left-recursion.indirect.2 input)))))) (test-both-modes left-recursion.indirect.condition "Test signaling of `left-recursion' condition if requested." (let ((*on-left-recursion* :error)) (signals (left-recursion) (parse 'left-recursion.indirect.1 "l")) (handler-case (parse 'left-recursion.indirect.1 "l") (left-recursion (condition) (is (string= "l" (esrap-error-text condition))) (is (= 0 (esrap-error-position condition))) (is (eq 'left-recursion.indirect.1 (left-recursion-nonterminal condition))) (is (equal '(left-recursion.indirect.1 left-recursion.indirect.2 left-recursion.indirect.1) (left-recursion-path condition))))))) ;;; Test conditions (declaim (special *active*)) (defvar *active* nil) (defrule condition.maybe-active "foo" (:when *active*)) (defrule condition.always-active "foo" (:when t)) (defrule condition.never-active "foo" (:when nil)) (test-both-modes condition.maybe-active "Rule not active at toplevel." (flet ((do-it () (parse 'condition.maybe-active "foo"))) ; TODO avoid redundancy (signals esrap-parse-error (do-it)) (handler-case (do-it) (esrap-parse-error (condition) (search "Rule CONDITION.MAYBE-ACTIVE not active" (princ-to-string condition))))) (finishes (let ((*active* t)) (parse 'condition.maybe-active "foo"))) (finishes (parse 'condition.always-active "foo")) (flet ((do-it () (parse 'condition.never-active "foo"))) (signals esrap-parse-error (do-it)) (handler-case (do-it) (esrap-parse-error (condition) (search "Rule CONDITION.NEVER-ACTIVE not active" (princ-to-string condition)))))) (defrule condition.undefined-dependency (and "foo" no-such-rule)) (test-both-modes condition.undefined-rules "Test handling of undefined rules." (signals undefined-rule-error (parse 'no-such-rule "foo")) (signals undefined-rule-error (parse 'condition.undefined-dependency "foo"))) (test condition.invalid-argument-combinations "Test handling of invalid PARSE argument combinations." ;; Prevent the compiler-macro form recognizing the invalid argument ;; combination at compile-time. (locally (declare (notinline parse)) (signals error (parse 'integer "1" :junk-allowed t :raw t))) ;; Compiler-macro should recognize the invalid argument combination ;; at compile-time. Relies on the implementation detecting invalid ;; keyword arguments at compile-time. (signals warning (with-silent-compilation-unit () (compile nil '(lambda () (parse 'integer "1" :junk-allowed t :raw t)))))) (test-both-modes condition.misc "Test signaling of `esrap-parse-error' conditions for failed parses." ;; Rule does not allow empty string. (signals-esrap-error ("" esrap-parse-error 0 ("At end of input" "^ (Line 1, Column 0, Position 0)" "In context DIGITS:" "While parsing DIGITS." "Expected" "any character satisfying DIGIT-CHAR-P")) (parse 'integer "")) ;; Junk at end of input. (signals-esrap-error ("123foo" esrap-parse-error 3 ("At" "^ (Line 1, Column 3, Position 3)" "In context INTEGER:" "While parsing INTEGER." "The production" "#\\f" "does not satisfy the predicate DIGIT-CHAR-P" "Expected" "any character satisfying DIGIT-CHAR-P" "or ")) (parse 'integer "123foo")) (signals-esrap-error ("a" esrap-parse-error 0 ("At" "^ (Line 1, Column 0, Position 0)" "In context (STRING 0):" "While parsing (STRING 0)." "Expected" "")) (parse '(string 0) "a")) ;; Whitespace not allowed. (signals-esrap-error ("1, " esrap-parse-error 3 ("At" "^ (Line 1, Column 3, Position 3)" "In context DIGITS:" "While parsing DIGITS." "Expected" "any character satisfying DIGIT-CHAR-P")) (parse 'list-of-integers "1, ")) ;; Multi-line input. (signals-esrap-error ("1, 2, " esrap-parse-error 6 ("At" "1," "^ (Line 2, Column 3, Position 6)" "In context DIGITS:" "While parsing DIGITS." "Expected" "any character satisfying DIGIT-CHAR-P")) (parse 'list-of-integers "1, 2, ")) ;; Rule not active at toplevel. (signals-esrap-error ("foo" esrap-parse-error 0 ("At" "^ (Line 1, Column 0, Position 0)" "In context CONDITION.NEVER-ACTIVE:" "While parsing CONDITION.NEVER-ACTIVE." "Problem" "Rule ESRAP-TESTS::CONDITION.NEVER-ACTIVE is not active")) (parse 'condition.never-active "foo")) ;; Rule not active at subexpression-level. (signals-esrap-error ("ffoo" esrap-parse-error 1 ("At" "^ (Line 1, Column 1, Position 1)" "In context (AND \"f\" CONDITION.NEVER-ACTIVE):" "While parsing (AND \"f\" CONDITION.NEVER-ACTIVE). Expected:")) (parse '(and "f" condition.never-active) "ffoo")) ;; Failing function terminal. (signals-esrap-error ("(1 2" esrap-parse-error 0 ("At" "^ (Line 1, Column 0, Position 0)" "In context FUNCTION-TERMINALS.INTEGER:" "While parsing FUNCTION-TERMINALS.INTEGER." "Expected:" "a string that can be parsed by the function")) (parse 'function-terminals.integer "(1 2")) ;; Failing nested semantic predicates. (signals-esrap-error ("aacbb" esrap-parse-error 1 ("At" "^ (Line 1, Column 1, Position 1)" "In context ODDITY:" "While parsing ODDITY." "The production" "((\"a\" \"c\" \"b\"))" "does not satisfy the predicate" "EVEN-LENGTH-P" "Expected:" "the character a")) (parse 'oddity "aacbb")) (signals-esrap-error ("aaacbbb" esrap-parse-error 2 ("At" "^ (Line 1, Column 2, Position 2)" "In context ODDITY:" "While parsing ODDITY." "The production" "((\"a\" \"c\" \"b\"))" "does not satisfy the predicate" "EVEN-LENGTH-P" "Expected:" "the character a")) (parse 'oddity "aaacbbb")) ;; Failing look{ahead,behind}. (signals-esrap-error ("abc" esrap-parse-error 2 ("At" "^ (Line 1, Column 2, Position 2)" "In context (< 2 \"bbc\"):" "While parsing (< 2 \"bbc\")" "Expected:" "the string \"bbc\"" "2 characters before the current position")) (parse '(and "ab" (< 2 "bbc") "c") "abc")) (signals-esrap-error ("abcd" esrap-parse-error 1 ("At" "^ (Line 1, Column 1, Position 1)" "In context (> 1 \"bb\"):" "While parsing (> 1 \"bb\")" "Expected:" "the string \"bb\"" "1 character after the current position")) (parse '(and "a" (> 1 "bb") "bcd") "abcd"))) (test-both-modes condition.error-report "Test effect of the :ERROR-REPORT option on parse error reports." (signals-esrap-error ("foo" esrap-parse-error 0 ("At" "^ (Line 1, Column 0, Position 0)" "In context ERROR-REPORT" "While parsing ERROR-REPORT" "Expected:" "the string \"d1\"" "or the string \"d4\"")) (parse 'error-report "foo")) (signals-esrap-error ("d1" esrap-parse-error 2 ("At" "^ (Line 1, Column 2, Position 2)" "In context ERROR-REPORT" "While parsing DETAIL.1" "Expected:" "the character Space")) (parse 'error-report "d1")) (signals-esrap-error ("context" esrap-parse-error 7 ("At" "^ (Line 1, Column 7, Position 7)" "In context CONTEXT.1" "While parsing CONTEXT.1")) (parse 'error-report "context")) (signals-esrap-error ("context d1 foo" esrap-parse-error 8 ("At" "^ (Line 1, Column 8, Position 8)" "In context CONTEXT.1" "While parsing CONTEXT.1")) (parse 'error-report "context d1 foo")) (signals-esrap-error ("context d3 foo" esrap-parse-error 8 ("At" "^ (Line 1, Column 8, Position 8)" "In context CONTEXT.1" "While parsing CONTEXT.1")) (parse 'error-report "context d3 foo"))) (test-both-modes parse.string "Test parsing an arbitrary string of a given length." (is (equal "" (parse '(string 0) ""))) (is (equal "aa" (parse '(string 2) "aa"))) (signals esrap-parse-error (parse '(string 0) "a")) (signals esrap-parse-error (parse '(string 2) "a")) (signals esrap-parse-error (parse '(string 2) "aaa"))) (defmacro times-2 (form) (destructuring-bind (first second third) form (let ((expr (second second))) `(,first '(and ,expr ,expr) ,third)))) (defmacro times-4 (form) (destructuring-bind (first second third) form (let ((expr (second second))) `(,first '(and ,expr ,expr ,expr ,expr) ,third)))) (test-both-modes parse.case-insensitive "Test parsing character and string constants in case-insensitive mode." ;; The TIMES-{2,4} macros and (and #\c) variants prevent the ;; ordered-choise and greedy-repetition optimizations from being ;; used. ;; ;; To further complicate things, TIMES-{2,4} cannot be lexical ;; macros as that would prevent application of the `parse' ;; compiler-macro. (dolist (input '("aabb" "AABB" "aAbB" "aaBB" "AAbb")) (unless (every #'lower-case-p input) (signals esrap-parse-error (times-4 (parse '(or #\a #\b) input))) (signals esrap-parse-error (times-4 (parse '(or #\a #\b (and #\c)) input))) (signals esrap-parse-error (times-2 (parse '(or "aa" "bb") input))) (signals esrap-parse-error (times-2 (parse '(or "aa" "bb" (and #\c)) input)))) (is (equal "aabb" (text (times-2 (parse '(or (~ "aa") (~ "bb")) input))))) (is (equal "aabb" (text (times-2 (parse '(or (~ "aa") (~ "bb") (and #\c)) input))))) (is (equal "aabb" (text (times-2 (parse '(or (~ "aa") (~ "bb")) input))))) (is (equal "aabb" (text (times-2 (parse '(or (~ "aa") (~ "bb") (and #\c)) input))))) (is (equal "aabb" (text (times-4 (parse '(or (~ #\a) (~ #\b)) input))))) (is (equal "aabb" (text (times-4 (parse '(or (~ #\a) (~ #\b) (and #\c)) input))))) (is (equal "AABB" (text (times-4 (parse '(or (~ #\A) (~ #\B)) input))))) (is (equal "AABB" (text (times-4 (parse '(or (~ #\A) (~ #\B) (and #\c)) input))))) (is (equal "aaBB" (text (times-4 (parse '(or (~ #\a) (~ #\B)) input))))) (is (equal "aaBB" (text (times-4 (parse '(or (~ #\a) (~ #\B) (and #\c)) input))))))) (test-both-modes parse.negation "Test negation in rules." (macrolet ((test-case (expected expression input) `(is (equal ,expected (text (parse ,expression ,input :junk-allowed t)))))) (test-case "F" '(not "Baz") "FooBazBar") (test-case "Foo" '(+ (not "Baz")) "FooBazBar") (test-case "FooBaz" '(+ (not "Bar")) "FooBazBar") (test-case "Foo" '(+ (not (or "Bar" "Baz"))) "FooBazBar"))) (test-both-modes parse.compiler-macro "Test `parse' compiler-macro." ;; Make sure the `parse' compiler-macro applies `constantp' ;; correctly. (macrolet ((times-2 (expr) `'(and ,expr ,expr))) (is (equal '("a" "a") (parse (times-2 #\a) "aa"))))) ;;; Test around (defvar *around.depth* nil) (defrule around/inner (+ (alpha-char-p character)) (:text t)) (defrule around.1 (or around/inner (and #\{ around.1 #\})) (:lambda (thing) (if (stringp thing) (cons *around.depth* thing) (second thing))) (:around () (let ((*around.depth* (if *around.depth* (cons (1+ (first *around.depth*)) *around.depth*) (list 0)))) (call-transform)))) (defrule around.2 (or around/inner (and #\{ around.2 #\})) (:lambda (thing) (if (stringp thing) (cons *around.depth* thing) (second thing))) (:around (&bounds start end) (let ((*around.depth* (if *around.depth* (cons (cons (1+ (car (first *around.depth*))) (cons start end)) *around.depth*) (list (cons 0 (cons start end)))))) (call-transform)))) (test-both-modes around.1 "Test executing code around the transform of a rule." (macrolet ((test-case (input expected) `(is (equal ,expected (parse 'around.1 ,input))))) (test-case "foo" '((0) . "foo")) (test-case "{bar}" '((1 0) . "bar")) (test-case "{{baz}}" '((2 1 0) . "baz")))) (test-both-modes around.2 "Test executing code around the transform of a rule." (macrolet ((test-case (input expected) `(is (equal ,expected (parse 'around.2 ,input))))) (test-case "foo" '(((0 . (0 . 3))) . "foo")) (test-case "{bar}" '(((1 . (1 . 4)) (0 . (0 . 5))) . "bar")) (test-case "{{baz}}" '(((2 . (2 . 5)) (1 . (1 . 6)) (0 . (0 . 7))) . "baz")))) ;;; Test character ranges (defrule character-range (character-ranges (#\a #\b) #\-)) (test-both-modes character-range (macrolet ((test-case (expected expression input) `(is (equal ,expected (parse ,expression ,input :junk-allowed t))))) (test-case #\a '(character-ranges (#\a #\z) #\-) "a") (test-case '(#\a #\b) '(* (character-ranges (#\a #\z) #\-)) "ab") (test-case '(#\a #\b) '(* (character-ranges (#\a #\z) #\-)) "ab1") (test-case '(#\a #\b #\-) '(* (character-ranges (#\a #\z) #\-)) "ab-") (test-case nil '(* (character-ranges (#\a #\z) #\-)) "AB-") (test-case nil '(* (character-ranges (#\a #\z) #\-)) "ZY-") (test-case '(#\a #\b #\-) '(* character-range) "ab-cd"))) ;;; Test multiple transforms (defrule multiple-transforms.1 (and #\a #\1 #\c) (:function second) (:text t) (:function parse-integer)) (defrule multiple-transforms.2 #\1 (:text t) (:function parse-integer) (:function 1+) (:lambda (x &bounds start end) (list x start end))) (defrule multiple-transforms.3 #\1 (:text t) (:lambda (x &bounds start) (list (parse-integer x) start))) (test-both-modes multiple-transforms "Apply composed transforms to parse result." (is (eql 1 (parse 'multiple-transforms.1 "a1c"))) (is (equal '(2 0 1) (parse 'multiple-transforms.2 "1"))) (is (equal '(1 0) (parse 'multiple-transforms.3 "1")))) ;;; Test rule introspection (defrule expression-start-terminals.1 (or expression-start-terminals.2 #\a)) (defrule expression-start-terminals.2 (or #\c (and (? #\b) expression-start-terminals.1))) (test expression-start-terminals.smoke (macrolet ((test-case (expression expected) `(is (equal ',expected (expression-start-terminals ,expression))))) (test-case '(and) ()) (test-case '(or) ()) (test-case 'character (character)) (test-case '(string 5) ((string 5))) (test-case #\A (#\A)) (test-case '(or #\B #\A) (#\A #\B)) (test-case '(or character #\A) (#\A character)) (test-case '(or #\A "foo") ("foo" #\A)) (test-case "foo" ("foo")) (test-case '(or "foo" "bar") ("bar" "foo")) (test-case '(character-ranges (#\a #\z)) ((character-ranges (#\a #\z)))) (test-case '(~ "foo") ((~ "foo"))) (test-case '#'parse-integer (#'parse-integer)) (test-case '(digit-char-p (and)) ()) (test-case '(digit-char-p character) ((digit-char-p (character)))) (test-case '(or (digit-char-p character) #\a) (#\a (digit-char-p (character)))) (test-case 'expression-start-terminals.1 (#\a #\b #\c)) (test-case 'expression-start-terminals.2 (#\a #\b #\c)) (test-case 'left-recursion.direct (#\l #\r)) (test-case '(or #\b #\a) (#\a #\b)) (test-case '(and #\a #\b) (#\a)) (test-case '(and (or #\a #\b) #\c) (#\a #\b)) (test-case '(and (? #\a) #\b) (#\a #\b)) (test-case '(and (? #\a) (? #\b) (or #\d #\c)) (#\a #\b #\c #\d)) (test-case '(and (and) #\a) (#\a)) (test-case '(not (or #\a #\b)) ((not (#\a #\b)))) (test-case '(not character) ((not (character)))) (test-case '(! (or #\a #\b)) ((! (#\a #\b)))) (test-case '(! character) ((! (character)))) (test-case '(& #\a) (#\a)) (test-case '(* #\a) (#\a)) (test-case '(+ #\a) (#\a)) (test-case '(< 1 #\a) ((< 1 (#\a)))) (test-case '(> 1 #\a) ((> 1 (#\a)))) (test-case '(> 2 (or #\a #\b)) ((> 2 (#\a #\b)))))) (test describe-terminal.smoke (flet ((describe-it (terminal) (with-output-to-string (stream) (with-standard-io-syntax (let ((*print-pretty* t)) (pprint-logical-block (stream nil) (describe-terminal terminal stream))))))) (macrolet ((test-case (terminal expected) `(is (string= ,expected (describe-it ,terminal))))) (test-case 'character "any character") (test-case '(string 5) "a string of length 5") (test-case #\a (format nil "the character a (~A)" (char-name #\a))) (test-case #\Space "the character Space") (test-case '(~ #\a) (format nil "the character a (~A), disregarding case" (char-name #\a))) (test-case "f" (format nil "the character f (~A)" (char-name #\f))) (test-case "foo" "the string \"foo\"") (test-case '(~ "foo") "the string \"foo\", disregarding case") (test-case '(character-ranges #\a) "a character in [a]") (test-case '(character-ranges (#\a #\z)) "a character in [a-z]") (test-case '#'parse-integer "a string that can be parsed by the function PARSE-INTEGER") (test-case '(digit-char-p (character)) "any character satisfying DIGIT-CHAR-P") (test-case '(digit-char-p ((~ "foo"))) "the string \"foo\", disregarding case satisfying DIGIT-CHAR-P") (test-case '(digit-char-p ("aa" "bb")) " the string \"aa\" or the string \"bb\" satisfying DIGIT-CHAR-P") (test-case '(digit-char-p ("aa" "bb" "cc")) " the string \"aa\" or the string \"bb\" or the string \"cc\" satisfying DIGIT-CHAR-P") (test-case '(not (#\a #\b)) (format nil "anything but the character a (~A) and the character b (~A)" (char-name #\a) (char-name #\b))) (test-case '(not (character)) "") (test-case '(! (#\a #\b)) (format nil "anything but the character a (~A) and the character b (~A)" (char-name #\a) (char-name #\b))) (test-case '(! ((keyword? (#\_ (alpha-char-p (character)))))) (format nil "anything but the character _ (~A) or any character satisfying ALPHA-CHAR-P satisfying ~A" (char-name #\_) 'keyword?))))) (test describe-terminal.condition (signals error (describe-terminal '(and #\a #\b)))) (defrule describe-grammar.undefined-dependency describe-grammar.no-such-rule.1) (test describe-grammar.smoke "Smoke test for DESCRIBE-GRAMMAR." (mapc (lambda (spec) (destructuring-bind (rule &rest expected) spec (let ((output (with-output-to-string (stream) (describe-grammar rule stream)))) (mapc (lambda (expected) (is (search expected output))) (ensure-list expected))))) '((condition.maybe-active "Grammar CONDITION.MAYBE-ACTIVE" "MAYBE-ACTIVE" "<-" "\"foo\" : *ACTIVE*") (describe-grammar.undefined-dependency "Grammar DESCRIBE-GRAMMAR.UNDEFINED-DEPENDENCY" "Undefined nonterminal" "DESCRIBE-GRAMMAR.NO-SUCH-RULE.1") (describe-grammar.no-such-rule.1 "Symbol DESCRIBE-GRAMMAR.NO-SUCH-RULE.1 is not a defined nonterminal.") (describe-grammar.no-such-rule.2 "Symbol DESCRIBE-GRAMMAR.NO-SUCH-RULE.2 is not a defined nonterminal.") (around.1 "Grammar AROUND.1" "AROUND.1" "<-" ": T" "AROUND/INNER" "<-" ": T") (around.2 "Grammar AROUND.2" "AROUND.2" "<-" ": T" "AROUND/INNER" "<-" ": T") (character-range "Grammar CHARACTER-RANGE" "CHARACTER-RANGE" "<-" "(CHARACTER-RANGES (#\\a" ": T") (multiple-transforms.1 "Grammar MULTIPLE-TRANSFORMS.1" "MULTIPLE-TRANSFORMS.1" "<-" "(AND #\\a #\\1" ": T") (beginning-of-input "Grammar BEGINNING-OF-INPUT" "BEGINNING-OF-INPUT" "<-" "(! (< 1 CHARACTER))" ": T")))) ;;; Test tracing (defun parse-with-trace (rule text) (with-output-to-string (*trace-output*) (parse rule text))) (test #+TODO -both-modes trace-rule.smoke "Smoke test for the rule (un)tracing functionality." (labels ((test-case (trace-rule trace-args parse-rule text expected) ;; No trace output before tracing. (is (emptyp (parse-with-trace parse-rule text))) ;; Trace output. (apply #'trace-rule trace-rule trace-args) (is (string= expected (parse-with-trace parse-rule text))) ;; Back to no output. (apply #'untrace-rule trace-rule trace-args) (is (emptyp (parse-with-trace parse-rule text))))) ;; Smoke test 1. (test-case 'integer '() 'integer "123" "1: INTEGER 0? 1: INTEGER 0-3 -> 123 ") ;; Smoke test 2. (test-case 'beginning-of-input '(:recursive t) 'beginning-of-input "" "1: BEGINNING-OF-INPUT 0? 1: BEGINNING-OF-INPUT 0-0 -> :BEGINNING-OF-INPUT ") ;; Smoke test 3. (test-case 'integer '(:recursive t) 'integer "12" "1: INTEGER 0? 2: WHITESPACE 0? 2: WHITESPACE -| 2: DIGITS 0? 2: DIGITS 0-2 -> \"12\" 2: WHITESPACE 2? 2: WHITESPACE -| 1: INTEGER 0-2 -> 12 ") ;; Depth-limited recursive tracing. (test-case 'list-of-integers '(:recursive 1) 'list-of-integers "12, 13" "1: LIST-OF-INTEGERS 0? 2: INTEGER 0? 2: INTEGER 0-2 -> 12 2: LIST-OF-INTEGERS 3? 3: INTEGER 3? 3: INTEGER 3-6 -> 13 3: INTEGER 3? 3: INTEGER 3-6 -> 13 3: INTEGER 6? 3: INTEGER -| 3: INTEGER 6? 3: INTEGER -| 2: LIST-OF-INTEGERS 3-6 -> (13) 2: INTEGER 6? 2: INTEGER -| 2: INTEGER 6? 2: INTEGER -| 1: LIST-OF-INTEGERS 0-6 -> (12 13) ") ;; Left-recursive rule - non-recursive tracing. (test-case 'left-recursion.direct '() 'left-recursion.direct "rl" "1: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT -| 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0-1 -> \"r\" 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0-2 -> (\"r\" \"l\") 1: LEFT-RECURSION.DIRECT 0-2 -> (\"r\" \"l\") ") ;; Left-recursive rule - recursive tracing. (test-case 'left-recursion.direct '(:recursive t) 'left-recursion.direct "rl" "1: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT -| 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0-1 -> \"r\" 2: LEFT-RECURSION.DIRECT 0? 2: LEFT-RECURSION.DIRECT 0-2 -> (\"r\" \"l\") 1: LEFT-RECURSION.DIRECT 0-2 -> (\"r\" \"l\") ") ;; Conditional tracing. (test-case 'digits `(:condition ,(lambda (symbol text position end) (declare (ignore symbol text end)) (= position 0))) 'list-of-integers "123, 123" "1: DIGITS 0? 1: DIGITS 0-3 -> \"123\" "))) (test trace-rule.condition "Test conditions signaled by the rule (un)tracing functionality." ;; It is important for this test that no rule of the given name ;; exists - including as undefined dependency of another rule. (signals error (trace-rule 'trace-rule.condition.no-such-rule.1)) (signals error (untrace-rule 'trace-rule.condition.no-such-rule.1))) (defrule trace-rule.condition.recursive (and trace-rule.condition.no-such-rule.2)) (test trace-rule.condition.recursive+undefined-rule "Recursively tracing a rule with undefined dependencies should not signal an error." (finishes (trace-rule 'trace-rule.condition.recursive :recursive t))) (defrule trace-rule.redefinition (and)) (test trace-rule.redefinition "Make sure that a traced rule can be redefined. This used to signal an error." (finishes (trace-rule 'trace-rule.redefinition) (change-rule 'trace-rule.redefinition '(and)))) (test trace-rule.twice "Test tracing an already-traced rule." (trace-rule 'integer) (trace-rule 'integer) (is (string= "1: INTEGER 0? 1: INTEGER 0-3 -> 123 " (parse-with-trace 'integer "123"))) (untrace-rule 'integer)) esrap-20170630-git/test/util.lisp000066400000000000000000000067071311177263700165200ustar00rootroot00000000000000;;;; Copyright (c) 2007-2013 Nikodemus Siivola ;;;; Copyright (c) 2012-2016 Jan Moringen ;;;; ;;;; Permission is hereby granted, free of charge, to any person ;;;; obtaining a copy of this software and associated documentation files ;;;; (the "Software"), to deal in the Software without restriction, ;;;; including without limitation the rights to use, copy, modify, merge, ;;;; publish, distribute, sublicense, and/or sell copies of the Software, ;;;; and to permit persons to whom the Software is furnished to do so, ;;;; subject to the following conditions: ;;;; ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. (cl:in-package #:esrap-tests) (defmacro with-silent-compilation-unit (() &body body) `(let ((*error-output* (make-broadcast-stream))) (with-compilation-unit (:override t) ,@body))) (defun call-expecting-signals-esrap-error (thunk input condition position &optional messages) (ecase condition (esrap-parse-error (signals (esrap-parse-error) (funcall thunk)))) (handler-case (funcall thunk) (esrap-error (condition) (is (string= (esrap-error-text condition) input)) (when position (is (= (esrap-error-position condition) position))) (let ((report (with-standard-io-syntax (let ((*print-pretty* t)) (with-output-to-string (stream) (pprint-logical-block (stream nil) (princ condition stream)))))) (start 0)) (mapc (lambda (message) (let ((position (search message report :start2 start))) (is (integerp position) "~@" message report start) (when position (setf start position)))) messages))))) (defmacro signals-esrap-error ((input condition position &optional messages) &body body) `(call-expecting-signals-esrap-error (lambda () ,@body) ,input ',condition ,position (list ,@(ensure-list messages)))) (defmacro test-both-modes (name &body body) (multiple-value-bind (body declarations documentation) (parse-body body :documentation t) (declare (ignore declarations)) (let ((name/interpreted (symbolicate name '#:.interpreted)) (name/compiled (symbolicate name '#:.compiled))) `(progn (test ,name/interpreted ,@(when documentation `(,documentation)) (let ((esrap::*eval-nonterminals* t)) (#-sbcl progn #+sbcl locally #+sbcl (declare (sb-ext:disable-package-locks esrap:parse)) (flet ((parse (&rest args) (apply #'parse args))) ,@body)))) (test ,name/compiled ,@(when documentation `(,documentation)) ,@body))))) esrap-20170630-git/web/000077500000000000000000000000001311177263700144365ustar00rootroot00000000000000esrap-20170630-git/web/Makefile000066400000000000000000000002101311177263700160670ustar00rootroot00000000000000.PHONY: clean all: index.html index.html: ../doc/esrap.html cp ../doc/esrap.html index.html clean: rm -f *~ *.lisp *.lisp.html \#* esrap-20170630-git/web/style.css000066400000000000000000000016601311177263700163130ustar00rootroot00000000000000body { color: #000000; background-color: #ffffff; } .builtin { /* font-lock-builtin-face */ color: #7a378b; } .comment { /* font-lock-comment-face */ color: #b22222; } .comment-delimiter { /* font-lock-comment-delimiter-face */ color: #b22222; } .constant { /* font-lock-constant-face */ color: #008b8b; } .function-name { /* font-lock-function-name-face */ color: #0000ff; } .keyword { /* font-lock-keyword-face */ color: #7f007f; } .slime-reader-conditional { /* slime-reader-conditional-face */ color: #b22222; } .string { /* font-lock-string-face */ color: #996633; } .type { /* font-lock-type-face */ color: #228b22; } .warning { /* font-lock-warning-face */ color: #ff0000; font-weight: bold; } a { color: inherit; background-color: inherit; font: inherit; text-decoration: inherit; } a:hover { text-decoration: underline; }