galax-1.1/0000775000076400007640000000000010772255452010574 5ustar mffmffgalax-1.1/stdlib/0000775000076400007640000000000010772255370012054 5ustar mffmffgalax-1.1/stdlib/fn_error.ml0000664000076400007640000000656010560462366014231 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: fn_error.ml,v 1.15 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Fn_error Description: This module implements support for the user-level fn:error() function. *) open Error open Dm_atomic open Physical_value open Physical_item type xquery_error_msg = Physical_value.item * Physical_value.item option * Physical_value.item list exception Xquery_error of xquery_error_msg let default_error_item = Item_Atomic (new atomicQName (Namespace_symbols.anon_symbol Namespace_builtin.err_default_error) :> atomicValue) (********************************************************************) (* fn:error function *) (* If an invocation provides $description and $error-object, then *) (* these values may also be returned to the external processing *) (* environment. The means by which these values are provided to a host *) (* environment is ·implementation dependent·. *) (* An error xs:QName with namespace URI NS and local part LP will be *) (* returned as the URI NS#LP. *) let raise_error proc_ctxt v = (* If fn:error is invoked with no arguments, then its behavior is the same as the invocation of the following expression: *) (* fn:error(fn:expanded-QName('http://www.w3.org/2004/07/xqt-errors', 'FOER0000')) *) let arity = List.length v in let qname = if (arity < 1) then default_error_item (* fn:error($error as xs:QName) as none *) else List.hd v in let description = (* fn:error($error as xs:QName, $description as xs:string) as none *) if (arity <= 1) then None else Some (List.hd (List.tl v)) in let error_objs = (* fn:error($error as xs:QName, $description as xs:string, $error-object as item()* *) if (arity <= 1) then [] else List.tl (List.tl v) in (* let fn_error_printer (qname, desc, error_objs) = (Serialization.bserialize_datamodel proc_ctxt (Cursor.cursor_of_singleton qname))^":"^ (Serialization.bserialize_datamodel proc_ctxt (Cursor.cursor_of_singleton desc))^"\n"^ (Serialization.bserialize_datamodel proc_ctxt (Cursor.cursor_of_list error_objs)) in raise (Boxed_error ((qname, description, error_objs), fn_error_printer))*) raise (Xquery_error (qname, description, error_objs)) let downgrade_error proc_ctxt msg = let errstr = match msg with | (qnameitem, None, _) -> (getAtomicValue qnameitem)#erase_atomic_value() | (qnameitem, Some stritem, objs) -> (getAtomicValue qnameitem)#erase_atomic_value() ^ (getAtomicValue stritem)#erase_atomic_value() ^ (if objs = [] then "" else (Serialization.bserialize_datamodel proc_ctxt (Cursor.cursor_of_list objs))) in raise (Query (Error (errstr))) galax-1.1/stdlib/fn_doc.mli0000664000076400007640000000607710560462366014021 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: fn_doc.mli,v 1.17 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Fn_doc Description: This module implements the fn:doc() function. *) (*************************) (* Back-end registration *) (*************************) (* Note: The fn:doc() function supports files and HTTP natively, but can also be used to retrieve data in back-end stores (e.g., in Jungle). Such back-end stores must register themselves to the system using the following function. - Jerome *) type back_end_call = Processing_context.processing_context -> string * string option * string -> Physical_value.item list val register_back_end : string -> back_end_call -> unit (* Alive documents *) type alive_documents val build_alive_documents_table : unit -> alive_documents val build_dummy_alive_documents_table : unit -> alive_documents val alive_documents_table_mem : alive_documents -> Datatypes.xs_string -> bool val alive_documents_table_get : alive_documents -> Datatypes.xs_string -> Physical_value.item list val alive_documents_table_put : alive_documents -> Datatypes.xs_string -> Physical_value.item list -> unit val merge_alive_documents : alive_documents -> alive_documents -> alive_documents (*********) (* doc() *) (*********) (* Note: The main doc() function below takes an input URI and returns a function which evaluates to a data model value, which is the result of calling the fn:doc() function. - Jerome *) val lookup_document_from_io : Galax_io.input_spec -> alive_documents option -> Processing_context.processing_context -> Physical_value.item list val lookup_doc_function : Datatypes.xs_string -> alive_documents option -> Processing_context.processing_context -> Physical_value.item list val lookup_doc_function_no_table : Datatypes.xs_string -> Processing_context.processing_context -> Physical_value.item list val lookup_doc_function_with_index : Datatypes.xs_string -> Physical_name_index.name_indices_handler -> alive_documents option -> Processing_context.processing_context -> Physical_value.item list (* Note: The main collection() function below takes an input URI and returns a function which evaluates to a data model value, which is the result of calling the fn:collection() function. - Jerome *) val lookup_collection_function : Datatypes.xs_string -> alive_documents option -> Processing_context.processing_context -> Physical_value.item list galax-1.1/stdlib/fn_error.mli0000664000076400007640000000230110560462366014367 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: fn_error.mli,v 1.11 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Fn_error Description: This module implements support for the user-level fn:error() function. *) (* QName, Description, Error objects *) type xquery_error_msg = Physical_value.item * Physical_value.item option * Physical_value.item list exception Xquery_error of xquery_error_msg val raise_error : Processing_context.processing_context -> Physical_value.item list -> 'a val downgrade_error : Processing_context.processing_context -> xquery_error_msg -> 'a galax-1.1/stdlib/pervasive.xq0000664000076400007640000013373310771003110014422 0ustar mffmffmodule namespace glx = "http://www.galaxquery.org"; (: ::::::::::::::::: F&O functions ::::::::::::::::: :) (: F&O Section 2. Accessors :) declare function fn:node-name($arg1 as node()?) as xs:QName? external; declare function fn:nilled($arg1 as node()?) as xs:boolean? external; declare function fn:string($arg1 as item()?) as xs:string external; declare function fn:data($arg1 as item()*) as xs:anyAtomicType* external; declare function fn:base-uri($arg1 as node()?) as xs:string? external; declare function fn:document-uri($arg1 as node()?) as xs:string? external; (: F&O Section 4. Trace function :) declare updating function fn:trace($arg1 as item()*, $arg2 as xs:string) as item()* external; (: F&O Section 5. Constructor functions :) (: Galax supports constructor functions for xs:string, xs:boolean, xs:decimal, xs:float, xs:double, xs:integer, and xs:int. Galax does not support any user-declared simple types. :) (: F&O Section 6. Functions and Operators on Numerics :) declare function op:numeric-add($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-subtract($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-multiply($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-divide($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-mod($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-idivide($arg1 as numeric()?, $arg2 as numeric()?) as numeric()? external; declare function op:numeric-unary-plus($arg1 as numeric()?) as numeric()? external; declare function op:numeric-unary-minus($arg1 as numeric()?) as numeric()? external; (: xs:int :) declare function op:int-add($arg1 as xs:int?, $arg2 as xs:int?) as xs:int? external; declare function op:int-subtract($arg1 as xs:int?, $arg2 as xs:int?) as xs:int? external; declare function op:int-multiply($arg1 as xs:int?, $arg2 as xs:int?) as xs:int? external; declare function op:int-divide($arg1 as xs:int?, $arg2 as xs:int?) as xs:decimal? external; declare function op:int-idivide($arg1 as xs:int?, $arg2 as xs:int?) as xs:int? external; declare function op:int-mod($arg1 as xs:int?, $arg2 as xs:int?) as xs:int? external; declare function op:int-unary-plus($arg1 as xs:int) as xs:int external; declare function op:int-unary-minus($arg1 as xs:int) as xs:int external; declare function op:int-equal($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; declare function op:int-nequal($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; declare function op:int-lt($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; declare function op:int-gt($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; declare function op:int-le($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; declare function op:int-ge($arg1 as xs:int, $arg2 as xs:int) as xs:boolean external; (: xs:integer :) declare function op:integer-add($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer? external; declare function op:integer-subtract($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer? external; declare function op:integer-multiply($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer? external; declare function op:integer-divide($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:decimal? external; declare function op:integer-idivide($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer? external; declare function op:integer-mod($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer? external; declare function op:integer-unary-plus($arg1 as xs:integer) as xs:integer external; declare function op:integer-unary-minus($arg1 as xs:integer) as xs:integer external; declare function op:integer-equal($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; declare function op:integer-nequal($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; declare function op:integer-lt($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; declare function op:integer-gt($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; declare function op:integer-le($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; declare function op:integer-ge($arg1 as xs:integer, $arg2 as xs:integer) as xs:boolean external; (: xs:decimal :) declare function op:decimal-add($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:decimal? external; declare function op:decimal-subtract($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:decimal? external; declare function op:decimal-multiply($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:decimal? external; declare function op:decimal-divide($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:decimal? external; declare function op:decimal-idivide($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:integer? external; declare function op:decimal-mod($arg1 as xs:decimal?, $arg2 as xs:decimal?) as xs:decimal? external; declare function op:decimal-unary-plus($arg1 as xs:decimal) as xs:decimal external; declare function op:decimal-unary-minus($arg1 as xs:decimal) as xs:decimal external; declare function op:decimal-equal($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; declare function op:decimal-nequal($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; declare function op:decimal-lt($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; declare function op:decimal-gt($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; declare function op:decimal-le($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; declare function op:decimal-ge($arg1 as xs:decimal, $arg2 as xs:decimal) as xs:boolean external; (: xs:float :) declare function op:float-add($arg1 as xs:float?, $arg2 as xs:float?) as xs:float? external; declare function op:float-subtract($arg1 as xs:float?, $arg2 as xs:float?) as xs:float? external; declare function op:float-multiply($arg1 as xs:float?, $arg2 as xs:float?) as xs:float? external; declare function op:float-divide($arg1 as xs:float?, $arg2 as xs:float?) as xs:float? external; declare function op:float-idivide($arg1 as xs:float?, $arg2 as xs:float?) as xs:integer? external; declare function op:float-mod($arg1 as xs:float?, $arg2 as xs:float?) as xs:float? external; declare function op:float-unary-plus($arg1 as xs:float) as xs:float external; declare function op:float-unary-minus($arg1 as xs:float) as xs:float external; declare function op:float-equal($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; declare function op:float-nequal($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; declare function op:float-lt($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; declare function op:float-gt($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; declare function op:float-le($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; declare function op:float-ge($arg1 as xs:float, $arg2 as xs:float) as xs:boolean external; (: xs:double :) declare function op:double-add($arg1 as xs:double?, $arg2 as xs:double?) as xs:double? external; declare function op:double-subtract($arg1 as xs:double?, $arg2 as xs:double?) as xs:double? external; declare function op:double-multiply($arg1 as xs:double?, $arg2 as xs:double?) as xs:double? external; declare function op:double-divide($arg1 as xs:double?, $arg2 as xs:double?) as xs:double? external; declare function op:double-idivide($arg1 as xs:double?, $arg2 as xs:double?) as xs:integer? external; declare function op:double-mod($arg1 as xs:double?, $arg2 as xs:double?) as xs:double? external; declare function op:double-unary-plus($arg1 as xs:double) as xs:double external; declare function op:double-unary-minus($arg1 as xs:double) as xs:double external; declare function op:double-equal($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function op:double-nequal($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function op:double-lt($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function op:double-gt($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function op:double-le($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function op:double-ge($arg1 as xs:double, $arg2 as xs:double) as xs:boolean external; declare function fn:abs($arg1 as numeric()?) as numeric()? external; declare function fn:abs-double($arg1 as xs:double?) as xs:double? external; declare function fn:abs-float($arg1 as xs:float?) as xs:float? external; declare function fn:abs-decimal($arg1 as xs:decimal?) as xs:decimal? external; declare function fn:abs-integer($arg1 as xs:integer?) as xs:integer? external; declare function fn:floor($arg1 as numeric()?) as numeric()? external; declare function fn:floor-double($arg1 as xs:double?) as xs:double? external; declare function fn:floor-float($arg1 as xs:float?) as xs:float? external; declare function fn:floor-decimal($arg1 as xs:decimal?) as xs:decimal? external; declare function fn:floor-integer($arg1 as xs:integer?) as xs:integer? external; declare function fn:ceiling($arg1 as numeric()?) as numeric()? external; declare function fn:ceiling-double($arg1 as xs:double?) as xs:double? external; declare function fn:ceiling-float($arg1 as xs:float?) as xs:float? external; declare function fn:ceiling-decimal($arg1 as xs:decimal?) as xs:decimal? external; declare function fn:ceiling-integer($arg1 as xs:integer?) as xs:integer? external; declare function fn:round($arg1 as numeric()?) as numeric()? external; declare function fn:round-double($arg1 as xs:double?) as xs:double? external; declare function fn:round-float($arg1 as xs:float?) as xs:float? external; declare function fn:round-decimal($arg1 as xs:decimal?) as xs:decimal? external; declare function fn:round-integer($arg1 as xs:integer?) as xs:integer? external; declare function fn:round-half-to-even($arg1 as numeric()?, $arg2 as xs:integer) as numeric()? external; declare function fn:round-half-to-even-double($arg1 as xs:double?, $arg2 as xs:integer) as xs:double? external; declare function fn:round-half-to-even-float($arg1 as xs:float?, $arg2 as xs:integer) as xs:float? external; declare function fn:round-half-to-even-decimal($arg1 as xs:decimal?, $arg2 as xs:integer) as xs:decimal? external; declare function fn:round-half-to-even-integer($arg1 as xs:integer?, $arg2 as xs:integer) as xs:integer? external; (: F&O Section 7. Functions on Strings :) declare function fn:codepoints-to-string($arg1 as xs:integer*) as xs:string external; declare function fn:string-to-codepoints($arg1 as xs:string?) as xs:integer* external; declare function fn:codepoint-equal($arg1 as xs:string?, $arg2 as xs:string?) as xs:boolean? external; declare function fn:compare($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:integer? external; (: fn:concat is variadic :) declare function fn:concat($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:string external; declare function fn:string-join($arg1 as xs:string*, $arg2 as xs:string?) as xs:string external; (: Galax does not support any of the string functions that take the name of a collation sequence. :) declare function fn:substring($arg1 as xs:string?, $arg2 as xs:double, $arg3 as xs:double) as xs:string external; declare function fn:string-length($arg1 as xs:string?) as xs:integer external; declare function fn:normalize-space($arg1 as xs:string?) as xs:string external; declare function fn:normalize-unicode($arg1 as xs:string?, $arg2 as xs:string) as xs:string? external; declare function fn:upper-case($arg1 as xs:string?) as xs:string external; declare function fn:lower-case($arg1 as xs:string?) as xs:string external; declare function fn:translate($arg1 as xs:string?, $arg2 as xs:string, $arg3 as xs:string) as xs:string external; declare function fn:encode-for-uri($uri-part as xs:string?) as xs:string external; declare function fn:iri-to-uri($uri-part as xs:string?) as xs:string external; declare function fn:escape-html-uri($uri as xs:string?) as xs:string external; declare function fn:contains($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:boolean? external; declare function fn:starts-with($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:boolean? external; declare function fn:ends-with($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:boolean? external; declare function fn:substring-before($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:string? external; declare function fn:substring-after($arg1 as xs:string?, $arg2 as xs:string?, $arg3 as xs:string) as xs:string? external; declare function fn:matches($arg1 as xs:string?, $arg2 as xs:string, $arg3 as xs:string) as xs:boolean external; declare function fn:tokenize($arg1 as xs:string?, $arg2 as xs:string, $arg3 as xs:string?) as xs:string* external; declare function fn:replace($arg1 as xs:string?, $arg2 as xs:string, $arg3 as xs:string, $arg4 as xs:string?) as xs:string external; declare function op:string-equal($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; declare function op:string-nequal($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; declare function op:string-lt($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; declare function op:string-gt($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; declare function op:string-le($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; declare function op:string-ge($arg1 as xs:string, $arg2 as xs:string) as xs:boolean external; (: F&O Section 11. Functions and Operators for anyURI :) (: fn:resolve-uri with one argument is normalized into fn:resolve-uri with two arguments :) declare function fn:resolve-uri($arg1 as xs:string?, $arg2 as xs:string) as xs:string external; declare function op:anyURI-equal($arg1 as xs:anyURI, $arg2 as xs:anyURI) as xs:boolean external; declare function op:anyURI-nequal($arg1 as xs:anyURI, $arg2 as xs:anyURI) as xs:boolean external; (: F&O Section 9. Functions and Operators on Booleans :) declare function fn:true() as xs:boolean external; declare function fn:false() as xs:boolean external; declare function fn:not($arg1 as xs:boolean) as xs:boolean external; (: F&O writes signature as: declare function fn:not($arg1 as item()*) as xs:boolean external; :) declare function op:boolean-equal($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; declare function op:boolean-nequal($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; declare function op:boolean-lt($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; declare function op:boolean-gt($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; declare function op:boolean-le($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; declare function op:boolean-ge($arg1 as xs:boolean, $arg2 as xs:boolean) as xs:boolean external; (: F&O Section 10. Functions and Operators on Durations, Dates, and Times :) (: 10.3 Comparisons on Duration, Date, and time :) declare function op:yearMonthDuration-equal($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:yearMonthDuration-nequal($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:yearMonthDuration-lt($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:yearMonthDuration-le($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:yearMonthDuration-gt($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:yearMonthDuration-ge($arg1 as xs:yearMonthDuration,$arg2 as xs:yearMonthDuration) as xs:boolean external; declare function op:dayTimeDuration-equal($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:dayTimeDuration-nequal($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:dayTimeDuration-lt($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:dayTimeDuration-le($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:dayTimeDuration-gt($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:dayTimeDuration-ge($arg1 as xs:dayTimeDuration,$arg2 as xs:dayTimeDuration) as xs:boolean external; declare function op:duration-equal($arg1 as xs:duration,$arg2 as xs:duration) as xs:boolean external; declare function op:duration-nequal($arg1 as xs:duration,$arg2 as xs:duration) as xs:boolean external; declare function op:dateTime-equal($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:dateTime-nequal($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:dateTime-lt($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:dateTime-le($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:dateTime-gt($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:dateTime-ge($arg1 as xs:dateTime,$arg2 as xs:dateTime) as xs:boolean external; declare function op:date-equal($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:date-nequal($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:date-lt($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:date-le($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:date-gt($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:date-ge($arg1 as xs:date,$arg2 as xs:date) as xs:boolean external; declare function op:gYearMonth-equal($arg1 as xs:gYearMonth,$arg2 as xs:gYearMonth) as xs:boolean external; declare function op:gYearMonth-nequal($arg1 as xs:gYearMonth,$arg2 as xs:gYearMonth) as xs:boolean external; declare function op:gYear-equal($arg1 as xs:gYear,$arg2 as xs:gYear) as xs:boolean external; declare function op:gYear-nequal($arg1 as xs:gYear,$arg2 as xs:gYear) as xs:boolean external; declare function op:gMonthDay-equal($arg1 as xs:gMonthDay,$arg2 as xs:gMonthDay) as xs:boolean external; declare function op:gMonthDay-nequal($arg1 as xs:gMonthDay,$arg2 as xs:gMonthDay) as xs:boolean external; declare function op:gDay-equal($arg1 as xs:gDay,$arg2 as xs:gDay) as xs:boolean external; declare function op:gDay-nequal($arg1 as xs:gDay,$arg2 as xs:gDay) as xs:boolean external; declare function op:gMonth-equal($arg1 as xs:gMonth,$arg2 as xs:gMonth) as xs:boolean external; declare function op:gMonth-nequal($arg1 as xs:gMonth,$arg2 as xs:gMonth) as xs:boolean external; declare function op:time-equal($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; declare function op:time-nequal($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; declare function op:time-lt($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; declare function op:time-le($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; declare function op:time-gt($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; declare function op:time-ge($arg1 as xs:time,$arg2 as xs:time) as xs:boolean external; (: 10.5 Component Extraction Functions on Duration, Date and Time Values :) declare function fn:years-from-duration($arg1 as xs:duration?) as xs:integer? external; declare function fn:months-from-duration($arg1 as xs:duration?) as xs:integer? external; declare function fn:days-from-duration($arg1 as xs:duration?) as xs:integer? external; declare function fn:hours-from-duration($arg1 as xs:duration?) as xs:integer? external; declare function fn:minutes-from-duration($arg1 as xs:duration?) as xs:integer? external; declare function fn:seconds-from-duration($arg1 as xs:duration?) as xs:decimal? external; declare function fn:year-from-dateTime($arg1 as xs:dateTime?) as xs:integer? external; declare function fn:month-from-dateTime($arg1 as xs:dateTime?) as xs:integer? external; declare function fn:day-from-dateTime($arg1 as xs:dateTime?) as xs:integer? external; declare function fn:hours-from-dateTime($arg1 as xs:dateTime?) as xs:integer? external; declare function fn:minutes-from-dateTime($arg1 as xs:dateTime?) as xs:integer? external; declare function fn:seconds-from-dateTime($arg1 as xs:dateTime?) as xs:decimal? external; declare function fn:timezone-from-dateTime($arg1 as xs:dateTime?) as xs:dayTimeDuration? external; declare function fn:year-from-date($arg1 as xs:date?) as xs:integer? external; declare function fn:month-from-date($arg1 as xs:date?) as xs:integer? external; declare function fn:day-from-date($arg1 as xs:date?) as xs:integer? external; declare function fn:timezone-from-date($arg1 as xs:date?) as xs:dayTimeDuration? external; declare function fn:hours-from-time($arg1 as xs:time?) as xs:integer? external; declare function fn:minutes-from-time($arg1 as xs:time?) as xs:integer? external; declare function fn:seconds-from-time($arg1 as xs:time?) as xs:decimal? external; declare function fn:timezone-from-time($arg1 as xs:time?) as xs:dayTimeDuration? external; (: 10.5 Arithmetic Functions on Durations :) declare function op:add-yearMonthDurations($arg1 as xs:yearMonthDuration?, $arg2 as xs:yearMonthDuration?) as xs:yearMonthDuration? external; declare function op:subtract-yearMonthDurations($arg1 as xs:yearMonthDuration?, $arg2 as xs:yearMonthDuration?) as xs:yearMonthDuration? external; declare function op:multiply-yearMonthDuration($arg1 as xs:yearMonthDuration?, $arg2 as xs:double?) as xs:yearMonthDuration? external; declare function op:divide-yearMonthDuration($arg1 as xs:yearMonthDuration?, $arg2 as xs:double?) as xs:yearMonthDuration? external; declare function op:multiply-yearMonthDuration2($arg1 as xs:double?, $arg2 as xs:yearMonthDuration?) as xs:yearMonthDuration? external; declare function op:divide-yearMonthDuration-by-yearMonthDuration($arg1 as xs:yearMonthDuration?, $arg2 as xs:yearMonthDuration?) as xs:decimal? external; declare function op:add-dayTimeDurations($arg1 as xs:dayTimeDuration?,$arg2 as xs:dayTimeDuration?) as xs:dayTimeDuration? external; declare function op:subtract-dayTimeDurations($arg1 as xs:dayTimeDuration?, $arg2 as xs:dayTimeDuration?) as xs:dayTimeDuration? external; declare function op:multiply-dayTimeDuration($arg1 as xs:dayTimeDuration?, $arg2 as xs:double?) as xs:dayTimeDuration? external; declare function op:multiply-dayTimeDuration2($arg1 as xs:double?, $arg2 as xs:dayTimeDuration?) as xs:dayTimeDuration? external; declare function op:divide-dayTimeDuration($arg1 as xs:dayTimeDuration?, $arg2 as xs:double?) as xs:dayTimeDuration? external; declare function op:divide-dayTimeDuration-by-dayTimeDuration($arg1 as xs:dayTimeDuration?, $arg2 as xs:dayTimeDuration?) as xs:decimal? external; (: 10.6 Timezone Adjustment :) declare function fn:adjust-time-to-timezone($arg1 as xs:time?, $arg2 as xs:dayTimeDuration?) as xs:time? external; declare function fn:adjust-date-to-timezone($arg1 as xs:date?, $arg2 as xs:dayTimeDuration?) as xs:date? external; declare function fn:adjust-dateTime-to-timezone($arg1 as xs:dateTime?, $arg2 as xs:dayTimeDuration?) as xs:dateTime? external; declare function fn:adjust-time-to-timezone-unary($arg1 as xs:time?) as xs:time? external; declare function fn:adjust-date-to-timezone-unary($arg1 as xs:date?) as xs:date? external; declare function fn:adjust-dateTime-to-timezone-unary($arg1 as xs:dateTime?) as xs:dateTime? external; (: 10.7 Arithmetic Functions on Durations, Dates, Times :) declare function op:subtract-dateTimes($arg1 as xs:dateTime?,$arg2 as xs:dateTime?) as xs:dayTimeDuration? external; declare function op:subtract-dates($arg1 as xs:date?,$arg2 as xs:date?) as xs:dayTimeDuration? external; declare function op:subtract-times($arg1 as xs:time?,$arg2 as xs:time?) as xs:dayTimeDuration? external; declare function op:add-yearMonthDuration-to-dateTime($arg1 as xs:dateTime?,$arg2 as xs:yearMonthDuration?) as xs:dateTime? external; declare function op:add-yearMonthDuration-to-dateTime2($arg1 as xs:yearMonthDuration?,$arg2 as xs:dateTime?) as xs:dateTime? external; declare function op:add-dayTimeDuration-to-dateTime($arg1 as xs:dateTime?,$arg2 as xs:dayTimeDuration?) as xs:dateTime? external; declare function op:add-dayTimeDuration-to-dateTime2($arg1 as xs:dayTimeDuration?,$arg2 as xs:dateTime?) as xs:dateTime? external; declare function op:subtract-yearMonthDuration-from-dateTime($arg1 as xs:dateTime?, $arg2 as xs:yearMonthDuration?) as xs:dateTime? external; declare function op:subtract-dayTimeDuration-from-dateTime($arg1 as xs:dateTime?, $arg2 as xs:dayTimeDuration?) as xs:dateTime? external; declare function op:add-yearMonthDuration-to-date($arg1 as xs:date?,$arg2 as xs:yearMonthDuration?) as xs:date? external; declare function op:add-yearMonthDuration-to-date2($arg1 as xs:yearMonthDuration?,$arg2 as xs:date?) as xs:date? external; declare function op:add-dayTimeDuration-to-date($arg1 as xs:date?,$arg2 as xs:dayTimeDuration?) as xs:date? external; declare function op:add-dayTimeDuration-to-date2($arg1 as xs:dayTimeDuration?,$arg2 as xs:date?) as xs:date? external; declare function op:subtract-yearMonthDuration-from-date($arg1 as xs:date?, $arg2 as xs:yearMonthDuration?) as xs:date? external; declare function op:subtract-dayTimeDuration-from-date($arg1 as xs:date?, $arg2 as xs:dayTimeDuration?) as xs:date? external; declare function op:add-dayTimeDuration-to-time($arg1 as xs:time?,$arg2 as xs:dayTimeDuration?) as xs:time? external; declare function op:add-dayTimeDuration-to-time2($arg1 as xs:dayTimeDuration?,$arg2 as xs:time?) as xs:time? external; declare function op:subtract-dayTimeDuration-from-time($arg1 as xs:time?, $arg2 as xs:dayTimeDuration?) as xs:time? external; (: F&O Section 11. Functions on QNames :) declare function fn:resolve-QName($arg1 as xs:string?, $arg2 as element()) as xs:QName external; declare function fn:QName($uri as xs:string?,$qname as xs:string) as xs:QName external; declare function fn:local-name-from-QName($arg1 as xs:QName?) as xs:NCName? external; declare function fn:namespace-uri-from-QName($arg1 as xs:QName?) as xs:anyURI? external; declare function fn:prefix-from-QName($arg1 as xs:QName?) as xs:NCName? external; declare function fn:namespace-uri-for-prefix($prefix as xs:string?, $element as element()) as xs:anyURI? external; declare function fn:in-scope-prefixes($arg1 as element()) as xs:string* external; declare function op:QName-equal($arg1 as xs:QName, $arg2 as xs:QName) as xs:boolean external; declare function op:QName-nequal($arg1 as xs:QName, $arg2 as xs:QName) as xs:boolean external; (: F&O Section 12. Functions and Operators on base64Binary and hexBinary :) declare function op:hexBinary-equal($arg1 as xs:hexBinary, $arg2 as xs:hexBinary) as xs:boolean external; declare function op:base64Binary-equal($arg1 as xs:base64Binary, $arg2 as xs:base64Binary) as xs:boolean external; declare function op:hexBinary-nequal($arg1 as xs:hexBinary, $arg2 as xs:hexBinary) as xs:boolean external; declare function op:base64Binary-nequal($arg1 as xs:base64Binary, $arg2 as xs:base64Binary) as xs:boolean external; (: F&O Section 13. Functions and Operators on NOTATION :) (: declare function op:NOTATION-equal($arg1 as xs:NOTATION,$arg2 as xs:NOTATION) as xs:boolean external; :) (: F&O Section 14. Functions and Operators on Nodes :) declare function fn:name($arg1 as node()?) as xs:string external; declare function fn:local-name($arg1 as node()?) as xs:string external; declare function fn:namespace-uri($arg1 as node()?) as xs:string external; declare function fn:number($arg1 as xs:anyAtomicType?) as xs:double external; (: fn:lang is only defined over context nodes, not arbitrary nodes: :) declare function fn:lang($arg1 as xs:string?,$arg2 as node()) as xs:boolean external; declare function op:is-same-node($arg1 as node()?, $arg2 as node()?) as xs:boolean? external; declare function op:node-before($arg1 as node()?, $arg2 as node()?) as xs:boolean? external; declare function op:node-after($arg1 as node()?, $arg2 as node()?) as xs:boolean? external; declare function fn:root($arg1 as node()?) as node()? external; (: F&O Section 15. Functions and Operators on Sequences :) (: 15.1 General Functions :) (: The function signature for fn:boolean() in the dynamic semantics is: fn:boolean(item()*) but in the static semantics it is: fn:boolean(empty|NodeType+|xs:boolean|xs:string|xs:untypedAtomic|fs:numeric) :) declare function fn:boolean($arg1 as item()*) as xs:boolean external; declare function fn:index-of($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType, $arg3 as xs:string) as xs:integer* external; declare function fn:empty($arg1 as item()*) as xs:boolean external; declare function fn:exists($arg1 as item()*) as xs:boolean external; declare function fn:distinct-values($arg1 as xs:anyAtomicType*, $arg2 as xs:string) as xs:anyAtomicType* external; declare function fn:insert-before($arg1 as item()*, $arg2 as xs:integer, $arg3 as item()*) as item()* external; declare function fn:remove($arg1 as item()*, $arg2 as xs:integer) as item()* external; declare function fn:reverse($arg1 as item()*) as item()* external; declare function fn:subsequence($arg1 as item()*, $arg2 as xs:integer) as item()* external; declare function fn:subsequence($arg1 as item()*, $arg2 as xs:integer, $arg3 as xs:integer) as item()* external; declare function fn:unordered($arg1 as item()*) as item()* external; (: 15.2 Functions that test cardinality of sequences :) declare function fn:zero-or-one($arg1 as item()*) as item()? external; declare function fn:exactly-one($arg1 as item()*) as item() external; declare function fn:one-or-more($arg1 as item()*) as item()+ external; (: 15.3 Equals, Union, Intersection, Except :) declare function fn:deep-equal($arg1 as item()*, $arg2 as item()*, $arg3 as xs:string) as xs:boolean external; declare function op:union($arg1 as node()*, $arg2 as node()*) as node()* external; declare function op:intersect($arg1 as node()*, $arg2 as node()*) as node()* external; declare function op:except($arg1 as node()*, $arg2 as node()*) as node()* external; (: 15.4 Aggregate Functions :) declare function fn:count($arg1 as item()*) as xs:integer external; declare function fn:avg($arg1 as xs:anyAtomicType*) as xs:anyAtomicType? external; declare function fn:max($arg1 as xs:anyAtomicType*) as xs:anyAtomicType? external; declare function fn:min($arg1 as xs:anyAtomicType*) as xs:anyAtomicType? external; declare function fn:sum($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType?) as xs:anyAtomicType? external; declare function fn:sum($arg1 as xs:anyAtomicType*) as xs:anyAtomicType external; (: xs:dayTimeDuration :) declare function fn:avg-dayTimeDuration($arg1 as xs:dayTimeDuration*) as xs:dayTimeDuration? external; declare function fn:max-dayTimeDuration($arg1 as xs:dayTimeDuration*) as xs:dayTimeDuration? external; declare function fn:min-dayTimeDuration($arg1 as xs:dayTimeDuration*) as xs:dayTimeDuration? external; declare function fn:sum-dayTimeDuration($arg1 as xs:dayTimeDuration*, $arg2 as xs:dayTimeDuration?) as xs:dayTimeDuration? external; declare function fn:sum-dayTimeDuration($arg1 as xs:dayTimeDuration*) as xs:dayTimeDuration external; (: xs:yearMonthDuration :) declare function fn:avg-yearMonthDuration($arg1 as xs:yearMonthDuration*) as xs:yearMonthDuration? external; declare function fn:max-yearMonthDuration($arg1 as xs:yearMonthDuration*) as xs:yearMonthDuration? external; declare function fn:min-yearMonthDuration($arg1 as xs:yearMonthDuration*) as xs:yearMonthDuration? external; declare function fn:sum-yearMonthDuration($arg1 as xs:yearMonthDuration*, $arg2 as xs:yearMonthDuration?) as xs:yearMonthDuration? external; declare function fn:sum-yearMonthDuration($arg1 as xs:yearMonthDuration*) as xs:yearMonthDuration external; (: xs:string :) declare function fn:max-string($arg1 as xs:string*) as xs:string? external; declare function fn:min-string($arg1 as xs:string*) as xs:string? external; (: xs:date :) declare function fn:max-date($arg1 as xs:date*) as xs:date? external; declare function fn:min-date($arg1 as xs:date*) as xs:date? external; (: xs:time :) declare function fn:max-time($arg1 as xs:time*) as xs:time? external; declare function fn:min-time($arg1 as xs:time*) as xs:time? external; (: xs:dateTime :) declare function fn:max-dateTime($arg1 as xs:dateTime*) as xs:dateTime? external; declare function fn:min-dateTime($arg1 as xs:dateTime*) as xs:dateTime? external; (: xs:integer :) declare function fn:avg-integer($arg1 as xs:integer*) as xs:decimal? external; declare function fn:max-integer($arg1 as xs:integer*) as xs:integer? external; declare function fn:min-integer($arg1 as xs:integer*) as xs:integer? external; declare function fn:sum-integer($arg1 as xs:integer*) as xs:integer external; declare function fn:sum-integer($arg1 as xs:integer*, $arg2 as xs:integer?) as xs:integer? external; (: xs:decimal :) declare function fn:avg-decimal($arg1 as xs:decimal*) as xs:decimal? external; declare function fn:max-decimal($arg1 as xs:decimal*) as xs:decimal? external; declare function fn:min-decimal($arg1 as xs:decimal*) as xs:decimal? external; declare function fn:sum-decimal($arg1 as xs:decimal*) as xs:decimal external; declare function fn:sum-decimal($arg1 as xs:decimal*, $arg2 as xs:decimal?) as xs:decimal? external; (: xs:float :) declare function fn:avg-float($arg1 as xs:float*) as xs:float? external; declare function fn:max-float($arg1 as xs:float*) as xs:float? external; declare function fn:min-float($arg1 as xs:float*) as xs:float? external; declare function fn:sum-float($arg1 as xs:float*) as xs:float external; declare function fn:sum-float($arg1 as xs:float*, $arg2 as xs:float?) as xs:float? external; (: xs:double :) declare function fn:avg-double($arg1 as xs:double*) as xs:double? external; declare function fn:max-double($arg1 as xs:double*) as xs:double? external; declare function fn:min-double($arg1 as xs:double*) as xs:double? external; declare function fn:sum-double($arg1 as xs:double*) as xs:double external; declare function fn:sum-double($arg1 as xs:double*, $arg2 as xs:double?) as xs:double? external; (: 15.5 Functions that generate sequences :) declare function op:to($arg1 as xs:integer?, $arg2 as xs:integer?) as xs:integer+ external; declare function fn:doc($arg1 as xs:string?) as document-node()? external; declare function fn:collection($arg1 as xs:string?) as node()* external; declare function fn:id($arg as xs:string*, $node as node()) as element()* external; declare function fn:idref($arg as xs:string*, $node as node()) as node()* external; (: :::::::::::::::::::::::: Comparison functions :::::::::::::::::::::::: :) (: Note: Those functions are done in a polymorphic fashion in Galax. This differs from the F&O document where comparison functions are specialized by type. :) (: comparators :) declare function op:equal($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; declare function op:nequal($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; declare function op:ge($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; declare function op:le($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; declare function op:gt($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; declare function op:lt($arg1 as xs:anyAtomicType?, $arg2 as xs:anyAtomicType?) as xs:boolean external; (: following functions handle cases when one argument to value comparison is empty :) declare function op:equal-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:nequal-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:ge-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:le-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:gt-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:lt-left-empty($arg1 as empty-sequence(), $arg2 as xs:anyAtomicType?) as empty-sequence() external; declare function op:equal-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; declare function op:nequal-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; declare function op:ge-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; declare function op:le-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; declare function op:gt-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; declare function op:lt-right-empty($arg1 as xs:anyAtomicType?, $arg2 as empty-sequence()) as empty-sequence() external; (: F&O Section 16 : Context functions :) declare function fn:static-base-uri() as xs:anyURI? external; declare function fn:current-dateTime() as xs:dateTime external; declare function fn:current-date() as xs:date external; declare function fn:current-time() as xs:time external; declare function fn:default-collation() as xs:string external; declare function fn:implicit-timezone() as xs:dayTimeDuration? external; declare function fn:dateTime($arg1 as xs:date?, $arg2 as xs:time?) as xs:dateTime external; (: :::::::::::::::::::::::::::::: Formal Semantics functions :::::::::::::::::::::::::::::: :) (: Used in semantics of path expressions :) (: Sort by document order and duplicate removal :) declare function fs:distinct-docorder-or-atomic-sequence($arg1 as item()*) as item()* external; declare function fs:distinct-docorder($arg1 as node()*) as node()* external; (: distinct-docorder can be optimized to either docorder or distinct: :) declare function fs:docorder($arg1 as node()*) as node()* external; declare function fs:distinct($arg1 as node()*) as node()* external; (: docorder and distinct can be optimized to node-sequence: :) declare function fs:node-sequence($arg1 as node()*) as node()* external; declare function fs:node-sequence-or-atomic-sequence($arg1 as item()*) as item()* external; (: Used in semantics of constructors :) (: Converts a sequence of items into an untyped atomic value :) declare function fs:item-sequence-to-untypedAtomic($arg1 as item()*) as xs:untypedAtomic external; declare function fs:item-sequence-to-untypedAtomic-optional($arg1 as item()*) as xs:untypedAtomic? external; (: These functions are only needed for typing constructors:) declare function fs:item-sequence-to-node-sequence($arg1 as item()*) as node()* external; (: Used in semantics of arithmetic, value, and general comparisions. Converts untypedAtomic argument to a target type. Returns all other arguments unchanged. :) declare function fs:untyped-to-integer($arg1 as xs:anyAtomicType?) as xs:anyAtomicType? external; declare function fs:untyped-to-double($arg1 as xs:anyAtomicType?) as xs:anyAtomicType? external; declare function fs:untyped-to-string($arg1 as xs:anyAtomicType?) as xs:anyAtomicType? external; declare function fs:untyped-to-any($arg1 as xs:anyAtomicType, $arg2 as xs:anyAtomicType) as xs:anyAtomicType external; declare function fs:promote-to-numeric($arg1 as numeric()*, $arg2 as numeric()) as xs:anyAtomicType* external; declare function fs:promote-to-anystring($arg1 as anystring()*) as xs:anyAtomicType* external; declare function fs:unsafe-promote-to-numeric($arg1 as numeric()*, $arg2 as numeric()) as xs:anyAtomicType* external; declare function fs:convert-simple-operand($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType) as xs:anyAtomicType* external; (: Used in semantics of XPath predicate expressions: :) declare function fs:first-item($arg1 as item()*) as item()? external; declare function fs:last-item($arg1 as item()*) as item()? external; (: ::::::::::::::::::: Galax functions ::::::::::::::::::: :) (: Note: The following functions are not part of the F&O spec. They are added in Galax for user's convenience. :) (: Union, intersection and exception on values :) declare function glx:union-values($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType*) as xs:anyAtomicType* external; declare function glx:intersect-values($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType*) as xs:anyAtomicType* external; declare function glx:except-values($arg1 as xs:anyAtomicType*, $arg2 as xs:anyAtomicType*) as xs:anyAtomicType* external; (: Output functions :) declare function glx:print-string($arg1 as xs:string) as empty-sequence() external; declare function glx:print-item($arg1 as item()) as empty-sequence() external; declare function glx:string-of-item($arg1 as item()) as xs:string external; declare function glx:doc-of-string($arg1 as xs:string) as document-node() external; declare function glx:print-string-err($arg1 as xs:string) as empty-sequence() external; declare function glx:print-item-err($arg1 as item()) as empty-sequence() external; declare function glx:save-document($arg1 as xs:string,$arg2 as document-node()) as empty-sequence() external; (: xml:lang functions :) declare function glx:get-lang($arg1 as node()) as xs:string? external; (: all the traditional float operations :) declare function glx:exponent($arg1 as xs:double, $arg2 as xs:double) as xs:double external; declare function glx:sqrt($arg1 as xs:double) as xs:double external; declare function glx:exp($arg1 as xs:double) as xs:double external; declare function glx:log($arg1 as xs:double) as xs:double external; declare function glx:log10($arg1 as xs:double) as xs:double external; declare function glx:cos($arg1 as xs:double) as xs:double external; declare function glx:sin($arg1 as xs:double) as xs:double external; declare function glx:tan($arg1 as xs:double) as xs:double external; declare function glx:acos($arg1 as xs:double) as xs:double external; declare function glx:asin($arg1 as xs:double) as xs:double external; declare function glx:atan($arg1 as xs:double) as xs:double external; declare function glx:atan2($arg1 as xs:double, $arg2 as xs:double) as xs:double external; declare function glx:cosh($arg1 as xs:double) as xs:double external; declare function glx:sinh($arg1 as xs:double) as xs:double external; declare function glx:tanh($arg1 as xs:double) as xs:double external; (: Previously was fn:string-pad :) declare function glx:string-pad($arg1 as xs:string?, $arg2 as xs:integer) as xs:string? external; (: Jabber :) (: - jid - Jabber ID: username@server/source :) (: - password - string of password of username on server :) (: - timeout - in seconds :) (: - verbose mode flag - if true, print on stderr messages sent to/rcv'd by jabber server :) declare function glx:jabber-buddies($arg1 as xs:string,$arg2 as xs:string,$arg3 as xs:int,$arg4 as xs:boolean) as document-node() external; (: glx:file-exists If argument is empty, returns empty. Otherwise, checks if local file exists and returns true, otherwise false. :) declare function glx:file-exists($arg1 as xs:string?) as xs:boolean? external; (: glx:stem If first argument is empty, returns empty. Otherwise, returns the stem of the given word. The second argument specifies a case sensitive/insensitive stemming. If the second argument is not specified then case insensitve is default. $arg2 = "i" => case insensitive stemming $arg2 = "s" => case sensitive stemming :) declare function glx:stem($arg1 as xs:string?, $arg2 as xs:string?) as xs:string? external; declare function glx:deep-distinct($arg1 as item()*) as item()* external; (: Added accessor function for pre-order -- Philippe :) declare function glx:get-order($arg as node()) as xs:integer external; declare function glx:get-docid($arg as node()) as xs:integer external; (: Delay for the specified amount of time (in seconds; accepts non-integer sleep times) :) declare function glx:sleep($arg as xs:float) external; (: Similar to fn:doc, but re-reads the document each time it is called :) declare function glx:getdoc($arg1 as xs:string?) as document-node()? external; (: SOAP calls :) declare function glx:soap-call($arg1 as xs:anyURI,$arg2 as xs:string,$arg3 as xs:string,$arg4 as item()*) as document-node() external; (: Generic function for HTTP requests :) declare function glx:http-request( $method as xs:string, $url as xs:string, $content as element()? ) as xs:string? external; (: Implementation of GET and POST using the camlnet package :) declare function glx:http-get-request( $url as xs:string ) as item()? external; (: Returns the current time, in seconds since Jan.1, 1970 :) declare updating function glx:gettime() as xs:double external; (: Perform major O'Caml collection, and return number of live words. :) declare updating function glx:livewords() as xs:int external; (: O'Caml Random module :) declare updating function glx:random_int($bound as xs:integer) as xs:integer external; declare updating function glx:keyref($keyname as xs:string,$keyval as xs:anyAtomicType) as xs:integer external; galax-1.1/stdlib/fn_doc.ml0000664000076400007640000001343610670320214013630 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: fn_doc.ml,v 1.32 2007/09/07 19:03:40 mff Exp $ *) (* Module: Fn_doc Description: This module implements the fn:doc() function. *) open Error open Galax_io open Datatypes open Dm open Monitoring_context open Processing_context (* Note: This module performs two main tasks: (1) it dispatches URL's to the appropriate source (HTTP, file system, back-end store, etc.), and (2) if necessary it 'stores' the resulting document locally in order to preserve the original node identity of the document. - Jerome *) (*************************) (* Back-end registration *) (*************************) type back_end_call = Processing_context.processing_context -> string * string option * string -> Physical_value.item list let back_ends = ref [] let is_registered_back_end me = List.mem_assoc me !back_ends let lookup_back_end_fun me = try List.assoc me !back_ends with | _ -> raise (Query (Internal_Error ("Accessing unregistered backend: " ^ me ^ ", how come?"))) let register_back_end me f = if is_registered_back_end me then raise (Query (Internal_Error ("Backend : " ^ me ^ " already registered"))) else begin Galax_url.register_method me; (* Do not forget to register the backend as a URI method *) back_ends := (me,f) :: !back_ends (* Add the function to the list of registered back-ends *) end (* Alive documents *) type alive_documents = (Datatypes.xs_string, Physical_value.item list) Hashtbl.t let build_alive_documents_table () = Hashtbl.create 59 let build_dummy_alive_documents_table () = Hashtbl.create 1 (* Nicola: used only when we want an empty documents table *) let alive_documents_table_mem alive_documents file = Hashtbl.mem alive_documents file let alive_documents_table_get alive_documents file = Hashtbl.find alive_documents file let alive_documents_table_put alive_documents file doc = Hashtbl.add alive_documents file doc let merge_alive_documents ad1 ad2 = Gmisc.merge_hashtable ad1 ad2 (*********************) (* Http/file sources *) (*********************) let lookup_document_from_io_with_index gio name_indices entity_kind alive_ctxt_opt proc_ctxt = let apply_load_document () = (* 1. Set the document id and nodeid context *) let uri_string = match gio with | File_Input uri_string | Http_Input uri_string -> uri_string | _ -> raise (Query (Internal_Error "Cannot call the fn:doc() function on something else than a file or URI")) in let get_data () = let result_node = (* 1. Open a SAX cursor on the input document *) let (dtd_opt, xml_stream) = Streaming_parse.open_xml_stream_from_io gio (* entity kind *) in (* 2. Resolve namespaces *) let resolved_xml_stream = Streaming_ops.resolve_xml_stream xml_stream in (* 3. Apply type annotations *) let typed_xml_stream = Streaming_ops.typed_of_resolved_xml_stream resolved_xml_stream in (* If the doc id is not found, then build a new one and register it *) let docid = Galax_nodeid.new_docid () in let nodeid_context = Nodeid_context.default_nodeid_context () in (* 4. Load in the data model *) Physical_index_load.load_xml_document_from_typed_stream_for_docid nodeid_context docid name_indices typed_xml_stream in begin begin match alive_ctxt_opt with | None -> () | Some alive_ctxt -> Hashtbl.add alive_ctxt uri_string result_node; end; result_node end in begin match alive_ctxt_opt with | None -> get_data () | Some alive_ctxt -> try Hashtbl.find alive_ctxt uri_string with | Not_found -> get_data () end in Monitor.wrap_monitor proc_ctxt (Document_ParsingLoading_Phase (Parse_io.name_of_input_spec gio)) apply_load_document () let lookup_document_from_io gio = let name_indices = Physical_name_index.no_name_indices in lookup_document_from_io_with_index gio name_indices Document_entity let lookup_entity_with_index uri name_indices entity_kind = let http_method = Galax_url.glx_decode_url uri in match http_method with | Galax_url.File _ | Galax_url.Http _ -> lookup_document_from_io_with_index (Galax_io.Http_Input uri) name_indices entity_kind | Galax_url.ExternalSource (me,host,port,local) -> let back_end_fun = lookup_back_end_fun me in (fun alive_ctxt -> fun proc_ctxt -> back_end_fun proc_ctxt (host,port,local)) let lookup_doc_function_with_index uri name_indices ad = lookup_entity_with_index uri name_indices Document_entity ad let lookup_doc_function uri = let name_indices = Physical_name_index.no_name_indices in lookup_entity_with_index uri name_indices Document_entity let lookup_doc_function_no_table uri = let ad = build_alive_documents_table () in let name_indices = Physical_name_index.no_name_indices in lookup_entity_with_index uri name_indices Document_entity (Some ad) (* Collection functions *) let lookup_collection_function uri = let name_indices = Physical_name_index.no_name_indices in lookup_entity_with_index uri name_indices Document_fragment galax-1.1/base/0000775000076400007640000000000010772255367011513 5ustar mffmffgalax-1.1/base/gmisc.mli0000664000076400007640000001310710571347146013314 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: gmisc.mli,v 1.36 2007/02/28 18:48:06 mff Exp $ *) (* Module: Gmisc Description: This module implements some additions to the Caml standard library that appeared to be useful in the process of developing Galax. *) (* Note: All these functions raise standard Caml exceptions, rather than Galax exceptions in the rest of the system. - Jerome *) (*********************************) (* I/O and File System functions *) (*********************************) (* Load the content of a file into a string *) val load_file_in_buffer : Netbuffer.t -> string -> unit val load_string_in_buffer : Netbuffer.t -> string -> unit val load_file : string -> string (* Converts Win95 carriage returns and newlines *) val string_of_file : string -> string val get_files_in_directory: string -> string list val ls : string -> string -> string list (* Convert a shell-style regular expression, using the special characters, ?*[], to a Caml-style regular expression. *) val convert_regexp : string -> string (******************) (* List functions *) (******************) (* an additional function to partition list, but only getting the first element that satisfy the given predicate *) val partition_first : ('a -> bool * 'b) -> 'a list -> 'b * 'a list (* Partitions a list based on a predicate over the index of the element in the list *) val partition_index : (int -> bool) -> 'a list -> 'a list * 'a list (* Partitions a list by pairs of elements, raises Invalid_argument in case the list contains an odd number of elements *) val partition_pairs : 'a list -> ('a * 'a) list (* Filter non existing elements from a list *) val filter_non_exists : 'a list -> 'a list -> 'a list (* Map concat *) val map_concat : ('a -> 'b list) -> 'a list -> 'b list (* N first items *) val list_npeek : int -> 'a list -> 'a list (* Triple split *) val triple_split : ('a * 'b * 'c) list -> 'a list * 'b list * 'c list (* Remove duplicates *) val remove_duplicates : 'a list -> 'a list (* Sort and remove duplicates : takes a _reverse_ comparator that returns -1 if a1 > a2 *) val sort_and_remove_duplicates_revcompare : ('a -> 'a -> int) -> 'a list -> 'a list (* Unwrap a list of optional values *) val unwrap_option_list : 'a option list -> 'a list val some_list : 'a option list -> 'a list val is_some : 'a option -> bool val is_subset : 'a list -> 'a list -> bool (* X subset Y *) val intersect_list : 'a list -> 'a list -> 'a list (* X intersect Y *) val difference_list : 'a list -> 'a list -> 'a list (* X - Y *) (*********************) (* Hashtbl functions *) (*********************) val create_hashtable : int -> ('a * 'b) list -> ('a, 'b) Hashtbl.t (* Create a hashtable of the given size and fills it with the given bindings. *) val all_of_hashtable : (('a, 'b) Hashtbl.t) -> ('a * 'b) list (* Returns all entries in hash table *) val keys_of_hashtable : (('a, 'b) Hashtbl.t) -> 'a list (* Returns all keys in hash table *) val cond_add : ('a, 'b) Hashtbl.t -> 'a -> 'b -> unit val merge_hashtable : ('a, 'b) Hashtbl.t -> ('a, 'b) Hashtbl.t -> ('a, 'b) Hashtbl.t (********************) (* String functions *) (********************) val split_right_on_char : string -> char -> string * string val split_left_on_char : string -> char -> string * string val split_on_char : string -> char -> string list val remove_leading : string -> char -> string val remove_trailing : string -> char -> string val quote_quotes : string -> string (*********************) (* Parsing functions *) (*********************) val wrap_lexer : (Lexing.lexbuf -> 'a) -> string -> 'a (* [wrap_lexer f s] applies the lexing function [f] on the string [s]. Raises [Failure] in case of failure. *) (**********************) (* Printing functions *) (**********************) (* Print to stdout *) val printf_stub : string -> (Format.formatter -> 'a -> unit) -> 'a -> unit (* Print to stderr *) val eprintf_stub : string -> (Format.formatter -> 'a -> unit) -> 'a -> unit (* Print to output channel *) val fprintf_stub : Format.formatter -> string -> (Format.formatter -> 'a -> unit) -> 'a -> unit (* Print to a string buffer *) val bprintf_stub : string -> (Format.formatter -> 'a -> unit) -> 'a -> string (**********************) (* Filename functions *) (**********************) (* Rename a DOS dir to a UNIX dir *) val rename_dir : string -> string val string_hash : string -> int (*********************) (* Integer functions *) (*********************) (* Some missing conversions *) val big_int_of_int32 : int32 -> Big_int.big_int val int32_of_big_int : Big_int.big_int -> int32 val big_int_of_int64 : int64 -> Big_int.big_int val int64_of_big_int : Big_int.big_int -> int64 val compose : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b val binary_of_hexString : string -> string val string_of_hexBinary : string -> string (* val comment_blit : string -> unit *) galax-1.1/base/dynamic_stack.ml0000775000076400007640000000417410560462355014657 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: dynamic_stack.ml,v 1.3 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Dynamic_stack Implements a dynamic stack with direct access to the elements needed for e.g. twig joins *) open Error let initial_stack_size = 16 (* in twigjoin, the size of the stack is bounded by the depth of the tree *) type 'a dynamic_stack = { mutable capacity: int; mutable size : int; mutable stack : 'a array; default : 'a; } let make c x = {capacity = c; size = 0; stack = Array.make initial_stack_size x; default = x; } let grow_stack s = if s.capacity = 0 then begin s.capacity <- initial_stack_size; s.stack <- Array.make initial_stack_size s.default; end else begin s.capacity <- s.capacity * 2; let new_stack = Array.make s.capacity s.default in Array.blit s.stack 0 new_stack 0 s.size; s.stack <- new_stack; end let push stack item = if stack.size = stack.capacity then grow_stack stack; stack.stack.(stack.size) <- item; stack.size <- stack.size +1 let empty stack = (stack.size = 0) let pop stack = if empty stack then raise (Query(Internal_Error("Empty stack exception (dynamic_stack)"))) else begin let return_item = stack.stack.(stack.size -1) in stack.size <- stack.size -1; return_item end let top stack = if empty stack then raise (Query(Internal_Error("Empty stack exception (dynamic_stack)"))) else stack.stack.(stack.size -1) galax-1.1/base/debug.mli0000664000076400007640000000316310672301472013273 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: debug.mli,v 1.7 2007/09/13 18:36:42 simeon Exp $ *) (* Module: Debug Description: This module implements basic operations used for debugging. *) type debug_flag = | JoinDebug | TypeDebug | DefaultDebug | CompileDebug | StaticDebug | DxqDebug | MaterializationDebug val print_join_debug : string -> unit val print_dxq_debug : string -> unit val print_materialization_debug : string -> unit val print_typing_debug : string -> unit val print_compile_debug : string -> unit val print_static_debug : string -> unit val print_default_debug : string -> unit val sprintf_default_debug : ('a -> string, unit, string) format -> 'a -> unit val set_debug : debug_flag list -> unit val join_debug : unit -> bool val typing_debug : unit -> bool val dxq_debug : unit -> bool val compile_debug : unit -> bool val static_debug : unit -> bool val materialization_debug : unit -> bool val default_debug : unit -> bool val debug_flag_of_string : string -> debug_flag galax-1.1/base/pool.ml0000664000076400007640000000744210560462355013015 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: pool.ml,v 1.10 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Pool Description: This module implements string pools. Those are bidirectional mappings from strings to integers, used to save space in the representation of XML documents. *) (* Note: Name pools are higher order modules parameterized by a Hashable type. - Jerome and Byron *) (* Signature of the NamePool modules *) module type NamePool = sig type name type symbol = int type namepool val create_pool : unit -> namepool val init_pool : namepool -> unit val get_name : namepool -> symbol -> name val add_name : namepool -> name -> symbol val exists_name : namepool -> name -> bool val symbol_equals : namepool -> symbol -> symbol -> bool val pool_size : namepool -> (int * int * int) end (* Functor to create new name pool modules for an given hashed type *) (* Note: The input of the function is just a hashble type and it returns a new name pool whose name type is the original hashable type. - J&B *) module MakeNamePool (H: Hashtbl.HashedType) : (NamePool with type name = H.t) = struct type name = H.t (* Type of names *) type name_o = name option (* Type of optional names *) type symbol = int (* Type of symbols in the name pool *) type namepool = { name_table : (name, symbol) Hashtbl.t; (* Table from names to symbols *) mutable inverse_table : name_o array; (* Table from symbols to names *) sym_counter : Id.id_gen } (* Symbol counter *) let create_pool () = { name_table = Hashtbl.create 1439; inverse_table = (Array.create 100 None); sym_counter = Id.create 0 } let init_pool np = Hashtbl.clear np.name_table; np.inverse_table <- (Array.create 100 None); Id.init np.sym_counter 0 let put_inverse_mapping np v = (* Adds a new symbol in the inverse table *) let sym = Id.next np.sym_counter in if sym >= Array.length np.inverse_table then np.inverse_table <- Array.append np.inverse_table (Array.create 100 None) else (); (np.inverse_table).(sym) <- Some v; sym let get_name np sym = (* Returns a name from a symbol *) match (np.inverse_table).(sym) with | None -> raise Not_found | Some nm -> nm let add_name np v = (* Adds a new name,symbol binding *) try Hashtbl.find np.name_table v with | _ -> begin let sym = put_inverse_mapping np v in Hashtbl.add np.name_table v sym; sym end let exists_name np v = (* Returns true is a name exists in the name pool *) Hashtbl.mem np.name_table v let symbol_equals np s1 s2 = (s1 = s2) (* Compares two symbols *) let pool_size np = let hashtbl_size ht = let size = ref 0 in Hashtbl.iter (fun x y -> size := !size+1) ht; !size in (Id.top np.sym_counter, hashtbl_size np.name_table, Array.length np.inverse_table) end galax-1.1/base/occurrence.mli0000664000076400007640000000455410560462355014346 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: occurrence.mli,v 1.8 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Occurrence Description: Manipulations of occurrences, i.e., an integer or 'unbounded'. Used for XML Schema minOccurs, maxOccurs, and XQuery occurrence indicators. *) (* the type 'occurs' corresponds to minOccur and maxOccur *) type occurs = | UP_INT of int | UNBOUNDED type occurrence_indicator= (occurs * occurs) (* operations on bounds *) val occurs : int -> occurs val unbounded : occurs val occurs_zero : occurs val occurs_one : occurs val ub_max : occurs -> occurs -> occurs val ub_min : occurs -> occurs -> occurs val ub_add : occurs -> occurs -> occurs val ub_mult : occurs -> occurs -> occurs val mult : occurs -> occurs -> occurs val minus : int -> occurs -> occurs val equal : occurs -> occurs -> bool val le : occurs -> occurs -> bool (* prints bounds *) val string_of_occurs : occurs -> string (* Approximate occurrence indicators as used in XQuery *) val one : occurrence_indicator (* Exactly one *) val optional : occurrence_indicator (* Zero or one '?' *) val star : occurrence_indicator (* Zero or more '*' *) val plus : occurrence_indicator (* One or more '+' *) val is_one : occurrence_indicator -> bool val is_optional : occurrence_indicator -> bool val is_star : occurrence_indicator -> bool val is_plus : occurrence_indicator -> bool val mult_occurrences : occurrence_indicator -> occurrence_indicator -> occurrence_indicator val seq_occurrences : occurrence_indicator -> occurrence_indicator -> occurrence_indicator (* Computes an approximate occurrence indicator *) val approximate_occurrences : occurrence_indicator -> occurrence_indicator galax-1.1/base/id.ml0000664000076400007640000000177110560462355012437 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: id.ml,v 1.5 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Id Description: Generation of unique id's, which, in this version are Caml integers. *) (* id generators *) type id_gen = int ref let create i = ref i let init ig i = ig := i let next ig = let current = !ig in ig := current + 1; current let top ig= !ig galax-1.1/base/encoding.ml0000664000076400007640000001302410560462355013623 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: encoding.ml,v 1.13 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Encoding Description: Configuration of character encoding operations. *) open Error let utf8_string = "UTF-8" (* Internal stuff used to set-up Pxp resolvers *) class warner = object method warn w = print_endline ("WARNING: " ^ w) end let global_warner = new warner (**********************) (* Character encoding *) (**********************) type encoding = Pxp_types.encoding (* All possible external encodings *) type rep_encoding = Pxp_types.rep_encoding (* All possible internal encodings *) (* Converting a string to an encoding *) let encoding_of_string s = try let s' = if (s = "ISO-8859") then (s^"-1") else s in Netconversion.encoding_of_string s' with | _ -> raise (Query (Undefined ("Undefined character encoding: " ^ s ^". See http://www.w3.org/TR/REC-xml/#charencoding"))) (* Converting an encoding to a string *) let string_of_encoding = try Netconversion.string_of_encoding with | _ -> raise (Query (Undefined "Undefined character encoding")) (* Converting an internal encoding to an encoding *) let encoding_of_rep_encoding enc = match enc with | `Enc_utf8 -> `Enc_utf8 | `Enc_iso88591 -> `Enc_iso88591 | _ -> raise (Query (Undefined "Internal encoding can only be UTF8 or ISO-8859-1")) (* Converting an encoding to an internal encoding *) let rep_encoding_of_encoding enc = match enc with | `Enc_utf8 -> `Enc_utf8 | `Enc_iso88591 -> `Enc_iso88591 | _ -> raise (Query (Undefined "Internal encoding can only be UTF8 or ISO-8859-1")) (* Character encoding *) let internal_encoding = ref `Enc_utf8 let output_encoding = ref `Enc_utf8 let set_internal_encoding enc = internal_encoding := enc; output_encoding := encoding_of_rep_encoding enc let set_output_encoding enc = output_encoding := enc let set_default_output_encoding () = output_encoding := encoding_of_rep_encoding !internal_encoding let get_internal_encoding () = !internal_encoding let get_output_encoding () = !output_encoding (**********************************************) (* String / character conversion capabilities *) (**********************************************) (* Converting a string from one encoding to another *) (* This is a FIX to PXP so that it does not encode '%' as a character reference. - Jerome *) let pxp_fix_write_data_string ~(from_enc:rep_encoding) ~to_enc os content = (* Write the 'from_enc'-encoded string 's' as 'to_enc'-encoded string to * 'os'. The characters '&', '<', '>', '"', '%' and every character that * cannot be represented in 'to_enc' are paraphrased as entity reference * "&...;". *) let convert_ascii s = (* Convert the ASCII-encoded string 's'. Note that 'from_enc' is * always ASCII-compatible *) if to_enc = (from_enc :> encoding) then s else Netconversion.recode_string ~in_enc:(from_enc :> encoding) ~out_enc:to_enc ~subst:(fun n -> assert false) s in let write_ascii s = (* Write the ASCII-encoded string 's' *) let s' = convert_ascii s in Pxp_core_types.write os s' 0 (String.length s') in let write_part j l = (* Writes the substring of 'content' beginning at pos 'j' with length 'l' *) if to_enc = (from_enc :> encoding) then Pxp_core_types.write os content j l else begin let s' = Netconversion.recode_string ~in_enc:(from_enc :> encoding) ~out_enc:to_enc ~subst:(fun n -> convert_ascii ("&#" ^ string_of_int n ^ ";")) (String.sub content j l) in Pxp_core_types.write os s' 0 (String.length s') end in let i = ref 0 in for k = 0 to String.length content - 1 do match content.[k] with ('&' | '<' | '>' | '"' | '\'') as c -> if !i < k then write_part !i (k - !i); begin match c with '&' -> write_ascii "&" | '<' -> write_ascii "<" | '>' -> write_ascii ">" | '"' -> write_ascii """ | '\'' -> write_ascii "'" | _ -> assert false end; i := k+1 | _ -> () done; if !i < String.length content then write_part !i (String.length content - !i) (* END OF FIX *) let write_data_string from_enc to_enc s = try let b = Buffer.create 80 in pxp_fix_write_data_string from_enc to_enc (`Out_buffer b) s; Buffer.contents b with | _ -> raise (Query (Undefined ("Cannot convert string \"" ^ s ^ "\" to output encoding"))) let write_markup_string from_enc to_enc s = try let b = Buffer.create 80 in Pxp_aux.write_markup_string from_enc to_enc (`Out_buffer b) s; Buffer.contents b with | _ -> raise (Query (Undefined ("Cannot convert markup name \"" ^ s ^ "\" to output encoding"))) let character enc k = try Pxp_aux.character enc global_warner k with | Pxp_core_types.WF_error msg -> raise (Query (Undefined msg)) galax-1.1/base/register_handlers.mli0000664000076400007640000000225210560462355015713 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: register_handlers.mli,v 1.2 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Register_handlers Description: This module handles registration of close handlers for Galax. These handlers allow persistant stores to do any final cleanup before exiting. We could also allow things like failure handlers, sync handlers but right now we just have proper close handlers. *) (* Close (end of run) handlers *) val register_close_handler : (unit -> unit) -> unit val call_close_handlers : unit -> unit galax-1.1/base/galax_camomile_0.7.ml0000664000076400007640000000516410707660605015372 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_camomile_0.7.ml,v 1.2 2007/10/24 15:15:17 simeon Exp $ *) (* Module: Glx_camomile Description: This module is a wrapper over Camomile operations. *) open Error let makedatadir d = Filename.concat d "database" let makelocaledir d = Filename.concat d "locales" let makecharmapdir d = Filename.concat d "charmaps" let makeunimapdir d = Filename.concat d "mappings" module Camomileconfig = struct (* configuration for tools *) let datadir = try makedatadir (Sys.getenv "UNICODE_MAPS") with Not_found -> CamomileLibrary.CamomileDefaultConfig.datadir let localedir = try makelocaledir (Sys.getenv "UNICODE_MAPS") with Not_found -> CamomileLibrary.CamomileDefaultConfig.localedir let charmapdir = try makecharmapdir (Sys.getenv "UNICODE_MAPS") with Not_found -> CamomileLibrary.CamomileDefaultConfig.charmapdir let unimapdir = try makeunimapdir (Sys.getenv "UNICODE_MAPS") with Not_found -> CamomileLibrary.CamomileDefaultConfig.unimapdir end module TextUTF8 = CamomileLibrary.UNF.Make(Camomileconfig)(CamomileLibrary.UTF8) let nfc x = TextUTF8.nfc x let nfd x = TextUTF8.nfd x let nfkc x = TextUTF8.nfkc x let nfkd x = TextUTF8.nfkd x let utf8_string_of_code_point i = Encoding.character (Encoding.get_internal_encoding ()) i let utf8_add_point_to_buffer b i = let c = Encoding.character (Encoding.get_internal_encoding ()) i in Buffer.add_string b c let utf8_string_of_code_points c = let b = Buffer.create 10 in List.iter (utf8_add_point_to_buffer b) c; Buffer.contents b let utf8_codepoint_compare s1 s2 = CamomileLibrary.UTF8.compare s1 s2 let utf8_code_points_of_string s = try let codept_list = ref [] in CamomileLibrary.UTF8.iter (fun uc -> let ic = CamomileLibrary.UChar.code uc in codept_list := (!codept_list) @ [ic]) s; !codept_list with | CamomileLibrary.UChar.Out_of_range -> raise(Query(Unicode_Error("Unicode character cannot be represented by positive integer."))) galax-1.1/base/args.ml0000664000076400007640000000451610560462355012777 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: args.ml,v 1.10 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Args Description: This module implements extraction of function arguments for various arities. *) open Error (************************) (* Arguments extraction *) (************************) let get_param0 = function | [] -> () | _ -> raise (Query (Parameter_Mismatch("Expected zero arguments"))) let get_param1 = function | [x1] -> x1 | _ -> raise (Query (Parameter_Mismatch("Expected one argument"))) let get_param2 = function | [x1;x2] -> (x1,x2) | _ -> raise (Query (Parameter_Mismatch("Expected two arguments"))) let get_param3 = function | [x1;x2;x3] -> (x1,x2,x3) | _ -> raise (Query (Parameter_Mismatch("Expected three arguments"))) let get_param4 = function | [x1;x2;x3;x4] -> (x1,x2,x3,x4) | _ -> raise (Query (Parameter_Mismatch("Expected four arguments"))) let get_array_param0 x = if (Array.length x) != 0 then raise (Query (Parameter_Mismatch("Expected array argument of length zero"))) let get_array_param1 x = if (Array.length x != 1) then raise (Query (Parameter_Mismatch("Expected array argument of length one"))) else x.(0) let get_array_param2 x = if (Array.length x != 2) then raise (Query (Parameter_Mismatch("Expected array argument of length two"))) else x.(0), x.(1) let get_array_param3 x = if (Array.length x != 3) then raise (Query (Parameter_Mismatch("Expected array argument of length three"))) else x.(0), x.(1), x.(2) let get_array_param4 x = if (Array.length x != 4) then raise (Query (Parameter_Mismatch("Expected array argument of length four"))) else x.(0), x.(1), x.(2), x.(3) galax-1.1/base/whitespace.mli0000664000076400007640000000300210560462355014335 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: whitespace.mli,v 1.14 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Whitespace Description: Manipulation of whitespace within strings. Used during whitespace handling in XML/XQuery. *) (* Whitespace operations *) val remove_newlines : string -> string (* Remove leading and trailing whitespace characters *) val remove_whitespace : string -> string val remove_leading_whitespace : string -> string val remove_all_whitespace : string -> string val whitespace_only : string -> bool (* Whitespace or not whitespace ? That is the question... *) val white : char -> bool type mode = | Preserve | Default (* Split whitespace-separated strings *) val whitespace_separate : string -> string list val whitespace_normalize : string -> string val whitespace_id_normalize : string -> string val remove_trailing_spaces : string -> string galax-1.1/base/finfo.ml0000664000076400007640000001143210560462355013137 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: finfo.ml,v 1.11 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Finfo Description: This module is used to keep track of the original location of expressions in files during query processing. This is essentially used to provide accurate error messages. *) open Format open Pool (******************************************************) (* This module implements location within source file *) (******************************************************) (***************************************) (* Handle on the files being processed *) (***************************************) type file_name = string module FileName = struct type t = file_name let equal = (=) let hash = Hashtbl.hash end module FilesPool = MakeNamePool (FileName) type file_handle = FilesPool.symbol let input_files = FilesPool.create_pool () (* Name of the file currently being processed *) let bogus_file = "" let bogus_file_handle = FilesPool.add_name input_files bogus_file let current_file = ref bogus_file let current_file_id = ref bogus_file_handle (* file info *) type start_end_pos = { file_handle : file_handle; (* which file it comes from *) start_pos : int; (* offset for the begining of a parsed token *) end_pos : int } (* offset for the end of a parsed token *) type line_col_pos = { fh : file_handle; (* which file it comes from *) line : int; (* line for the begining of a parsed token *) col : int } (* column for the begining of a parsed token *) type finfo = | StartEndPos of start_end_pos | LineColPos of line_col_pos (* creates a new location info *) let make_finfo start_pos end_pos = StartEndPos ({ file_handle = !current_file_id; start_pos = start_pos; end_pos = end_pos }) let make_finfo_line_col file line col = LineColPos ({ fh = FilesPool.add_name input_files file; line = line; col = col }) let make_finfo_line_col_id id line col = LineColPos ({ fh = id; line = line; col = col }) let get_file_id file = FilesPool.add_name input_files file let get_current_file () = !current_file let get_current_file_id () = !current_file_id let set_current_file file = current_file := file; current_file_id := FilesPool.add_name input_files file (* Bogus location *) let bogus = StartEndPos { file_handle = bogus_file_handle; start_pos = -1; end_pos = -1 } (* Prints the location within a file *) let loc_in_file file_name pos = let inch = open_in file_name in let lineno = ref 1 in let linepos = ref 0 in for i=0 to pos do try let ch = input_char inch in if ch = '\012' or ch = '\n' then begin incr lineno; linepos := 0 end else incr linepos with | End_of_file -> incr linepos done; close_in inch; !lineno,(!linepos-1) (* Prints the location within a string *) let finfo_to_string finfo = match finfo with | StartEndPos { file_handle = hand ; start_pos = start_pos ; end_pos = end_pos } -> let fname = FilesPool.get_name input_files hand in if fname = "" then sprintf "characters %d-%d" start_pos end_pos else let lineno,linepos = loc_in_file fname start_pos in sprintf "File \"%s\", line %d, characters %d-%d" fname lineno linepos (linepos + end_pos - start_pos) | LineColPos { fh = hand ; line = line ; col = col } -> let fname = FilesPool.get_name input_files hand in sprintf "File \"%s\", line %d, col %d" fname line col (* Create a location from parsing token *) let parsing_locinfo () = try make_finfo (Parsing.symbol_start()) (Parsing.symbol_end()) with | _ -> bogus (* Create a location from lexing token *) let lexing_locinfo lexbuf = make_finfo (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) (* Extract the part of a string corresponding to a given finfo *) let extract s fi = match fi with | StartEndPos fi -> let starting = fi.start_pos in let len = fi.end_pos - starting in String.sub s starting len | LineColPos fi -> raise(Invalid_argument("Finfo: Cannot extract substring using line/col position")) galax-1.1/base/galax_io.mli0000664000076400007640000000342010654145646013775 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_io.mli,v 1.12 2007/08/01 18:06:30 simeon Exp $ *) (* Module: Galax_io Description: This module is used to represent input and output methods for the Galax engine. *) (****************************) (* Galax I/O specifications *) (****************************) (* What kind of input ? *) type input_spec = | File_Input of string | String_Input of string | Buffer_Input of Netbuffer.t | Http_Input of string | Channel_Input of in_channel (* What kind of output ? *) type output_spec = | File_Output of string | Buffer_Output of Buffer.t | Channel_Output of out_channel | Formatter_Output of Format.formatter (*********************) (* PXP Parsing hooks *) (*********************) type pull_handle type entity_kind = Document_entity | Document_fragment type pxp_stream = (unit -> Pxp_types.event option) * pull_handle val pull_parser_from_input_spec : input_spec -> entity_kind -> pxp_stream val close_pull_parser : pull_handle -> unit val dtd_from_input_spec : input_spec -> Pxp_dtd.dtd val uri_string_of_gio : input_spec -> string option val string_of_gio : input_spec -> string galax-1.1/base/conf.mlp0000664000076400007640000002347610710406215013144 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: conf.mlp,v 1.108 2007/10/26 15:49:33 simeon Exp $ *) (* Module: Conf Description: This module contains global variables and parameters changing the general behavior of the Galax interpretor. *) (***********************) (* Version information *) (***********************) let system = "Galax" let version = "%%RELEASE%%" let copyright = "Copyright "^system^" Contributors 2001-2006" let status = "%%STATUS%%" let motd = "Thank you for using "^system^"!\n" let xquery_version = "1.0" (****************) (* Galax stdlib *) (****************) (* Where is the Galax library installed *) let galax_library = try Sys.getenv "GALAXLIB" with Not_found -> "%%LIBDIR%%" let pervasive_content = ref Pervasive.pervasive let unicode_maps = ref None (***********************) (* Behavior parameters *) (***********************) (* Output behavior *) let print_global = ref true let print_prolog = ref false let print_annotations = ref false let verbose = ref false let verbose_error = ref true let warning = ref true (* Default escape function is identity function *) let xml_charescape_fn = ref (fun s -> s) let print_xml = ref false (* Default is false as in the API, the user controls printing *) let xml_output = ref stdout let xml_formatter = ref Format.std_formatter let xml_header = ref ("Dynamic Evaluation:\n" ^"-------------------\n\n") let xml_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_expr = ref false let expr_output = ref stdout let expr_formatter = ref Format.std_formatter let expr_header = ref ("XQuery Expression:\n" ^"------------------\n\n") let expr_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_type = ref false let type_output = ref stdout let type_formatter = ref Format.std_formatter let type_header = ref ("Static Type Analysis:\n" ^"---------------------\n\n") let type_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_core_expr = ref false let core_expr_output = ref stdout let core_expr_formatter = ref Format.std_formatter let core_expr_header = ref ("Normalized Expression (XQuery Core):\n" ^"------------------------------------\n\n") let core_expr_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_optimized_expr = ref false let optimized_expr_output = ref stdout let optimized_expr_formatter = ref Format.std_formatter let optimized_expr_header = ref ("Rewritten Normalized Expression (XQuery Core):\n" ^"----------------------------------------------\n\n") let optimized_expr_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_factorized_expr = ref false let factorized_expr_output = ref stdout let factorized_expr_formatter = ref Format.std_formatter let factorized_expr_header = ref ("Factorized Normalized Expression (XQuery Core):\n" ^"-----------------------------------------------\n\n") let factorized_expr_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_projection = ref false let projection_output = ref stdout let projection_formatter = ref Format.std_formatter let print_projected_file = ref false let projected_file_output = ref stdout let projected_file_formatter = ref Format.std_formatter let print_algebra_optimization_rewrite = ref false let algebra_optimization_rewrite_output = ref stdout let algebra_optimization_rewrite_formatter = ref Format.std_formatter let algebra_optimization_rewrite_header = ref ("Algebra Rerwite (XQuery Algebra):\n" ^"------------------------------\n\n") let algebra_optimization_rewrite_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_logical_algebra = ref false let logical_algebra_output = ref stdout let logical_algebra_formatter = ref Format.std_formatter let logical_algebra_header = ref ("Logical Plan (XQuery Algebra):\n" ^"------------------------------\n\n") let logical_algebra_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let serialize_logical_algebra = ref false let print_optimized_algebra = ref false let optimized_algebra_output = ref stdout let optimized_algebra_formatter = ref Format.std_formatter let optimized_algebra_header = ref ("Optimized Logical Plan (XQuery Algebra):\n" ^"----------------------------------------\n\n") let optimized_algebra_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_physical_algebra = ref false let physical_algebra_output = ref stdout let physical_algebra_formatter = ref Format.std_formatter let physical_algebra_header = ref ("Physical Plan (XQuery Algebra):" ^"\n-----------------------------\n\n") let physical_algebra_footer = ref "\n\n------------------------------------------------------------------------------\n\n" let print_dfgraph = ref false let dfgraph_output = ref stdout let dfgraph_formatter = ref Format.std_formatter let genresults = ref false (********************) (* XML Plan Loading *) (********************) let load_xml_plans = ref false let execute_logical_plan = ref false let execute_optimized_plan = ref false let error_blocking = ref true let glx_stderr = ref stderr let glx_err_formatter = ref Format.err_formatter (*********************) (* Global parameters *) (*********************) (* Default XML Schema namespaces *) (* Note: The following namespace URIs are populated at configuration time! They are used in pervasive.xq, and namespace resolution. - Jerome *) let emptyns = "" let xmlns = "%%XMLURI%%" let xmlnsns = "%%XMLNSURI%%" let xsns = "%%XSURI%%" let xsdns = "%%XSDURI%%" let xsins = "%%XSIURI%%" let fnns = "%%FNURI%%" let xqxns = "%%XQUERYXURI%%" let opns = "%%OPURI%%" let fsns = "%%FSURI%%" let localns = "%%LOCALURI%%" let collns = "%%COLLURI%%" let glxns = "%%GLXURI%%" let errns = "%%ERRURI%%" let bPrinting_comp_annotations = ref false (* Materialization flag *) let print_materialize = ref false (********************************************) (* Experimental parameters for optimization *) (********************************************) (* Note: ALL experimental flags for new optimizations should go here. ALL should be false by default. - Jerome 10/25/2004 *) (* New descendant style *) let new_descendant_style = ref false (* Aggressive sbdo remove *) let aggressive_sbdo_remove = ref false (* Physical optimization flags *) let nested_loop_join = ref false (* Variable materialization flag *) let force_materialized_variables = ref false let allow_streamed_tuple_fields = ref false let code_selection_by_physical_type = ref true (* Jungle flags *) let old_children_method = ref false let jungle_buffsize : int option ref = ref None (* SAX materialization buffers *) let buffer_chunks = ref 0 let buffer_csize = ref 10000 let buffer_inc = ref 2 (* Statistics *) let countload = ref 0 let countnext = ref 0 let countexpo = ref 0 (* Language *) type language_kind = | Language_XQuery10 | Language_XQueryUpdates | Language_XQueryBang | Language_XQueryP | Language_DXQ let language = ref Language_XQuery10 let set_language l = language := l let is_xquery () = match !language with | Language_XQuery10 -> true | Language_XQueryUpdates -> false | Language_XQueryBang -> false | Language_XQueryP -> false | Language_DXQ -> false let is_ultf () = match !language with | Language_XQuery10 -> false | Language_XQueryUpdates -> true | Language_XQueryBang -> false | Language_XQueryP -> false | Language_DXQ -> false let is_xquerybang () = match !language with | Language_XQuery10 -> false | Language_XQueryUpdates -> false | Language_XQueryBang -> true | Language_XQueryP -> false | Language_DXQ -> false let is_xqueryp () = match !language with | Language_XQuery10 -> false | Language_XQueryUpdates -> false | Language_XQueryBang -> false | Language_XQueryP -> true | Language_DXQ -> true (* DXQ contains XQueryP *) let is_dxq () = match !language with | Language_XQuery10 -> false | Language_XQueryUpdates -> false | Language_XQueryBang -> false | Language_XQueryP -> false | Language_DXQ -> true type syntax_kind = | XQuery_Syntax | XQueryX_Syntax let syntax = ref XQuery_Syntax let set_syntax s = syntax := s let is_xquery_syntax () = match !syntax with | XQuery_Syntax -> true | XQueryX_Syntax -> false let is_xqueryx_syntax () = match !syntax with | XQuery_Syntax -> false | XQueryX_Syntax -> true let batch_xqueryx = ref false let embed_xqueryx = ref false type materialize_tables_kind = | Always | Analysis | Never let materialize_tables = ref Analysis let set_materialize_tables mat_kind = materialize_tables := mat_kind let get_materialize_tables() = !materialize_tables galax-1.1/base/galax_camomile_0.6.ml0000664000076400007640000000337110677001100015350 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_camomile_0.6.ml,v 1.1 2007/09/27 19:19:28 simeon Exp $ *) (* Module: Glx_camomile Description: This module is a wrapper over Camomile operations. *) open Error module TextUTF8 = Camomile.UNF.Make(Camomile.UTF8) let nfc x = TextUTF8.nfc x let nfd x = TextUTF8.nfd x let nfkc x = TextUTF8.nfkc x let nfkd x = TextUTF8.nfkd x let utf8_string_of_code_point i = Encoding.character (Encoding.get_internal_encoding ()) i let utf8_add_point_to_buffer b i = let c = Encoding.character (Encoding.get_internal_encoding ()) i in Buffer.add_string b c let utf8_string_of_code_points c = let b = Buffer.create 10 in List.iter (utf8_add_point_to_buffer b) c; Buffer.contents b let utf8_codepoint_compare s1 s2 = Camomile.UTF8.compare s1 s2 let utf8_code_points_of_string s = try let codept_list = ref [] in Camomile.UTF8.iter (fun uc -> let ic = Camomile.UChar.code uc in codept_list := (!codept_list) @ [ic]) s; !codept_list with | Camomile.UChar.Out_of_range -> raise(Query(Unicode_Error("Unicode character cannot be represented by positive integer."))) galax-1.1/base/debug.ml0000664000076400007640000000564410672301472013130 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: debug.ml,v 1.10 2007/09/13 18:36:42 simeon Exp $ *) (* Module: Debug Description: This module implements basic operations used for debugging. *) type debug_flag = | JoinDebug | TypeDebug | DefaultDebug | CompileDebug | StaticDebug | DxqDebug | MaterializationDebug let debug_flag_of_string s = match s with | "join" -> JoinDebug | "typing" -> TypeDebug | "default" -> DefaultDebug | "compile" -> CompileDebug | "static" -> StaticDebug | "dxq" -> DxqDebug | "materialization" -> MaterializationDebug | _ -> raise Not_found let string_of_flag f = match f with | JoinDebug -> " JOIN" | TypeDebug -> " TYPING" | DxqDebug -> " DXQ" | StaticDebug -> " STATIC" | CompileDebug -> " COMPILE" | MaterializationDebug -> "MATERIALIZATION" | DefaultDebug -> "" let debug_flags = ref [] let print_debug flag msg = let fs = string_of_flag flag in Format.fprintf (!Conf.glx_err_formatter) "[DEBUG%s] %s@.%!" fs msg; Format.pp_print_flush (!Conf.glx_err_formatter) () let join_debug () = List.exists (fun x -> x = JoinDebug) !debug_flags let dxq_debug () = List.exists (fun x -> x = DxqDebug) !debug_flags let typing_debug () = List.exists (fun x -> x = TypeDebug) !debug_flags let compile_debug () = List.exists (fun x -> x = CompileDebug) !debug_flags let static_debug () = List.exists (fun x -> x = StaticDebug) !debug_flags let materialization_debug () = List.exists (fun x -> x = MaterializationDebug) !debug_flags let default_debug () = List.exists (fun x -> x = DefaultDebug) !debug_flags let print_join_debug msg = if join_debug() then print_debug JoinDebug msg let print_typing_debug msg = if typing_debug() then print_debug TypeDebug msg let print_compile_debug msg = if compile_debug() then print_debug CompileDebug msg let print_default_debug msg = if default_debug() then print_debug DefaultDebug msg let print_dxq_debug msg = if dxq_debug() then print_debug DxqDebug msg let print_static_debug msg = if static_debug() then print_debug StaticDebug msg let print_materialization_debug msg = if materialization_debug() then print_debug MaterializationDebug msg let sprintf_default_debug ff x = let s = Format.sprintf ff x in print_default_debug s let set_debug df = debug_flags := df galax-1.1/base/pervasive.mli0000664000076400007640000000165610560462355014222 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: pervasive.mli,v 1.5 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Pervasive Description: Contains built-functions signatures and types in XQuery syntax. Generated automatically from the file ./stdlib/pervasive.xq file. *) val pervasive : string galax-1.1/base/finfo.mli0000664000076400007640000000352010560462355013307 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: finfo.mli,v 1.9 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Finfo Description: This module is used to keep track of the original location of expressions in files during query processing. This is essentially used to provide accurate error messages. *) type finfo (* A file location *) val get_file_id : string -> int val set_current_file : string -> unit (* The file currently being parsed *) val make_finfo : int -> int -> finfo (* Creates a file location *) val make_finfo_line_col : string -> int -> int -> finfo (* Creates a file location (line, column) in a particular file *) val make_finfo_line_col_id : int -> int -> int -> finfo (* Creates a file location (line, column) in a particular file *) val bogus : finfo (* An empty file location *) val finfo_to_string : finfo -> string (* Prints a location within a string *) val parsing_locinfo : unit -> finfo (* Generates a file location during parsing *) val lexing_locinfo : Lexing.lexbuf -> finfo (* Generates a file location during lexing *) val extract : string -> finfo -> string (* Extracts the part of a string corresponding to a given file location *) galax-1.1/base/args.mli0000664000076400007640000000244010560462355013142 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: args.mli,v 1.11 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Args Description: This module implements extraction of function arguments for various arities. *) (* Arguments extraction *) val get_param0 : 'a list -> unit val get_param1 : 'a list -> 'a val get_param2 : 'a list -> ('a * 'a) val get_param3 : 'a list -> ('a * 'a * 'a) val get_param4 : 'a list -> ('a * 'a * 'a * 'a) val get_array_param0 : 'a array -> unit val get_array_param1 : 'a array -> 'a val get_array_param2 : 'a array -> ('a * 'a) val get_array_param3 : 'a array -> ('a * 'a * 'a) val get_array_param4 : 'a array -> ('a * 'a * 'a * 'a) galax-1.1/base/galax_pxp.mli0000664000076400007640000000211510560462355014170 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_pxp.mli,v 1.2 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Galax_pxp Description: This module contains useful hooks to the PXP parser. *) val glx_default_config : Pxp_types.config val glx_default_entry : Pxp_types.entry val empty_dtd : unit -> Pxp_dtd.dtd val default_dtd : Pxp_dtd.dtd val default_lfactory : Pxp_lexer_types.lexer_factory val default_lexobj : Pxp_lexer_types.lexer_obj galax-1.1/base/error.ml0000664000076400007640000005414510654145646013204 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: error.ml,v 1.64 2007/08/01 18:06:30 simeon Exp $ *) (* Module: Error Description: This module deals with error handling in Galax. *) open Format type error = (* Lexing *) | Lexing of Finfo.finfo * string (* Parsing *) | Parsing of Finfo.finfo * string | Algebra_Parsing_Error of string | Namespace_Internal of string | Namespace_Error of string (* Normalization *) | Static_Error of string | Static_Internal of string | Module_Import of string | Annotation_Error of string (* Types *) | Malformed_Type of string | Malformed_Tuple of string | Malformed_Expr of string | Malformed_Core_Expr of string | Malformed_Algebra_Expr of string (* Static Typing *) | Static_Type_Error of string | Automata of string | Undefined_Variable of Finfo.finfo * string * string (* Rewriting *) | Rewriting of string | Streaming_XPath of string (* Factorization *) | Factorization of string (* Compilation *) | Compilation of string | Symbol_Already_Defined of (string * string) (* Optimization *) | Optimization of string (* Code Selection *) | Code_Selection of string | Expr_Error of string | Key_Error of (string * string) | KeyRef_Error of (string * string) | Physical_Type_Error of string (* Evaluation *) | Constructor_Error of string | Type_Error of string | Unicode_Error of string | Validation of string (* Schema Normalization *) | Schema of string | Schema_Internal of string | Schema_Import of string (* Serialization *) | Serialization of string (* Data Model / Loading *) | Datamodel of string | URI_Error of string | Load_Error of string | Cast_Error of string | Protocol_Error of string | Stream_Error of string | Cursor_Error of string | Physical_DM_Error of string | Malformed_DateTimeValue of string | Jungle_Error of string | Shredded_Error of string (* Projection *) | Projection of string (* WSDL *) | Root (* Toplevel tools *) | Toplevel_Error of string | Monitor_Error of string (* Multiple Modules *) | Parameter_Mismatch of string (* Norm, Eval *) | Unknown of string | Internal_Error of string | Wrong_Args of string (* PhysicalDM, Code Selection *) | Prototype of string | Undefined of string (* Namespace, Parsing, Code Selection *) | Mapping_Failure of string (* Normalization, Namespaces *) | Update_Error of string (* DM, Code Selection *) (* Distributed XQuery *) | DXQ_Error of string (* Testing *) | Testing_Error of string (* Generic error with file location *) | Error of string (* XQueryX errors *) | XQueryX_Error of Finfo.finfo * string (* Generic error with file location -- used to wrap internal error with a file location *) | Wrapped_Error of Finfo.finfo * string exception Query of error let fprint_finfo ff finfo = fprintf ff " [%s:]" (Finfo.finfo_to_string finfo) let print_error_verbose ff exn = match exn with (* Lexing *) | Query (Lexing (fi,msg)) -> fprintf ff "Lexing Error: %s" msg; fprint_finfo ff fi (* Parsing *) | Query (Parsing (fi,msg)) -> fprintf ff "Parsing Error: %s" msg; fprint_finfo ff fi | Query (Algebra_Parsing_Error msg) -> fprintf ff "Algebra Parsing Error: %s" msg | Query (Namespace_Error msg) -> fprintf ff "Namespace Error: %s" msg (* Prints Caml errors *) | Query (Namespace_Internal msg) -> fprintf ff "Namespace Internal Error: %s" msg (* Normalization *) | Query (Static_Error (msg)) -> fprintf ff "Static Error: %s" msg | Query (Static_Internal (msg)) -> fprintf ff "Static Internal Error: %s" msg | Query (Module_Import(msg)) -> fprintf ff "Module Import Error: %s" msg | Query (Annotation_Error (msg)) -> fprintf ff "Annotation Error: %s" msg (* Types *) | Query (Malformed_Type (msg)) -> fprintf ff "Malformed Type: %s" msg | Query (Malformed_Tuple (msg)) -> fprintf ff "Malformed Tuple: %s" msg | Query (Malformed_Expr (msg)) -> fprintf ff "Malformed Expr: %s" msg | Query (Malformed_Core_Expr (msg)) -> fprintf ff "Malformed Core Expr: %s" msg | Query (Malformed_Algebra_Expr (msg)) -> fprintf ff "Malformed Algebra Expr: %s" msg (* Static Typing *) | Query (Static_Type_Error (msg)) -> fprintf ff "Static_Type Error: %s" msg | Query (Automata msg) -> fprintf ff "Automata Error: %s" msg | Query (Undefined_Variable(fi, vn, msg)) -> fprintf ff "Undefined variable $%s: %s" vn msg; fprint_finfo ff fi (* Rewriting *) | Query (Rewriting msg) -> fprintf ff "Rewriting Internal Error: %s" msg | Query (Streaming_XPath msg) -> fprintf ff "Rewriting Internal Error: %s" msg (* Factorization *) | Query (Factorization msg) -> fprintf ff "Factorization Internal Error: %s" msg (* Compilation *) | Query (Compilation msg) -> fprintf ff "Compilation Internal Error: %s" msg | Query (Symbol_Already_Defined(kind, name)) -> fprintf ff "Symbol_Already_Defined Error: %s %s multiply defined" kind name (* Code Selection *) | Query (Code_Selection msg) -> fprintf ff "Code Selection Internal Error: %s" msg | Query (Expr_Error (msg)) -> fprintf ff "Expr Error: %s" msg | Query (Key_Error (key,msg)) -> fprintf ff "Key Error: %s" msg | Query (KeyRef_Error (key,msg)) -> fprintf ff "KeyRef Error: %s" msg | Query (Physical_Type_Error (msg)) -> fprintf ff "Physical Type Error: %s" msg (* Evaluation *) | Query (Constructor_Error (msg)) -> fprintf ff "Constructor Error: %s" msg | Query (Type_Error (msg)) -> fprintf ff "Type Error: %s" msg | Query (Unicode_Error (msg)) -> fprintf ff "Unicode Error: %s" msg | Query (Validation msg) -> fprintf ff "Validation Error: %s" msg (* Schema Normalization *) | Query (Schema msg) -> fprintf ff "Schema_Error: %s" msg | Query (Schema_Internal msg) -> fprintf ff "Schema Internal Error: %s" msg | Query (Schema_Import(msg)) -> fprintf ff "Schema mapping Error: %s" msg (* Serialization *) | Query (Serialization msg) -> fprintf ff "Serialization Internal Error: %s" msg (* Datamodel *) | Query (Datamodel msg) -> fprintf ff "Datamodel Error: %s" msg | Query (URI_Error msg) -> fprintf ff "URI Error: %s" msg | Query (Load_Error msg) -> fprintf ff "Load Error: %s" msg | Query (Cast_Error msg) -> fprintf ff "Cast Error: %s" msg | Query (Protocol_Error msg) -> fprintf ff "Protocol Error: %s" msg | Query (Stream_Error msg) -> fprintf ff "Stream Error: %s" msg | Query (Cursor_Error msg) -> fprintf ff "Cursor Error: %s" msg | Query (Physical_DM_Error msg) -> fprintf ff "Physical Data Model Error: %s" msg | Query (Malformed_DateTimeValue (msg)) -> fprintf ff "Malformed DateTime Value: %s" msg | Query (Jungle_Error msg) -> fprintf ff "Jungle Error: %s" msg | Query (Shredded_Error msg) -> fprintf ff "Shredded Error: %s" msg (* Projection *) | Query (Projection msg) -> fprintf ff "Projection Internal Error: %s" msg (* WSDL *) | Query Root -> fprintf ff "Root Error: Root node" (* Toplevel *) | Query (Toplevel_Error msg) -> fprintf ff "Toplevel Error: %s" msg | Query (Monitor_Error msg) -> fprintf ff "Monitor Error: %s" msg (* Multiple Modules *) | Query (Parameter_Mismatch (msg)) -> fprintf ff "Parameter_Mismatch Error: %s" msg | Query (Unknown msg) -> fprintf ff "Unknown Error: %s" msg | Query (Internal_Error msg) -> fprintf ff "Internal Error: %s" msg | Query (Wrong_Args s) -> fprintf ff "Wrong_Args Error: %s" s | Query (Prototype msg) -> fprintf ff "Prototype Error: %s" msg | Query (Undefined msg) -> fprintf ff "Undefined Error: %s" msg | Query (Mapping_Failure msg) -> fprintf ff "Mapping_Failure Error: %s" msg | Query (Testing_Error (msg)) -> fprintf ff "Testing Error: %s" msg | Query (XQueryX_Error (fi, msg)) -> fprintf ff "XQueryX Error: %s" msg; fprint_finfo ff fi | Query (Update_Error (msg)) -> fprintf ff "Update Error: %s" msg | Query (DXQ_Error (msg)) -> fprintf ff "Distributed XQuery Error: %s" msg (* Top-level error resulting from downgrade_error *) | Query (Error(msg)) -> fprintf ff "Error: %s" msg (* This error is used to re-wrap errors with a file location *) | Query (Wrapped_Error(fi, msg)) -> fprintf ff "%s" msg; fprint_finfo ff fi (* | Boxed_Error (v, printer) -> fprintf ff "%s" (printer v) *) (* Prints Caml errors *) (* Prints Caml errors -- we have to handle all the predefined Caml errors here *) | Match_failure (file, startpos, endpos) -> fprintf ff "Match Failure at %s %d %d" file startpos endpos | Assert_failure(file, startpos, endpos) -> fprintf ff "Assert Failure at %s %d %d" file startpos endpos | Invalid_argument msg -> fprintf ff "Invalid argument: %s" msg | Failure msg -> fprintf ff "Failure exception: %s" msg | Not_found -> fprintf ff "Not_found" | Out_of_memory -> fprintf ff "Out_of_memory" | Stack_overflow -> fprintf ff "Stack_overflow" | End_of_file -> fprintf ff "End_of_file" | Division_by_zero -> fprintf ff "Division_by_zero" | Sys_blocked_io -> fprintf ff "Sys_blocked_io" | Sys_error msg -> fprintf ff "System Error: %s" msg | Unix.Unix_error(unixerr,fname,arg) -> fprintf ff "%s: %s" fname (Unix.error_message unixerr) (* If we end up here, we need to ask PXP to print the error *) | e -> fprintf ff "%s" (Pxp_types.string_of_exn e) let print_error_code ff exn = match exn with (* Lexing *) | Query (Lexing (fi,msg)) -> fprintf ff "Lexing Error"; fprint_finfo ff fi (* Parsing *) | Query (Parsing (fi,msg)) -> fprintf ff "Parsing Error"; fprint_finfo ff fi | Query (Algebra_Parsing_Error msg) -> fprintf ff "Algebra Parsing Error" | Query (Namespace_Error msg) -> fprintf ff "Namespace Error" (* Prints Caml errors *) | Query (Namespace_Internal msg) -> fprintf ff "Namespace Internal Error" (* Normalization *) | Query (Static_Error (msg)) -> fprintf ff "Static Error" | Query (Static_Internal (msg)) -> fprintf ff "Static Internal Error" | Query (Module_Import(msg)) -> fprintf ff "Module Import Error" | Query (Annotation_Error (msg)) -> fprintf ff "Annotation Error" (* Types *) | Query (Malformed_Type (msg)) -> fprintf ff "Malformed Type" | Query (Malformed_Tuple (msg)) -> fprintf ff "Malformed Tuple" | Query (Malformed_Expr (msg)) -> fprintf ff "Malformed Expr" | Query (Malformed_Core_Expr (msg)) -> fprintf ff "Malformed Core Expr" | Query (Malformed_Algebra_Expr (msg)) -> fprintf ff "Malformed Algebra Expr" (* Static Typing *) | Query (Static_Type_Error (msg)) -> fprintf ff "Static_Type Error" | Query (Automata msg) -> fprintf ff "Automata Error" | Query (Undefined_Variable(fi, vn, msg)) -> fprintf ff "Error: Undefined Variable"; fprint_finfo ff fi (* Rewriting *) | Query (Rewriting msg) -> fprintf ff "Rewriting Internal Error" | Query (Streaming_XPath msg) -> fprintf ff "Rewriting Internal Error" (* Factorization *) | Query (Factorization msg) -> fprintf ff "Factorization Internal Error" (* Compilation *) | Query (Compilation msg) -> fprintf ff "Compilation Internal Error" | Query (Symbol_Already_Defined(kind, name)) -> fprintf ff "Symbol_Already_Defined Error:" (* Code Selection *) | Query (Code_Selection msg) -> fprintf ff "Code Selection Internal Error" | Query (Expr_Error (msg)) -> fprintf ff "Expr Error" | Query (Key_Error (key,msg)) -> fprintf ff "Key Error" | Query (KeyRef_Error (key,msg)) -> fprintf ff "KeyRef Error" | Query (Physical_Type_Error (msg)) -> fprintf ff "Physical Type Error" (* Evaluation *) | Query (Constructor_Error (msg)) -> fprintf ff "Constructor Error" | Query (Type_Error (msg)) -> fprintf ff "Type Error" | Query (Unicode_Error (msg)) -> fprintf ff "Unicode Error" | Query (Validation msg) -> fprintf ff "Validation Error" (* Schema Normalization *) | Query (Schema msg) -> fprintf ff "Schema_Error" | Query (Schema_Internal msg) -> fprintf ff "Schema Internal Error" | Query (Schema_Import(msg)) -> fprintf ff "Schema mapping Error" (* Serialization *) | Query (Serialization msg) -> fprintf ff "Serialization Internal Error" (* Datamodel *) | Query (Datamodel msg) -> fprintf ff "Datamodel Error" | Query (URI_Error msg) -> fprintf ff "URI Error" | Query (Load_Error msg) -> fprintf ff "Load Error" | Query (Cast_Error msg) -> fprintf ff "Cast Error" | Query (Protocol_Error msg) -> fprintf ff "Protocol Error" | Query (Stream_Error msg) -> fprintf ff "Stream Error" | Query (Cursor_Error msg) -> fprintf ff "Cursor Error" | Query (Physical_DM_Error msg) -> fprintf ff "Physical Data Model Error" | Query (Malformed_DateTimeValue (msg)) -> fprintf ff "Malformed DateTime Value" | Query (Jungle_Error msg) -> fprintf ff "Jungle Error" | Query (Shredded_Error msg) -> fprintf ff "Shredded Error" (* Projection *) | Query (Projection msg) -> fprintf ff "Projection Internal Error" (* WSDL *) | Query Root -> fprintf ff "Root Error: Root node" (* Toplevel *) | Query (Toplevel_Error msg) -> fprintf ff "Toplevel Error" | Query (Monitor_Error msg) -> fprintf ff "Monitor Error" (* Multiple Modules *) | Query (Parameter_Mismatch (msg)) -> fprintf ff "Parameter_Mismatch Error" | Query (Unknown msg) -> fprintf ff "Unknown Error" | Query (Internal_Error msg) -> fprintf ff "Internal Error" | Query (Wrong_Args s) -> fprintf ff "Wrong_Args Error" | Query (Prototype msg) -> fprintf ff "Prototype Error" | Query (Undefined msg) -> fprintf ff "Undefined Error" | Query (Mapping_Failure msg) -> fprintf ff "Mapping_Failure Error" | Query (Testing_Error (msg)) -> fprintf ff "Testing Error" | Query (XQueryX_Error (fi,msg)) -> fprintf ff "XQueryX Error" | Query (Update_Error (msg)) -> fprintf ff "Update Error" | Query (DXQ_Error (msg)) -> fprintf ff "Distributed XQuery Error" | Query (Error (msg)) -> fprintf ff "Error" | Query (Wrapped_Error (fi, msg)) -> fprintf ff "Error" (* Prints Caml errors -- we have to handle all the predefined Caml errors here *) (* | Boxed_Error (v, printer) -> fprintf ff "Error" *) | Match_failure (file, startpos, endpos) -> fprintf ff "Match Failure Error at %s %d %d" file startpos endpos | Assert_failure(file, startpos, endpos) -> fprintf ff "Assert Failure Errorat %s %d %d" file startpos endpos | Invalid_argument msg -> fprintf ff "Invalid argument Error: %s" msg | Failure msg -> fprintf ff "Failure exception Error: %s" msg | Not_found -> fprintf ff "Not_found Error" | Out_of_memory -> fprintf ff "Out_of_memory Error" | Stack_overflow -> fprintf ff "Stack_overflow Error" | Sys_error _ -> fprintf ff "System Error" | End_of_file -> fprintf ff "End_of_file Error" | Division_by_zero -> fprintf ff "Division_by_zero Error" | Sys_blocked_io -> fprintf ff "Sys_blocked_io Error" (* If we end up here, we need to ask PXP to print the error *) | e -> fprintf ff "%s" (Pxp_types.string_of_exn e) let print_error_dispatch ff exn = if !Conf.verbose_error then print_error_verbose ff exn else print_error_code ff exn let print_error_safe_dispatch ff exn = try print_error_dispatch ff exn with | _ -> fprintf ff "Unknown Error Occurred" let print_error ff exn = begin print_error_dispatch ff exn; pp_print_flush ff () end let print_error_safe ff exn = begin print_error_safe_dispatch ff exn; pp_print_flush ff () end let printf_error s exn = Gmisc.printf_stub s print_error exn let eprintf_error s error = Gmisc.eprintf_stub s print_error error let bprintf_error s error = Gmisc.bprintf_stub s print_error error let printf_error_safe s error = Gmisc.printf_stub s print_error_safe error let eprintf_error_safe s error = Gmisc.eprintf_stub s print_error_safe error let bprintf_error_safe s error = Gmisc.bprintf_stub s print_error_safe error let print_warning ff s = if !Conf.warning then fprintf ff "[WARNING] %s\n" s let printf_warning s = Gmisc.printf_stub "" print_warning s let eprintf_warning s = Gmisc.eprintf_stub "" print_warning s let bprintf_warning s = Gmisc.bprintf_stub "" print_warning s let error_with_file_location fi exn = let fi_string = "\nAt "^(Finfo.finfo_to_string fi) in match exn with (* Lexing *) | Query (Lexing (_,msg)) -> Query(Lexing(fi,msg)) (* Parsing *) | Query (Parsing (_,msg)) -> Query(Parsing(fi,msg)) | Query (Algebra_Parsing_Error msg) -> Query(Algebra_Parsing_Error(msg^fi_string)) | Query (Namespace_Error msg) -> Query(Namespace_Error(msg^fi_string)) | Query (Namespace_Internal msg) -> Query(Namespace_Internal(msg^fi_string)) (* Normalization *) | Query (Static_Error (msg)) -> Query(Static_Error(msg^fi_string)) | Query (Static_Internal (msg)) -> Query(Static_Internal(msg^fi_string)) | Query (Module_Import(msg)) -> Query(Module_Import(msg^fi_string)) | Query (Annotation_Error (msg)) -> Query(Annotation_Error(msg^fi_string)) (* Types *) | Query (Malformed_Type (msg)) -> Query(Malformed_Type(msg^fi_string)) | Query (Malformed_Tuple (msg)) -> Query(Malformed_Tuple(msg^fi_string)) | Query (Malformed_Expr (msg)) -> Query(Malformed_Expr(msg^fi_string)) | Query (Malformed_Core_Expr (msg)) -> Query(Malformed_Core_Expr(msg^fi_string)) | Query (Malformed_Algebra_Expr (msg)) -> Query(Malformed_Algebra_Expr(msg^fi_string)) (* Static Typing *) | Query (Static_Type_Error (msg)) -> Query(Static_Type_Error(msg^fi_string)) | Query (Automata msg) -> Query(Automata(msg^fi_string)) | Query (Undefined_Variable(_, vn, msg)) -> Query(Undefined_Variable(fi, vn, msg^fi_string)) (* Rewriting *) | Query (Rewriting msg) -> Query(Rewriting(msg^fi_string)) | Query (Streaming_XPath msg) -> Query(Streaming_XPath(msg^fi_string)) (* Factorization *) | Query (Factorization msg) -> Query(Factorization(msg^fi_string)) (* Compilation *) | Query (Compilation msg) -> Query(Compilation(msg^fi_string)) | Query (Symbol_Already_Defined(kind, name)) -> Query(Symbol_Already_Defined(kind, name^fi_string)) (* Code Selection *) | Query (Code_Selection msg) -> Query(Code_Selection(msg^fi_string)) | Query (Expr_Error (msg)) -> Query(Expr_Error(msg^fi_string)) | Query (Key_Error (key,msg)) -> Query(Key_Error(key, msg^fi_string)) | Query (KeyRef_Error (key,msg)) -> Query(KeyRef_Error(key, msg^fi_string)) | Query (Physical_Type_Error msg) -> Query(Physical_Type_Error(msg^fi_string)) (* Evaluation *) | Query (Constructor_Error (msg)) -> Query(Constructor_Error(msg^fi_string)) | Query (Type_Error (msg)) -> Query(Type_Error(msg^fi_string)) | Query (Unicode_Error (msg)) -> Query(Unicode_Error(msg^fi_string)) | Query (Validation msg) -> Query(Validation(msg^fi_string)) (* Schema Normalization *) | Query (Schema msg) -> Query(Schema(msg^fi_string)) | Query (Schema_Internal msg) -> Query(Schema_Internal(msg^fi_string)) | Query (Schema_Import(msg)) -> Query(Schema_Import(msg^fi_string)) (* Serialization *) | Query (Serialization msg) -> Query(Serialization(msg^fi_string)) (* Datamodel *) | Query (Datamodel msg) -> Query(Datamodel(msg^fi_string)) | Query (URI_Error msg) -> Query(URI_Error(msg^fi_string)) | Query (Load_Error msg) -> Query(Load_Error(msg^fi_string)) | Query (Cast_Error msg) -> Query(Cast_Error(msg^fi_string)) | Query (Protocol_Error msg) -> Query(Protocol_Error(msg^fi_string)) | Query (Stream_Error msg) -> Query(Stream_Error(msg^fi_string)) | Query (Cursor_Error msg) -> Query(Cursor_Error(msg^fi_string)) | Query (Physical_DM_Error msg) -> Query(Physical_DM_Error(msg^fi_string)) | Query (Malformed_DateTimeValue (msg)) -> Query(Malformed_DateTimeValue(msg^fi_string)) | Query (Jungle_Error msg) -> Query(Jungle_Error(msg^fi_string)) | Query (Shredded_Error msg) -> Query(Shredded_Error(msg^fi_string)) (* Projection *) | Query (Projection msg) -> Query(Projection(msg^fi_string)) (* Toplevel *) | Query (Toplevel_Error msg) -> Query(Toplevel_Error(msg^fi_string)) | Query (Monitor_Error msg) -> Query(Monitor_Error(msg^fi_string)) (* Multiple Modules *) | Query (Parameter_Mismatch (msg)) -> Query(Parameter_Mismatch(msg^fi_string)) | Query (Unknown msg) -> Query(Unknown(msg^fi_string)) | Query (Internal_Error msg) -> Query(Internal_Error(msg^fi_string)) | Query (Wrong_Args msg) -> Query(Wrong_Args(msg^fi_string)) | Query (Prototype msg) -> Query(Prototype(msg^fi_string)) | Query (Undefined msg) -> Query(Undefined(msg^fi_string)) | Query (Mapping_Failure msg) -> Query(Mapping_Failure(msg^fi_string)) | Query (Testing_Error (msg)) -> Query(Testing_Error(msg^fi_string)) | Query (XQueryX_Error (_,msg)) -> Query(XQueryX_Error(fi,msg)) | Query (Update_Error (msg)) -> Query(Update_Error(msg^fi_string)) | Query (DXQ_Error (msg)) -> Query(DXQ_Error(msg^fi_string)) (* Top-level error resulting from downgrade_error *) | Query (Error(msg)) -> Query(Error(msg^fi_string)) (* Prints Caml errors -- we have to handle all the predefined Caml errors here *) | Invalid_argument msg -> Invalid_argument(msg^fi_string) | Failure msg -> Failure(msg^fi_string) | Sys_error msg -> Sys_error(msg^fi_string) (* If we end up here, we need to ask PXP to print the error *) | e -> e galax-1.1/base/dynamic_buffer.mli0000664000076400007640000000450010560462355015162 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: dynamic_buffer.mli,v 1.2 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Dynamic_buffer Description: This module implements a buffer that automatically grows (if need be) when adding new elements. The buffer itself is composed of an array of arrays (chunks) the index space of which is exposed to the outside world as being linear. The buffer maintains a single internal cursor for keeping track of the current position; usage should adhere to the following protocol: - create the buffer - add values to the buffer - position the cursor, read values etc. - reset the buffer, then start again The allocated buffer space will never shrink (even in case of a reset). This behaviour is intentional. - Michael *) (* The structure holding the actual data. *) type 'a t (* Signals that the end of the buffer has been reached. *) exception Exhausted (* Creates a new buffer, using the specified (int that order) - number of chunks - chunk size - increment - initial value. *) val make : int -> int -> int -> 'a -> 'a t (* Sets the internal cursor to the specified index position. *) val position : 'a t -> int -> unit (* Returns the value currently pointed at by the internal cursor, then increments that cursor. *) val next : 'a t -> 'a (* Adds a value at the end of the buffer. *) val add : 'a t -> 'a -> unit (* Resets the internal cursor to position 0. *) val reset : 'a t -> unit (* True just after creation or after reset has been applied. *) val is_empty : 'a t -> bool (* Returns the index position currently pointed at by the internal cursor. *) val get_position : 'a t -> int galax-1.1/base/register_handlers.ml0000664000076400007640000000266110560462355015546 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: register_handlers.ml,v 1.2 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Register_handlers Description: This module handles registration of close handlers for Galax. These handlers allow persistant stores to do any final cleanup before exiting. We could also allow things like failure handlers, sync handlers but right now we just have proper close handlers. *) (*****************************************) (* A list of close handlers to be called *) (* Order is the reverse of registration *) (*****************************************) let (close_handlers: (unit -> unit) list ref) = ref [] let register_close_handler (h: unit -> unit) = close_handlers := h :: !close_handlers let call_close_handlers () = List.iter (fun x -> x ()) !close_handlers galax-1.1/base/whitespace.ml0000664000076400007640000001035310560462355014173 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: whitespace.ml,v 1.15 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Whitespace Description: Manipulation of whitespace within strings. Used during whitespace handling in XML/XQuery. *) (**************************) (* Processing white space *) (**************************) let rec get_whitespace_gen isspace s b = try let c = Stream.next s in if (isspace c) then begin Buffer.add_char b c; get_whitespace_gen isspace s b end else let newb = Buffer.create 50 in begin Buffer.add_char newb c; let result = ((Buffer.contents b), (get_non_whitespace_gen isspace s newb)) in begin Buffer.reset newb; Buffer.clear newb; result end end with | Stream.Failure -> ("","") and get_non_whitespace_gen isspace s b = try let c = Stream.next s in if not(isspace c) then begin Buffer.add_char b c; get_non_whitespace_gen isspace s b end else let newb = Buffer.create 50 in begin Buffer.add_char newb c; let (lead,trail) = (get_whitespace_gen isspace s newb) in let result = (Buffer.contents b) ^ lead ^ trail in begin Buffer.reset newb; Buffer.clear newb; result end end with | Stream.Failure -> (Buffer.contents b) let get_whitespace s b = let isspace c = (c = ' ' || c = '\n' || c = '\t' || c = '\r') in get_whitespace_gen isspace s b let whitespace_only text = let b = Buffer.create 50 in let s = Stream.of_string text in match (get_whitespace s b) with | (_,"") -> true | (_,trail) -> false let remove_whitespace_and_empty text = let b = Buffer.create 50 in let s = Stream.of_string text in let result = match (get_whitespace s b) with | (_,"") -> None | (_,trail) -> Some trail in begin Buffer.reset b; Buffer.clear b; result end let remove_whitespace text = let b = Buffer.create 50 in let s = Stream.of_string text in match (get_whitespace s b) with | (_,trail) -> trail let re = Netstring_str.regexp "[ \n\t\r]*" let remove_leading_whitespace text = Netstring_str.replace_first re "" text let get_newlines s b = let isspace c = (c = '\n' || c = '\r') in get_whitespace_gen isspace s b let remove_newlines text = let b = Buffer.create 50 in let s = Stream.of_string text in match (get_newlines s b) with | (_,trail) -> trail let remove_all_whitespace text = Netstring_str.global_replace re "" text (* Whitespace or not whitespace ? That is the question... *) let white c = (c = ' ') || (c = '\n') || (c = '\t') || (c = '\r') (* Whitespace preserving mode *) type mode = | Preserve | Default (* Split whitespace-separated strings *) let whitespace_regexp = Str.regexp "[ \t\n]+" let whitespace_pair = Str.regexp "\013\n" let whitespace_single = Str.regexp "\013" let whitespace_onespace = Str.regexp "[ \t\n]" let whitespace_separate str = Str.split whitespace_regexp str let whitespace_normalize str = let str = Str.global_replace whitespace_pair "\n" str in let str = Str.global_replace whitespace_single "\n" str in let str = Str.global_replace whitespace_onespace " " str in str let whitespace_id_normalize str = let str = Str.global_replace whitespace_regexp " " str in remove_whitespace str let remove_trailing_spaces str = let str = Str.global_replace whitespace_pair "\n" str in let str = Str.global_replace whitespace_single "\n" str in let i = ref ((String.length str) - 1) in while if !i >= 0 then ((String.get str !i) = '\n' || (String.get str !i) = ' ') else false do decr i done; Str.string_before str (!i+1) galax-1.1/base/gmisc.ml0000664000076400007640000003611610571347146013150 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: gmisc.ml,v 1.47 2007/02/28 18:48:06 mff Exp $ *) (* Module: Gmisc Description: This module implements some additions to the Caml standard library that appeared to be useful in the process of developing Galax. *) (* Note: All these functions raise standard Caml exceptions, rather than Galax exceptions in the rest of the system. - Jerome *) (*****************) (* I/O functions *) (*****************) (* Load the content of a file into a string *) (* Grab a file and stuff it into a string *) let string_of_file file = try let inchan = open_in_bin file in let len = in_channel_length inchan in let buf = String.create len in really_input inchan buf 0 len; close_in inchan; (* Windows 95 terminates lines with carriage return as well as newline; this screws up the Unix version when it reads in files created under Windows. So, we get rid of the carriage returns, not the ideal solution. *) if Sys.os_type = "Win32" then buf else Str.global_replace (Str.regexp "\r\n") "\n" buf with Sys_error err -> Printf.eprintf "galaxd: could not read the file %s, got error Sys_error %s\n@?" file err; raise(Sys_error err) let load_file_in_buffer nb f = let ic = open_in f in while Netbuffer.add_inplace nb (input ic) > 0 do () done; close_in ic let load_string_in_buffer nb s = Netbuffer.add_string nb s let load_file f = let nb = Netbuffer.create 100 in begin load_file_in_buffer nb f; Netbuffer.contents nb end let get_files_in_directory dir = let dirh = Unix.opendir dir in let rec get_them l = match try Some(Unix.readdir dirh) with _ -> None with | None -> Unix.closedir dirh; l | Some x -> get_them (x::l) in Sort.list (<=) (get_them []) (* Convert a shell-style regular expression, using the special characters, ?*[], to a caml-style regular expression. *) let convert_regexp s = let s = Str.global_replace (Str.regexp "\\+") "\\+" s in let s = Str.global_replace (Str.regexp "\\^") "\\^" s in let s = Str.global_replace (Str.regexp "\\$") "\\$" s in let s = Str.global_replace (Str.regexp "\\.") "\\." s in let s = Str.global_replace (Str.regexp "\\?") "." s in let s = Str.global_replace (Str.regexp "\\*") ".*" s in s ^ "$" let ls dir pattern = let files = get_files_in_directory dir in let re = Str.regexp (convert_regexp pattern) in let rec filter l = match l with [] -> [] | hd::tl -> if Str.string_match re hd 0 then hd::(filter tl) else filter tl in filter files (******************) (* List functions *) (******************) (* an additional function to partition list, but only getting the first element that satisfy the given predicate *) let partition_first p l = let rec part_first remain = function | [] -> raise Not_found | x :: l -> let (t,v) = (p x) in if t then (v,(List.rev remain)@ l) else part_first (x :: remain) l in part_first [] l let partition_index f l = let index = ref 0 in let rec part_aux f l = match l with | [] -> ([],[]) | e :: r -> if (f !index) then begin incr index; let (m1,m2) = part_aux f r in (e :: m1,m2) end else begin incr index; let (m1,m2) = part_aux f r in (m1,e :: m2) end in part_aux f l let rec partition_pairs l = match l with | [] -> [] | x :: [] -> raise (Invalid_argument "[Gmisc.partition_pairs]: not a pair") | x1 :: x2 :: r -> (x1,x2) :: (partition_pairs r) (* Filter non existing elements from a list *) let filter_non_exists g1 g2 = let partfun y = not(List.exists (fun x -> x = y) g2) in List.filter partfun g1 (* Map concat *) let map_concat f l = List.concat (List.map f l) (* N first items *) let rec list_npeek n list = if n <= 0 then [] else match list with | [] -> [] | x :: rest -> x :: (list_npeek (n-1) rest) (* Triple split *) let rec triple_split l = match l with | [] -> ([],[],[]) | (a,b,c)::l' -> let (l1,l2,l3)=triple_split l' in (a::l1,b::l2,c::l3) (* Remove duplicates *) let rec remove_duplicates ps = match ps with | [] -> [] | a::ps' -> if List.mem a ps' then remove_duplicates ps' else a::remove_duplicates ps' let sort_and_remove_duplicates_revcompare rev_compare d = if d = [] then [] else (* has at least one element, so List.hd call is safe *) begin (* Sort them in opposite order so we can use tail recursive fold_left without reverse or [] *) let sorted_list = List.fast_sort rev_compare d in let head = List.hd sorted_list in (* now remove duplicates *) (* fst because last_seen is already in the list *) fst (List.fold_left (fun (cur_list, last_seen) new_value -> if ((rev_compare new_value last_seen) = 0) then (cur_list, last_seen) else (new_value :: cur_list, new_value)) ([head], (head)) (List.tl sorted_list)) end let rec unwrap_option_list l = match l with | [] -> [] | None::l' -> unwrap_option_list l' | (Some v)::l' -> v :: (unwrap_option_list l') let some_list l = let len = List.length l in let unwrap_l = (unwrap_option_list l) in if (List.length unwrap_l) = len then unwrap_l else raise Not_found let is_some opt = match opt with | None -> false | Some _ -> true (* X subset Y *) let is_subset x_set y_set = List.fold_left (fun is_subset x -> is_subset && (List.mem x y_set)) true x_set (* X intersect Y *) let intersect_list l1 l2 = List.filter (fun x -> List.mem x l1) l2 (* X - Y *) let difference_list l1 l2 = List.filter (fun x -> not (List.mem x l2)) l1 (*********************) (* Hashtbl functions *) (*********************) (* Creates and load a hash table *) let create_hashtable size init = let tbl = Hashtbl.create size in List.iter (fun (key, data) -> Hashtbl.add tbl key data) init; tbl let all_of_hashtable ht = let f a b c = (a, b) :: c in Hashtbl.fold f ht [] let keys_of_hashtable ht = let f a b c = (a) :: c in Hashtbl.fold f ht [] let cond_add nh x y = if Hashtbl.mem nh x then () else Hashtbl.add nh x y let merge_hashtable h1 h2 = Hashtbl.iter (cond_add h1) h2; h1 (********************) (* String functions *) (********************) let split_string_on_char s c = let mid = (String.index s c) in let sta1 = 0 in let len1 = mid in let sta2 = mid + 1 in let len2 = (String.length s) - sta2 in (String.sub s sta1 len1, String.sub s sta2 len2) let split_right_on_char s c = try split_string_on_char s c with Not_found -> ("", s) let split_left_on_char s c = try split_string_on_char s c with Not_found -> (s, "") let rec split_on_char s c = try let (s1, s2) = split_string_on_char s c in s1 :: split_on_char s2 c with Not_found -> s :: [] let remove_leading s c = let l = String.length s in let rest_first = let rec loop s i = if i >= l then l-1 else if s.[i] = c then loop s (i+1) else i in loop s 0 in String.sub s rest_first (l-rest_first) let remove_trailing s c = if (String.length s > 0) then let rest_last = let rec loop s i = if i = 0 then 1 else if s.[i] = c then loop s (i-1) else i+1 in loop s ((String.length s) - 1) in String.sub s 0 rest_last else s let quote1 = Str.regexp "\"" let quote2 = Str.regexp "'" let quote_quotes str = let str = Str.global_replace quote1 """ str in let str = Str.global_replace quote2 "'" str in str (*********************) (* Parsing functions *) (*********************) let wrap_lexer f s = let lexbuf = Lexing.from_string s in f lexbuf (**********************) (* Printing functions *) (**********************) (* Print to stdout *) let printf_stub s f x = Format.printf "%s%a@?" s f x (* Print to stderr *) let eprintf_stub s f x = Format.fprintf (!Conf.glx_err_formatter) "%s%a@?" s f x (* Print to output channel *) let fprintf_stub c s f x = Format.fprintf c "%s%a@?" s f x (* Print to a string buffer *) let bprintf_stub s f x = let buff = Buffer.create 50 in Format.bprintf buff "%s%a@?" s f x; let result = Buffer.contents buff in Buffer.reset buff; result (**********************) (* Filename functions *) (**********************) (* Rename a DOS dir to a UNIX dir *) let rename_dir wd = Str.global_replace (Str.regexp (Str.quote "\\")) "/" wd (******************) (* Hash Functions *) (******************) let prime_seed = 23 let string_hash str = let hash_code = ref 393 in let cur_prime = ref prime_seed in for i = 0 to (String.length str) - 1 do cur_prime := !cur_prime * prime_seed; hash_code := !hash_code + ((!cur_prime) * (Char.code str.[i])) done; !hash_code (*********************) (* Integer functions *) (*********************) (* Some missing conversions *) (* Some auxiliary conversion operations between Caml numeric types *) let max_int32 = Int32.max_int let min_int32 = Int32.min_int let max_int31 = Int32.of_int max_int let min_int31 = Int32.of_int min_int let bmax_int31 = Big_int.big_int_of_int max_int let bmin_int31 = Big_int.big_int_of_int min_int let bmax_int32 = Big_int.big_int_of_int max_int let bmin_int32 = Big_int.big_int_of_int min_int let big_int_of_int32 i = let div = Big_int.big_int_of_int (Int32.to_int (Int32.div i max_int31)) in let rem = Big_int.big_int_of_int (Int32.to_int (Int32.rem i max_int31)) in Big_int.add_big_int (Big_int.mult_big_int div bmax_int31) rem let bmax_int32 = big_int_of_int32 Int32.max_int let bmin_int32 = big_int_of_int32 Int32.min_int let int32_of_big_int bi = if (Big_int.lt_big_int bmax_int32 bi) || (Big_int.lt_big_int bi bmin_int32) then raise (Failure "Big_int out of bound for int32 conversion") else let (bdiv,brem) = Big_int.quomod_big_int bi bmax_int31 in let div = Int32.of_int (Big_int.int_of_big_int bdiv) in let rem = Int32.of_int (Big_int.int_of_big_int brem) in Int32.add (Int32.mul div max_int31) rem let dmax_int64 = Int64.max_int let dmin_int64 = Int64.min_int let dmax_int32 = Int64.of_int32 Int32.max_int let dmin_int32 = Int64.of_int32 Int32.min_int let big_int_of_int64 i = let div1 = Int64.div i dmax_int32 in let rem1 = big_int_of_int32 (Int64.to_int32 (Int64.rem i dmax_int32)) in let div2 = big_int_of_int32 (Int64.to_int32 (Int64.div div1 dmax_int32)) in let rem2 = big_int_of_int32 (Int64.to_int32 (Int64.rem div1 dmax_int32)) in let result1 = Big_int.add_big_int (Big_int.mult_big_int div2 bmax_int32) rem2 in Big_int.add_big_int (Big_int.mult_big_int result1 bmax_int32) rem1 let bmax_int64 = big_int_of_int64 Int64.max_int let bmin_int64 = big_int_of_int64 Int64.min_int let int64_of_big_int bi = if (Big_int.lt_big_int bmax_int64 bi) || (Big_int.lt_big_int bi bmin_int64) then raise (Failure "Big_int out of bound for int64 conversion") else let (bdiv1,brem1) = Big_int.quomod_big_int bi bmax_int32 in let (bdiv2,brem2) = Big_int.quomod_big_int bdiv1 bmax_int32 in let div2 = Int64.of_int32 (int32_of_big_int bdiv2) in let rem2 = Int64.of_int32 (int32_of_big_int brem2) in let result1 = Int64.add (Int64.mul div2 dmax_int32) rem2 in let rem1 = Int64.of_int32 (int32_of_big_int brem1) in Int64.add (Int64.mul result1 dmax_int32) rem1 let compose f g = function x -> f(g(x)) let int_of_hex_char c = match c with | '0' -> 0 | '1' -> 1 | '2' -> 2 | '3' -> 3 | '4' -> 4 | '5' -> 5 | '6' -> 6 | '7' -> 7 | '8' -> 8 | '9' -> 9 | 'a' | 'A' -> 10 | 'b' | 'B' -> 11 | 'c' | 'C' -> 12 | 'd' | 'D' -> 13 | 'e' | 'E' -> 14 | 'f' | 'F' -> 15 | _ -> raise (Invalid_argument "Invalid hex character") let hex_char_of_int i = match i with | 0 -> '0' | 1 -> '1' | 2 -> '2' | 3 -> '3' | 4 -> '4' | 5 -> '5' | 6 -> '6' | 7 -> '7' | 8 -> '8' | 9 -> '9' | 10 -> 'A' | 11 -> 'B' | 12 -> 'C' | 13 -> 'D' | 14 -> 'E' | 15 -> 'F' | _ -> raise (Invalid_argument "Invalid integer range in hex") let hex_char_pair_of_int i = let i1 = i / 16 in let i2 = i mod 16 in (hex_char_of_int i1, hex_char_of_int i2) let int_of_hex_char_pair c1 c2 = let i1 = int_of_hex_char c1 in let i2 = int_of_hex_char c2 in 16 * i1 + i2 let char_of_hex_char_pair c1 c2 = Char.chr (int_of_hex_char_pair c1 c2) (* XML Schema Datatypes 3.2.15 hexBinary [Definition:] hexBinary represents arbitrary hex-encoded binary data. The ·value space· of hexBinary is the set of finite-length sequences of binary octets. 3.2.15.1 Lexical Representation hexBinary has a lexical representation where each binary octet is encoded as a character tuple, consisting of **two(2)** hexadecimal digits ([0-9a-fA-F]) representing the octet code. For example, "0FB7" is a hex encoding for the 16-bit integer 4023 (whose binary representation is 111110110111). *) let binary_of_hexString s = if s = "" then "" else begin let l = String.length s in let dv = l / 2 in let md = l mod 2 in let newl = if md = 0 then dv else raise (Invalid_argument("Text: \"" ^ s ^ "\" not a hexBinary value (hex digit pairs)")) in let news = String.make newl 'x' in let (current,c1,c2) = if md = 0 then (ref 2, ref (String.get s 0), ref (String.get s 1)) else (ref 1, ref '0', ref (String.get s 0)) in String.set news ((!current-1) / 2) (char_of_hex_char_pair !c1 !c2); while (!current < l - 1) do c1 := (String.get s !current); c2 := (String.get s (!current+1)); current := !current + 2; String.set news ((!current-1) / 2) (char_of_hex_char_pair !c1 !c2) done; news end let string_of_hexBinary s = if s = "" then "" else begin let l = String.length s in let newl = l * 2 in let news = String.make newl 'x' in let current = ref 0 in while (!current < newl) do let i = Char.code (String.get s (!current / 2)) in let (c1,c2) = hex_char_pair_of_int i in String.set news !current c1; String.set news (!current+1) c2; current := !current+2 done; news end (* type ct = | OpenComment | CloseComment | Constructor let ct_of_chars c1 c2 = match (c1,c2) with | '(',':' -> OpenComment | ':',')' -> CloseComment | '<',_ -> if let comment_blit start s = let c1 = ref String.get s 0 in let c2 = ref String.get s 1 in let current = ref start in let stop = ref false while not(stop) do match done *) galax-1.1/base/dynamic_stack.mli0000775000076400007640000000224510560462355015025 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: dynamic_stack.mli,v 1.2 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Dynamic_stack Implements a dynamic stack with direct access to the elements needed for e.g. twig joins *) type 'a dynamic_stack = { mutable capacity : int; mutable size : int; mutable stack : 'a array; default : 'a; } val make : int -> 'a -> 'a dynamic_stack val push : 'a dynamic_stack -> 'a -> unit val empty : 'a dynamic_stack -> bool val pop : 'a dynamic_stack -> 'a val top : 'a dynamic_stack -> 'a galax-1.1/base/encoding.mli0000664000076400007640000000334210560462355013776 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: encoding.mli,v 1.6 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Encoding Description: Configuration of character encoding operations. *) (* Character encoding *) val utf8_string : string type encoding = Pxp_types.encoding (* All possible external encodings *) type rep_encoding = Pxp_types.rep_encoding (* All possible internal encodings *) val encoding_of_string : string -> encoding val string_of_encoding : encoding -> string val encoding_of_rep_encoding : rep_encoding -> encoding val rep_encoding_of_encoding : encoding -> rep_encoding (* Character encoding *) val set_internal_encoding : rep_encoding -> unit val get_internal_encoding : unit -> rep_encoding val set_output_encoding : encoding -> unit val set_default_output_encoding : unit -> unit val get_output_encoding : unit -> encoding (* String / character conversions *) val write_data_string : rep_encoding -> encoding -> string -> string val write_markup_string : rep_encoding -> encoding -> string -> string val character : rep_encoding -> int -> string galax-1.1/base/id.mli0000664000076400007640000000173010560462355012603 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: id.mli,v 1.7 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Id Description: Generation of unique id's, which, in this version are Caml integers. *) (* id generators *) type id_gen val create : int -> id_gen val init : id_gen -> int -> unit val next : id_gen -> int val top : id_gen -> int galax-1.1/base/pool.mli0000664000076400007640000000307210560462355013161 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: pool.mli,v 1.7 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Pool Description: This module implements string pools. Those are bidirectional mappings from strings to integers, used to save space in the representation of XML documents. *) (* Signature of the NamePool modules *) module type NamePool = sig type name type symbol = int type namepool val create_pool : unit -> namepool val init_pool : namepool -> unit val get_name : namepool -> symbol -> name val add_name : namepool -> name -> symbol val exists_name : namepool -> name -> bool val symbol_equals : namepool -> symbol -> symbol -> bool val pool_size : namepool -> (int * int * int) end (* Functor to create new name pool modules for an given hashed type *) module MakeNamePool (H: Hashtbl.HashedType) : (NamePool with type name = H.t) galax-1.1/base/error.mli0000664000076400007640000000744710654145646013360 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: error.mli,v 1.40 2007/08/01 18:06:30 simeon Exp $ *) (* Module: Error Description: This module deals with error handling in Galax. *) type error = (* Lexing *) | Lexing of Finfo.finfo * string (* Parsing *) | Parsing of Finfo.finfo * string | Algebra_Parsing_Error of string | Namespace_Internal of string | Namespace_Error of string (* Normalization *) | Static_Error of string | Static_Internal of string | Module_Import of string | Annotation_Error of string (* Types *) | Malformed_Type of string | Malformed_Tuple of string | Malformed_Expr of string | Malformed_Core_Expr of string | Malformed_Algebra_Expr of string (* Static Typing *) | Static_Type_Error of string | Automata of string | Undefined_Variable of Finfo.finfo * string * string (* Rewriting *) | Rewriting of string | Streaming_XPath of string (* Factorization *) | Factorization of string (* Compilation *) | Compilation of string | Symbol_Already_Defined of (string * string) (* Optimization *) | Optimization of string (* Code Selection *) | Code_Selection of string | Expr_Error of string | Key_Error of (string * string) | KeyRef_Error of (string * string) | Physical_Type_Error of string (* Evaluation *) | Constructor_Error of string | Type_Error of string | Unicode_Error of string | Validation of string (* Schema Normalization *) | Schema of string | Schema_Internal of string | Schema_Import of string (* Serialization *) | Serialization of string (* Data Model / Loading *) | Datamodel of string | URI_Error of string | Load_Error of string | Cast_Error of string | Protocol_Error of string | Stream_Error of string | Cursor_Error of string | Physical_DM_Error of string | Malformed_DateTimeValue of string | Jungle_Error of string | Shredded_Error of string (* Projection *) | Projection of string (* WSDL *) | Root (* Toplevel tools *) | Toplevel_Error of string | Monitor_Error of string (* Multiple Modules *) | Parameter_Mismatch of string (* Norm, Eval *) | Unknown of string | Internal_Error of string | Wrong_Args of string (* PhysicalDM, Code Selection *) | Prototype of string | Undefined of string (* Namespace, Parsing, Code Selection *) | Mapping_Failure of string (* Normalization, Namespaces *) | Update_Error of string (* DM, Code Selection *) | DXQ_Error of string (* Distributed XQuery *) (* Testing *) | Testing_Error of string (* Top-level error resulting from downgrade_error *) | Error of string (* XQueryX errors *) | XQueryX_Error of Finfo.finfo * string (* Generic error with file location -- used to wrap internal error with a file location *) | Wrapped_Error of Finfo.finfo * string exception Query of error val printf_warning : string -> unit val eprintf_warning : string -> unit val bprintf_warning : string -> string val printf_error : string -> exn -> unit val eprintf_error : string -> exn -> unit val bprintf_error : string -> exn -> string val printf_error_safe : string -> exn -> unit val eprintf_error_safe : string -> exn -> unit val bprintf_error_safe : string -> exn -> string val error_with_file_location : Finfo.finfo -> exn -> exn galax-1.1/base/dynamic_buffer.ml0000664000076400007640000001417110560462355015016 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: dynamic_buffer.ml,v 1.6 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Dynamic_buffer Description: This module implements a buffer that automatically grows (if need be) when adding new elements. The buffer itself is composed of an array of arrays (chunks) the index space of which is exposed to the outside world as being linear. The buffer maintains a single internal cursor for keeping track of the current position; usage should adhere to the following protocol: - create the buffer - add values to the buffer - position the cursor, read values etc. - reset the buffer, then start again The allocated buffer space will never shrink (even in case of a reset). This behaviour is intentional. - Michael *) (* The structure holding the actual data. *) type 'a t = { mutable buffer : 'a array array; mutable chunk_size : int; mutable num_chunks : int; mutable increment : int; mutable buffer_index : int; mutable chunk_index : int; mutable buffer_last_index : int; mutable chunk_last_index : int; } (* Signals that the end of the buffer has been reached. *) exception Exhausted (* Creates a new buffer, using the specified (in that order) - initial number of chunks - chunk size - increment - initial value. *) let make n s inc x = (* let chunk = Array.make s x in let buff = Array.make n chunk in for i = 0 to n - 1 do let chunk = Array.make s x in buff.(i) <- chunk done; *) (* Michael 05|14|2006 *) let chunk = [||] in let buff = Array.make n chunk in for i = 0 to n - 1 do let chunk = Array.make s x in buff.(i) <- chunk done; { buffer = buff; chunk_size = s; num_chunks = n; increment = inc; buffer_index = 0; chunk_index = 0; buffer_last_index = -1; chunk_last_index = -1; } (* Resets the internal cursor to position 0. *) let reset t = t.buffer_index <- 0; t.chunk_index <- 0; t.buffer_last_index <- -1; t.chunk_last_index <- -1 (* Initializes the buffer with one blank chunk. *) let init t x = let s = t.chunk_size in (* let _ = print_string "init buffer with nchunks, csize, inc:\n " in let _ = print_int t.num_chunks in let _ = print_string " " in let _ = print_int s in let _ = print_string " " in let _ = print_int t.increment in let _ = print_string "\n" in *) let chunk = Array.make s x in let buff = Array.make 1 chunk in t.buffer <- buff; t.num_chunks <- 1; reset t let is_physical_index_valid t bi ci = ((bi < t.buffer_last_index) && (ci < t.chunk_size)) || ((bi = t.buffer_last_index) && (ci <= t.chunk_last_index)) let get_physical_index t i = let bi = i / t.chunk_size in let ci = i mod t.chunk_size in if is_physical_index_valid t bi ci then (bi, ci) else raise (Invalid_argument ("In Dynamic_buffer.get_physical_index: index "^(string_of_int i)^" out of bounds")) (* Sets the internal cursor to the specified index position. *) let position t i = let (bi, ci) = get_physical_index t i in t.buffer_index <- bi; t.chunk_index <- ci let increment_physical_index t = if t.chunk_index < (t.chunk_size - 1) then t.chunk_index <- (t.chunk_index + 1) else (* unsafe *) begin t.buffer_index <- (t.buffer_index + 1); t.chunk_index <- 0 end (* Returns the value currently pointed at by the internal cursor, then increments that cursor. *) let next t = let bi = t.buffer_index in let ci = t.chunk_index in if is_physical_index_valid t bi ci then begin let result = t.buffer.(bi).(ci) in let _ = increment_physical_index t in result end else raise Exhausted let resize t x = (* Remember to initialize every single element of the array with a distinct value! *) let chunk = Array.make t.chunk_size x in (* Michael 05|14|2006 *) let num_chunks = t.num_chunks * t.increment in (*let num_chunks = t.num_chunks + t.increment in*) (* let _ = print_string "resize buffer with nchunks, csize, inc:\n " in let _ = print_int num_chunks in let _ = print_string " " in let _ = print_int t.chunk_size in let _ = print_string " " in let _ = print_int t.increment in let _ = print_string "\n" in *) let buff = Array.make num_chunks chunk in begin for i = t.num_chunks to (num_chunks - 1) do let chunk = Array.make t.chunk_size x in buff.(i) <- chunk done; Array.blit t.buffer 0 buff 0 t.num_chunks; t.num_chunks <- num_chunks; t.buffer <- buff end (* Adds a value at the end of the buffer. *) let add t a = (* Michael 05|14|2006 *) if (t.num_chunks = 0) then init t a; let bi = t.buffer_index in let ci = t.chunk_index in t.buffer.(bi).(ci) <- a; t.buffer_last_index <- bi; t.chunk_last_index <- ci; begin if t.chunk_index < (t.chunk_size - 1) then t.chunk_index <- (ci + 1) else begin if not (t.buffer_index < (t.num_chunks - 1)) then resize t a; t.buffer_index <- (bi + 1); t.chunk_index <- 0 end end (* Resets the internal cursor to position 0. *) let reset t = t.buffer_index <- 0; t.chunk_index <- 0; t.buffer_last_index <- -1; t.chunk_last_index <- -1 (* True just after creation or after reset has been applied. *) let is_empty t = (t.buffer_last_index < 0) && (t.chunk_last_index < 0) (* Returns the index position currently pointed at by the internal cursor. *) let get_position t = (t.buffer_index * t.chunk_size) + t.chunk_index galax-1.1/base/galax_io.ml0000664000076400007640000000776310670320214013622 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_io.ml,v 1.12 2007/09/07 19:03:40 mff Exp $ *) (* Module: Galax_io Description: This module contains some basic I/O structures and operations. *) open Error open Pxp_ev_parser open Pxp_types (****************************) (* Galax I/O specifications *) (****************************) (* What kind of input ? *) type input_spec = | File_Input of string | String_Input of string | Buffer_Input of Netbuffer.t | Http_Input of string | Channel_Input of in_channel (* What kind of output ? *) type output_spec = | File_Output of string | Buffer_Output of Buffer.t | Channel_Output of out_channel | Formatter_Output of Format.formatter (*********************) (* PXP Parsing hooks *) (*********************) let source_from_input_spec gio = match gio with | File_Input s -> from_file s | String_Input s -> from_string s | Buffer_Input nb -> let (in_channel,close) = Netchannels.create_input_netbuffer nb in close (); from_obj_channel in_channel | Http_Input uri -> begin match Galax_url.glx_decode_url uri with | Galax_url.Http (host,port,local) -> from_string (Http.HTTP.get uri) | Galax_url.File f -> from_file f | Galax_url.ExternalSource _ -> raise (Query (Prototype "Cannot parse external source")) end | Channel_Input ic -> from_channel ic let close_source s = let resolver = match s with | Entity (_,resolver) | ExtID (_,resolver) | XExtID (_,_,resolver) -> resolver in resolver#close_in type pull_handle = source type pxp_stream = (unit -> Pxp_types.event option) * pull_handle type entity_kind = Document_entity | Document_fragment (* Mary : The default entity manager requires that the entity read be a complete XML document. See Pxp_ev_parser for how to create an entity manager for document fragments. *) let pull_parser_from_input_spec gio entity_kind = let config = Galax_pxp.glx_default_config in let entry = Galax_pxp.glx_default_entry in let source = source_from_input_spec gio in let entity_manager = match entity_kind with | Document_entity -> create_entity_manager config source (* Mary: The following line needs to be changed, somehow, to create an entity manager that can parse individual XML entities instead of complete documents. *) | Document_fragment -> create_entity_manager ~is_document:false config source in (create_pull_parser config entry entity_manager,source) let close_pull_parser source = close_source source let dtd_from_input_spec gio = let config = Galax_pxp.glx_default_config in let source = source_from_input_spec gio in Pxp_dtd_parser.parse_dtd_entity config source let ff = "file://" let uri_string_of_gio gio = match gio with | File_Input s -> if Filename.is_relative s then Some (Gmisc.rename_dir (Filename.concat ff (Filename.concat (Sys.getcwd ()) s))) else Some (ff ^ s) | String_Input s -> None | Buffer_Input _ -> None | Http_Input uri -> Some uri | Channel_Input _ -> None let string_of_gio gio = match gio with | File_Input s -> if Filename.is_relative s then (Gmisc.rename_dir (Filename.concat ff (Filename.concat (Sys.getcwd ()) s))) else (ff ^ s) | String_Input s -> "[StringInput]" | Buffer_Input _ -> "[BufferInput]" | Http_Input uri -> uri | Channel_Input _ -> "[ChannelInput]" galax-1.1/base/galax_pxp.ml0000664000076400007640000000337310560462355014026 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_pxp.ml,v 1.3 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Galax_pxp Description: This module contains useful hooks to the PXP parser. *) open Error open Pxp_ev_parser open Pxp_types (*********************) (* PXP configuration *) (*********************) (* Internal stuff used to set-up Pxp *) let glx_default_config = let _ = new drop_warnings in { default_config with encoding = Encoding.get_internal_encoding (); (* We want PI's and comments enabled, don't we? *) enable_comment_nodes = true; enable_pinstr_nodes = true; enable_super_root_node = true; (* If we store element positions, then positions are stored for elements and wrapped processing instructions. See Pxp_types.mli *) store_element_positions = true } (* default entry *) let glx_default_entry = `Entry_document[`Extend_dtd_fully] let empty_dtd () = let _ = glx_default_config in Pxp_dtd_parser.create_empty_dtd glx_default_config let default_dtd = empty_dtd () let default_lfactory = default_dtd # lexer_factory let default_lexobj = default_lfactory # open_string "" galax-1.1/base/conf.mli0000664000076400007640000001631710707660605013144 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: conf.mli,v 1.93 2007/10/24 15:15:17 simeon Exp $ *) (* Module: Conf Description: This module contains global variables and parameters changing the general behavior of the Galax interpretor. *) (***********************) (* Version information *) (***********************) val system : string (* System name *) val version : string (* Current version number *) val copyright : string (* Copyright notice *) val status : string (* This release's status *) val motd : string (* Any additional notice *) val xquery_version : string (**************************) (* Galax library location *) (**************************) (* Where is the Galax library installed *) val galax_library : string (* Where is the standard library file *) val pervasive_content : string ref (* Where are Camomile Unicode maps *) val unicode_maps : string option ref (***********************) (* Behavior parameters *) (***********************) (* Output behavior *) val print_global : bool ref val print_prolog : bool ref val verbose : bool ref (* true if verbose output emitted *) val verbose_error : bool ref (* true is full error message is emitted *) val warning : bool ref val xml_charescape_fn : (string -> string) ref (* function for escaping characters in XML output and expressions *) val print_xml : bool ref (* true if core expression emitted *) val xml_output : out_channel ref val xml_formatter : Format.formatter ref val xml_header : string ref val xml_footer : string ref val print_expr : bool ref (* true if expression emitted *) val expr_output : out_channel ref val expr_formatter : Format.formatter ref val expr_header : string ref val expr_footer : string ref val print_type : bool ref (* true if type emitted *) val type_output : out_channel ref val type_formatter : Format.formatter ref val type_header : string ref val type_footer : string ref val print_core_expr : bool ref (* true if core expression emitted *) val core_expr_output : out_channel ref val core_expr_formatter : Format.formatter ref val core_expr_header : string ref val core_expr_footer : string ref val print_annotations : bool ref (* true if expression annotations are printed *) val print_optimized_expr : bool ref (* true if optimized core expression emitted *) val optimized_expr_output : out_channel ref val optimized_expr_formatter : Format.formatter ref val optimized_expr_header : string ref val optimized_expr_footer : string ref val print_factorized_expr : bool ref (* true if factorized core expression emitted *) val factorized_expr_output : out_channel ref val factorized_expr_formatter : Format.formatter ref val factorized_expr_header : string ref val factorized_expr_footer : string ref val print_projection : bool ref val projection_output : out_channel ref val projection_formatter : Format.formatter ref (* true when printing projection of document is on. off by default *) val print_projected_file : bool ref val projected_file_output : out_channel ref val projected_file_formatter : Format.formatter ref val glx_stderr : out_channel ref val glx_err_formatter : Format.formatter ref val print_algebra_optimization_rewrite : bool ref val algebra_optimization_rewrite_output : out_channel ref val algebra_optimization_rewrite_formatter : Format.formatter ref val algebra_optimization_rewrite_header : string ref val algebra_optimization_rewrite_footer : string ref val print_logical_algebra : bool ref val logical_algebra_output : out_channel ref val logical_algebra_formatter : Format.formatter ref val logical_algebra_header : string ref val logical_algebra_footer : string ref val serialize_logical_algebra : bool ref val print_optimized_algebra : bool ref val optimized_algebra_output : out_channel ref val optimized_algebra_formatter : Format.formatter ref val optimized_algebra_header : string ref val optimized_algebra_footer : string ref val print_physical_algebra : bool ref val physical_algebra_output : out_channel ref val physical_algebra_formatter : Format.formatter ref val physical_algebra_header : string ref val physical_algebra_footer : string ref val print_dfgraph : bool ref val dfgraph_output : out_channel ref val dfgraph_formatter : Format.formatter ref val genresults : bool ref (********************) (* XML Plan Loading *) (********************) val load_xml_plans : bool ref val execute_logical_plan : bool ref val execute_optimized_plan : bool ref (*********************) (* Global parameters *) (*********************) (* XML & XML Schema namespaces *) val emptyns : string val xmlns : string val xmlnsns : string val xsns : string val xsdns : string val xsins : string val fnns : string val xqxns : string val opns : string val fsns : string val collns : string val errns : string val localns : string val glxns : string val bPrinting_comp_annotations : bool ref (* Materialization flag *) val print_materialize : bool ref (********************************************) (* Experimental parameters for optimization *) (********************************************) val new_descendant_style : bool ref (* Aggressive sbdo remove *) val aggressive_sbdo_remove : bool ref (* Physical optimization flags *) val nested_loop_join : bool ref (* Variable materialization flag *) val force_materialized_variables : bool ref val allow_streamed_tuple_fields : bool ref (* Jungle flags *) val old_children_method : bool ref val jungle_buffsize : int option ref (* SAX materialization buffers *) val buffer_chunks : int ref val buffer_csize : int ref val buffer_inc : int ref (* Statistics *) val countload : int ref val countnext : int ref val countexpo : int ref (* Language *) type language_kind = | Language_XQuery10 | Language_XQueryUpdates | Language_XQueryBang | Language_XQueryP | Language_DXQ val language : language_kind ref val set_language : language_kind -> unit val is_xquery : unit -> bool val is_ultf : unit -> bool val is_xquerybang : unit -> bool val is_xqueryp : unit -> bool val is_dxq : unit -> bool type syntax_kind = | XQuery_Syntax | XQueryX_Syntax val syntax : syntax_kind ref val set_syntax : syntax_kind -> unit val is_xquery_syntax : unit -> bool val is_xqueryx_syntax : unit -> bool val batch_xqueryx : bool ref val embed_xqueryx : bool ref type materialize_tables_kind = | Always | Analysis | Never val set_materialize_tables : materialize_tables_kind -> unit val get_materialize_tables : unit -> materialize_tables_kind galax-1.1/base/galax_url.ml0000664000076400007640000000675310670320214014013 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* Module: Galax_url Description: This module contains functions for decoding URLs. *) open Neturl open Error type decoded_url = | File of string | Http of (string * int * string) | ExternalSource of (string * string * string option * string) let registered_methods = ref [] let is_registered b = List.exists (fun x -> x=b) !registered_methods let register_method b = if (is_registered b) then raise (Query (Toplevel_Error ("URL method : " ^ b ^ " already registered"))) else registered_methods := b :: !registered_methods (* fix_local worked with Http.decode_url, and fix_local2 works with decode_url2 below *) let fix_local r = match r with | None -> "/" | Some x -> "/" ^ x let fix_local2 r = match r with | None -> "/" | Some x -> x let decodeaux s = try let scheme = extract_url_scheme s in let newurl = if is_registered scheme then url_of_string ip_url_syntax s else url_of_string (Hashtbl.find common_url_syntax scheme) s in let urlhost = try url_host newurl with Not_found -> "" in let urlportopt = try Some(string_of_int (url_port newurl)) with Not_found -> None in let urlpath = try join_path (norm_path (url_path newurl)) ^ if is_registered scheme then (* If it is an external registered source then the url fragment needs to be appended on to the path *) try "#" ^ url_fragment newurl with | Not_found -> "" else "" with Not_found -> "" in Some((url_scheme newurl), urlhost, urlportopt, Some urlpath) with _ -> None let decode_url2 s = try if((String.compare (String.sub s 0 6) ("file:\\")) = 0) then ( (* Indicating a windows file uri *) (* Netstring does not parse them correctly so the old parsing*) (* routine has to be used *) Http.decode_url s ) else decodeaux s with | Invalid_argument _ -> decodeaux s | x -> raise x let glx_decode_url s = match decode_url2 s with | None -> (File s) | Some ("file", host, port, s) -> if host = "" && port = None then let local = fix_local2 s in (File local) else raise (Query (URI_Error "File URL should not have a host or port number")) | (Some ("http", host, port, s)) -> let local = fix_local2 s in let new_port = match port with | None -> 80 (* Default port *) | Some portnum -> begin try (int_of_string portnum) with | _ -> raise (Query (URI_Error (portnum ^ " is not a valid port number"))) end in (Http (host, new_port, local)) | (Some (me, host, port, s)) -> try if (is_registered me) then let local = fix_local2 s in ExternalSource (me,host,port,local) else raise Not_found with | _ -> raise (Query (URI_Error ("Method: " ^ me ^ " not supported in URL"))) galax-1.1/base/occurrence.ml0000664000076400007640000001051010560462355014162 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: occurrence.ml,v 1.7 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Occurrence Description: Manipulations of occurrences, i.e., an integer or 'unbounded'. Used for XML Schema minOccurs, maxOccurs, and XQuery occurrence indicators. *) (* The type 'occurs' corresponds to minOccurs and maxOccurs *) type occurs = | UP_INT of int | UNBOUNDED (* An occurrence indicator is a pair of minOccurs, maxOccurs *) type occurrence_indicator = (occurs * occurs) (* operations on occurs *) let occurs i = UP_INT i let unbounded = UNBOUNDED let occurs_zero = UP_INT 0 let occurs_one = UP_INT 1 let unbounded_operation op b1 b2 = match (b1,b2) with | (UNBOUNDED,_) -> UNBOUNDED | (_,UNBOUNDED) -> UNBOUNDED | (UP_INT(i1),UP_INT(i2)) -> UP_INT(op i1 i2) let ub_lt b1 b2 = match (b1,b2) with | (UNBOUNDED,UNBOUNDED) -> false | (_,UNBOUNDED) -> true | (UNBOUNDED,_) -> false | (UP_INT i1, UP_INT i2) -> i1 < i2 let ub_gt b1 b2 = match (b1,b2) with | (UNBOUNDED,UNBOUNDED) -> false | (UNBOUNDED,_) -> true | (_,UNBOUNDED) -> false | (UP_INT i1, UP_INT i2) -> i1 > i2 let ub_max ma1 ma2 = unbounded_operation max ma1 ma2 let ub_min ma1 ma2 = match (ma1,ma2) with | (UNBOUNDED,UP_INT(i)) -> UP_INT(i) | (UP_INT(i),UNBOUNDED) -> UP_INT(i) | (UP_INT(i1),UP_INT(i2)) -> UP_INT(min i1 i2) | _ -> UNBOUNDED let ub_add ma1 ma2 = unbounded_operation (+) ma1 ma2 let ub_mult ma1 ma2 = match (ma1,ma2) with | (_,UP_INT 0) -> UP_INT 0 | (UP_INT 0,_) -> UP_INT 0 | (UNBOUNDED,_) -> UNBOUNDED | (_,UNBOUNDED) -> UNBOUNDED | (UP_INT(i1),UP_INT(i2)) -> UP_INT(i1*i2) let minus i = function | UNBOUNDED -> UNBOUNDED | (UP_INT i') -> let i'' = i'-i in if i'' < 0 then (UP_INT 0) else (UP_INT i'') let b_equal mi ma = match (mi,ma) with | (UNBOUNDED,UNBOUNDED) -> true | (_,UNBOUNDED) -> false | (UNBOUNDED,_) -> false | (UP_INT i1,UP_INT i2) -> i1=i2 let equal mi ma = match (mi,ma) with | (UNBOUNDED,UNBOUNDED) -> true | (_,UNBOUNDED) -> false | (UNBOUNDED,_) -> false | (UP_INT i1,UP_INT i2) -> i1=i2 let le mi ma = match (mi,ma) with | (_,UNBOUNDED) -> true | (UNBOUNDED,_) -> false | (UP_INT i1,UP_INT i2) -> i1 <= i2 (* Prints bounds *) let string_of_occurs ub = match ub with | UP_INT i -> (string_of_int i) | UNBOUNDED -> "unbounded" (* Approximate occurrence indicators as used in XQuery *) let one = ((UP_INT 1), (UP_INT 1)) (* Exactly one *) let optional = ((UP_INT 0), (UP_INT 1)) (* Zero or one '?' *) let star = ((UP_INT 0), UNBOUNDED) (* Zero or more '*' *) let plus = ((UP_INT 1), UNBOUNDED) (* One or more '+' *) let is_one oi = oi = one let is_optional oi = oi = optional let is_star oi = oi = star let is_plus oi = oi = plus (* Computes an approximate occurrence indicator *) (* · | 1 ? + * --------------------------------- 1 | 1 ? + * ? | ? ? * * + | + * + * * | * * * * *) let mult mi ma = match (mi,ma) with | (UNBOUNDED,_) -> UNBOUNDED | (_,UNBOUNDED) -> UNBOUNDED | (UP_INT(i1),UP_INT(i2)) -> UP_INT(i1 * i2) let mult_occurrences (l1,u1) (l2,u2) = (mult l1 l2, mult u1 u2) (* , | 1 ? + * --------------------------------- 1 | + + + + ? | + * + * + | + + + + * | + * + * *) let seq_occurrences (l1,u1) (l2,u2) = (ub_max l1 l2, UNBOUNDED) (* | | 1 ? + * --------------------------------- 1 | 1 ? + * ? | ? ? * * + | + * + * * | * * * * *) let approximate_occurrences (b1,b2) = let b1' = ub_min b1 (UP_INT 1) and b2' = if le b2 (UP_INT 1) then b2 else unbounded in (b1',b2') galax-1.1/base/galax_url.mli0000664000076400007640000000172510670320214014156 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* Module: Galax_url Description: This module contains functions for decoding URLs. *) type decoded_url = | File of string | Http of (string * int * string) | ExternalSource of (string * string * string option * string) val register_method : string -> unit val glx_decode_url : string -> decoded_url galax-1.1/base/galax_camomile.mli0000664000076400007640000000220110677001100015125 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: galax_camomile.mli,v 1.5 2007/09/27 19:19:28 simeon Exp $ *) (* Module: Glx_camomile Description: This module is a wrapper over Camomile operations. *) val nfc : string -> string val nfd : string -> string val nfkc : string -> string val nfkd : string -> string val utf8_string_of_code_point : int -> string val utf8_code_points_of_string : string -> int list val utf8_string_of_code_points : int list -> string val utf8_codepoint_compare : string -> string -> int galax-1.1/factorization/0000775000076400007640000000000010772255370013447 5ustar mffmffgalax-1.1/factorization/factorize_flwor.ml0000664000076400007640000003444610705011416017176 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_flwor.ml,v 1.35 2007/10/16 01:25:34 mff Exp $ *) (* Module: Factorize_flwor Description: This module factorizes out code from FLWOR expression. This is to help the optimizer pick up logical optimizations. *) (* Example: Currently the only component is pulling up FLWOR blocks nested in return statements. Ex: for $x in ... return { for $y in ... where $x = $y } ----- for $x in ... let $FLWOR_norm := for $y in ... where $x = $y return { $FLWOR_norm } Added Element construction rewrite and function calls. *) open Conf open Error open Xquery_common_ast open Xquery_core_ast open Xquery_core_ast_util (******************) (* Misc utilities *) (******************) (* Jerome: Probably those should be moved *) type env = { static_context : Typing_context.static_context; mutable flwor_bindings : (crname * acexpr) list option } let mk_env sc pl = { static_context = sc; flwor_bindings= pl } (* These are independent bindings, order does not matter but we preserve it *) let normed_count = ref 0 let get_return_name () = let nc = !normed_count in incr normed_count; (Namespace_builtin.glx_prefix, Namespace_builtin.glx_uri, ("FLWOR_norm" ^ (string_of_int nc))) let get_bindings_unsafe env = match env.flwor_bindings with | None -> raise (Error.Query (Error.Factorization ("Factorization is attempting to extract bindings, but there is no place holder!"))) | Some binding -> binding let update_parent env vn binding = match env.flwor_bindings with | None -> raise (Error.Query (Error.Factorization ("Factorization is attempting to update parent of FLWOR, but parent is None"))) | Some flwor_bindings -> env.flwor_bindings <- Some (flwor_bindings @ ((vn, binding) :: [])) let make_initial_env stat_ctxt = mk_env stat_ctxt None let make_place_holder env = mk_env env.static_context (Some []) let make_null_place_holder env = mk_env env.static_context None let make_CELET_binding (vn, binding) = CELET (None, vn, binding) let add_new_CELET_bindings fl_clauses env = let bindings = get_bindings_unsafe env in let new_lets = List.map make_CELET_binding bindings in (* Order does matter here *) fl_clauses @ new_lets (****************************) (* Rewrite to a normal form *) (****************************) let rec put_in_normal_form (env: env) (ce: acexpr) = let ma = ce.pcexpr_annot in let eh = ce.pcexpr_origin in let loc = ce.pcexpr_loc in match ce.pcexpr_desc with | CEFLWOR (fl_clauses, where_clause, order_by_clause, return_clause) -> (* 1. Apply update normal-form first. *) (* Note: this never effects the where and orderby clauses. *) (* let (fl_clauses,return_clause) = update_in_normal_form fl_clauses return_clause in *) (* 2. Then go on with general normal-form. - Jerome *) begin match env.flwor_bindings with | None -> let fl_clauses = List.concat (List.map (cfl_put_in_normal_form env) fl_clauses) in let where_clause = match where_clause with None -> None | Some wc -> Some (put_in_normal_form env wc) in let (order_by_clause: acorder_by option) = match order_by_clause with None -> None | Some (stable, co_specs, osig) -> let (co_specs: acorder_spec list) = List.map (fun (ce, sk, esk) -> (put_in_normal_form env ce), sk,esk) co_specs in Some (stable, co_specs, osig) in (* We are the parent of our return clause *) (* Make a place holder for us, so we can get our new let clauses *) let env = make_place_holder env in let return_clause = put_in_normal_form env return_clause in (* Add the new let bindings *) let fl_clauses = add_new_CELET_bindings fl_clauses env in let expr_name = CEFLWOR(fl_clauses, where_clause, order_by_clause, return_clause) in fmkacexpr expr_name ma eh loc | Some parent -> begin (* if we are side-effect free, Replace ce with a stub variable else just return ce (don't move it) *) if Rewriting_judgments.side_effect_free_judge ce then begin let stub_name = get_return_name () in let replacement_var = fmkacexpr (CEVar stub_name) ma eh loc in (* Update our parent expression with the body of our expression *) update_parent env stub_name (put_in_normal_form (make_null_place_holder env) ce); replacement_var end else ce end end (*************************************************) (* We stop factorization in Ifs and typeswitches *) (* This stopping is accomplished by replacing *) (* parent_flwor with None *) (*************************************************) | CEIf (cexpr1, cexpr2, cexpr3) -> let cexpr1 = put_in_normal_form env cexpr1 in (* Do not factor inside branches, this is accomplished by not allowing parent expressions *) let cexpr2 = put_in_normal_form (make_null_place_holder env) cexpr2 in let cexpr3 = put_in_normal_form (make_null_place_holder env) cexpr3 in fmkacexpr (CEIf (cexpr1, cexpr2, cexpr3)) ma eh loc | CEWhile (cexpr1, cexpr2) -> let cexpr1 = put_in_normal_form env cexpr1 in (* Do not factor inside branches, this is accomplished by not allowing parent expressions *) let cexpr2 = put_in_normal_form (make_null_place_holder env) cexpr2 in fmkacexpr (CEWhile (cexpr1, cexpr2)) ma eh loc | CETypeswitch (cexpr, cases) -> let cexpr' = put_in_normal_form env cexpr in let apply_one_case (pat,ovn,case_cexpr) = let new_case_cexpr = put_in_normal_form (make_null_place_holder env) case_cexpr in (pat,ovn,new_case_cexpr) in let cases = List.map apply_one_case cases in fmkacexpr (CETypeswitch (cexpr', cases)) ma eh loc (****************************************************) (* These operators contain variable bindings *) (* To be conservative we do not allow crossing *) (* expressions to cross these for flwor extraction. *) (****************************************************) | CESome (odt, vname, cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form (make_null_place_holder env) cexpr1 in let cexpr2' = put_in_normal_form (make_null_place_holder env) cexpr2 in fmkacexpr (CESome (odt, vname, cexpr1', cexpr2')) ma eh loc | CEEvery (odt, vname, cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form (make_null_place_holder env) cexpr1 in let cexpr2' = put_in_normal_form (make_null_place_holder env) cexpr2 in fmkacexpr (CEEvery (odt, vname, cexpr1', cexpr2')) ma eh loc (**************************************************) (* The rest of this code is just walking the tree *) (**************************************************) | CEUnordered cexpr -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CEUnordered cexpr') ma eh loc | CEOrdered cexpr -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CEOrdered cexpr') ma eh loc | CEVar vname -> ce | CEScalar _ -> ce | CEProtoValue _ -> ce | CEText _ -> ce | CECharRef _ -> ce | CETextComputed cexpr1 -> let cexpr1' = put_in_normal_form env cexpr1 in fmkacexpr (CETextComputed cexpr1') ma eh loc | CEPI _ -> ce | CEPIComputed (cexpr1,cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CEPIComputed (cexpr1', cexpr2')) ma eh loc | CEComment _ -> ce | CECommentComputed cexpr1 -> let cexpr1' = put_in_normal_form env cexpr1 in fmkacexpr (CECommentComputed cexpr1') ma eh loc | CEDocument cexpr1 -> let cexpr1' = put_in_normal_form env cexpr1 in fmkacexpr (CEDocument cexpr1') ma eh loc | CECall (fname, arguments, sign, upd, selfrecur) -> let arguments' = List.map (put_in_normal_form env) arguments in fmkacexpr (CECall (fname, arguments',sign, upd, selfrecur)) ma eh loc | CEOverloadedCall (fname, arguments, sigs) -> let arguments' = List.map (put_in_normal_form env) arguments in fmkacexpr (CEOverloadedCall (fname, arguments', sigs)) ma eh loc | CELetServerImplement (nc1, nc2, cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CELetServerImplement(nc1,nc2,cexpr1',cexpr2')) ma eh loc | CEExecute (async, ncname, uri, cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CEExecute(async, ncname, uri, cexpr1', cexpr2')) ma eh loc | CEForServerClose (nc1, uri, cexpr1) -> let cexpr1' = put_in_normal_form env cexpr1 in fmkacexpr (CEForServerClose(nc1,uri, cexpr1')) ma eh loc | CEEvalClosure (cexpr1) -> let cexpr1' = put_in_normal_form env cexpr1 in fmkacexpr (CEEvalClosure(cexpr1')) ma eh loc | CESeq (cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CESeq(cexpr1', cexpr2')) ma eh loc | CEImperativeSeq (cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CEImperativeSeq(cexpr1', cexpr2')) ma eh loc | CEEmpty -> ce | CEElem (relem_symbol, nsenv, cexprlist) -> let cexprlist' = List.map (put_in_normal_form env) cexprlist in fmkacexpr (CEElem (relem_symbol, nsenv, cexprlist')) ma eh loc | CEAnyElem (cexpr1, nsenv1, nsenv2, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CEAnyElem (cexpr1', nsenv1, nsenv2, cexpr2')) ma eh loc | CEAttr (rattr_symbol, cexprlist)-> let cexprlist' = List.map (put_in_normal_form env) cexprlist in fmkacexpr (CEAttr (rattr_symbol, cexprlist')) ma eh loc | CEAnyAttr (cexpr1, nsenv, cexpr2) -> let cexpr1' = put_in_normal_form env cexpr1 in let cexpr2' = put_in_normal_form env cexpr2 in fmkacexpr (CEAnyAttr (cexpr1', nsenv, cexpr2')) ma eh loc | CEError cexpr_list -> (* Note: I don't see any reason why expressions inside the error function shouldn't be factorized as anything else - Jerome (* ce *) *) let cexpr_list' = List.map (put_in_normal_form env) cexpr_list in fmkacexpr (CEError cexpr_list') ma eh loc | CETreat (cexpr, model) -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CETreat (cexpr', model)) ma eh loc | CECast (cexpr, nsenv, model) -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CECast (cexpr', nsenv, model)) ma eh loc | CECastable (cexpr, nsenv, model) -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CECastable (cexpr', nsenv, model)) ma eh loc | CEValidate (vmode,cexpr) -> let cexpr' = put_in_normal_form env cexpr in fmkacexpr (CEValidate (vmode,cexpr')) ma eh loc | CEForwardAxis (v,axis, cnode_test) | CEReverseAxis (v,axis, cnode_test) -> ce | CECopy ce1 -> let ce1' = put_in_normal_form env ce1 in fmkacexpr (CECopy ce1') ma eh loc | CEDelete ce1 -> let ce1' = put_in_normal_form env ce1 in fmkacexpr (CEDelete ce1') ma eh loc | CEInsert (cexpr, cinsert_location) -> let cexpr' = put_in_normal_form env cexpr in let cinsert_location' = match cinsert_location with | CUAsLastInto ce1 -> let ce1' = put_in_normal_form env ce1 in (CUAsLastInto ce1') | CUAsFirstInto ce1 -> let ce1' = put_in_normal_form env ce1 in (CUAsFirstInto ce1') | CUInto ce1 -> let ce1' = put_in_normal_form env ce1 in CUInto ce1' | CUAfter ce1 -> let ce1' = put_in_normal_form env ce1 in (CUAfter ce1') | CUBefore ce1 -> let ce1' = put_in_normal_form env ce1 in (CUBefore ce1') in fmkacexpr (CEInsert (cexpr', cinsert_location')) ma eh loc | CERename (nsenv, ce1, ce2) -> let ce1' = put_in_normal_form env ce1 in let ce2' = put_in_normal_form env ce2 in fmkacexpr (CERename (nsenv, ce1',ce2')) ma eh loc | CEReplace (vof, ce1, ce2) -> let ce1' = put_in_normal_form env ce1 in let ce2' = put_in_normal_form env ce2 in fmkacexpr (CEReplace (vof,ce1',ce2')) ma eh loc | CESnap (sm,ce) -> let ce = put_in_normal_form env ce in fmkacexpr (CESnap (sm,ce)) ma eh loc | CELetvar (odt, vname, cexpr1, cexpr2) -> let cexpr1' = put_in_normal_form (make_null_place_holder env) cexpr1 in let cexpr2' = put_in_normal_form (make_null_place_holder env) cexpr2 in fmkacexpr (CELetvar (odt, vname, cexpr1', cexpr2')) ma eh loc | CESet (vname, ce) -> let ce = put_in_normal_form env ce in fmkacexpr (CESet (vname,ce)) ma eh loc and cfl_put_in_normal_form env cfl = match cfl with | CELET(odt, vn, expr) -> begin match expr.pcexpr_desc with | CEFLWOR _ -> (* Already nested, rewrite to parent *) CELET(odt, vn, (put_in_normal_form env expr)) :: [] | _ -> (* we are this parents *) begin let env = make_place_holder env in let norm_expr = put_in_normal_form env expr in let bindings = get_bindings_unsafe env in let new_lets = List.map make_CELET_binding bindings in (* these lets should occur before the returned let, it could depend on them *) new_lets @ CELET(odt, vn, norm_expr) :: [] end end | CEFOR(odt, vn, vn2, expr) -> let expr = put_in_normal_form env expr in CEFOR(odt, vn, vn2, expr) :: [] let factorize_flwor static_context expr = put_in_normal_form (make_initial_env static_context) expr galax-1.1/factorization/factorize_free_var.ml0000664000076400007640000001363210705011416017630 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_free_var.ml,v 1.29 2007/10/16 01:25:34 mff Exp $ *) (* Module: Factorize_free_var Description: Given a core expression, recursively fill out the free variable annotation. Additionally return the list of free variables for the expression. *) open Xquery_common_ast open Xquery_core_ast open Xquery_core_ast_util open Xquery_core_ast_annotation let rec remove_bindings vn vlist = List.filter (fun x -> not (Namespace_names.rqname_equal vn x)) vlist let rec compute_free_vars cexpr = let { pcexpr_desc = ce_desc; pcexpr_annot = annot; pcexpr_origin = eh; pcexpr_loc = loc;} = cexpr in let fv_list = match ce_desc with | CEOrdered ce | CEUnordered ce -> compute_free_vars ce | CEFLWOR(cfls, wc, ob, ret) -> compute_flwr_free cfls wc ob ret | CEIf(cond, ce1, ce2) -> (compute_free_vars cond) @ (compute_free_vars ce1) @ (compute_free_vars ce2) | CEWhile(cond, ce1) -> (compute_free_vars cond) @ (compute_free_vars ce1) | CETypeswitch (on, patt) -> let fv_on = compute_free_vars on in let handle_branch (_, ovn, cexpr) = match ovn with None -> compute_free_vars cexpr | Some vn -> remove_bindings vn (compute_free_vars cexpr) in fv_on @ (List.concat (List.map handle_branch patt)) | CEVar vn -> vn :: [] | CEOverloadedCall (_, cexpr_list, _) | CECall (_, cexpr_list, _, _, _) -> List.concat (List.map compute_free_vars cexpr_list) | CEScalar _ -> [] | CEProtoValue _ -> [] | CESeq (ce1,ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEImperativeSeq (ce1,ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEEmpty -> [] | CEDocument ce -> compute_free_vars ce | CEPI _ -> [] | CEPIComputed (ce1, ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEComment _ -> [] | CECommentComputed ce -> compute_free_vars ce | CEText _ -> [] | CECharRef _ -> [] | CETextComputed ce -> compute_free_vars ce | CEElem (_,_,cexpr_list) -> List.concat (List.map compute_free_vars cexpr_list) | CEAnyElem(ce1,_, _, ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEAttr (_, cexpr_list) -> List.concat (List.map compute_free_vars cexpr_list) | CEAnyAttr(ce1, nsenv, ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEError cexpr_list -> List.concat (List.map compute_free_vars cexpr_list) | CETreat (ce, _) -> compute_free_vars ce | CEValidate (_, ce) -> compute_free_vars ce | CECast (ce, _, _ ) -> compute_free_vars ce | CECastable (ce, _, _ ) -> compute_free_vars ce | CEForwardAxis _ -> [Xquery_common_ast.fs_dot] | CEReverseAxis _ -> [Xquery_common_ast.fs_dot] | CESome (_, vn, ce1, ce2) | CEEvery (_, vn, ce1, ce2) -> remove_bindings vn ((compute_free_vars ce1) @ (compute_free_vars ce2)) (* Updates *) | CECopy ce1 -> compute_free_vars ce1 | CEDelete ce1 -> compute_free_vars ce1 | CEInsert (cei, il) -> (compute_free_vars cei) @ (compute_free_vars_insert_location il) | CERename(_, ce1, ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CEReplace(_,ce1, ce2) -> (compute_free_vars ce1) @ (compute_free_vars ce2) | CESnap (sm,cexpr) -> compute_free_vars cexpr | CELetvar (_, vn, ce1, ce2) -> remove_bindings vn ((compute_free_vars ce1) @ (compute_free_vars ce2)) | CESet (_,ce) -> compute_free_vars ce (* NOTE: The following are extension to the XQuery Formal Semantics Core XQuery - Jerome *) | CELetServerImplement(_,_, ce1,ce2) | CEExecute (_, _, _, ce1, ce2) -> (compute_free_vars ce1)@(compute_free_vars ce2) | CEForServerClose(_,_,ce1) | CEEvalClosure (ce1) -> (compute_free_vars ce1) in (* Remove duplicates *) let fv_list = Gmisc.remove_duplicates fv_list in set_free_var_annot annot fv_list; fv_list (************************************************) (* Compute Free Variabels for FLWOR Expressions *) (************************************************) and compute_flwr_free cfls wc ob ret = match cfls with [] -> compute_free_where wc ob ret | CELET(_, vn, bound) :: rest -> (compute_free_vars bound) @ (remove_bindings vn (compute_flwr_free rest wc ob ret)) | CEFOR(_, vn, pos, bound) :: rest -> let vn_free = remove_bindings vn (compute_flwr_free rest wc ob ret) in let binding = match pos with None -> vn_free | Some v -> remove_bindings v vn_free in (compute_free_vars bound) @ binding and compute_free_where wc ob ret = match wc with | None -> compute_free_orderby ob ret | Some wc -> (compute_free_vars wc) @ (compute_free_orderby ob ret) and compute_free_orderby ob ret = match ob with | None -> compute_free_vars ret | Some (_, ospec, osig) -> List.concat (List.map (fun (ce, _, _) -> compute_free_vars ce) ospec) @ compute_free_vars ret (***********************************************) (* Compute Free Variables for insert locations *) (***********************************************) and compute_free_vars_insert_location il = match il with | CUAsLastInto ce | CUAsFirstInto ce | CUInto ce | CUAfter ce | CUBefore ce -> compute_free_vars ce let annotate_free_vars stat_ctxt acexpr = ignore(compute_free_vars acexpr); stat_ctxt galax-1.1/factorization/factorize_unique.ml0000664000076400007640000002266110705011416017347 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_unique.ml,v 1.30 2007/10/16 01:25:34 mff Exp $ *) (* Module: Factorize_unique Description: This module assigns all variables a unique name in the core expression. o It assumes that names that are not bound are globals, or defined in another scope. These variable names remain untouched. o factorize_unique_with_context allows you to pass in initial bindings, from global variables/parameters if renaming is done there. o fs:dot is a special variable and is never rebound. The purpose is to simplify code later on. Currently this is not used. *) open Error open Xquery_core_ast (******************************) (* Environment Helper section *) (******************************) type unique_environment = { cur_id: int ref; rebind_list: (Xquery_common_ast.cvname * Xquery_common_ast.cvname) list } let mk_uenv idref l = { cur_id = idref; rebind_list = l } (* Identity map for fs_dot since it should never be rebound *) let empty_uenv () = mk_uenv (ref 0) [] let mk_default_uenv binding = mk_uenv (ref 0) binding (* MUST ENSURE IT IS NOT fs:dot *) let get_unique_name env ((prefix,uri,name) as vn) = if Namespace_names.rqname_equal Xquery_common_ast.fs_dot vn then vn else begin let name = name ^ "_u" ^ (string_of_int !(env.cur_id)) in incr env.cur_id; (prefix,uri,name) end (* Adding bindings *) let add_binding env pair = mk_uenv env.cur_id (pair :: env.rebind_list) let add_binding_name env vn = let new_name = get_unique_name env vn in let env = add_binding env (vn, new_name) in new_name, env let add_opt_binding_name env ovn = match ovn with | None -> None, env | Some vn -> let vn, env = add_binding_name env vn in (Some vn), env (***********************) (* Retrieving Bindings *) (***********************) let get_rebound_name env vn = if Namespace_names.rqname_equal Xquery_common_ast.fs_dot vn then vn else begin if List.mem_assoc vn env.rebind_list then List.assoc vn env.rebind_list else vn end (*****************) (* Main function *) (*****************) (* Mary : Why isn't this function written using the Ast_walker_rewrite module ? *) let rec unique_rename env cexpr = let { pcexpr_desc = ce_desc; pcexpr_annot = annot; pcexpr_origin = eh; pcexpr_loc = loc;} = cexpr in let ce_desc = match ce_desc with | CEOrdered ce -> CEOrdered (unique_rename env ce) | CEUnordered ce -> CEUnordered (unique_rename env ce) | CEFLWOR(cfls, wc, ob, ret) -> flwr_unique_rename env cfls wc ob ret | CEIf(cond, ce1, ce2) -> let cond = unique_rename env cond in let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CEIf(cond,ce1,ce2) | CEWhile(cond, ce1) -> let cond = unique_rename env cond in let ce1 = unique_rename env ce1 in CEWhile(cond,ce1) | CETypeswitch (on, patt) -> let on = unique_rename env on in let rename_branch (cpat, ovn, ce) = let ovn, env = add_opt_binding_name env ovn in (cpat, ovn, (unique_rename env ce)) in let patt = List.map rename_branch patt in CETypeswitch (on, patt) | CEVar vn -> CEVar (get_rebound_name env vn) | CEOverloadedCall (cfname, cexpr_list, sig_table) -> let cexpr_list = List.map (unique_rename env) cexpr_list in CEOverloadedCall (cfname, cexpr_list, sig_table) | CECall (cfname, cexpr_list, in_out_types, upd, selfrecur) -> let cexpr_list = List.map (unique_rename env) cexpr_list in CECall (cfname, cexpr_list, in_out_types, upd, selfrecur) | CEScalar av -> CEScalar av | CEProtoValue av -> CEProtoValue av | CESeq (ce1,ce2) -> let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CESeq (ce1,ce2) | CEImperativeSeq (ce1,ce2) -> let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CEImperativeSeq (ce1,ce2) | CEEmpty -> CEEmpty | CEDocument ce -> CEDocument (unique_rename env ce) | CEPI (name, pi) -> CEPI (name,pi) | CEPIComputed (ce1, ce2) -> let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CEPIComputed (ce1, ce2) | CEComment c -> CEComment c | CECommentComputed ce -> CECommentComputed (unique_rename env ce) | CEText t -> CEText t | CECharRef t -> CECharRef t | CETextComputed ce -> CETextComputed (unique_rename env ce) | CEElem (name,nsenv,cexpr_list) -> CEElem (name,nsenv, (List.map (unique_rename env) cexpr_list)) | CEAnyElem(ce1,nsenv1, nsenv2, ce2) -> CEAnyElem ((unique_rename env ce1), nsenv1, nsenv2, (unique_rename env ce2)) | CEAttr (ca, cexpr_list) -> CEAttr (ca, (List.map (unique_rename env) cexpr_list)) | CEAnyAttr(ce1, nsenv, ce2) -> CEAnyAttr ((unique_rename env ce1), nsenv, (unique_rename env ce2)) | CEError cexpr_list -> CEError (List.map (unique_rename env) cexpr_list) | CETreat (ce, dt) -> CETreat ((unique_rename env ce), dt) | CEValidate (vm, ce) -> CEValidate (vm, (unique_rename env ce)) | CECast (ce, nsenv, dt) -> CECast ((unique_rename env ce), nsenv, dt) | CECastable (ce, nsenv, dt) -> CECastable ((unique_rename env ce), nsenv, dt) | CEForwardAxis (v,a,nt) -> CEForwardAxis (v,a,nt) | CEReverseAxis (v,a,nt) -> CEReverseAxis (v,a,nt) | CESome (odt, vn, ce1, ce2) -> let ce1 = unique_rename env ce1 in let vn, env = add_binding_name env vn in let ce2 = unique_rename env ce2 in CESome (odt, vn, ce1, ce2) | CEEvery (odt, vn, ce1, ce2) -> let ce1 = unique_rename env ce1 in let vn, env = add_binding_name env vn in let ce2 = unique_rename env ce2 in CEEvery (odt, vn, ce1, ce2) (* Updates *) | CECopy ce1 -> CECopy (unique_rename env ce1) | CEDelete ce1 -> CEDelete (unique_rename env ce1) | CEInsert (cei, il) -> CEInsert ((unique_rename env cei), (unique_rename_insertloc env il)) | CERename(nsenv, ce1, ce2) -> CERename(nsenv, (unique_rename env ce1), (unique_rename env ce2)) | CEReplace(vof,ce1, ce2) -> CEReplace(vof, (unique_rename env ce1), (unique_rename env ce2)) | CESnap (sm,cexpr) -> CESnap (sm, (unique_rename env cexpr)) | CELetvar (odt, vn, ce1, ce2) -> let ce1 = unique_rename env ce1 in let vn, env = add_binding_name env vn in let ce2 = unique_rename env ce2 in CELetvar (odt, vn, ce1, ce2) | CESet (v, ce) -> CESet (v, unique_rename env ce) (* NOTE: The following are extension to the XQuery Formal Semantics Core XQuery - Jerome *) | CELetServerImplement (nc1, nc2, ce1, ce2) -> let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CELetServerImplement (nc1, nc2, ce1, ce2) | CEExecute (async, ncname, uri, ce1, ce2) -> let ce1 = unique_rename env ce1 in let ce2 = unique_rename env ce2 in CEExecute (async, ncname, uri, ce1, ce2) | CEForServerClose (nc1, uri, ce1) -> let ce1 = unique_rename env ce1 in CEForServerClose (nc1, uri, ce1) | CEEvalClosure (ce1) -> let ce1 = unique_rename env ce1 in CEEvalClosure (ce1) in Xquery_core_ast_util.fmkacexpr ce_desc annot eh loc and unique_rename_insertloc env il = match il with | CUAsLastInto ce -> CUAsLastInto (unique_rename env ce) | CUAsFirstInto ce -> CUAsFirstInto (unique_rename env ce) | CUInto ce -> CUInto (unique_rename env ce) | CUAfter ce -> CUAfter (unique_rename env ce) | CUBefore ce -> CUBefore (unique_rename env ce) and flwr_unique_rename env cfls wc ob ret = (* NOTE: THIS REVERSES THE ORDER FOLD LEFT AND CONS *) let rebind_fold (cfls, env) cfl = match cfl with | CELET(odt,vn,ce) -> let vn, env = add_binding_name env vn in let ce = unique_rename env ce in (CELET(odt, vn, ce) :: cfls, env) | CEFOR(odt,vn,ovn,ce) -> let vn, env = add_binding_name env vn in let ovn, env = add_opt_binding_name env ovn in let ce = unique_rename env ce in (CEFOR(odt, vn, ovn, ce) :: cfls, env) in let cfls_rev, env = List.fold_left rebind_fold ([], env) cfls in let cfls = List.rev cfls_rev in let wc = match wc with | None -> None | Some w -> Some (unique_rename env w) in let ob = unique_rename_orderby env ob in let ret = unique_rename env ret in CEFLWOR(cfls, wc, ob, ret) and unique_rename_orderby env ob = match ob with | None -> ob | Some (sk, aos, osig) -> let aos = List.map (fun (ce, sk,esk) -> ((unique_rename env ce), sk, esk)) aos in Some (sk, aos, osig) (***********************************) (* These are the exposed functions *) (***********************************) type binding_list = (Xquery_common_ast.cvname * Xquery_common_ast.cvname) list let factorize_unique_with_context bindings ce = let env = mk_default_uenv bindings in unique_rename env ce let factorize_unique ce = factorize_unique_with_context [] ce galax-1.1/factorization/factorize_top.mli0000664000076400007640000000232310560462356017021 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_top.mli,v 1.4 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_top Description: This module will put statements into a normal form before their are compiled into the algbera. This is to help the optimizer pick up logical optimizations. *) open Xquery_core_ast open Typing_context val factorize_statement : static_context -> acstatement -> acstatement val factorize_prolog : static_context -> acprolog -> static_context * acprolog val factorize_xmodule : static_context -> acxmodule -> static_context * acxmodule galax-1.1/factorization/factorize_flwor.mli0000664000076400007640000000175710560462356017362 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_flwor.mli,v 1.4 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_flwor Description: This module factorizes out code from FLWOR expression. This is to help the optimizer pick up logical optimizations. *) open Xquery_core_ast val factorize_flwor : Typing_context.static_context -> acexpr -> acexpr galax-1.1/factorization/factorize_iteration.mli0000664000076400007640000000210010560462356020206 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_iteration.mli,v 1.5 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_iteration Description: This module attempts to put expressions in a normal form. For a fragment of the language it moves expressions close as close to the iteration they depend on as possible. *) val factorize_expression : Typing_context.static_context -> Xquery_core_ast.acexpr -> Xquery_core_ast.acexpr galax-1.1/factorization/factorize_globals.mli0000664000076400007640000000216710560462356017650 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_globals.mli,v 1.3 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_globals Description: This module takes those expressions which are global variables (for expressions) and makes them let bindings. This lets the compiler know that the dependence is lest restrictive. *) (* Factor bindings into global expressions *) val factor_global_expression : Typing_context.static_context -> Xquery_core_ast.acexpr -> Xquery_core_ast.acexpr galax-1.1/factorization/factorize_tpnf_util.mli0000664000076400007640000000422210560462356020223 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf_util.mli,v 1.6 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_tpnf_util Description: This module contains utilities for the TPNF. *) open Error open Namespace_names open Namespace_builtin open Norm_util open Xquery_common_ast open Xquery_common_ast_util open Xquery_core_ast open Xquery_type_core_ast open Xquery_core_ast_util open Xquery_core_ast_annotation open Ast_walker_rewrite_context open Ast_walker_rewrite open Processing_context open Typing_context val debug_apply: string -> acexpr -> acexpr -> unit val get_opt_sbdo_arg_desc: acexpr -> acexpr_desc option val get_one_arg_from_call: acexpr -> acexpr val get_opt_sbdo_arg: acexpr -> (cfname * acexpr * ((csequencetype * cxtype) option list * (csequencetype * cxtype)) * updating_modifier) option val wrap_in_sbdo: static_context rewrite_context -> acexpr -> acexpr val wrap_in_fn_boolean: static_context rewrite_context -> acexpr -> acexpr val var_name_equals: acexpr -> cvname -> bool val is_free_var_of: cvname -> acexpr -> bool val observes_doc_order: acexpr -> bool val is_step: acexpr -> bool val mk_fn_false: static_context rewrite_context -> Xquery_ast.expr_handle -> Finfo.finfo -> acexpr val get_properties: static_context rewrite_context -> acexpr -> (cvname, bool * bool * bool * bool) Hashtbl.t -> bool * bool * bool * bool val is_in_tpnf: acexpr -> bool val is_in_tpnf': acexpr -> bool val is_in_cxq_plus: static_context rewrite_context -> acexpr -> bool galax-1.1/factorization/factorize_tpnf_util.ml0000664000076400007640000004412710624377617020070 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf_util.ml,v 1.15 2007/05/21 20:22:39 mff Exp $ *) (* Module: Factorize_tpnf_util Description: This module contains utilities for the TPNF. *) open Error open Namespace_names open Namespace_builtin open Datatypes open Dm_atomic open Dm open Norm_util open Xquery_common_ast open Xquery_common_ast_util open Xquery_core_ast open Xquery_core_ast_util open Xquery_core_ast_annotation open Ast_walker_rewrite_context open Ast_walker_rewrite open Processing_context open Rewriting_judgments open Schema_builtin open Schema_util open Print_top (* *** Helper functions *** *) let debug_apply s ce ce' = let print_annot ff a = begin let (ta, da, fv, sc) = Xquery_core_ast_annotation.annot_components a in Format.fprintf ff "[sc:%s]%!" (Xquery_core_ast_annotation.print_scrambling_annot sc); Format.fprintf ff "[type: "; (match ta with | None -> Format.fprintf ff "None]" | Some m -> Format.fprintf ff "%a]%!" Print_type_core.print_cxtype m); end in if !Conf.print_algebra_optimization_rewrite then begin Format.fprintf !Conf.algebra_optimization_rewrite_formatter "@?**** TPNF RULE APPLIED: %s ****%!@.@\nBefore:@\n%a%!@.After:@\n%a%!@.\n" s (fun ff x -> Print_xquery_core.print_cexpr ff x print_annot) ce (fun ff x -> Print_xquery_core.print_cexpr ff x print_annot) ce' end let get_opt_sbdo_arg ce = match ce.pcexpr_desc with | CECall(fname, [arg], sign, x, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence -> Some (fname, arg, sign, x) | _ -> None let get_opt_sbdo_arg_desc ce = match get_opt_sbdo_arg ce with | Some (_,a,_,_) -> Some a.pcexpr_desc | None -> None let get_one_arg_from_call ce = match ce.pcexpr_desc with | CECall(fname, [arg], sign, x, selfrecur) -> arg | _ -> raise (Query (Internal_Error ("Argument count mismatch in get_one_arg_from_call"))) let wrap_in_sbdo ctxt ce = let norm_ctxt = Typing_context.norm_context_from_stat_context (get_context ctxt) in let (it, ot), opt_fun_kind, upd = Norm_context.one_sig_from_norm_context norm_ctxt (fs_distinct_docorder, 1) in let it' = List.map (fun t -> Some t) it in fmkacexpr (CECall(fs_distinct_docorder, [ce], (it',ot), upd, false)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc let wrap_in_fn_boolean ctxt ce = let norm_ctxt = Typing_context.norm_context_from_stat_context (get_context ctxt) in let (it, ot), opt_fun_kind, upd = Norm_context.one_sig_from_norm_context norm_ctxt (fn_boolean, 1) in let it' = List.map (fun t -> Some t) it in let fn_bool = fmkacexpr (CECall(fn_boolean, [ce], (it',ot), upd, false)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let _ = set_scrambling_annot fn_bool.pcexpr_annot List in fn_bool let var_name_equals ce name = match ce.pcexpr_desc with | CEVar name' -> name = name' | _ -> false let is_free_var_of vname cexpr = let free = Ast_walker_rewrite.free_variables cexpr in List.mem vname free let has_max_one_robust ctxt ce = try has_max_one ce with Query _ -> false (* let ctxt' = Ast_walker_rewrite_context.get_context ctxt in let t = Typing_expr.type_cexpr ctxt' ce in let _ = set_type_annot ce.pcexpr_annot t in has_max_one ce *) (*** OBSERVE DOC ORDER ***********************************************) (* An expression does not observe docorder if it is in the following *) (* fragment : *) (* EXPR := DOC | VAR | FLWOR | STEP *) (* STEP := AXIS::NT *) (* FLOWR := ("for" VAR "in" EXPR | "let" VAR ":=" EXPR)+ *) (* ("where" EXPR)? *) (* "return" EXPR *) (* DOC := doc-call | root-call *) (* *) (* maybe we can extend the fragment -- Ph *) (*********************************************************************) let rec observes_doc_order ce = match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,n_ce1,None,ce1)], _, None, ce2) -> observes_doc_order ce2 | CECall (fname,[arg],sign,u, selfrecur) -> if rqname_equal fname fn_doc then false else if rqname_equal fname fn_root then false else if rqname_equal fname fs_distinct_docorder then false else if rqname_equal fname fs_distinct_docorder_or_atomic_sequence then false else true | CEForwardAxis (v, axis, nt) | CEReverseAxis (v, axis, nt) -> false | CEVar v -> false | _ -> true let is_step op = match op.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fn_boolean -> begin match arg.pcexpr_desc with | CEForwardAxis _ | CEReverseAxis _ -> true | _ -> false end | _ -> false let mk_fn_false rewrite_ctxt eh fi = let stat_ctxt = get_context rewrite_ctxt in (*let proc_ctxt = Typing_context.processing_context_from_stat_context stat_ctxt in*) let norm_ctxt = Typing_context.norm_context_from_stat_context stat_ctxt in Norm_util.build_core_false norm_ctxt eh fi (* Helper function for getting the selection (or * root) variable for a subexpression *) let rec get_select_var ce = match ce.pcexpr_desc with | CEIf (e1,e2,e3) -> begin match e3.pcexpr_desc with | CEEmpty -> get_select_var e2 | _ -> None end | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, e3) -> begin let s1 = get_select_var e1 in let s3 = get_select_var e3 in match s1, s3 with | Some s1', Some s3' -> if rqname_equal s3' n_x then s1 else s3 | _ -> None end | CEFLWOR ([CELET(t_x,n_x,e1)], None, None, e3) -> begin let s1 = get_select_var e1 in let s3 = get_select_var e3 in match s1, s3 with | Some s1', Some s3' -> if rqname_equal s3' n_x then s1 else s3 | _ -> None end | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence || rqname_equal fname fn_boolean -> begin get_select_var arg end | CEForwardAxis (vname, axis, nt) when axis = Child || axis = Descendant_or_self || axis = Descendant -> Some vname | CEVar vname -> Some vname | _ -> None (* ** Check wether expression is in the CXQ+ fragment ** *) let rec is_in_cxq_plus ctxt ce = match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, e2) | CEFLWOR ([CELET(t_x,n_x,e1)], None, None, e2) -> is_in_cxq_plus ctxt e1 && is_in_cxq_plus ctxt e2 | CEIf (e1,e2,e3) -> begin match e3.pcexpr_desc with | CEEmpty -> is_in_cxq_plus ctxt e1 && is_in_cxq_plus ctxt e2 | _ -> false end | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence || rqname_equal fname fn_boolean -> is_in_cxq_plus ctxt arg | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fn_doc || rqname_equal fname fn_root -> false (* inlining doc-calls = bad idea!;-) *) | CEForwardAxis (vname, axis, nt) | CEReverseAxis (vname, axis, nt) -> true (* Warning: need to double-check that allowing all axes here does not *) (* break any of the rewrites *) | CEVar vname -> true (* | CEDocument _ | CEElem _ | CEAnyElem _ | CEAttr _ | CEAnyAttr _ -> true *) | _ -> (* has_max_one_robust ctxt ce *) false (* Helper function for detemining wether the * no2d, gen, ord and nodup properties (in that order) * hold for a given expression. *) let rec get_properties ctxt ce symtab = let sink = false, false, false, false in (* let ff = !Conf.core_expr_formatter in *) (* let _ = Format.fprintf ff "==============Getting props for: \n" in *) (* let _ = Print_xquery_core.print_cexpr ff ce (fun a -> fun x -> ()) in *) (* let _ = Format.fprintf ff "\n==============\n" in *) match ce.pcexpr_desc with | CEIf (e1, e2, e3) -> begin match e3.pcexpr_desc with | CEEmpty -> let no2d, gen, ord, nodup = get_properties ctxt e2 symtab in (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" no2d gen ord nodup in *) no2d, gen, ord, nodup | _ -> sink end | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, e3) -> begin let no2d1, gen1, ord1, nodup1 = get_properties ctxt e1 symtab in let symtab' = Hashtbl.copy symtab in let _ = Hashtbl.add symtab' n_x (true,true,true,true) in let no2d3, gen3, ord3, nodup3 = get_properties ctxt e3 symtab' in (* make sure to remain in 'sink' if one if the subexprs returns 'sink' *) if not (no2d1 || gen1 || ord1 || nodup1) || not (no2d3 || gen3 || ord3 || nodup3) then sink else match get_select_var e3 with | Some sel_var -> let no2d = no2d3 && (( (*(rqname_equal sel_var n_x) &&*) no2d1) || not (rqname_equal sel_var n_x)) in let gen = gen3 && (( (*(rqname_equal sel_var n_x) &&*) gen1) || not (rqname_equal sel_var n_x)) in let ord = ((rqname_equal sel_var n_x) && ((ord1 && no2d3) || (ord1 && gen1 && nodup1 && ord3))) || (not (rqname_equal sel_var n_x) && (no2d3 || no2d1 && nodup1 && ord3)) in let nodup = ((rqname_equal sel_var n_x) && nodup1 && gen1 && nodup3) || (not (rqname_equal sel_var n_x) && no2d1 && nodup1 && nodup3) in (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" no2d gen ord nodup in *) no2d, gen, ord, nodup | _ -> (* let _ = Format.fprintf ff "No select var, return \t[SINK]\n" in *) sink end | CEFLWOR ([CELET(t_x,n_x,e1)], None, None, e3) -> begin let symtab' = Hashtbl.copy symtab in let props = get_properties ctxt e1 symtab in let _ = Hashtbl.add symtab' n_x props in get_properties ctxt e3 symtab' end | CECall (fname, [arg], sign,u, selfrecur) -> if rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence then begin let no2d, gen, ord, nodup = get_properties ctxt arg symtab in (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" no2d gen true true in *) no2d, gen, true, true end else if rqname_equal fname fn_boolean then let no2d, gen, ord, nodup = get_properties ctxt arg symtab in (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" no2d gen ord nodup in *) no2d, gen, ord, nodup else if rqname_equal fname fn_root || rqname_equal fname fn_doc then (* EXTENSION *) begin true, true, true, true end else if has_max_one_robust ctxt ce then (* let _ = Printf.printf "\t[MAXONE]\n" in *) true, true, true, true else (* let _ = Printf.printf "\t[SINK]\n" in *) sink | CEForwardAxis (vname, axis, nt) when axis = Child || axis = Descendant_or_self || axis = Descendant -> begin match axis with | Child -> (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" false true true true in *) false, true, true, true | Descendant_or_self | Descendant -> (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" false false true true in *) false, false, true, true | _ -> sink end | CEVar vname -> begin try let no2d, gen, ord, nodup = Hashtbl.find symtab vname in (* let _ = Format.fprintf ff "\t[%b;%b;%b;%b;]\n" no2d gen ord nodup in *) no2d, gen, ord, nodup with Not_found -> (* externally declared? *) if has_max_one_robust ctxt ce then true, true, true, true else sink end (* | CEDocument _ | CEElem _ | CEAnyElem _ | CEAttr _ | CEAnyAttr _ -> (* EXTENSION *) begin true, true, true, true end *) | _ -> (* Check type here for maxone? *) if has_max_one_robust ctxt ce && is_in_cxq_plus ctxt ce then (* let _ = Printf.printf "\t[MAXONE]\n" in *) true, true, true, true else (* let _ = Printf.printf "\t[SINK]\n" in *) sink let is_max_one_expr c = match c.pcexpr_desc with | CEDocument _ | CEElem _ | CEAnyElem _ | CEAttr _ | CEAnyAttr _ -> true | CECall (fname, [arg], sign,u, selfrecur) -> (rqname_equal fname fn_doc || rqname_equal fname fn_root) | _ -> false (* ** Check wether expression is in TPNF ** *) let is_in_tpnf ce = let rec is_fp c = begin match c.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc with | CEEmpty -> begin match c1.pcexpr_desc with | CECall (fname, [arg],_,_,_) when rqname_equal fname fn_boolean -> is_tp arg && is_fp c2 | _ -> false end | _ -> false end | _ -> is_otp c end and is_tp c = begin match c.pcexpr_desc with | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> if rqname_equal n_x fs_dot then begin match c1.pcexpr_desc with | CEVar _ -> is_rc c2 | _ -> is_max_one_expr c1 && is_rc c2 end else false | CEVar _ -> true | _ -> is_atp c || is_max_one_expr c end and is_otp c = begin match c.pcexpr_desc with | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> if rqname_equal n_x fs_dot then begin match c1.pcexpr_desc with | CEVar _ -> is_orc c2 | _ -> is_max_one_expr c1 && is_orc c2 end else false | CEVar _ -> true | _ -> is_aotp c || is_max_one_expr c end and is_atp c = begin match c.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> true | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> if rqname_equal n_x fs_dot then begin match c1.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> is_rc c2 | _ -> false end else false | _ -> false end and is_aotp c = begin match c.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> true | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> if rqname_equal n_x fs_dot then begin match c1.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> is_orc c2 | _ -> false end else false | _ -> false end and is_rc c = begin match c.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc with | CEEmpty -> begin match c1.pcexpr_desc with | CECall (fname, [arg],_,_,_) when rqname_equal fname fn_boolean -> is_atp arg && is_rc c2 | _ -> false end | _ -> false end | _ -> is_atp c end and is_orc c = begin match c.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc with | CEEmpty -> begin match c1.pcexpr_desc with | CECall (fname, [arg],_,_,_) when rqname_equal fname fn_boolean -> begin match c2.pcexpr_desc with | CEVar v when rqname_equal v fs_dot -> is_atp arg | _ -> is_atp arg && (is_max_one_expr c2 || is_orc c2) end | _ -> false end | _ -> false end | _ -> is_aotp c end in is_fp ce (* ** Check wether expression is in TPNF ** *) let is_in_tpnf' ce = let rec is_fp c = begin match c.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc with | CEEmpty -> begin match c1.pcexpr_desc with | CECall (fname, [arg],_,_,_) -> rqname_equal fname fn_boolean && is_tp arg && is_fp c2 | _ -> false end | _ -> false end | _ -> is_otp c end and is_tp c = begin match c.pcexpr_desc with | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> rqname_equal n_x fs_dot && is_tp c1 && is_rc c2 | CEVar _ -> true | _ -> is_atp c || is_max_one_expr c end and is_otp c = begin match c.pcexpr_desc with | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> rqname_equal n_x fs_dot && is_tp c1 && is_orc c2 | CEVar _ -> true | _ -> is_aotp c || is_max_one_expr c end and is_atp c = begin match c.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> true | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> rqname_equal n_x fs_dot && is_atp c1 && is_rc c2 | _ -> false end and is_aotp c = begin match c.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> true | CEFLWOR([CEFOR(t_x,n_x,None,c1)], None, None, c2) -> rqname_equal n_x fs_dot && is_atp c1 && is_orc c2 | _ -> false end and is_rc c = begin match c.pcexpr_desc with | CEForwardAxis (v,a,nt) | CEReverseAxis(v,a,nt) -> true | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc, c1.pcexpr_desc with | CEEmpty, CECall (fname, [arg],_,_,_) -> rqname_equal fname fn_boolean && is_atp arg && is_rc c2 | _ -> false end | _ -> is_atp c end and is_orc c = begin match c.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c3.pcexpr_desc, c1.pcexpr_desc with | CEEmpty, CECall (fname, [arg],_,_,_) when rqname_equal fname fn_boolean -> begin match c2.pcexpr_desc with | CEVar v when rqname_equal v fs_dot -> is_atp arg | _ -> (is_atp arg) && (is_orc c2 || is_max_one_expr c2) end | _ -> false end | _ -> is_aotp c end in is_fp ce galax-1.1/factorization/factorize_globals.ml0000664000076400007640000004561010705011416017463 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_globals.ml,v 1.37 2007/10/16 01:25:34 mff Exp $ *) (* Module: Factorize_globals Description: This module takes those expressions which are global variables (for expressions) and makes them let bindings. This lets the compiler know that the dependence is lest restrictive. *) open Xquery_common_ast open Xquery_core_ast open Xquery_core_ast_util open Xquery_core_ast_annotation open Xquery_type_core_ast (* This module rewrites core expressions so that global variables are expressed as top level CElet bindings. Consider: let $x := expr1 return expr2 if free_variables(expr1) subset global_variables then we can rewrite $x as a top level global. Given an entire statement the free variables(statement) are assumed global. NOTE: If the query is not properly formed, this will result in strange looking queries. Specifically, if a query uses a variable that is not bound, it could be rewritten as a toplevel let binding. NOTE: for $y in .. let $z := expr($y) is not global. COND: We assume the variables have already been annotated with free variables *) (* Note on annotations: o The annotations are not correctly kept by this module. Change this so that free variables are correctly calculated. o We need a better way to deal with preserving annotations. *) exception FactorError of string let get_free_variables x = match get_free_var_annot x.pcexpr_annot with | Some fv -> fv | None -> let s = Print_top.bprintf_acexpr "" x in raise (FactorError ("Unannotated core expression: " ^ s)) let is_subset vl1 vl2 = List.for_all (fun x -> List.mem x vl2) vl1 (***********************************) (* The return environment. *) (* It contains a list of bindings *) (***********************************) type global_return_env = (* Order in this list is important. if i > j => b_i does not depend on b_j *) { bindings : (cvname * ((csequencetype * cxtype) option * acexpr)) list ; } let mk_global_return bindings = { bindings = bindings } let combine_global_envs b1 b2 = { bindings = Gmisc.remove_duplicates (b1.bindings @ b2.bindings) } let empty_renv = mk_global_return [] (*******************************) (* The passed down environment *) (*******************************) type global_passed_env = { stat_ctxt : Typing_context.static_context; (* Order enforces scoping as usual *) rebindings : (cvname * cvname) list; generator : Namespace_generate.name_gen ref; globals : cvname list} (* This is really a set *) let mk_global_passed_env sc rebindings globals = let proc_ctxt = Typing_context.processing_context_from_stat_context sc in let ng = Processing_context.get_name_generator proc_ctxt Namespace_builtin.fs_prefix Namespace_builtin.fs_uri "" in { stat_ctxt = sc; rebindings = rebindings; generator = ng; globals = globals; } let get_global_variables genv = genv.globals let get_new_factored_variable genv vn = let ng = !(genv.generator) in let (_, _, ncname) = vn in Namespace_generate.generate_name_with_prefix ng ncname (* Called to add a binding for a variable which is not global *) let add_global_binding genv vn ((odt,ce) as bind) = (* Make sure there are no scoping problems *) let vn' = get_new_factored_variable genv vn in let new_genv = mk_global_passed_env genv.stat_ctxt ((vn, vn') :: genv.rebindings) (vn :: genv.globals) in let new_renv = mk_global_return [(vn', bind)] in new_genv, new_renv let remove_name_binding vn l = List.filter (fun x -> not (Namespace_names.rqname_equal x vn)) l (* Called for each variable that is not global *) let add_non_global_binding genv vn = mk_global_passed_env genv.stat_ctxt ((vn, vn) :: genv.rebindings) (remove_name_binding vn genv.globals) let add_opt_non_global_binding genv ovn = match ovn with | None -> genv | Some vn -> add_non_global_binding genv vn let get_rebound_variable genv vn = if (List.mem_assoc vn genv.rebindings) then List.assoc vn genv.rebindings else vn (**************************************************************) (************************ IMPORTANT ***************************) (* In this section, the order of bindings matter *) (* Those bindings at the end, can depend on previous bindings *) (* So we need to reverse the binding list (just for tail *) (* recursion, we could use a fold_right *) (**************************************************************) (**************************************************************) let dump_let_bindings (ce, renv) = let at = ce.pcexpr_annot in let eh = ce.pcexpr_origin in let fi = ce.pcexpr_loc in let construct_CELET_binding (vn, (odt,fce)) = CELET (odt, vn, fce) in (* Now include all top level bindings as CELets *) let let_bindings = List.map construct_CELET_binding renv.bindings in make_let_flwor let_bindings ce at eh fi (************************************************) (* Our condition is that the binding should move ce: core expression 1. free variables in ce are a subset of the current globals (it depends only on globals) 2. ce is not a scalar or a variable value 3. ce not dependent on the context item fs:dot 4. ce is side-effect free *) (*************************************************) let is_forbidden_expr ce = match ce.pcexpr_desc with | CEComment _ | CEText _ | CEScalar _ | CEPI _ | CEEmpty | CEVar _ | CEForwardAxis _ | CEReverseAxis _ -> true | _ -> false let binding_should_move genv ce1 = let fv = get_free_variables ce1 in ((not (is_forbidden_expr ce1)) && (not (List.mem Xquery_common_ast.fs_dot fv))) && (is_subset fv (get_global_variables genv)) && (not (Rewriting_judgments.has_side_effect_judge ce1)) (***********************) (* Intermediate walker *) (***********************) let rec factor_globals_walker (genv:global_passed_env) cexpr = let eh = cexpr.pcexpr_origin in let fi = cexpr.pcexpr_loc in (* This should be used when we don't have a new operand, and then it should be able to calc the annotaitons efficiently.... *) let build_unmodified_return desc = (* SHOULD ASSERT THAT DESC = cexpr.desc *) (* let at = cexpr.pcexpr_annot in *) fmkcexpr desc eh fi in (* This is should be called when we can efficiently recalculate the annotations *) let build_changed_return desc = fmkcexpr desc eh fi in match cexpr.pcexpr_desc with | CEUnordered ce -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CEUnordered fce)), renv | CEOrdered ce -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CEOrdered fce)), renv | CEFLWOR (cfls, wc, ob, ret) -> let (fl,wc,ob,ret),renv = factor_global_flwor genv cfls wc ob ret in (* We must handle the case when all the bindings are gone *) if (fl = []) then begin (* NOTE: Order By can be ignored because only let bindings are there. This means there is only a singleton sequence to be ordered *) match wc with (* Just the return clause should remain *) | None -> ret, renv | (Some wc) -> (* This is nasty *) (* Normalize back to CEIf *) let eh = cexpr.pcexpr_origin in let fi = cexpr.pcexpr_loc in let empty = fmkcexpr CEEmpty eh fi in (build_changed_return (CEIf (wc, ret,empty))), renv end else (build_changed_return (CEFLWOR (fl,wc,ob,ret))), renv | CEIf (cond, ce1, ce2) -> let fcond,renv = factor_globals_walker genv cond in let fce1 = dump_let_bindings (factor_globals_walker genv ce1) in let fce2 = dump_let_bindings (factor_globals_walker genv ce2) in (build_unmodified_return (CEIf(fcond, fce1, fce2))), renv | CETypeswitch (on_cond, patterns) -> let fcond,renv = factor_globals_walker genv on_cond in let typeswitch_patterns (cp, ovn, ce) = let genv = add_opt_non_global_binding genv ovn in let fce = dump_let_bindings (factor_globals_walker genv ce) in (cp, ovn, fce) in let pats = List.map typeswitch_patterns patterns in (build_unmodified_return (CETypeswitch (fcond, pats))), renv | CECall(cfname, cexprs, ti, upd, selfrecur) -> let fconds, renv = factor_globals_walker_map genv cexprs in (build_unmodified_return (CECall(cfname, fconds, ti, upd, selfrecur))), renv | CEOverloadedCall(name, cexprs, sigtable) -> let fconds, renv = factor_globals_walker_map genv cexprs in (build_unmodified_return (CEOverloadedCall(name, fconds, sigtable))), renv | CESeq(ce1,ce2) -> let fce1,renv1 = factor_globals_walker genv ce1 in let fce2,renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CESeq (fce1, fce2))), renv | CEImperativeSeq(ce1,ce2) -> let fce1,renv1 = factor_globals_walker genv ce1 in let fce2,renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEImperativeSeq (fce1, fce2))), renv | CEDocument ce -> let fce,renv = factor_globals_walker genv ce in (build_unmodified_return (CEDocument fce)), renv | CEPIComputed (ce1, ce2) -> let fce1,renv1 = factor_globals_walker genv ce1 in let fce2,renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEPIComputed (fce1, fce2))), renv | CECommentComputed ce -> let fce,renv = factor_globals_walker genv ce in (build_unmodified_return (CECommentComputed fce)), renv | CETextComputed ce -> let fce,renv = factor_globals_walker genv ce in (build_unmodified_return (CETextComputed fce)), renv | CEElem (name,ns, cexprs) -> let fces,renv = factor_globals_walker_map genv cexprs in (build_unmodified_return (CEElem (name,ns, fces))),renv | CEAnyElem (ce1, ns, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEAnyElem (fce1, ns, fce2))), renv | CEAttr (name, cexprs) -> let fces,renv = factor_globals_walker_map genv cexprs in (build_unmodified_return (CEAttr (name,fces))),renv | CEAnyAttr (ce1, nsenv, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEAnyAttr (fce1, nsenv, fce2))), renv | CEError cexpr_list -> let fcexpr_list, renv = factor_globals_walker_map genv cexpr_list in (build_unmodified_return (CEError fcexpr_list)), renv | CETreat (ce, dt) -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CETreat (fce,dt))), renv | CEValidate (vm, ce) -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CEValidate (vm,fce))), renv | CECast (ce, nsenv, dt) -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CECast (fce, nsenv, dt))), renv | CECastable (ce, nsenv, dt) -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CECastable (fce, nsenv, dt))), renv | CESome (odt, vn, ce1, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let genv = add_non_global_binding genv vn in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CESome (odt,vn, fce1, fce2))), renv | CEEvery (odt, vn, ce1, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let genv = add_non_global_binding genv vn in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEEvery (odt,vn, fce1, fce2))), renv (* NOTE: Are the sequence expressions independent or dependent? They are being treated here as independent. PLEASE MAKE SURE THIS IS CORRECT. If they should be dependent just switch the result to a fold walker *) | CECopy ce -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CECopy fce)), renv | CEDelete ce -> let fce, renv = factor_globals_walker genv ce in (build_unmodified_return (CEDelete fce)), renv | CEInsert (ce, aci) -> let fce, renv1 = factor_globals_walker genv ce in let faci, renv2 = factor_globals_walker_insert genv aci in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEInsert (fce, faci))), renv | CEReplace (vof, ce1, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CEReplace (vof, fce1, fce2))), renv | CERename (nsenv, ce1, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CERename (nsenv, fce1, fce2))), renv | CESnap (sm,cexpr) -> let fce, renv = factor_globals_walker genv cexpr in (build_unmodified_return (CESnap (sm,fce))), renv | CELetvar (odt, vn, ce1, ce2) -> let fce1, renv1 = factor_globals_walker genv ce1 in let genv = add_non_global_binding genv vn in let fce2, renv2 = factor_globals_walker genv ce2 in let renv = combine_global_envs renv1 renv2 in (build_unmodified_return (CELetvar (odt,vn, fce1, fce2))), renv | CESet (v,cexpr) -> let fce, renv = factor_globals_walker genv cexpr in (build_unmodified_return (CESet (v,fce))), renv (* Mary: I'm assuming that CEExecute behaves like CESnap *) | CEExecute (async, ncname, uri, hostport,cexpr) -> let fhostport, renv1 = factor_globals_walker genv hostport in let fce, renv3 = factor_globals_walker genv cexpr in let renv' = combine_global_envs renv1 renv3 in (build_unmodified_return (CEExecute (async, ncname, uri, fhostport, fce))), renv' (* Should clean variables *) | CEVar vn -> (build_changed_return (CEVar (get_rebound_variable genv vn))), empty_renv | CEForwardAxis _ | CEReverseAxis _ (* fs:dot should never be renamed.. *) (* Bases *) | CEComment _ | CEText _ | CECharRef _ | CEScalar _ | CEProtoValue _ | CEPI _ | CEEmpty -> cexpr, empty_renv (* It is a map, so I am assuming the individual expressions are independent from one another *) and factor_globals_walker_map genv cexprs = let fconds, renvs = List.split (List.map (factor_globals_walker genv) cexprs) in let renv = match renvs with | [] -> (* Should this be an error? *) empty_renv | x :: rest -> (* Should just check non-empty with an if CHANGE THIS *) List.fold_left combine_global_envs x rest in fconds, renv and factor_global_flwor genv cfls wc ob ret = match cfls with [] -> let (wc,ob,ret),renv = factor_global_flwor_where genv wc ob ret in ([], wc, ob,ret), renv | CELET(odt, vn, ce1) :: rest -> let fce1, renv1 = factor_globals_walker genv ce1 in if binding_should_move genv ce1 then begin let genv, bind = add_global_binding genv vn (odt, fce1) in let op, renv2 = factor_global_flwor genv rest wc ob ret in let renv = combine_global_envs renv1 (combine_global_envs bind renv2) in op, renv end else begin let genv = add_non_global_binding genv vn in let this_op = CELET (odt, vn, fce1) in let (fls, wc, ob, ret),renv2 = factor_global_flwor genv rest wc ob ret in let renv = combine_global_envs renv1 renv2 in (this_op :: fls, wc, ob, ret), renv end | CEFOR(odt, vn, pos, ce1) :: rest -> let genv = add_non_global_binding genv vn in let genv = add_opt_non_global_binding genv pos in let fce1, renv1 = factor_globals_walker genv ce1 in let this_op = CEFOR(odt,vn,pos, fce1) in let (fls, wc, ob, ret),renv2 = factor_global_flwor genv rest wc ob ret in let renv = combine_global_envs renv1 renv2 in (this_op :: fls, wc, ob,ret), renv and factor_global_flwor_where genv wc ob ret = match wc with | None -> let (ob,ret), renv = factor_global_flwor_order_by genv ob ret in (None, ob, ret), renv | Some w -> let w,renv1 = factor_globals_walker genv w in let (ob,ret), renv2 = factor_global_flwor_order_by genv ob ret in let renv = combine_global_envs renv1 renv2 in ((Some w), ob,ret), renv and factor_global_flwor_order_by genv ob ret = match ob with | None -> let fret, renv = factor_globals_walker genv ret in (None, fret), renv | Some (sk, ospecs, osig) -> let fospecs, renvs = List.split (List.map (factor_acorderby genv) ospecs) in let fret, renv = factor_globals_walker genv ret in let renv = List.fold_left combine_global_envs renv renvs in ((Some (sk, fospecs, osig)),fret), renv and factor_acorderby genv (ce,sk,esk) = let fce, renv = factor_globals_walker genv ce in (fce, sk,esk), renv and factor_globals_walker_insert fenv il = match il with | CUAsLastInto ce -> let fce, renv = factor_globals_walker fenv ce in (CUAsLastInto fce), renv | CUAsFirstInto ce -> let fce, renv = factor_globals_walker fenv ce in (CUAsFirstInto fce), renv | CUInto ce -> let fce, renv = factor_globals_walker fenv ce in (CUInto fce), renv | CUAfter ce -> let fce, renv = factor_globals_walker fenv ce in (CUAfter fce), renv | CUBefore ce -> let fce, renv = factor_globals_walker fenv ce in (CUBefore fce), renv let factor_global_expression stat_ctxt ce = ignore ( Factorize_free_var.compute_free_vars ce ); let globals = get_free_variables ce in let genv = mk_global_passed_env stat_ctxt [] globals in dump_let_bindings (factor_globals_walker genv ce) galax-1.1/factorization/factorize_iteration.ml0000664000076400007640000005226710705011416020044 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_iteration.ml,v 1.36 2007/10/16 01:25:34 mff Exp $ *) (* Module: Factorize_iteration Description: This module attempts to put expressions in a normal form. For a fragment of the language it moves expressions close as close to the iteration they depend on as possible. *) open Xquery_core_ast let unique_binding_id = ref 0 let get_new_factored_name () = let local_name = "itfact_" ^ (string_of_int !unique_binding_id) in incr unique_binding_id; (Namespace_builtin.glx_prefix, Namespace_builtin.glx_uri, local_name) (* Return environment, holds bindings of names to expressions *) type return_env = (Xquery_common_ast.cvname * Xquery_core_ast.acexpr ) list let empty_renv () = [] let make_renv l = l let combine_renvironments i1 i2 = i1 @ i2 let get_bindings renv = renv let add_binding renv vn ce = let renv = (vn,ce) :: renv in let desc = CEVar vn in (Xquery_core_ast_util.fmkacexpr desc ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc), renv let add_fresh_binding (renv:return_env) ce = add_binding renv (get_new_factored_name ()) ce let assert_empty_renv renv e_string = match renv with | [] -> () | _ -> raise (Error.Query (Error.Factorization ("Assert Empty Renv failed: " ^ e_string))) (**********************) (* INTIAL ENVIRONMENT *) (**********************) (* type env. Here we record the variables that are in scope and are iteration dependant. *) type current_scope = | TopLevel | InConditional | Iteration of Xquery_common_ast.cvname list type env = { current_scope : current_scope; stat_ctxt : Typing_context.static_context; } let mk_env scope sc = { current_scope=scope; stat_ctxt=sc} let initial_env sc = mk_env TopLevel sc let enter_conditional env = mk_env InConditional env.stat_ctxt let add_binding env vn = let scope = match env.current_scope with | TopLevel -> TopLevel | InConditional -> InConditional | Iteration il -> Iteration (vn :: il) in mk_env scope env.stat_ctxt let add_opt_binding env ovn = match ovn with | None -> env | Some vn -> add_binding env vn let non_trivial_intersect (l1: 'a list) (l2: 'a list) = List.exists (fun v -> List.mem v l1) l2 let new_iterate_variable env vn = mk_env (Iteration [vn]) env.stat_ctxt (* Judgments *) let side_effect_free_judge env ce = not (Rewriting_judgments.has_side_effect_judge ce) (* Main judgement for iterate factorization: If the expression has side-effects, it will not be moved. If an expression is toplevel (i.e. there is no iteration) or in a conditional, then it does not need to be pulled up. If an expression is not toplevel or in a conditional and depends on a variable that is in the most recent iteration scope, then it should not be pulled up. If an expression is not toplevel or in a conditional and only depends on those variables in the most recent iteration scope, then it should be pulled up. *) let does_not_depend_judge env ce = match env.current_scope with | TopLevel | InConditional -> false | Iteration bindings -> match Xquery_core_ast_annotation.get_free_var_annot ce.pcexpr_annot with | None -> let s = Print_top.bprintf_acexpr "" ce in raise (Error.Query (Error.Factorization ("Unannotated core expression: " ^ s))) | Some fv -> not (non_trivial_intersect fv bindings) (* Holds if the core_expression depends on the variable *) let depends_on_judge vn ce = match Xquery_core_ast_annotation.get_free_var_annot ce.pcexpr_annot with | None -> let s = Print_top.bprintf_acexpr "" ce in raise (Error.Query (Error.Factorization ("Unannotated core expression: " ^ s))) | Some fv -> List.mem vn fv (* Pullup judge *) (* (i) does_not_depend fenv ce (ii) is not CEVar (iii) is not a constant (iv) does not contain side-effects *) let pullup_judge fenv ce = let rule_2_or_3 = match ce.pcexpr_desc with | CEVar _ | CEEmpty | CEScalar _ | CEComment _ | CEText _ | CEPI _ -> false | _ -> true in rule_2_or_3 && (does_not_depend_judge fenv ce) && (side_effect_free_judge fenv ce) (************************) (* Binding Construction *) (************************) let make_let_flwor let_bindings return_expression = if let_bindings = [] then return_expression else begin let at = return_expression.pcexpr_annot in let eh = return_expression.pcexpr_origin in let fi = return_expression.pcexpr_loc in let ce = Xquery_core_ast_util.fmkacexpr (CEFLWOR(let_bindings, None, None, return_expression)) at eh fi in (***********************************) (* REANNOTATE ONLY FREE VARIABLES! *) (***********************************) ignore (Factorize_free_var.compute_free_vars ce); ce end (******************************************) (* Here, we handle restoring the bindings *) (******************************************) let make_let_binding (vn,ce) = CELET(None, vn, ce) (* If no variable name is given, then ALL bindings stay here. *) (* returns (Bindings that stay here * bindings that continue up *) (* Partition the bindings into those that are going to stay here, and those that are going to move up. *) (* A binding stays here, if it depends on vn *) let partition_bindings renv vn = match vn with | None -> (get_bindings renv), [] | Some vn -> List.partition (fun (bname, bexpr) -> depends_on_judge vn bexpr) (get_bindings renv) (* Handles partitioning the variabels and creating the return environment *) let handle_return_helper renv vn = let bindings_for_here, bindings_to_move = partition_bindings renv vn in let renv = make_renv bindings_to_move in let lets = List.map make_let_binding bindings_for_here in lets, renv let handle_return_exprs_flwor renv vn cur_fls = let let_bindings, renv = handle_return_helper renv (Some vn) in (cur_fls :: let_bindings), renv let handle_return_exprs renv vn return_expr = let let_bindings, renv = handle_return_helper renv vn in let expr = make_let_flwor let_bindings return_expr in expr, renv (******************) (* Main Recursion *) (******************) let rec factor_expression fenv ce = let { pcexpr_desc = ce_desc; pcexpr_annot = at; pcexpr_origin = eh; pcexpr_loc = fi;} = ce in if pullup_judge fenv ce then add_fresh_binding (empty_renv ()) ce else begin let ce_desc, renv = match ce_desc with (*****************************************) (* Core Expressions that cause iteration *) (*****************************************) | CEFLWOR(cfls, where, orderby, ret) -> let (cfls, where,orderby,ret),renv = factor_flwor fenv cfls where orderby ret in CEFLWOR (cfls, where, orderby, ret), renv (*************************************) (* Iteration implicitly happens for existential and universal quantification *) (*************************************) | CESome (odt, vn, in_expr, return_expr) -> let in_expr, indep_renv = factor_expression fenv in_expr in let env = new_iterate_variable fenv vn in (*******************************) (* Handle dependent expression *) (*******************************) let return_expr, dep_renv = factor_expression env return_expr in (* Add in all the necessary bindings *) let return_expr, dep_renv = handle_return_exprs dep_renv (Some vn) return_expr in let fcexpr = CESome (odt, vn, in_expr, return_expr) in let renv = combine_renvironments indep_renv dep_renv in fcexpr, renv | CEEvery (odt, vn, in_expr, return_expr) -> let in_expr, indep_renv = factor_expression fenv in_expr in let fenv = new_iterate_variable fenv vn in (*******************************) (* Handle dependent expression *) (*******************************) let return_expr, dep_renv = factor_expression fenv return_expr in (* Add in all the necessary bindings *) let return_expr, dep_renv = handle_return_exprs dep_renv (Some vn) return_expr in let fcexpr = CEEvery (odt, vn, in_expr, return_expr) in let renv = combine_renvironments indep_renv dep_renv in fcexpr, renv (****************) (* CONDITIONALS *) (****************) (**********************************************************) (* Type switch is more complicated, we do not pull out of *) (* branches *) (**********************************************************) | CETypeswitch(on_cond, pat_list) -> let fcond,renv = factor_expression fenv on_cond in let handle_branch (pat, name, ce) = let fenv = enter_conditional fenv in (* Factor the expression *) let fce, renv = factor_expression fenv ce in let fce, renv = handle_return_exprs renv name ce in let () = assert_empty_renv renv "CETypeswitch" in (* Tossing renv *) pat, name, fce in let patterns = List.map handle_branch pat_list in (CETypeswitch (fcond, patterns)), renv (****************************************************) (* Here we stop pulling up bindings on the branches *) (****************************************************) | CEIf(cond, ce1, ce2) -> (* Only the cond is factored *) let fcond, renv = factor_expression fenv cond in let fce1, renv_ce1 = factor_expression fenv ce1 in let fce1, renv_ce1 = handle_return_exprs renv_ce1 None fce1 in let () = assert_empty_renv renv_ce1 "CEIF [Cond1]" in let fce2, renv_ce2 = factor_expression fenv ce2 in let fce2, renv_ce2 = handle_return_exprs renv_ce2 None fce2 in let () = assert_empty_renv renv_ce2 "CEIF [Cond2]" in CEIf(fcond, fce1, fce2), renv (******************************************************************************) (* In this section, we are just walking the tree. Our factorization algorithm *) (* does not do anything to the children of these expressions *) (******************************************************************************) (****************************************) (* Beyond here we do not bind variables *) (****************************************) | CEUnordered ce -> let factored_expression, return_env = factor_expression fenv ce in (CEUnordered (factored_expression)), return_env | CEOrdered ce -> let factored_expression, return_env = factor_expression fenv ce in (CEOrdered (factored_expression)), return_env | CEWhile (ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in CEWhile(fce1, fce2), renv | CESeq (ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in CESeq(fce1, fce2), renv | CEImperativeSeq (ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in CEImperativeSeq(fce1, fce2), renv | CEPIComputed (ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CEPIComputed(fce1, fce2)), renv | CECommentComputed ( ce ) -> let fce, renv = factor_expression fenv ce in (CECommentComputed fce), renv | CETextComputed ce -> let tce, renv = factor_expression fenv ce in (CETextComputed tce), renv | CEElem(cename, nsenv, cexpr_l) -> let fces, renv = factor_expression_list fenv cexpr_l in (CEElem (cename, nsenv, fces)), renv | CEAnyElem(ce1, nsenv1, nsenv2, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CEAnyElem (fce1, nsenv1, nsenv2, fce2)), renv | CEAttr (can, cexpr_l) -> let fces, renv = factor_expression_list fenv cexpr_l in CEAttr (can, fces), renv | CEAnyAttr(ce1, nsenv, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CEAnyAttr (fce1, nsenv, fce2)), renv | CEError cexpr_list -> let fcexpr_list, renv = factor_expression_list fenv cexpr_list in (CEError fcexpr_list), renv | CETreat (ce, cst) -> let fce, renv = factor_expression fenv ce in (CETreat (fce, cst)), renv | CEValidate(vm, ce) -> let fce, renv = factor_expression fenv ce in (CEValidate (vm, fce)), renv | CECast(ce, nsenv, cst) -> let fce, renv = factor_expression fenv ce in (CECast (fce, nsenv, cst)), renv | CECastable(ce, nsenv, cst) -> let fce, renv = factor_expression fenv ce in (CECastable (fce, nsenv, cst)), renv | CELetServerImplement(nc1, nc2, ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CELetServerImplement(nc1, nc2, fce1, fce2)), renv | CEExecute(async, ncname, uri, ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CEExecute(async, ncname, uri, fce1, fce2)), renv | CEForServerClose(nc1, uri, ce1) -> let fce1, renv1 = factor_expression fenv ce1 in (CEForServerClose(nc1, uri, fce1)), renv1 | CEEvalClosure(ce1) -> let fce1, renv1 = factor_expression fenv ce1 in (CEEvalClosure(fce1)), renv1 | CEDocument ce -> let fce, renv = factor_expression fenv ce in (CEDocument fce), renv | CECall(cfname, cexpr_l, types, upd, selfrecur) -> let fces, renv = factor_expression_list fenv cexpr_l in (CECall(cfname, fces, types, upd, selfrecur)), renv | CEOverloadedCall (cfname, cexpr_l, sigs) -> let fces, renv = factor_expression_list fenv cexpr_l in (CEOverloadedCall(cfname, fces, sigs)), renv | CECopy ce1 -> let fce1, renv = factor_expression fenv ce1 in (CECopy fce1), renv (******************************) (* These are the update cases *) (******************************) | CEDelete ce1 -> let fce1, renv = factor_expression fenv ce1 in (CEDelete fce1), renv | CEInsert (cei, il) -> let finsert, renv1 = factor_insert_location fenv il in let fces, renv2 = factor_expression fenv cei in let renv = combine_renvironments renv1 renv2 in (CEInsert (fces, finsert)), renv | CERename (nsenv, ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CERename (nsenv, fce1, fce2)), renv | CEReplace (vof, ce1, ce2) -> let fce1, renv1 = factor_expression fenv ce1 in let fce2, renv2 = factor_expression fenv ce2 in let renv = combine_renvironments renv1 renv2 in (CEReplace (vof, fce1, fce2)), renv | CESnap (sm,cexpr) -> let fce, renv = factor_expression fenv cexpr in (CESnap (sm,fce)), renv | CELetvar (odt, vn, ce1, ce2) -> let ce1', indep_renv = factor_expression fenv ce1 in let env = new_iterate_variable fenv vn in let ce2', dep_renv = factor_expression env ce2 in let ce2'', dep_renv = handle_return_exprs dep_renv (Some vn) ce2' in let fcexpr = CELetvar (odt, vn, ce1', ce2'') in let renv = combine_renvironments indep_renv dep_renv in fcexpr, renv | CESet (v, cexpr) -> let fce, renv = factor_expression fenv cexpr in (CESet (v,fce)), renv (*********************************************************) (* Base case *********************************************) (* These expressions can have their children factorized *) (*********************************************************) | ((CEVar _) as v) | ((CEScalar _) as v) | ((CEProtoValue _) as v) | (CEEmpty as v) | ((CEComment _) as v) | ((CEPI _) as v) | ((CEText _) as v) | ((CECharRef _) as v) | ((CEForwardAxis _) as v) | ((CEReverseAxis _) as v) -> v, (empty_renv ()) in (Xquery_core_ast_util.fmkacexpr ce_desc at eh fi), renv end and factor_expression_list fenv cel = let exprs, renvs = List.split (List.map (factor_expression fenv) cel) in let renv = List.fold_left combine_renvironments (empty_renv()) renvs in exprs, renv and factor_flwor fenv cfls where orderby ret = (* Depencency is backward from the factorization steps *) (* Return is just an expression *) (* Order By *) let factor_orderby fenv ob ret = let factor_acorderby fenv (ce,sk,esk) = let fce, renv = factor_expression fenv ce in (fce, sk,esk), renv in match ob with | None -> let fret, renv = factor_expression fenv ret in (None, fret), renv | Some (sk, ospecs, osig) -> let fospecs, renvs = List.split (List.map (factor_acorderby fenv) ospecs) in let fret, renv0 = factor_expression fenv ret in let renv = List.fold_left combine_renvironments renv0 renvs in let ret_ospec = Some (sk, fospecs, osig) in (ret_ospec, fret), renv in (* Where *) let factor_where fenv where orderby ret = match where with | None -> let (fob, fret), renv = factor_orderby fenv orderby ret in (None, fob, fret), renv | Some wc -> (* We *are* pulling out of where clauses *) let fwc, ret_env = factor_expression fenv wc in let (fob, fret), renv = factor_orderby fenv orderby ret in let renv = combine_renvironments ret_env renv in ((Some fwc), fob, fret), renv in let prepend_fls stmts (fls, wc, ob, ret) = ((stmts@fls), wc, ob, ret) in (* Main match *) match cfls with | [] -> let (fw, fob, fret), renv = factor_where fenv where orderby ret in ([], fw, fob, fret), renv | CELET(odt, vn, ce1) :: rest -> let fbound, indep_renv = factor_expression fenv ce1 in let fenv = add_binding fenv vn in let cfl = CELET(odt,vn, fbound) in (* There are also returns coming up here *) let flwor, dep_renv = factor_flwor fenv rest where orderby ret in let stmts, dep_renv = handle_return_exprs_flwor dep_renv vn cfl in let renv = combine_renvironments indep_renv dep_renv in (prepend_fls stmts flwor), renv | CEFOR(odt, vn, pos, ce1) :: rest -> let fbound,indep_renv = factor_expression fenv ce1 in let cfl = CEFOR(odt, vn, pos, fbound) in (* Add in variable bindings *) let fenv = new_iterate_variable fenv vn in let fenv = add_opt_binding fenv pos in (* There are also returns coming up here *) let flwor,dep_renv = factor_flwor fenv rest where orderby ret in let stmts,dep_renv = handle_return_exprs_flwor dep_renv vn cfl in let stmts,dep_renv = match pos with | None -> stmts, dep_renv | Some pos -> handle_return_exprs_flwor dep_renv pos cfl in let renv = combine_renvironments indep_renv dep_renv in (prepend_fls stmts flwor), renv (* Insert location *) (************************************) (* Update Factorization Section *) (* Specifically Insert location now *) (************************************) and factor_insert_location fenv il = match il with | CUAsLastInto ce -> let fce, renv = factor_expression fenv ce in (CUAsLastInto fce), renv | CUAsFirstInto ce -> let fce, renv = factor_expression fenv ce in (CUAsFirstInto fce), renv | CUInto ce -> let fce, renv = factor_expression fenv ce in (CUInto fce), renv | CUAfter ce -> let fce, renv = factor_expression fenv ce in (CUAfter fce), renv | CUBefore ce -> let fce, renv = factor_expression fenv ce in (CUBefore fce), renv (*************) (* FRONT-END *) (*************) let factorize_expression stat_ctxt cexpr = (* annotate it *) ignore ( Factorize_free_var.compute_free_vars cexpr ); let ce, renv = factor_expression (initial_env stat_ctxt) cexpr in let ce, renv = handle_return_exprs renv None ce in let () = assert_empty_renv renv "TopLevel" in ce galax-1.1/factorization/factorize_update.mli0000664000076400007640000000212410560462356017500 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_update.mli,v 1.2 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_update Description: This module factorizes out code from update expression. This is to help the optimizer pick up logical optimizations. *) open Xquery_core_ast (***************************) (* Normal form for updates *) (***************************) val update_in_normal_form : acfl_expr list -> acexpr -> acfl_expr list * acexpr galax-1.1/factorization/factorize_tpnf.ml0000664000076400007640000000715410560462356017024 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf.ml,v 1.16 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_tpnf Description: This module is a factorization plugin for the TPNF approach for normalizing XQuery Core expressions. -- Philippe *) open Xquery_core_ast open Ast_walker_rewrite_context open Ast_walker_rewrite open Factorize_tpnf_rules let unique_binding_id = ref 0 let get_new_factored_name () = let local_name = "tpnf_" ^ (string_of_int !unique_binding_id) in incr unique_binding_id; (Namespace_builtin.glx_prefix, Namespace_builtin.glx_uri, local_name) (* Apply a ruleset -- fixpoint *) let rewriting_cexpr recurse_rule_set toplevel_rule_set static_ctxt acexpr = let recurse_rewrite_ctxt = build_rewrite_context static_ctxt recurse_rule_set [] in let toplevel_rewrite_ctxt = build_rewrite_context static_ctxt toplevel_rule_set [] in let new_cexpr = rewrite_cexpr (toplevel_rewrite_ctxt, recurse_rewrite_ctxt) acexpr in new_cexpr (* *** Phase one: convert where-clauses and introduce ddo-calls *** *) let phase_one_recurse = [ convert_where_clause; decompose_flwor; insert_sbdo_xpath; ] (* *** Phase two *** *) (* - introduce scrambling annotations *) (* - propagate scrambling annotations *) (* - structural rewritings *) let phase_two_recurse_tpnf = [ insert_scrambling_ddo; insert_scrambling_if; propagate_scrambling_bool; propagate_scrambling_ddo; propagate_scrambling_for_let; propagate_scrambling_for; propagate_scrambling_if; remove_scrambled_sbdo; substitution; loop_fusion; condition_detection; condition_shift; return_condition_lift; nested_return_condition_lift; return_result_lift; nested_return_result_lift; for_condition_lift; trivial_dot_condition; trivial_loop; dot_introduction; dot_loop; ] let phase_two_recurse_tpnf' = [ insert_scrambling_ddo; insert_scrambling_if; propagate_scrambling_bool; propagate_scrambling_ddo; propagate_scrambling_for_let; propagate_scrambling_for; propagate_scrambling_if; remove_scrambled_sbdo; substitution; condition_detection; condition_shift; return_condition_lift; nested_return_condition_lift; return_result_lift; nested_return_result_lift; for_condition_lift; trivial_dot_condition; trivial_loop; dot_introduction; dot_loop; (* instead of loop fusion: *) loop_split; nested_loop_split; filter_fusion; shortening_condition; ] (* *** Phase three: verification *** *) let phase_three_recurse = [ check_normal_form; remove_redundant_sbdo; ] (* *** Phase four: pack'n go *** *) let phase_four_recurse = [ reintroduce_where; join_support_hack; ] let factorize_tpnf stat_ctxt cexpr = let nexpr1 = rewriting_cexpr phase_one_recurse [] stat_ctxt cexpr in let nexpr2 = rewriting_cexpr phase_two_recurse_tpnf' [] stat_ctxt nexpr1 in let nexpr3 = rewriting_cexpr phase_three_recurse [] stat_ctxt nexpr2 in let nexpr4 = rewriting_cexpr phase_four_recurse [] stat_ctxt nexpr3 in nexpr4 galax-1.1/factorization/factorize_tpnf_rules.ml0000664000076400007640000011623210560462356020234 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf_rules.ml,v 1.18 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_tpnf_rules Description: This module contains the rewriting rules used for the TPNF. *) open Error open Namespace_names open Namespace_builtin open Norm_util open Xquery_common_ast open Xquery_common_ast_util open Xquery_core_ast open Xquery_core_ast_util open Xquery_core_ast_annotation open Processing_context open Rewriting_judgments open Schema_builtin open Schema_util open Print_top open Factorize_tpnf_util (* ** Utility functions -- move to factorize_tpnf_util ** *) let has_bag_sem ce = match get_scrambling_annot ce.pcexpr_annot with | Bag -> true | _ -> false let has_bag_annot ce = has_bag_sem ce let has_set_sem ce = match get_scrambling_annot ce.pcexpr_annot with | Set | Bag -> true | _ -> false let has_set_annot ce = match get_scrambling_annot ce.pcexpr_annot with | Set -> true | _ -> false let annot_lt ce1 ce2 = match get_scrambling_annot ce1.pcexpr_annot, get_scrambling_annot ce2.pcexpr_annot with | List, Set | List, Bag | Set, Bag -> true | _ -> false (* ** debugging stuff ** *) let print_c_list c = let ff = !Conf.core_expr_formatter in let print_annot ff a = let (ta, da, fv, sc) = Xquery_core_ast_annotation.annot_components a in Format.fprintf ff "[sc:%s]%!" (Xquery_core_ast_annotation.print_scrambling_annot sc) in begin Format.fprintf ff "********** LIST ***********\n"; let _ = List.map (fun (a,b,c,d) -> begin Print_xquery_core.print_cexpr ff a print_annot; Format.fprintf ff "\n---------\n"; end ) c in Format.fprintf ff "\n********** END ***********\n"; end (* *** utility functions *** *) let rec conjunction_to_list c = try begin match c.pcexpr_desc with | CEIf(c1,c2,c3) -> begin match c3.pcexpr_desc with | CEEmpty -> (c1, c.pcexpr_annot, c.pcexpr_origin, c.pcexpr_loc):: conjunction_to_list c2 | _ -> raise (Query (Internal_Error ("Not a pure conjunction."))) end | _ -> [(c, c.pcexpr_annot, c.pcexpr_origin, c.pcexpr_loc)] end with _ -> [] let rec list_to_conjunction l last_needs_bool = match l with | [tl] -> let r, annot, ori, loc = tl in if last_needs_bool then r else let r' = match r.pcexpr_desc with | CECall (fn, [arg], s,u, selfrecur) when rqname_equal fn fn_boolean -> arg | _ -> r in r' | hd :: tl -> begin let ce1, annot, ori, loc = hd in let empty = fmkacexpr CEEmpty annot ori loc in let r = fmkacexpr (CEIf (ce1, list_to_conjunction tl last_needs_bool, empty)) annot ori loc in let _ = set_scrambling_annot ce1.pcexpr_annot Bag in r end | _ -> raise (Query (Internal_Error ("This function should not be called with an empty list."))) (* *** *** PHASE ONE : Inserting DDO's and converting where clauses *** *** *) let convert_where_clause rewrite_ctxt cexpr = match cexpr.pcexpr_desc with | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], Some e2, None, e3) -> begin let ce_empty = fmkacexpr CEEmpty e2.pcexpr_annot e2.pcexpr_origin e2.pcexpr_loc in let if_expr = fmkacexpr (CEIf (e2, e3, ce_empty)) (empty_ast_annot()) e2.pcexpr_origin e2.pcexpr_loc in let r = fmkacexpr (CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, if_expr)) cexpr.pcexpr_annot cexpr.pcexpr_origin cexpr.pcexpr_loc in let _ = debug_apply "convert where clause" cexpr r in r, true end | CEFLWOR ([CELET(t_x,n_x,e1)], Some e2, None, e3) -> begin let ce_empty = fmkacexpr CEEmpty e2.pcexpr_annot e2.pcexpr_origin e2.pcexpr_loc in let if_expr = fmkacexpr (CEIf (e2, e3, ce_empty)) e2.pcexpr_annot e2.pcexpr_origin e2.pcexpr_loc in let r = fmkacexpr (CEFLWOR ([CELET(t_x,n_x,e1)], None, None, if_expr)) cexpr.pcexpr_annot cexpr.pcexpr_origin cexpr.pcexpr_loc in let _ = debug_apply "convert where clause" cexpr r in r, true end | _ -> cexpr, false (* *** Decompose FLWOR blocks *** *) let decompose_flwor ctxt ce = let rec decompose_flwor_list lst wh ob ret = match lst with | [last] -> begin fmkacexpr (CEFLWOR ([last], wh, ob, ret)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc end | hd::tl -> begin let ret' = decompose_flwor_list tl wh ob ret in fmkacexpr (CEFLWOR ([hd], None, None, ret')) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc end | _ -> raise (Query (Internal_Error ( "Empty for/let list found during TPNF FLWOR decomposition"))) in match ce.pcexpr_desc with | CEFLWOR (flwor_list, wh, ob, ret) when (List.length flwor_list) > 1 -> decompose_flwor_list flwor_list wh ob ret, true | _ -> ce, false (* *** Insert SBDO (ord/nodup) *** *) (* Insert SBDO: if for a given subexpression, the ord and nodup *) (* properties can be derived, then insert an sbdo operation *) (* only apply sbdo's on non-scrambled CELet/CEFor *) let is_flwor ce = match ce.pcexpr_desc with | CEFLWOR _ -> true | _ -> false let is_if ce = match ce.pcexpr_desc with | CEIf _ -> true | _ -> false let insert_sbdo_xpath rewrite_ctxt ce = (* testing scrambling to avoid recursive addition of SBDO's *) if not ( has_set_sem ce ) && (is_flwor ce || is_if ce) then let symtab = Hashtbl.create 10 in let no2d, gen, ord, nodup = get_properties rewrite_ctxt ce symtab in if ord && nodup then let r = wrap_in_sbdo rewrite_ctxt ce in let _ = set_scrambling_annot ce.pcexpr_annot Set in let _ = debug_apply "insert distinct-docorder (ord/nodup holds)" ce r in r, true else ce, false else ce, false (* *** *** PHASE TWO : Inserting and propagating annotations *** *** *) let insert_scrambling_ddo rewrite_ctxt cexpr = match cexpr.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence -> begin match get_scrambling_annot arg.pcexpr_annot with | List -> let _ = set_scrambling_annot arg.pcexpr_annot Set in let _ = debug_apply "insert scrambling (ddo)" cexpr cexpr in cexpr, true | _ -> (* already scrambled *) cexpr, false end | _ -> cexpr, false let insert_scrambling_if rewrite_ctxt cexpr = match cexpr.pcexpr_desc with | CEIf (ce1, ce2, cempty) -> begin match get_scrambling_annot ce1.pcexpr_annot with | List | Set -> let _ = set_scrambling_annot ce1.pcexpr_annot Bag in let _ = debug_apply "insert scrambling (if)" cexpr cexpr in cexpr, true | Bag -> (* already scrambled *) cexpr, false end | _ -> cexpr, false let propagate_scrambling_bool rewrite_ctxt ce = let changed = ref false in let _ = match ce.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fn_boolean -> begin match get_scrambling_annot arg.pcexpr_annot with | Bag -> () | _ when annot_lt arg ce -> (set_scrambling_annot arg.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot); changed := true) | _ -> () end | _ -> () in if !changed then let _ = debug_apply "propagate scrambling (fn_boolean)" ce ce in ce, true else ce, false let propagate_scrambling_ddo rewrite_ctxt ce = let changed = ref false in let _ = match ce.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence -> begin if annot_lt arg ce then (set_scrambling_annot arg.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot); changed := true) else () end | _ -> () in if !changed then let _ = debug_apply "propagate scrambling (ddo)" ce ce in ce, true else ce, false let propagate_scrambling_for_let rewrite_ctxt ce = let changed = ref false in let _ = match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, e3) | CEFLWOR ([CELET(t_x,n_x,e1)], None, None, e3) when annot_lt e3 ce -> (set_scrambling_annot e3.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot); changed := true) | _ -> () in if !changed then let _ = debug_apply "propagate scrambling in return clause (for/let)" ce ce in ce, true else ce, false let propagate_scrambling_for rewrite_ctxt ce = let changed = ref false in let _ = match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_x,n_x,None,e1)], None, None, e3) -> begin match get_scrambling_annot e1.pcexpr_annot with | List -> begin match get_scrambling_annot ce.pcexpr_annot with | List -> () | _ -> (set_scrambling_annot e1.pcexpr_annot Set; changed := true) end | _ -> () end | _ -> () in if !changed then let _ = debug_apply "propagate scrambling in for clause" ce ce in ce, true else ce, false let propagate_scrambling_if rewrite_ctxt ce = let changed = ref false in let _ = match ce.pcexpr_desc with | CEIf (e1, e2, empty) when annot_lt e2 ce -> begin set_scrambling_annot e2.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot); changed := true end; | _ -> () in if !changed then let _ = debug_apply "propagate scrambling if" ce ce in ce, true else ce, false (* *** *** PHASE 3 : Structural Manipulation *** *** *) let remove_scrambled_sbdo rewrite_ctxt cexpr = match cexpr.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) -> if rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence then begin match get_scrambling_annot cexpr.pcexpr_annot with | List -> cexpr, false | Set | Bag -> let _ = debug_apply "remove scrambled distinct-docorder" cexpr arg in arg, true end else cexpr, false | _ -> cexpr, false (* ** Substitution ** * * *let $x := e1 return e2 == e1[$x/e2] * * if e1 and e2 are in CXQ+ * *) (* Q: Should be apply substitution only when the use count is one? A: No, see paper *) (* Small Hack to avoid substitution of doc/root calls *) let is_doc_or_root_call ce = match ce.pcexpr_desc with | CECall (fname, _, _, _, _) -> begin rqname_equal fname fn_doc ||rqname_equal fname fn_root end | _ -> false let subst_count = ref 0 let new_subst_var () = let v = !subst_count in incr subst_count; (Namespace_builtin.glx_prefix, Namespace_builtin.glx_uri, ("subst" ^ (string_of_int v))) (* rename a variable into a new, unique var name *) (* it is the user's responsability to make sure *) (* that new variable is not used so far, by *) (* creating one with new_subst_var() *) let rec tpnf_rename ctxt e x y = if is_in_cxq_plus ctxt e then begin match e.pcexpr_desc with | CEVar v when rqname_equal x v -> let e' = fmkacexpr (CEVar y) (empty_ast_annot()) e.pcexpr_origin e.pcexpr_loc in e', true | CEForwardAxis (v, a, nt) when rqname_equal v x -> fmkacexpr (CEForwardAxis (y, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true | CEReverseAxis (v, a, nt) when rqname_equal v x -> fmkacexpr (CEReverseAxis (y, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true | CEIf (ce1, ce2, ce3) -> let ce1', ch1 = tpnf_rename ctxt ce1 x y in let ce2', ch2 = tpnf_rename ctxt ce2 x y in let ce3', ch3 = tpnf_rename ctxt ce3 x y in fmkacexpr (CEIf (ce1', ce2', ce3')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2 || ch3) | CEFLWOR ([CEFOR(t,v,None,ce1)], None, None, ce2) -> begin let ce1', ch1 = tpnf_rename ctxt ce1 x y in if rqname_equal v x then begin if rqname_equal v y then fmkacexpr (CEFLWOR ([CEFOR(t,y,None,ce1')], None, None, ce2)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true else let ce2', ch2 = tpnf_rename ctxt ce2 x y in fmkacexpr (CEFLWOR ([CEFOR(t,y,None,ce1')], None, None, ce2')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end else let ce2', ch2 = tpnf_rename ctxt ce2 x y in fmkacexpr (CEFLWOR ([CEFOR(t,v,None,ce1')], None, None, ce2')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2) end | CEFLWOR ([CELET(t,v,ce1)], None, None, ce2) -> begin let ce1', ch1 = tpnf_rename ctxt ce1 x y in if rqname_equal v x then begin if rqname_equal v y then fmkacexpr (CEFLWOR ([CELET(t,y,ce1')], None, None, ce2)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true else let ce2', ch2 = tpnf_rename ctxt ce2 x y in fmkacexpr (CEFLWOR ([CELET(t,y,ce1')], None, None, ce2')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end else let ce2', ch2 = tpnf_rename ctxt ce2 x y in fmkacexpr (CEFLWOR ([CELET(t,v,ce1')], None, None, ce2')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2) end | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence || rqname_equal fname fn_boolean -> begin let se, ch = tpnf_rename ctxt arg x y in fmkacexpr (CECall (fname, [se], sign, u, selfrecur)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, ch end | _ -> e, false end else e, false let rec tpnf_subst ctxt e x e' = if is_in_cxq_plus ctxt e && is_in_cxq_plus ctxt e' then begin match e.pcexpr_desc with | CEVar v -> begin if rqname_equal x v then e', true else e, false end | CEForwardAxis (v, a, nt) -> begin match e'.pcexpr_desc with | CEVar z when rqname_equal v x -> (* distinction between variable renaming and substitution *) begin fmkacexpr (CEForwardAxis (z, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end | _ when rqname_equal v x -> begin let axis = fmkacexpr (CEForwardAxis (fs_dot, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc in fmkacexpr (CEFLWOR ([CEFOR(None,fs_dot,None,e')], None, None, axis)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end | _ -> e, false end | CEReverseAxis (v, a, nt) -> begin match e'.pcexpr_desc with | CEVar z when rqname_equal v x -> (* distinction between variable renaming and substitution *) begin fmkacexpr (CEReverseAxis (z, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end | _ when rqname_equal v x -> begin let axis = fmkacexpr (CEReverseAxis (fs_dot, a, nt)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc in fmkacexpr (CEFLWOR ([CEFOR(None,fs_dot,None,e')], None, None, axis)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, true end | _ -> e, false end | CEIf (b_ce1, ce2, ce3) -> begin match b_ce1.pcexpr_desc, ce3.pcexpr_desc with | CECall (fname, [ce1], sign, u, selfrecur), CEEmpty when rqname_equal fname fn_boolean -> begin let ce1', ch1 = tpnf_subst ctxt ce1 x e' in let ce2', ch2 = tpnf_subst ctxt ce2 x e' in let b_ce1' = fmkacexpr (CECall(fname, [ce1'], sign, u, selfrecur)) b_ce1.pcexpr_annot b_ce1.pcexpr_origin b_ce1.pcexpr_loc in fmkacexpr (CEIf (b_ce1', ce2', ce3)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2) end | _ -> e, false end | CEFLWOR ([CEFOR(t,y,None,ce1)], None, None, ce2) -> begin let ce1', ch1 = tpnf_subst ctxt ce1 x e' in if rqname_equal x y then fmkacexpr (CEFLWOR ([CEFOR(t,y,None,ce1')], None, None, ce2)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, ch1 else let z = new_subst_var () in let ce2', ch2 = tpnf_rename ctxt ce2 y z in let ce2'', ch2' = tpnf_subst ctxt ce2' x e' in fmkacexpr (CEFLWOR ([CEFOR(t,z,None,ce1')], None, None, ce2'')) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2') end | CEFLWOR ([CELET(t,y,ce1)], None, None, ce2) -> begin let ce1', ch1 = tpnf_subst ctxt ce1 x e' in if rqname_equal x y then fmkacexpr (CEFLWOR ([CELET(t,y,ce1')], None, None, ce2) ) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, ch1 else let z = new_subst_var () in let ce2', ch2 = tpnf_rename ctxt ce2 y z in let ce2'', ch2' = tpnf_subst ctxt ce2' x e' in fmkacexpr (CEFLWOR ([CELET(t,z,ce1')], None, None, ce2'') ) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, (ch1 || ch2') end | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence || rqname_equal fname fn_boolean -> begin let se, ch = tpnf_subst ctxt arg x e' in fmkacexpr (CECall (fname, [se], sign,u, selfrecur)) e.pcexpr_annot e.pcexpr_origin e.pcexpr_loc, ch end | _ -> e, false end else e, false let substitution rewrite_ctxt ce = match ce.pcexpr_desc with | CEFLWOR ([CELET(t,x,ce1)], None, None, ce2) when has_set_sem ce (* && has_set_sem ce2 *) -> begin let r, c = tpnf_subst rewrite_ctxt ce2 x ce1 in if c then let _ = debug_apply "substitution" ce r in r, true else ce, false end | _ -> ce, false (* ** LOOP SPLIT ** * * *for $dot in e1 = *for $dot in * return = *for $dot in e1 * *for $dot in e2 = return e2 * return e3 = return e3 * *) let loop_split rewrite_ctxt flwor1 = let fail = flwor1, false in match flwor1.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, flwor2) when has_set_sem flwor1 && has_set_annot ce1 && rqname_equal x fs_dot -> begin match flwor2.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce2,y,None,ce2)], None, None, ce3) when has_set_sem flwor2 && has_set_annot ce2 && has_set_sem ce3 && rqname_equal y fs_dot -> let flwor1' = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, ce2)) (empty_ast_annot ()) flwor1.pcexpr_origin flwor1.pcexpr_loc in let flwor2' = fmkacexpr (CEFLWOR ([CEFOR(t_ce2,y,None,flwor1')], None, None, ce3)) (empty_ast_annot ()) flwor2.pcexpr_origin flwor2.pcexpr_loc in let annot = get_scrambling_annot flwor1.pcexpr_annot in let _ = set_scrambling_annot flwor1'.pcexpr_annot annot in let _ = set_scrambling_annot flwor2'.pcexpr_annot annot in let _ = debug_apply "loop split" flwor1 flwor2' in flwor2', true | _ -> fail end | _ -> fail (* ** NESTED LOOP SPLIT ** * * *for $dot in e1 = *for $dot in * return = *for $dot in e1 * *if e2 ^ ... ^ en = return * then = if e2 ^ ... ^ en * for $dot in en+1 = then en+1 * return en+2 = return en+2 *) let nested_loop_split rewrite_ctxt flwor1 = let fail = flwor1, false in match flwor1.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, cond) when has_set_sem flwor1 && has_set_annot ce1 && has_set_annot cond && rqname_equal x fs_dot -> begin let boolean_list = conjunction_to_list cond in if List.length boolean_list > 1 then let rev_list = List.rev boolean_list in let rev_tail = List.tl rev_list in let flwor2, _, _, _ = List.hd rev_list in match flwor2.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce2,y,None,ce_n1)], None, None, ce_n2) when has_set_annot flwor2 && has_set_annot ce_n1 && rqname_equal y fs_dot -> let r = (ce_n1, ce_n1.pcexpr_annot, ce_n1.pcexpr_origin, ce_n1.pcexpr_loc) in let new_cond = List.rev (r :: rev_tail) in let ceif' = list_to_conjunction new_cond false in let flwor1' = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, ceif')) (empty_ast_annot()) flwor1.pcexpr_origin flwor1.pcexpr_loc in let flwor2' = fmkacexpr (CEFLWOR ([CEFOR(t_ce2,y,None,flwor1')], None, None, ce_n2)) (empty_ast_annot()) flwor2.pcexpr_origin flwor2.pcexpr_loc in let _ = set_scrambling_annot flwor1'.pcexpr_annot Set in let _ = set_scrambling_annot flwor2'.pcexpr_annot (get_scrambling_annot flwor1.pcexpr_annot) in let _ = debug_apply "nested loop split" flwor1 flwor2' in flwor2', true | _ -> fail else fail end | _ -> fail (* *** Loop Fusion *** * *for $y in = *for $x in e1 * *for $x in e1 = return * return e2 = *for $y in e2 * return e3 = return e3 * * $x not in FV(e3) || $x = $y *) let loop_fusion ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce,y,None,ce')], None, None, e3) when has_set_sem ce && rqname_equal y fs_dot && has_set_sem e3 -> begin match ce'.pcexpr_desc with | CEFLWOR ([CEFOR(t_e1,x,None,e1)], None, None, e2) when has_set_annot ce' && rqname_equal x fs_dot && has_set_annot e1 && has_set_annot e2 -> begin let flwor1 = fmkacexpr (CEFLWOR ([CEFOR(t_ce,y,None,e2)], None, None, e3)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let r = fmkacexpr (CEFLWOR ([CEFOR(t_e1,x,None,e1)], None, None, flwor1)) (empty_ast_annot()) ce'.pcexpr_origin ce'.pcexpr_loc in let _ = set_scrambling_annot flwor1.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let _ = set_scrambling_annot r.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let _ = debug_apply "loop fusion" ce r in r, true end | _ -> fail end | _ -> fail (* ** CONDITION DETECTION ** * * for $x in e1 = if e1 * return e2 = then e2 * * if $x not in FV(e2) * **) let condition_detection rewrite_ctxt flwor = let fail = flwor, false in match flwor.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, ce2) when has_set_annot ce1 && has_set_sem flwor -> if not(is_free_var_of x ce2) then let ce_empty = fmkacexpr CEEmpty (empty_ast_annot()) flwor.pcexpr_origin flwor.pcexpr_loc in let if_expr = fmkacexpr (CEIf ((wrap_in_fn_boolean rewrite_ctxt) ce1, ce2, ce_empty)) (empty_ast_annot()) flwor.pcexpr_origin flwor.pcexpr_loc in let _ = set_scrambling_annot ce1.pcexpr_annot Bag in let _ = set_scrambling_annot if_expr.pcexpr_annot (get_scrambling_annot flwor.pcexpr_annot) in let _ = debug_apply "condition detection" flwor if_expr in if_expr, true else fail | _ -> fail (* ** CONDITION SHIFT ** * * *if( *if e1 then e2) = *if e1 then * then e3 = *if e2 then e3 * **) let condition_shift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEIf (b_cif,ce3,cempty) -> begin match b_cif.pcexpr_desc with | CECall (fname, [cif], sign,u, selfrecur) when rqname_equal fname fn_boolean -> begin match cif.pcexpr_desc, cempty.pcexpr_desc with | CEIf (ce1,ce2,cempty'), CEEmpty when has_bag_sem ce1 && has_bag_sem ce2 -> begin match cempty'.pcexpr_desc with | CEEmpty -> let b_cif' = fmkacexpr (CECall(fname, [ce2], sign,u, selfrecur)) b_cif.pcexpr_annot b_cif.pcexpr_origin b_cif.pcexpr_loc in let nest_if = fmkacexpr (CEIf (b_cif', ce3, cempty)) cif.pcexpr_annot cif.pcexpr_origin cif.pcexpr_loc in let r = fmkacexpr (CEIf (ce1, nest_if, cempty')) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "condition shift" ce r in r, true | _ -> fail end | _ -> fail end | _ -> fail end | _ -> fail (* ** RETURN CONDITION LIFT ** * * *for $x in e1 = *if e2 then * return ( *if e2 then e3 ) = *for $x in e1 return e3 * * if $x not in FV(e2) * **) let return_condition_lift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, cif) when has_set_sem ce && has_set_sem cif -> begin match cif.pcexpr_desc with | CEIf (ce2,ce3,cempty) when has_set_sem ce3 -> begin match cempty.pcexpr_desc with | CEEmpty when not(is_free_var_of x ce2) -> let cefor = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, ce3)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let _ = set_scrambling_annot cefor.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let r = fmkacexpr (CEIf (ce2, cefor, cempty)) (empty_ast_annot()) cif.pcexpr_origin cif.pcexpr_loc in let _ = set_scrambling_annot r.pcexpr_annot (get_scrambling_annot cif.pcexpr_annot) in let _ = debug_apply "return condition lift" ce r in r, true | _ -> fail end | _ -> fail end | _ -> fail (* ** NESTED RETURN CONDITION LIFT ** * * *for $x in e1 = *if en then * return = *for $x in e1 * *if e2 ^ ... ^ en = return * then en+1 = *if e2 ^ ... ^ en-1 then en+1 * * if n > 2 amd $x not in FV(en) * **) let nested_return_condition_lift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, cond) when has_set_sem ce && has_set_sem cond -> begin let boolean_list = conjunction_to_list cond in if List.length boolean_list > 1 then let rev_list = List.rev boolean_list in let rev_tail = List.tl rev_list in (* find expressions e in rev_tail for which FV(e) =/= $x *) let lifted = List.filter (fun (e,_,_,_) -> not(is_free_var_of x e)) rev_tail in if List.length lifted > 0 then begin let non_lifted = List.filter (fun (e,_,_,_) -> is_free_var_of x e) rev_tail in let non_lifted' = List.rev ((List.hd rev_list) :: non_lifted) in let nested_if = list_to_conjunction non_lifted' false in let for_expr = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, nested_if)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let _ = set_scrambling_annot for_expr.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let new_res = (for_expr,ce.pcexpr_annot,ce.pcexpr_origin,ce.pcexpr_loc) in let lifted' = List.rev (new_res::lifted) in let r = list_to_conjunction lifted' false in let _ = debug_apply "nested return condition lift" ce r in r, true end else fail else fail end | _ -> fail (* ** RETURN RESULT LIFT ** * *for $x in e1 = *if ( *for $x in e1 return e2) * return = then e3 * *if e2 then e3 * * if $x not in FV(e3) * **) let return_result_lift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, cif) when has_set_sem ce && has_set_sem cif -> begin match cif.pcexpr_desc with | CEIf (b_ce2,ce3,cempty) when has_set_sem ce3 -> begin match cempty.pcexpr_desc with | CEEmpty when not(is_free_var_of x ce3) -> let ce2 = match b_ce2.pcexpr_desc with | CECall (fname, [arg], sign, u, selfrecur) when rqname_equal fname fn_boolean -> arg | _ -> b_ce2 in let cefor = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, ce2)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let _ = set_scrambling_annot cefor.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let r = fmkacexpr (CEIf ((wrap_in_fn_boolean rewrite_ctxt cefor), ce3, cempty)) cif.pcexpr_annot cif.pcexpr_origin cif.pcexpr_loc in let _ = debug_apply "return result lift" ce r in r, true | _ -> fail end | _ -> fail end | _ -> fail (* ** NESTED RETURN RESULT LIFT ** * *for $x in e1 = *if ( * return = *for $x in e1 * *if e2 ^ ... ^ en = return * then en+1 = *if e2 ^ ... ^ en-1 * then en * then en+1 * * if n > 2 and $x not in FV(en+1) * **) let nested_return_result_lift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, cond) when has_set_sem ce && has_set_sem cond -> begin let boolean_list = conjunction_to_list cond in if List.length boolean_list > 1 then begin let rev_list = List.rev boolean_list in let rev_tail = List.tl rev_list in let (c, c_annot, c_fi, c_eh) = List.hd rev_list in if not(is_free_var_of x c) then let nested_if = list_to_conjunction (List.rev rev_tail) false in let for_expr = fmkacexpr (CEFLWOR ([CEFOR(t_ce1,x,None,ce1)], None, None, nested_if)) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let _ = set_scrambling_annot for_expr.pcexpr_annot (get_scrambling_annot ce.pcexpr_annot) in let ce_empty = fmkacexpr CEEmpty c_annot c_fi c_eh in let r = fmkacexpr (CEIf ((wrap_in_fn_boolean rewrite_ctxt for_expr), c, ce_empty)) c_annot c_fi c_eh in let _ = debug_apply "nested return result lift" ce r in r, true else fail end else fail end | _ -> fail (* ** FOR CONDITION LIFT ** * * *for $x in = *if e1 then * ( *if e1 then e2) = *for $x in e2 * return e3 = return e3 * * **) let for_condition_lift rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_x,x,None,cif)], None, None, ce3) when has_set_sem ce && has_set_sem ce3 -> begin match cif.pcexpr_desc with | CEIf (ce1, ce2, cempty) when has_set_annot cif && has_set_annot ce2 -> begin match cempty.pcexpr_desc with | CEEmpty -> let cefor = fmkacexpr (CEFLWOR ([CEFOR(t_x,x,None,ce2)], None, None, ce3)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let r = fmkacexpr (CEIf (ce1, cefor, cempty)) cif.pcexpr_annot cif.pcexpr_origin cif.pcexpr_loc in let _ = debug_apply "for condition lift" ce r in r, true | _ -> fail end | _ -> fail end | _ -> fail (* ** TRIVIAL DOT CONDITION ** * if $dot then e2 = e2 * **) let trivial_dot_condition rewrite_ctxt ce = match ce.pcexpr_desc with | CEIf (c1, c2, c3) -> begin match c1.pcexpr_desc, c3.pcexpr_desc with | CEVar dot, CEEmpty when rqname_equal dot fs_dot -> let _ = debug_apply "trivial dot condition" ce c2 in c2, true | _ -> ce, false end | _ -> ce, false (* ** TRIVIAL LOOP ** * *for $x in e return *$x = *e * **) let trivial_loop rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce,x,None,ce_r)], None, None, cevar) when has_set_sem ce && has_set_annot ce_r -> begin match cevar.pcexpr_desc with | CEVar v when rqname_equal x v && has_set_sem cevar -> let _ = set_scrambling_annot ce_r.pcexpr_annot Set in let _ = debug_apply "trivial loop" ce ce_r in ce_r, true | _ -> fail end | _ -> fail (* ** DOT INTRODUCTION ** * for $x in e1 return e2 = for $dot in e1 return e2[$x/$dot] * * $dot not in FV(e2) and $x =/= $dot * **) let dot_introduction ctxt ce = match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t,x,None,e1)], None, None, e2) (* dropped precondition 'has_set_sem e1', because it *) (* may break the rewrite and does not seem necessary *) when is_in_cxq_plus ctxt ce && not (is_free_var_of fs_dot e2) && not(rqname_equal x fs_dot)-> begin let e2', ch = tpnf_rename ctxt e2 x fs_dot in let r = fmkacexpr (CEFLWOR ([CEFOR(t,fs_dot,None,e1)], None, None, e2')) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "$dot introduction" ce r in r, true end | _ -> ce, false (* ** DOT LOOP ** * for $x in $x return e = e * * if has_max_one $x * **) let dot_loop rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_ce,x1,None,cevar)], None, None, ce') -> begin match cevar.pcexpr_desc with | CEVar x2 when rqname_equal x1 x2 && rqname_equal x1 fs_dot -> let _ = debug_apply "$dot loop" ce ce' in ce', true | _ -> fail end | _ -> fail (* ** SHORTENING CONDITION ** * * **) let shortening_condition rewrite_ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEIf (e1, e2, empty) when (* is_in_cxq_plus rewrite_ctxt ce && *) has_bag_sem ce && has_bag_sem e1 && has_bag_sem e2 -> begin match e2.pcexpr_desc, empty.pcexpr_desc with | CEVar d, CEEmpty when rqname_equal d fs_dot -> let e1' = match e1.pcexpr_desc with | CECall (fn, [a], s, u, selfrecur) when rqname_equal fn fn_boolean -> a | _ -> e1 in let _ = debug_apply "condition shortening" ce e1' in e1', true | _ -> fail end | _ -> fail (* *** Filter Fusion *** * * for $dot in ( = for $dot in e1 * for $dot in e1 return = return * if e2 ^ ... ^ en = if e2 ^ ... ^ en * then $dot = then en+1 * return en+1) = * *) let filter_fusion ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t,x,None,ce')], None, None, en1) when has_set_sem ce && rqname_equal x fs_dot -> begin match ce'.pcexpr_desc with | CEFLWOR ([CEFOR(t_e1,y,None,e1)], None, None, cond) when has_set_sem ce' && rqname_equal y fs_dot && has_set_sem cond -> begin let boolean_list = conjunction_to_list cond in let rev_list = List.rev boolean_list in (* wrap last condition in the list into an fn_boolean call *) let head,a,b,c = List.hd rev_list in match head.pcexpr_desc with | CEVar z when rqname_equal z fs_dot -> begin let rev_list' = (en1,a,b,c)::(List.tl rev_list) in let cond' = list_to_conjunction (List.rev rev_list') false in let r = fmkacexpr (CEFLWOR ([CEFOR(t_e1,y,None,e1)], None, None, cond')) (empty_ast_annot()) ce.pcexpr_origin ce.pcexpr_loc in let annot = get_scrambling_annot ce.pcexpr_annot in let _ = set_scrambling_annot cond'.pcexpr_annot annot in let _ = set_scrambling_annot r.pcexpr_annot annot in let _ = debug_apply "filter fusion" ce r in r, true end | _ -> fail end | _ -> fail end | _ -> fail (* ** ** PHASE 4 -- The sanity Check ** ** *) let check_normal_form ctxt ce = let rec skip_redundant_let ce = match ce.pcexpr_desc with | CEFLWOR ([CELET(t_ce1,n_ce1,ce1)], None, None, ce2) -> skip_redundant_let ce2 | _ -> ce in if not(!Conf.print_algebra_optimization_rewrite) then ce, false else begin match ce.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence -> begin let ff = !Conf.core_expr_formatter in let print_annot ff a = begin let (ta, da, fv, sc) = Xquery_core_ast_annotation.annot_components a in Format.fprintf ff "[sc:%s]%!" (Xquery_core_ast_annotation.print_scrambling_annot sc); Format.fprintf ff "[type: "; (match ta with | None -> Format.fprintf ff "None]" | Some m -> Format.fprintf ff "%a]%!" Print_type_core.print_cxtype m); end in let ce' = skip_redundant_let arg in if not(is_in_tpnf' ce') then begin Format.fprintf ff " **************** WARNING ******************\n"; Format.fprintf ff " * The following expression is not in TPNF *\n"; Format.fprintf ff " *******************************************\n"; Print_xquery_core.print_cexpr ff ce' print_annot; Format.fprintf ff "\n\n"; ce, false end else begin Format.fprintf ff " ******************* NOTE ***************\n"; Format.fprintf ff " * The following expression is in TPNF *\n"; Format.fprintf ff " ****************************************\n"; Print_xquery_core.print_cexpr ff ce' print_annot; Format.fprintf ff "\n\n"; ce, false end end | _ -> ce, false end (* ** ** PHASE 5 -- Fix whatever got messed up ** ** *) (* ** reintroduce where clauses ** * for $x in e1 = for $x in e1 * return = where if e2 ^ ... ^ en-1 then en * if e2 ^ ... ^ en = return en+1 * then en+1 *) let shorten_conjunction ctxt cond = let boolean_list = conjunction_to_list cond in if List.length boolean_list > 1 then begin let rev_list = List.rev boolean_list in let ret, _, _, _ = List.hd rev_list in let rev_tail = List.tl rev_list in let new_cond = list_to_conjunction (List.rev rev_tail) true in Some (new_cond, ret) end else None let reintroduce_where ctxt ce = let fail = ce, false in match ce.pcexpr_desc with | CEFLWOR ([CEFOR(t_e1,x,None,e1)], None, None, cond) -> begin match shorten_conjunction ctxt cond with | Some (new_cond, ret) -> let r = fmkacexpr (CEFLWOR ([CEFOR(t_e1,x,None,e1)], Some new_cond, None, ret)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "reintroduce where clause" ce r in r, true | _ -> fail end | CEFLWOR ([CELET(t_e1,x,e1)], None, None, cond) -> begin match shorten_conjunction ctxt cond with | Some (new_cond, ret) -> let r = fmkacexpr (CEFLWOR ([CELET(t_e1,x,e1)], Some new_cond, None, ret)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "reintroduce where clause" ce r in r, true | _ -> fail end | _ -> fail let remove_redundant_sbdo ctxt ce = match ce.pcexpr_desc with | CECall (fname, [arg], sign,u, selfrecur) when rqname_equal fname fs_distinct_docorder || rqname_equal fname fs_distinct_docorder_or_atomic_sequence -> let symtab = Hashtbl.create 10 in let no2d, gen, ord, nodup = get_properties ctxt arg symtab in if ord && nodup then let _ = debug_apply "remove distinct-docorder (ord/nodup holds)" ce arg in arg, true else ce, false | _ -> ce, false let group_flwor_block ctxt ce = match ce.pcexpr_desc with | CEFLWOR ([single_fl], None, None, e2) -> begin match e2.pcexpr_desc with | CEFLWOR (fl_list, w, ob, ret) -> let r = fmkacexpr (CEFLWOR (single_fl::fl_list, w, ob, ret)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "group FLWOR block" ce r in r, true | _ -> ce, false end | _ -> ce, false let join_support_hack ctxt ce = let rec has_no_let fl_list = match fl_list with | [] -> true | (CELET _ )::tl -> false | (CEFOR _ )::tl -> has_no_let tl in match ce.pcexpr_desc with | CEFLWOR (fl_list, None, None, ret1) when has_no_let fl_list -> begin match ret1.pcexpr_desc with | (CEFLWOR ([CELET(t,v,expr)], None, None, ret2)) -> let r = fmkacexpr (CEFLWOR (fl_list@[CELET(t,v,expr)], None, None, ret2)) ce.pcexpr_annot ce.pcexpr_origin ce.pcexpr_loc in let _ = debug_apply "Join support hack" ce r in r, true | _ -> ce, false end | _ -> ce, false galax-1.1/factorization/factorize_util.mli0000664000076400007640000000174110560462356017177 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_util.mli,v 1.3 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_util Description: Utilities used during query factorization. *) open Xquery_core_ast val update_replace_fun : acexpr -> (acfl_expr * acexpr) val get_new_factored_variable : Xquery_common_ast.cvname -> Xquery_common_ast.cvname galax-1.1/factorization/factorize_util.ml0000664000076400007640000000233210560462356017023 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_util.ml,v 1.3 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_util Description: Utilities used during query factorization. *) open Xquery_core_ast open Xquery_core_ast_util let update_replace_fun ce = let fi = ce.pcexpr_loc in let eh = ce.pcexpr_origin in let (cvname,cvar_expr) = gen_new_cvar eh fi in (CELET (None, cvname, ce),cvar_expr) let factored_count = ref 0 let get_new_factored_variable vn = let (prefix, uri, ncname) = vn in incr factored_count; (prefix, uri, (ncname ^ (string_of_int (!factored_count)))) galax-1.1/factorization/factorize_unique.mli0000664000076400007640000000300610560462356017524 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_unique.mli,v 1.3 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_unique Description: This module assigns all variables a unique name in the core expression. o It assumes that names that are not bound are globals, or defined in another scope. These variable names remain untouched. o factorize_unique_with_context allows you to pass in initial bindings, from global variables/parameters if renaming is done there. o fs:dot is a special variable and is never rebound. The purpose is to simplify other factorization code. *) type binding_list = (Xquery_common_ast.cvname * Xquery_common_ast.cvname) list val factorize_unique_with_context : binding_list -> Xquery_core_ast.acexpr -> Xquery_core_ast.acexpr val factorize_unique : Xquery_core_ast.acexpr -> Xquery_core_ast.acexpr galax-1.1/factorization/factorize_tpnf.mli0000664000076400007640000000176410560462356017176 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf.mli,v 1.2 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_tpnf Description: This module is a factorization plugin for the TPNF approach for normalizing XQuery Core expressions. -- Philippe *) val factorize_tpnf : Typing_context.static_context -> Xquery_core_ast.acexpr -> Xquery_core_ast.acexpr galax-1.1/factorization/factorize_top.ml0000664000076400007640000001024010674300056016637 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_top.ml,v 1.25 2007/09/19 20:01:50 mff Exp $ *) (* Module: Factorize_top Description: This module will put statements into a normal form before their are compiled into the algbera. This is to help the optimizer pick up logical optimizations. *) open Conf open Error open Xquery_common_ast open Xquery_core_ast open Xquery_core_ast_util (**************************************************) (* Exposed *) (* Normalize the FLWOR Expressions in a statement *) (**************************************************) let factorize_expr stat_ctxt expr = (* Note: we may want to have several factorization "phases" called here. - Jerome *) begin (* *** Disabled for now * let nexpr1 = Factorize_unique.factorize_unique expr in * let nexpr2 = Factorize_globals.factor_global_expression stat_ctxt nexpr1 in * let nexpr3 = Factorize_flwor.factorize_flwor stat_ctxt nexpr2 in * let nexpr4 = Factorize_iteration.factorize_expression stat_ctxt nexpr3 in *) let nexpr3 = Factorize_tpnf.factorize_tpnf stat_ctxt expr in (* let nexpr3 = expr in let nexpr4 = Factorize_flwor.factorize_flwor stat_ctxt nexpr3 in *) nexpr3 end let factorize_statement stat_ctxt statement = factorize_expr stat_ctxt statement let factorize_function_body stat_ctxt cfunction_body = match cfunction_body with | CEFunctionInterface | CEFunctionImported | CEFunctionBltIn -> cfunction_body | CEFunctionUser ce -> CEFunctionUser (factorize_expr stat_ctxt ce) let factorize_function stat_ctxt cfunction_def = let new_desc = match cfunction_def.pcfunction_def_desc with | (rfname, cvars, cfunction_signature, cfunction_body, upd) -> (rfname, cvars, cfunction_signature, factorize_function_body stat_ctxt cfunction_body, upd) in fmkcfunction_def new_desc cfunction_def.pcfunction_def_loc let factorize_var stat_ctxt cvar_decl = let new_desc = match cvar_decl.pcvar_decl_desc with | (vname, model, CEVarUser ce) -> (vname, model, CEVarUser (factorize_expr stat_ctxt ce)) | (vname, model, cevar_body) -> (vname, model, cevar_body) in fmkcvar_decl new_desc cvar_decl.pcvar_decl_loc let factorize_index stat_ctxt cindex_def = let new_desc = match cindex_def.pcindex_def_desc with | CValueIndex (name, ce1, ce2) -> CValueIndex (name, factorize_expr stat_ctxt ce1, factorize_expr stat_ctxt ce2) | CNameIndex name -> CNameIndex name in fmkcindex_def new_desc cindex_def.pcindex_def_loc let factorize_prolog stat_ctxt cprolog = let prolog = { pcprolog_functions = List.map (factorize_function stat_ctxt) cprolog.pcprolog_functions; pcprolog_vars = List.map (factorize_var stat_ctxt) cprolog.pcprolog_vars; pcprolog_servers = raise (Query(Prototype("server declarations not implemented"))); pcprolog_indices = List.map (factorize_index stat_ctxt) cprolog.pcprolog_indices } in (stat_ctxt,prolog) let factorize_xmodule stat_ctxt cxmodule = let factorized_cxmodule = { pcmodule_prolog = snd (factorize_prolog stat_ctxt cxmodule.pcmodule_prolog); pcmodule_statements = List.map (factorize_statement stat_ctxt) cxmodule.pcmodule_statements } in (* Stream analysis for streaming XPath evaluation; adds core annotations. - Michael *) (* let _ = Stream_analysis.stream_analysis_of_xmodule factorized_cxmodule in *) (*****************************************) (* why is that called twice, by the way? *) (*****************************************) (stat_ctxt,factorized_cxmodule) galax-1.1/factorization/factorize_tpnf_rules.mli0000664000076400007640000000752210560462356020406 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_tpnf_rules.mli,v 1.13 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_tpnf_rules Description: This module contains the rewriting rules used for the TPNF. *) open Ast_walker_rewrite_context open Xquery_core_ast open Typing_context (* *** PHASE ONE *** *) val insert_sbdo_xpath: static_context rewrite_context -> acexpr -> acexpr * bool val decompose_flwor: static_context rewrite_context -> acexpr -> acexpr * bool val convert_where_clause: static_context rewrite_context -> acexpr -> acexpr * bool (* *** PHASE TWO *** *) val insert_scrambling_ddo: static_context rewrite_context -> acexpr -> acexpr * bool val insert_scrambling_if: static_context rewrite_context -> acexpr -> acexpr * bool val propagate_scrambling_bool: static_context rewrite_context -> acexpr -> acexpr * bool val propagate_scrambling_ddo: static_context rewrite_context -> acexpr -> acexpr * bool val propagate_scrambling_for_let: static_context rewrite_context -> acexpr -> acexpr * bool val propagate_scrambling_for: static_context rewrite_context -> acexpr -> acexpr * bool val propagate_scrambling_if: static_context rewrite_context -> acexpr -> acexpr * bool (* *** PHASE THREE *** *) val remove_scrambled_sbdo: static_context rewrite_context -> acexpr -> acexpr * bool val substitution: static_context rewrite_context -> acexpr -> acexpr * bool val loop_fusion: static_context rewrite_context -> acexpr -> acexpr * bool val loop_split: static_context rewrite_context -> acexpr -> acexpr * bool val nested_loop_split: static_context rewrite_context -> acexpr -> acexpr * bool val condition_detection: static_context rewrite_context -> acexpr -> acexpr * bool val condition_shift: static_context rewrite_context -> acexpr -> acexpr * bool val return_condition_lift: static_context rewrite_context -> acexpr -> acexpr * bool val nested_return_condition_lift: static_context rewrite_context -> acexpr -> acexpr * bool val return_result_lift: static_context rewrite_context -> acexpr -> acexpr * bool val nested_return_result_lift: static_context rewrite_context -> acexpr -> acexpr * bool val for_condition_lift: static_context rewrite_context -> acexpr -> acexpr * bool val trivial_dot_condition: static_context rewrite_context -> acexpr -> acexpr * bool val trivial_loop: static_context rewrite_context -> acexpr -> acexpr * bool val dot_introduction: static_context rewrite_context -> acexpr -> acexpr * bool val dot_loop: static_context rewrite_context -> acexpr -> acexpr * bool val shortening_condition: static_context rewrite_context -> acexpr -> acexpr * bool val filter_fusion: static_context rewrite_context -> acexpr -> acexpr * bool (* *** PHASES FOUR-FIVE *** *) val remove_redundant_sbdo: static_context rewrite_context -> acexpr -> acexpr * bool val check_normal_form: static_context rewrite_context -> acexpr -> acexpr * bool val reintroduce_where: static_context rewrite_context -> acexpr -> acexpr * bool val group_flwor_block: static_context rewrite_context -> acexpr -> acexpr * bool val join_support_hack: static_context rewrite_context -> acexpr -> acexpr * bool galax-1.1/factorization/factorize_free_var.mli0000664000076400007640000000217210560462356020012 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_free_var.mli,v 1.3 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_free_var Description: Given a core expression, recursively fill out the free variable annotation. Additionally return the list of free variables for the expression. *) val compute_free_vars : Xquery_core_ast.acexpr -> Xquery_common_ast.cvname list val annotate_free_vars : Typing_context.static_context -> Xquery_core_ast.acexpr -> Typing_context.static_context galax-1.1/factorization/factorize_update.ml0000664000076400007640000000742010560462356017333 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: factorize_update.ml,v 1.4 2007/02/01 22:08:46 simeon Exp $ *) (* Module: Factorize_update Description: This module factorizes out code from update expression. This is to help the optimizer pick up logical optimizations. *) open Xquery_core_ast open Xquery_core_ast_util open Factorize_util (****************************) (* Factorize simple updates *) (****************************) let factorize_insert_location il = match il with | CUAsLastInto ce -> let (l_clause, cvar_expr) = update_replace_fun ce in (l_clause,CUAsLastInto cvar_expr) | CUAsFirstInto ce -> let (l_clause, cvar_expr) = update_replace_fun ce in (l_clause, CUAsFirstInto cvar_expr) | CUInto ce -> let (l_clause, cvar_expr) = update_replace_fun ce in (l_clause, CUInto cvar_expr) | CUAfter ce -> let (l_clause, cvar_expr) = update_replace_fun ce in (l_clause, CUAfter cvar_expr) | CUBefore ce -> let (l_clause, cvar_expr) = update_replace_fun ce in (l_clause, CUBefore cvar_expr) let factorize_simple_update_clause csu = let annot = csu.pcexpr_annot in let eh = csu.pcexpr_origin in let fi = csu.pcexpr_loc in match csu.pcexpr_desc with | CEInsert (ce,cil) -> let (cl_clause1,ce) = update_replace_fun ce in let (cl_clause2,cil) = factorize_insert_location cil in let cl_clauses = cl_clause1 :: cl_clause2 :: [] in (cl_clauses, fmkacexpr (CEInsert (ce, cil)) annot eh fi) | CEReplace (vo,e1,e2) -> let (l_clause1,ce1) = update_replace_fun e1 in let (l_clause2,ce2) = update_replace_fun e2 in let l_clauses = l_clause1 :: l_clause2 :: [] in (l_clauses, fmkacexpr (CEReplace (vo,ce1,ce2)) annot eh fi) | CEDelete e -> let (l_clause1,ce) = update_replace_fun e in (l_clause1 :: [], fmkacexpr (CEDelete ce) annot eh fi) | _ -> ([],csu) let rec factorize_simple_update_clauses simple_update_clauses = match simple_update_clauses with | [] -> [],[] | su :: simple_update_clauses' -> let l_clauses1,csu = factorize_simple_update_clause su in let l_clauses2,csimple_update_clauses' = factorize_simple_update_clauses simple_update_clauses' in (l_clauses1 @ l_clauses2, csu :: csimple_update_clauses') let apply_update_normal_form cfl_clauses csimple_updates_clauses = let (cfl_additional_clauses,csimple_updates_clauses) = factorize_simple_update_clauses csimple_updates_clauses in let cfl_clauses = cfl_clauses @ cfl_additional_clauses in (cfl_clauses,csimple_updates_clauses) (***************************) (* Normal form for updates *) (***************************) let update_in_normal_form fl_clauses return_clause = match return_clause.pcexpr_desc with | CESnap csimple_update -> let (cfl_clauses,csimple_updates_clauses) = apply_update_normal_form fl_clauses [csimple_update] in let annot = return_clause.pcexpr_annot in let eh = return_clause.pcexpr_origin in let fi = return_clause.pcexpr_loc in let return_clause = fmkacexpr (CESnap csimple_updates_clauses) annot eh fi in (cfl_clauses,return_clause) | _ -> (fl_clauses, return_clause) galax-1.1/streaming/0000775000076400007640000000000010772255370012564 5ustar mffmffgalax-1.1/streaming/streaming_parse.ml0000664000076400007640000001434610560462366016311 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_parse.ml,v 1.13 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_parse Description: This module implements a SAX parser for Galax. *) open Error open Finfo open Pxp_types open Pxp_lexer_types open Namespace_names open Streaming_types open Streaming_util type context = { mutable odtd : Pxp_dtd.dtd option; mutable reused_lexobj : lexer_obj option; io : string option; fileid : int } let fileid = get_file_id "Unknown File" let cur_finfo () = ref (make_finfo_line_col_id fileid 0 0) let make_file_id gio_string = match gio_string with | None -> fileid | Some f -> get_file_id f let default_context gio_string = { odtd = None; reused_lexobj = None; io = gio_string; fileid = make_file_id gio_string } let sax_raise_parsing_error pp msg = begin Galax_io.close_pull_parser (snd pp); raise (Query (Parsing (Finfo.bogus, msg))) end let set_dtd_in_context c dtd = begin let lfactory = dtd # lexer_factory in let reused_lexobj = lfactory # open_string "" in c.odtd <- Some dtd; c.reused_lexobj <- Some reused_lexobj end let glx_expand_attvalue c str = let dtd = match c.odtd with | None -> raise (Query (Internal_Error "Missing DTD in parsing")) | Some dtd -> dtd in let reused_lexobj = match c.reused_lexobj with | None -> raise (Query (Internal_Error "Missing DTD in parsing")) | Some reused_lexobj -> reused_lexobj in Pxp_aux.expand_attvalue reused_lexobj dtd str false let glx_normalized_public_id s = Pxp_aux.normalize_public_id s let make_document_desc c xmlversion dtd = let uri = match c.io with | None -> Dm_atomic_util.default_no_uri_dm | Some uri -> Dm_atomic_util.uri_dm_of_uri (Some (AnyURI._kinda_uri_of_string uri)) in match dtd#root with | None -> (Some (xmlversion,None,None), None, uri) | Some _ -> (Some (xmlversion,None,None), Some dtd, uri) let make_att c (name,content) = let qname = uqname_element_of_string name in (* let fixed_content = glx_expand_attvalue c content in *) let fixed_content = content in (qname,fixed_content) let make_element_desc c name attlist scope_opt entid = let qname = uqname_element_of_string name in let sax_xml_attribute_forest = List.rev (List.map (make_att c) attlist) in (qname,sax_xml_attribute_forest,false) let make_text_desc data = data let make_pi_desc target value = (target,value) let make_comment_desc content = content let rec map_event fi_ref c pp event = match event with | E_start_doc (xmlversion,dtd) -> begin if xmlversion = "1.0" then () else sax_raise_parsing_error pp ("XML version: " ^ xmlversion ^ " not supported"); set_dtd_in_context c dtd; Some (fmkse_event (SAX_startDocument (make_document_desc c xmlversion dtd)) (!fi_ref)) end | E_end_doc lit_name -> Some (fmkse_event (SAX_endDocument) !fi_ref) | E_start_tag (name, attlist, scope_opt, entid) -> Some (fmkse_event (SAX_startElement (make_element_desc c name attlist scope_opt entid)) (!fi_ref)) | E_end_tag (name, entid) -> Some (fmkse_event (SAX_endElement) (!fi_ref)) | E_char_data data -> Some (fmkse_event (SAX_characters (make_text_desc data)) (!fi_ref)) | E_pinstr (target,value,entid) -> Some (fmkse_event (SAX_processingInstruction (make_pi_desc target value)) (!fi_ref)) | E_pinstr_member (target,value,entid) -> Some (fmkse_event (SAX_processingInstruction (make_pi_desc target value)) (!fi_ref)) | E_comment content -> Some (fmkse_event (SAX_comment (make_comment_desc content)) (!fi_ref)) (* Silently ignoring super root event *) | E_start_super -> next_pxp_event fi_ref c pp | E_end_super -> next_pxp_event fi_ref c pp | E_position (entity,line,col) -> (* * These events are only created if the next event will be * E_start_tag, E_pinstr, or E_comment, and if the configuration option * store_element_position is true. *) fi_ref := make_finfo_line_col_id c.fileid line col; next_pxp_event fi_ref c pp | E_error exn -> begin Galax_io.close_pull_parser (snd pp); raise exn end | E_end_of_stream -> begin Galax_io.close_pull_parser (snd pp); None end and next_pxp_event fi_ref c pp = begin match (fst pp) () with | None -> begin Galax_io.close_pull_parser (snd pp); None end | Some event -> map_event fi_ref c pp event end (************************************) (* The top level stream constructor *) (************************************) let get_schema_of_stream stream = let eo = Cursor.cursor_peek stream in match eo with | None -> None | Some e -> match e.se_desc with | SAX_startDocument (_,schema,_) -> schema | _ -> None let open_pxp_stream_from_io gio entity_kind = let s = Galax_io.uri_string_of_gio gio in (Galax_io.pull_parser_from_input_spec gio Galax_io.Document_entity,s) let open_xml_stream_from_pxp_stream (pp,s) = let fi_ref = cur_finfo () in let c = default_context s in let glx_token_from_pxp_event x = next_pxp_event fi_ref c pp in let stream = Cursor.cursor_of_function glx_token_from_pxp_event in let schema = get_schema_of_stream stream in (schema, stream) let open_xml_stream_from_io gio = let pp,s = open_pxp_stream_from_io gio Galax_io.Document_entity in open_xml_stream_from_pxp_stream (pp,s) let open_xml_entity_stream_from_io gio entity_kind = let pp,s = open_pxp_stream_from_io gio entity_kind in open_xml_stream_from_pxp_stream (pp,s) (* Parse a stand-alone DTD *) let get_dtd gio = Galax_io.dtd_from_input_spec gio let parse_standalone_dtd gio = get_dtd gio galax-1.1/streaming/streaming_parse.mli0000664000076400007640000000325110560462366016453 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_parse.mli,v 1.5 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_parse Description: This module implements a SAX parser for Galax. *) val glx_normalized_public_id : string -> string (* Creates an XML stream from a string URI buffer and if a DTD exists and was parsed successfully, returns the XQuery type representing that DTD and the corresponding XML stream *) val open_pxp_stream_from_io : Galax_io.input_spec -> Galax_io.entity_kind -> Galax_io.pxp_stream * string option val open_xml_stream_from_pxp_stream : Galax_io.pxp_stream * string option -> (Pxp_dtd.dtd option * Streaming_types.xml_stream) (* Opens a document entity or a document fragment *) val open_xml_entity_stream_from_io : Galax_io.input_spec -> Galax_io.entity_kind -> (Pxp_dtd.dtd option * Streaming_types.xml_stream) (* Opens a document entity *) val open_xml_stream_from_io : Galax_io.input_spec -> (Pxp_dtd.dtd option * Streaming_types.xml_stream) val parse_standalone_dtd : Galax_io.input_spec -> Pxp_dtd.dtd galax-1.1/streaming/prefix_context.ml0000664000076400007640000000445010665643471016166 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: prefix_context.ml,v 1.5 2007/08/30 22:39:53 simeon Exp $ *) (* Module: Prefix_context Description: This module implements the context necessary to turn a resolved SAX stream back into an unresolved one with the proper xmlns attributes. *) (*********************************) (* A type for the prefix context *) (*********************************) type prefix_context = { prefix_context_nsenv : Namespace_context.nsenv Stack.t; current_bindings : Namespace_context.nsenv } (********************************) (* Creates a new prefix context *) (********************************) let build_prefix_context () = { prefix_context_nsenv = Stack.create (); current_bindings = Namespace_context.empty_nsenv } (*********************************) (* Operations on prefix contexts *) (*********************************) let push_nsenv_in_prefix_context prefix_context new_nsenv = let delta_bindings,new_nsenv = try let previous_nsenv = Stack.top prefix_context.prefix_context_nsenv in let delta_bindings = if Namespace_context.same_nsenv new_nsenv previous_nsenv then [] else Namespace_context.delta_bindings new_nsenv previous_nsenv in let new_nsenv,delta_bindings = Namespace_context.filter_nsenv_in_scope previous_nsenv delta_bindings in (delta_bindings,new_nsenv) with | Stack.Empty -> Namespace_context.flatten_bindings new_nsenv,new_nsenv in Stack.push new_nsenv prefix_context.prefix_context_nsenv; Namespace_context.cleanup_actual_out_bindings delta_bindings [] let pop_nsenv_from_prefix_context prefix_context = ignore(Stack.pop prefix_context.prefix_context_nsenv) galax-1.1/streaming/sax_annot.ml0000664000076400007640000000533210560462366015113 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: sax_annot.ml,v 1.4 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Sax_annot Description: This *module* implements annotations for typed SAX events. Semantics: Parsing yields a SAX event stream, namespace resolution yields a resolved SAX event stream, and validation yields a typed SAX event stream in which all event annotations are None. A stream-annotation phase (e.g., XPath stream analysis, node-id addition, etc.) takes a typed SAX event stream in which the corresponding annotation components are None, and yields a stream in which all the corresponding annotation components have been set. Setting an annotation component of an annotation destructively modifies the annotation. Accessing an annotation component in an annotation that is not set raises an error. *) open Error (** The XPath stream analysis label is a boolean **) type stream_label_annot = bool type sax_annot = { mutable stream_label_annot : bool option } (** Create an empty AST annotation. @return New AST annotation.*) let empty_sax_annot () = { stream_label_annot = None } (** Return all the annotation's components @return Tuple of annotation's components *) let annot_components sa = (sa.stream_label_annot) (** Copy an annotation @param sax_annotation @return copy of annotation *) let copy_annot sa = let sla = annot_components sa in { stream_label_annot = sla } (** XPath label annotations *) (** Set the stream-label annotation @param sax_annotation to update @param stream-label annotation to add *) let set_stream_label_annot sa sla = sa.stream_label_annot <- Some sla (** Get the stream-label annotation @param sax_annotation @return stream-label annotation *) let get_stream_label_annot sa = match sa.stream_label_annot with | None -> false (* raise (Query(Annotation_Error("In get_stream_label_annot: Annotation does not contain a stream label"))) *) | Some sla -> sla let set_annotation annot1 annot2 = annot1.stream_label_annot <- annot2.stream_label_annot; galax-1.1/streaming/resolve_stream_context.mli0000664000076400007640000000336410661364346020074 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: resolve_stream_context.mli,v 1.7 2007/08/17 18:28:54 simeon Exp $ *) (* Module: Type_stream_context Description: This module implements a the loading context. *) (*****************************) (* A type for the ts context *) (*****************************) type ts_context (****************************) (* Creates a new ts context *) (****************************) val build_ts_context : unit -> ts_context (***************************) (* Accesses the ts context *) (***************************) val get_nsenv : ts_context -> Namespace_context.nsenv * Namespace_context.nsenv val pop_nsenv : ts_context -> unit (*********************************************) (* Adds namespace bindings to the ts context *) (*********************************************) val push_ns_bindings : ts_context -> Namespace_context.binding_table -> unit val resolve_element_name : ts_context -> Namespace_context.nsenv -> Namespace_names.uqname -> Namespace_symbols.symbol * bool val resolve_attribute_name : ts_context -> Namespace_context.nsenv -> Namespace_names.uqname -> Namespace_symbols.symbol galax-1.1/streaming/small_stream_context.ml0000664000076400007640000001400110560462366017341 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: small_stream_context.ml,v 1.12 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Small_stream_context Description: This module implements the context used when building a small stream from an element-construction expression. *) open Error open Namespace_names open Namespace_symbols open Namespace_resolve open Namespace_context open Small_stream_ast open Streaming_types (****************************) (* The small stream context *) (****************************) type ss_context = { mutable current_sexpr_list : sexpr list; mutable remaining_sexpr_list : sexpr list; stacked_sexpr_list : (resolved_sax_event * sexpr list) Stack.t} (**************************************) (* Creates a new small stream context *) (**************************************) let build_ss_context cel = { current_sexpr_list = cel; remaining_sexpr_list = []; stacked_sexpr_list = Stack.create () } (******************************************) (* Operations on the small stream context *) (******************************************) (* Access the current expr list *) let get_current_sexpr_list ss_context = ss_context.current_sexpr_list (* Access the remaining expr list *) let get_remaining_sexpr_list ss_context = ss_context.remaining_sexpr_list (* Replace the current sequence *) let replace_current_sexpr_list ss_context new_sexpr_list = ss_context.current_sexpr_list <- new_sexpr_list (* Push a context (getting inside of an element/document node) *) let convert_sattribute (rattr_sym,content) = (rattr_sym,content) let convert_sattributes sattrs = (List.map convert_sattribute sattrs) let push_elem_to_ss_context ss_context se remaining_list = match se with | SDocument (base_uri,nse) -> begin Stack.push (Streaming_util.fmkrse_event RSAX_endDocument Finfo.bogus, remaining_list) ss_context.stacked_sexpr_list; (Streaming_util.fmkrse_event (RSAX_startDocument (None,None, base_uri)) Finfo.bogus, nse) end | SElem (rsym, nsenv, sattributes, base_uri, nse) -> begin let attributes = convert_sattributes sattributes in (* Has element content is tricky here, as part of the content may be computed. So, we are a bit conservative here and only return true if there is no text content and no holes in the content. - Jerome *) let has_element_content = List.for_all (fun x -> match x with | SHole | SText _ -> false | _ -> true) nse in Stack.push (Streaming_util.fmkrse_event RSAX_endElement Finfo.bogus, remaining_list) ss_context.stacked_sexpr_list; (Streaming_util.fmkrse_event (RSAX_startElement (rsym,attributes,has_element_content,base_uri,nsenv)) Finfo.bogus, nse) end | SText content -> (Streaming_util.fmkrse_event (RSAX_characters content) Finfo.bogus, remaining_list) | SPI (target,content) -> (Streaming_util.fmkrse_event (RSAX_processingInstruction (target,content)) Finfo.bogus, remaining_list) | SComment content -> (Streaming_util.fmkrse_event (RSAX_comment content) Finfo.bogus, remaining_list) | SHole -> (Streaming_util.fmkrse_event RSAX_hole Finfo.bogus, remaining_list) (* Pop a context (getting outside of an element/document node) *) let pop_elem_from_ss_context ss_context = try Some (Stack.pop ss_context.stacked_sexpr_list) with | Stack.Empty -> None (******************************) (* Simple stream constructors *) (******************************) (* Builds a small XML stream with holes from an AST with element construction operations *) let next_event_of_sexpr ss_context = let (event,rest) = match get_current_sexpr_list ss_context with | [] -> begin match pop_elem_from_ss_context ss_context with | Some (event,rest) -> (Some event,rest) | None -> (None,[]) end | sexpr :: rest_of_sexpr_list -> let (event,nested_sexpr_list) = push_elem_to_ss_context ss_context sexpr rest_of_sexpr_list in (Some event, nested_sexpr_list) in begin replace_current_sexpr_list ss_context rest; event end let next_event_of_sexpr_aux ss_context n = next_event_of_sexpr ss_context let resolved_xml_stream_of_sexpr sexpr = let ss_context = build_ss_context [sexpr] in Cursor.cursor_of_function (next_event_of_sexpr_aux ss_context) (* Builds a sexpr out of an unresolved sexpr *) let sattribute_of_rsattribute nsenv (rqname,content) = let rattr_sym = rattr_symbol rqname in (rattr_sym,content) let rec sexpr_of_rsexpr nsenv rsexpr = match rsexpr with | Small_stream_ast.RSDocument (base_uri_option, rsexpr_list) -> let sexpr_list = List.map (sexpr_of_rsexpr nsenv) rsexpr_list in Small_stream_ast.SDocument (base_uri_option, sexpr_list) | Small_stream_ast.RSElem (rqname,binding_table,rsattribute_forest,base_uri,rsexpr_list) -> let new_nsenv = add_all_ns nsenv binding_table in let relem_sym = relem_symbol rqname in let sattribute_forest = List.map (sattribute_of_rsattribute new_nsenv) rsattribute_forest in let sexpr_list = List.map (sexpr_of_rsexpr new_nsenv) rsexpr_list in Small_stream_ast.SElem (relem_sym,new_nsenv,sattribute_forest,base_uri,sexpr_list) | Small_stream_ast.RSText text -> Small_stream_ast.SText text | Small_stream_ast.RSPI (target,content) -> Small_stream_ast.SPI (target,content) | Small_stream_ast.RSComment comment -> Small_stream_ast.SComment comment | Small_stream_ast.RSHole -> Small_stream_ast.SHole galax-1.1/streaming/streaming_ops.mli0000664000076400007640000001307510560462366016147 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_ops.mli,v 1.8 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_ops Description: Some basic operations on XML streams. *) (********************) (* The empty stream *) (********************) val empty_xml_stream : unit -> Streaming_types.xml_stream (* Discards the current XML subtree *) val empty_resolved_xml_stream : unit -> Streaming_types.resolved_xml_stream (* Discards the current resolved XML subtree *) val empty_typed_xml_stream : unit -> Streaming_types.typed_xml_stream (* Discards the current typed XML subtree *) (***************************************) (* Validity checks on stream contents *) (***************************************) val check_valid_processing_instruction : string -> string -> string * string val check_valid_comment : string -> bool (***********************) (* Discarding a stream *) (***********************) val discard_xml_stream : Streaming_types.xml_stream -> unit (* Discards the current XML subtree *) val discard_resolved_xml_stream : Streaming_types.resolved_xml_stream -> unit (* Discards the current resolved XML subtree *) val discard_typed_xml_stream : Streaming_types.typed_xml_stream -> unit (* Discards the current typed XML subtree *) (*******************************************************) (* Conversion between well-formed and resolved streams *) (*******************************************************) val resolve_xml_stream : Streaming_types.xml_stream -> Streaming_types.resolved_xml_stream (* Resolves namespaces in the original well-formed stream *) val prefix_xml_stream : Streaming_types.resolved_xml_stream -> Streaming_types.xml_stream (* Turns a resolved XML stream back into one with prefixes *) (*************************************************) (* Conversion between resolved and typed streams *) (*************************************************) val typed_of_resolved_xml_stream : Streaming_types.resolved_xml_stream -> Streaming_types.typed_xml_stream (* Treats a resolved XML stream as 'typed'. I.e., adds xs:untypedAtomic and xs:untyped at the right places. *) val erase_xml_stream : Streaming_types.typed_xml_stream -> Streaming_types.resolved_xml_stream (* Turns a typed XML stream into a resolved, non-typed one *) val erase_xml_stream_section_3_7_1 : Streaming_types.typed_xml_stream -> Streaming_types.resolved_xml_stream (* Turns a typed XML stream into a resolved, non-typed one, but also turns atomic values into text nodes according to the semantics in Section 3.7.1 of the XQuery 1.0 document, and rejects attribute events, since they should have been processed before-hand from the beginning of the stream. *) (**********************************************) (* Conversion between typed and ordered typed *) (**********************************************) val ordered_typed_of_typed_stream_for_docid : Nodeid.docid -> Nodeid_context.nodeid_context -> Streaming_types.typed_xml_stream -> Streaming_types.ordered_typed_xml_stream val ordered_typed_of_typed_stream : Nodeid.docid_gen -> Nodeid_context.nodeid_context -> Streaming_types.typed_xml_stream -> Streaming_types.ordered_typed_xml_stream (**********************) (* Stream composition *) (**********************) val compose_xml_streams : Streaming_types.xml_stream -> Streaming_types.xml_stream list -> Streaming_types.xml_stream (* Compose XML streams together. [compose_xml_streams s0 [s1;...;sk]] builds a stream where s1, ..., sk are inserted in the k "holes" found in stream s0. *) val compose_resolved_xml_streams : Streaming_types.resolved_xml_stream -> Streaming_types.resolved_xml_stream list -> Streaming_types.resolved_xml_stream (* Compose resolved XML streams together. [compose_resolved_xml_streams s0 [s1;...;sk]] builds a stream where s1, ..., sk are inserted in the k "holes" found in stream s0. *) val compose_typed_xml_streams : Streaming_types.typed_xml_stream -> Streaming_types.typed_xml_stream list -> Streaming_types.typed_xml_stream (* Compose typed XML streams together. [compose_typed_xml_streams s0 [s1;...;sk]] builds a stream where s1, ..., sk are inserted in the k "holes" found in stream s0. *) (***************************) (* Simple stream accessors *) (***************************) val is_empty_xml_stream : Streaming_types.xml_stream -> bool (* Returns true is the stream is empty *) val is_empty_resolved_xml_stream : Streaming_types.resolved_xml_stream -> bool (* Returns true is the resolved stream is empty *) val is_empty_typed_xml_stream : Streaming_types.typed_xml_stream -> bool (* Returns true is the typed stream is empty *) val consume_leading_attribute_events : Streaming_types.resolved_xml_stream -> Streaming_types.resolved_sax_xml_attribute_forest (* Return all of the leading attributes in the stream *) galax-1.1/streaming/sax_annot.mli0000664000076400007640000000464610560462366015273 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: sax_annot.mli,v 1.3 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Sax_annot Description: This *interface* contains annotations for typed SAX events. Semantics: Parsing yields a SAX event stream, namespace resolution yields a resolved SAX event stream, and validation yields a typed SAX event stream in which all event annotations are None. A stream-annotation phase (e.g., XPath stream analysis, document-order addition, etc.) takes a typed SAX event stream in which the corresponding annotation components are None, and yields a stream in which all the corresponding annotation components have been set. Setting an annotation component of an annotation destructively modifies the annotation. Accessing an annotation component in an annotation that is not set raises an error. *) type sax_annot (** The XPath stream analysis label is a boolean **) type stream_label_annot = bool (** Create an empty AST annotation. @return New AST annotation.*) val empty_sax_annot : unit -> sax_annot (** Return all the annotation's components @return Tuple of annotation's components *) val annot_components : sax_annot -> stream_label_annot option (** Copy an annotation @param sax_annotation @return copy of annotation *) val copy_annot : sax_annot -> sax_annot (** XPath label annotations *) (** Set the stream-label annotation @param sax_annotation to update @param stream-label annotation to add *) val set_stream_label_annot : sax_annot -> stream_label_annot -> unit (** Get the stream-label annotation @param sax_annotation @return stream-label annotation *) val get_stream_label_annot : sax_annot -> stream_label_annot val set_annotation : sax_annot -> sax_annot -> unit galax-1.1/streaming/streaming_conv.ml0000664000076400007640000003465710560462366016153 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_conv.ml,v 1.13 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Stream_conv Description: This module provides data structures, accessor and conversion functions for an extended XML token stream model, labeled streams, namely. *) open Streaming_types open Namespace_names open Namespace_symbols open Error type typed_labeled_sax_event = typed_annotated_sax_event type typed_labeled_xml_stream = typed_xml_stream type depth_counter = int ref type typed_labeled_of_typed_context = depth_counter type typed_of_labeled_typed_context = { (* Buffer *) buffer : typed_labeled_sax_event Dynamic_buffer.t; (* Keeps track of relevant 'starting-points' (tree roots) inside the buffer *) position_queue : int Queue.t; (* An event in the input is top-level iff toplevel_depth_counter = 0 *) toplevel_depth_counter : depth_counter; (* Tracks the depth of buffered events relative to the buffered tree they are part of at the time they are added to the buffer; 1) the buffer is accessible iff b_i_d_c = 0 ( and ...) 2) an event has to be buffered iff b_i_d_c != 0 (or ...) *) buffer_input_depth_counter : depth_counter; (* Tracks the depth of buffered events relative to the buffered tree they are part of at the time they are taken out of the buffer; a buffered tree has been emitted entirely iff b_o_d_c = 0 *) buffer_output_depth_counter : depth_counter; } (*****************) (* Stream access *) (*****************) let get_flag labeled_event = Sax_annot.get_stream_label_annot labeled_event.tse_annot let set_flag labeled_event = Sax_annot.set_stream_label_annot labeled_event.tse_annot true let unset_flag labeled_event = Sax_annot.set_stream_label_annot labeled_event.tse_annot false (************************) (* Context initializers *) (************************) let default_typed_labeled_of_typed_context () = ref 0 let default_typed_of_typed_labeled_context chunks csize inc = let init = Streaming_util.fmktse_event TSAX_hole Finfo.bogus in unset_flag init; (* num_chunks, chunk_size, increment *) (* 20 500 5 *) (* 100 50000 5 *) (* 1 10000 1 *) let buff = Dynamic_buffer.make chunks csize inc init in { buffer = buff; position_queue = Queue.create (); toplevel_depth_counter = ref 0; buffer_output_depth_counter = ref 0; buffer_input_depth_counter = ref 0 } (***********************************) (* 'Generic' depth counter helpers *) (***********************************) let decrease_depth_counter depth_counter = depth_counter := (!depth_counter - 1) let increase_depth_counter depth_counter = depth_counter := (!depth_counter + 1) let is_depth_zero depth_counter = !depth_counter = 0 let is_depth_one depth_counter = !depth_counter = 1 let is_depth_greater_zero depth_counter = !depth_counter > 0 let is_depth_greater_one depth_counter = !depth_counter > 1 (*********************) (* Stream conversion *) (*********************) (***************) (* Event level *) (***************) (****************) (* Stream level *) (****************) (* Sets the flag iff the event is at the top-level *) let typed_labeled_of_typed_xml_stream_internal typed_xml_stream tlt_context = let next_event input_stream () = try let event = Cursor.cursor_next input_stream in match event.tse_desc with | TSAX_startDocument _ | TSAX_startElement _ -> begin increase_depth_counter tlt_context; if is_depth_one tlt_context then (set_flag event; Some event) else (unset_flag event; Some event) end | TSAX_endDocument | TSAX_endElement -> begin decrease_depth_counter tlt_context; if is_depth_zero tlt_context then (set_flag event; Some event) else (unset_flag event; Some event) end | _ -> begin if is_depth_zero tlt_context then (set_flag event; Some event) else (unset_flag event; Some event) end with | Stream.Failure -> None in Cursor.cursor_of_function (next_event typed_xml_stream) (* Returns the next event from the buffer, keeping track of the event's depth relative to the buffered fragment it is part of *) let next_buffered_event_track_depth ttl_context = let buffer = ttl_context.buffer in let depth_counter = ttl_context.buffer_output_depth_counter in let labeled_event = Dynamic_buffer.next buffer in begin match labeled_event.tse_desc with | TSAX_startElement _ -> increase_depth_counter depth_counter | TSAX_endElement -> decrease_depth_counter depth_counter | TSAX_startDocument _ | TSAX_endDocument -> raise (Query (Streaming_XPath "Should not have document events in the buffer.")) | _ -> () end; (* Added label-removal - Michael 05|14|2006 *) unset_flag labeled_event; labeled_event (* Returns the next event from the buffer, advancing to the next recorded buffer position if necessary *) let next_buffered_event ttl_context = let buffer = ttl_context.buffer in let depth_counter = ttl_context.buffer_output_depth_counter in begin if is_depth_zero depth_counter then let queue = ttl_context.position_queue in try let position = Queue.pop queue in Dynamic_buffer.position buffer position with Queue.Empty -> Dynamic_buffer.reset buffer end; next_buffered_event_track_depth ttl_context (* Records the current buffer position *) let record_buffer_position ttl_context = let position = Dynamic_buffer.get_position ttl_context.buffer in Queue.push position ttl_context.position_queue (* Stores an event in the buffer *) let buffer labeled_event ttl_context = (* print_string ("buffering: " ^ (string_of_typed_sax_event labeled_event.event) ^ "\n");*) Dynamic_buffer.add ttl_context.buffer labeled_event (* Buffers a (non-top-level) event iff its flag is set or it is part of a tree that is to be buffered; records the buffer position iff a start event's flag is set *) let consider_buffering labeled_event ttl_context = let depth_counter = ttl_context.buffer_input_depth_counter in match labeled_event.tse_desc with | TSAX_startElement _ -> let is_flag_set = get_flag labeled_event in if is_depth_zero depth_counter then begin if is_flag_set then begin increase_depth_counter depth_counter; record_buffer_position ttl_context; buffer labeled_event ttl_context end end else begin increase_depth_counter depth_counter; if is_flag_set then record_buffer_position ttl_context; buffer labeled_event ttl_context end | TSAX_endElement -> if is_depth_greater_zero depth_counter then begin decrease_depth_counter depth_counter; buffer labeled_event ttl_context end | TSAX_startDocument _ | TSAX_endDocument -> raise (Query (Streaming_XPath "Should never consider buffering document events.")) | _ -> let is_flag_set = get_flag labeled_event in if is_depth_zero depth_counter then begin if is_flag_set then begin record_buffer_position ttl_context; buffer labeled_event ttl_context end end else begin if is_flag_set then record_buffer_position ttl_context; buffer labeled_event ttl_context end (* Returns the next event from the input stream, considering buffering that event only in case it is not at the top-level *) let next_event_track_toplevel input_stream ttl_context = try let labeled_event = Cursor.cursor_next input_stream in let depth_counter = ttl_context.toplevel_depth_counter in match labeled_event.tse_desc with | TSAX_startDocument _ | TSAX_startElement _ -> increase_depth_counter depth_counter; if is_depth_greater_one depth_counter then begin consider_buffering labeled_event ttl_context; (* Added label-removal - Michael 06|12|2006 *) unset_flag labeled_event end; Some labeled_event | TSAX_endDocument | TSAX_endElement -> decrease_depth_counter depth_counter; if is_depth_greater_zero depth_counter then begin consider_buffering labeled_event ttl_context; (* Added label-removal - Michael 06|12|2006 *) unset_flag labeled_event end; Some labeled_event | _ -> if not (is_depth_zero depth_counter) then begin consider_buffering labeled_event ttl_context; (* Added label-removal - Michael 06|12|2006 *) unset_flag labeled_event end; Some labeled_event with | Stream.Failure -> None (* The buffer is accessible iff neither does the current event belong to a tree that is to be buffered, nor is the current event non-top-level (in case of the current event being non-top-level, the tree it is part of must be emitted in entirety, first, before starting to process a new tree from the buffer). *) let is_buffer_accesible ttl_context = (is_depth_zero ttl_context.buffer_input_depth_counter) && (is_depth_zero ttl_context.toplevel_depth_counter) (* If accessible, returns the next event from the buffer; if not, simply passes through the next outstanding event from the input stream (possibly involving buffering etc.) *) let next_event input_stream ttl_context () = if is_buffer_accesible ttl_context then begin (*print_string "from buffer\n";*) try Some (next_buffered_event ttl_context) with | Dynamic_buffer.Exhausted -> begin (*print_string "cancelled\n ";*) next_event_track_toplevel input_stream ttl_context end end else begin (*print_string "from input\n";*) next_event_track_toplevel input_stream ttl_context end (**********************) (* Context generation *) (**********************) let typed_of_typed_labeled_xml_stream_internal typed_labeled_xml_stream ttl_context = Cursor.cursor_of_function (next_event typed_labeled_xml_stream ttl_context) (***********) (* Exposed *) (***********) let typed_labeled_of_typed_xml_stream typed_xml_stream = let tlt_context = default_typed_labeled_of_typed_context () in typed_labeled_of_typed_xml_stream_internal typed_xml_stream tlt_context let typed_of_typed_labeled_xml_stream typed_labeled_xml_stream = let chunks = !Conf.buffer_chunks in let csize = !Conf.buffer_csize in let inc = !Conf.buffer_inc in let typed_of_typed_labeled_context = default_typed_of_typed_labeled_context chunks csize inc in let typed_xml_stream = typed_of_typed_labeled_xml_stream_internal typed_labeled_xml_stream typed_of_typed_labeled_context in typed_xml_stream (*typed_labeled_xml_stream*) let slice_typed_xml_stream typed_labeled_xml_stream = (* Must resolve the labels in order to properly reflect item boundaries. *) let typed_xml_stream = typed_of_typed_labeled_xml_stream typed_labeled_xml_stream in let finished_slice = ref false in let depth = ref 0 in let next_slice () = match Cursor.cursor_peek typed_xml_stream with | None -> None | Some _ -> let next_event_current_slice () = if !finished_slice then begin finished_slice := false; None end else let next_event = Cursor.cursor_next typed_xml_stream in begin match next_event.tse_desc with | TSAX_startDocument _ | TSAX_startElement _ | TSAX_startEncl -> depth := !depth + 1 | TSAX_endDocument | TSAX_endElement | TSAX_endEncl -> depth := !depth - 1; | _ -> () end; if !depth = 0 then finished_slice := true; Some next_event in Some (Cursor.cursor_of_function next_event_current_slice) in Cursor.cursor_of_function next_slice let slice_discard_typed_xml_stream typed_xml_stream = Cursor.cursor_map Streaming_ops.discard_typed_xml_stream (slice_typed_xml_stream typed_xml_stream) let item_count_typed_labeled_xml_stream typed_labeled_xml_stream = let item_count = ref 0 in let rec count_items () = let labeled_event = Cursor.cursor_next typed_labeled_xml_stream in if get_flag labeled_event then incr item_count; count_items () in let _ = try count_items () with | Stream.Failure -> () in !item_count let item_range_typed_labeled_xml_stream typed_labeled_xml_stream from_index to_index = typed_labeled_xml_stream let nth_item_typed_labeled_xml_stream typed_labeled_xml_stream n = item_range_typed_labeled_xml_stream typed_labeled_xml_stream 0 n (* NOTE: This behaves like a filter in total analogy to streamed XPath evaluation. The rest of the stream following the first item is continously consumed, although it is guaranteed to not contribute to the result. This must be done in order to ensure that other cursors pending on the same stream refer to the correct positions. We may want another static analysis capable of determining wether this consumption is really needed. - Michael *) let first_item_typed_labeled_xml_stream typed_labeled_xml_stream = (* nth_item_typed_labeled_xml_stream typed_labeled_xml_stream 1 *) let finished_first_item = ref false in let depth = ref 0 in let rec next_event () = match Cursor.cursor_peek typed_labeled_xml_stream with | None -> None | Some event -> begin Cursor.cursor_junk typed_labeled_xml_stream; if !finished_first_item then next_event () else begin begin match event.tse_desc with | TSAX_startDocument _ | TSAX_startElement _ | TSAX_startEncl -> begin if !depth > 0 then unset_flag event; depth := !depth + 1 end | TSAX_endDocument | TSAX_endElement | TSAX_endEncl -> begin depth := !depth - 1; if !depth > 0 then unset_flag event end | _ -> if !depth > 0 then unset_flag event end; if !depth = 0 then finished_first_item := true; Some event end end in Cursor.cursor_of_function next_event galax-1.1/streaming/prefix_context.mli0000664000076400007640000000274310560462366016336 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: prefix_context.mli,v 1.3 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Prefix_context Description: This module implements the context necessary to turn a resolved SAX stream back into an unresolved one with the proper xmlns attributes. *) (*********************************) (* A type for the prefix context *) (*********************************) type prefix_context (********************************) (* Creates a new prefix context *) (********************************) val build_prefix_context : unit -> prefix_context (*********************************) (* Operations on prefix contexts *) (*********************************) val push_nsenv_in_prefix_context : prefix_context -> Namespace_context.nsenv -> Namespace_context.binding_table val pop_nsenv_from_prefix_context : prefix_context -> unit galax-1.1/streaming/streaming_types.mli0000664000076400007640000002421210560462366016505 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_types.mli,v 1.6 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_types Description: This module contains data structures for XML streams. *) (**********************************) (* SAX events for well-formed XML *) (**********************************) (* XML declaration *) type sax_xml_decl = (Datatypes.xs_untyped * Encoding.encoding option * Datatypes.xs_untyped option) (* DTD declaration *) type sax_dtd_decl = Pxp_dtd.dtd option (* Base URI *) type sax_base_uri = Dm_atomic.atomicAnyURI option ref (* Attributes *) type sax_xml_attribute = Namespace_names.uqname * Datatypes.xs_untyped type sax_xml_attribute_forest = sax_xml_attribute list (* Elements, PIs, Comments, and Characters *) type has_element_content = bool type document_desc = sax_xml_decl option * sax_dtd_decl * sax_base_uri type element_desc = Namespace_names.uqname * sax_xml_attribute_forest * has_element_content type pi_desc = Namespace_names.ncname * Datatypes.xs_untyped type comment_desc = Datatypes.xs_untyped type text_desc = Datatypes.xs_untyped type atomic_desc = Dm_atomic.atomicValue (* SAX events *) type sax_event_desc = | SAX_startDocument of document_desc | SAX_endDocument | SAX_startElement of element_desc | SAX_endElement | SAX_processingInstruction of pi_desc | SAX_comment of comment_desc | SAX_characters of text_desc | SAX_attribute of sax_xml_attribute (* Addition to the standard SAX events *) | SAX_atomicValue of atomic_desc (* Addition to the standard SAX events *) | SAX_hole (* Addition to the standard SAX events *) type sax_event = { se_desc : sax_event_desc; se_loc : Finfo.finfo } (* Note: The two last events are added in order to be able to deal with top-level attributes and atomic values from the XQuery data model. - Jerome *) (* Well-formed XML streams *) (* Note: An XML stream is a Caml stream of SAX events. Please see the Stream Caml module for more on operations on streams. - Jerome *) type xml_stream = sax_event Cursor.cursor (*******************************) (* SAX events for resolved XML *) (*******************************) (* Note: The following structures are used to represent 'resolved' XML document in a streaming fashion. They are the same as above, but the QNames are turned into expanded-names (Symbols in Galax). - Jerome *) (* Resolved attributes *) type resolved_sax_xml_attribute = Namespace_symbols.rattr_symbol (* QNames are resolved *) * Datatypes.xs_untyped type resolved_sax_xml_attribute_forest = resolved_sax_xml_attribute list (* Typed elements *) type resolved_element_desc = Namespace_symbols.relem_symbol (* QNames are resolved *) * resolved_sax_xml_attribute_forest * has_element_content * sax_base_uri (* Addition to the standard SAX events *) * Namespace_context.nsenv (* Namespace environment for that element *) (* Resolved SAX events *) type resolved_sax_event_desc = | RSAX_startDocument of document_desc | RSAX_endDocument | RSAX_startElement of resolved_element_desc | RSAX_endElement | RSAX_processingInstruction of pi_desc | RSAX_comment of comment_desc | RSAX_characters of text_desc | RSAX_attribute of resolved_sax_xml_attribute (* Addition to the standard SAX events *) | RSAX_atomicValue of atomic_desc (* Addition to the standard SAX events *) | RSAX_hole (* Addition to the standard SAX events *) (* Resolved XML streams *) type resolved_sax_event = { rse_desc : resolved_sax_event_desc; rse_loc : Finfo.finfo; } (* Note: A resolved XML stream is a Caml stream of resolved SAX events. Please see the Stream Caml module for more on operations on streams. - Jerome *) type resolved_xml_stream = resolved_sax_event Cursor.cursor (****************************) (* SAX events for typed XML *) (****************************) (* Note: The following structures are used to represent 'typed' XML document in a streaming fashion. They support a number of additions from the standard SAX events, notably: type annotations on elements and attributes, as well as typed values resulting from XML Schema validation on top of the corresponding text content. - Jerome *) (* Typed attributes *) type typed_sax_xml_attribute = Namespace_symbols.rattr_symbol (* QNames are resolved *) * Datatypes.xs_untyped (* Addition to the standard SAX events *) * Namespace_symbols.rtype_symbol (* The XML Schema type annotation *) * Dm_atomic.atomicValue list (* The XML Schema simple value *) type typed_sax_xml_attribute_forest = typed_sax_xml_attribute list (* Typed elements *) type typed_element_desc = Namespace_symbols.relem_symbol (* QNames are resolved *) * typed_sax_xml_attribute_forest * has_element_content * sax_base_uri (* Addition to the standard SAX events *) * Namespace_context.nsenv (* Namespace environment for that element *) * Dm_types.nilled (* Has the element been nilled ? *) * Namespace_symbols.rtype_symbol (* The XML Schema type annotation *) * Dm_atomic.atomicValue list (* The XML Schema simple value *) (* SAX events *) type typed_sax_event_desc = | TSAX_startDocument of document_desc | TSAX_endDocument | TSAX_startElement of typed_element_desc | TSAX_endElement | TSAX_processingInstruction of pi_desc | TSAX_comment of comment_desc | TSAX_characters of text_desc (* Additions to the standard SAX events: *) | TSAX_attribute of typed_sax_xml_attribute | TSAX_atomicValue of atomic_desc | TSAX_hole (* Enclosed expressions only occur in typed streams and are eliminated during erasure *) | TSAX_startEncl | TSAX_endEncl (* Annotated typed SAX event As with the annotated Core AST, the annotation type contains mutable option fields. An alternative is to make the events polymorphic in the annotations, but this is kind of heavy weight. For the moment, we just add annotations to typed SAX events. Instead of having three kinds of SAX events (original, resolved, typed), we could have just one kind in which the annotations include the resolved QNames and the computed type, but this is a deeper change. - Mary *) type typed_annotated_sax_event = { tse_desc : typed_sax_event_desc; (* Typed SAX event *) tse_annot : Sax_annot.sax_annot; (* Annotation *) tse_loc : Finfo.finfo (* Location in stream *) } (* Typed XML streams *) (* Note: A typed XML stream is a Caml stream of typed SAX events. Please see the Stream Caml module for more on operations on streams. - Jerome *) type typed_xml_stream = typed_annotated_sax_event Cursor.cursor (* Ordered, typed XML streams An ordered, typed XML stream is a typed XML stream in which each typed SAX event is annotated with a Node identifier. *) type ordered_typed_sax_xml_attribute = typed_sax_xml_attribute (* The attribute event content *) * Nodeid.prepostint_docorder (* The attribute node id *) type ordered_typed_sax_xml_attribute_forest = ordered_typed_sax_xml_attribute list (* Ordered typed elements *) type ordered_typed_element_desc = Namespace_symbols.relem_symbol (* QNames are resolved *) * ordered_typed_sax_xml_attribute_forest * has_element_content * sax_base_uri (* Addition to the standard SAX events *) * Namespace_context.nsenv (* Namespace environment for that element *) * Dm_types.nilled (* Has the element been nilled ? *) * Namespace_symbols.rtype_symbol (* The XML Schema type annotation *) * Dm_atomic.atomicValue list (* The XML Schema simple value *) (* SAX events *) type ordered_typed_sax_event_desc = | OTSAX_startDocument of document_desc * Nodeid.preint_docorder | OTSAX_endDocument of Nodeid.postint_docorder | OTSAX_startElement of ordered_typed_element_desc * Nodeid.preint_docorder | OTSAX_endElement of Nodeid.postint_docorder | OTSAX_processingInstruction of pi_desc * Nodeid.prepostint_docorder | OTSAX_comment of comment_desc * Nodeid.prepostint_docorder | OTSAX_characters of text_desc * Nodeid.prepostint_docorder (* Additions to the standard SAX events: *) | OTSAX_attribute of ordered_typed_sax_xml_attribute | OTSAX_atomicValue of atomic_desc | OTSAX_hole (* Enclosed expressions only occur in typed streams and are eliminated during erasure *) | OTSAX_startEncl | OTSAX_endEncl (* Annotated typed SAX event As with the annotated Core AST, the annotation type contains mutable option fields. An alternative is to make the events polymorphic in the annotations, but this is kind of heavy weight. For the moment, we just add annotations to typed SAX events. Instead of having three kinds of SAX events (original, resolved, typed), we could have just one kind in which the annotations include the resolved QNames and the computed type, but this is a deeper change. - Mary *) type ordered_typed_annotated_sax_event = { otse_desc : ordered_typed_sax_event_desc; (* Ordered typed SAX event *) otse_annot : Sax_annot.sax_annot; (* Annotation *) otse_loc : Finfo.finfo (* Location in stream *) } type ordered_typed_xml_stream = ordered_typed_annotated_sax_event Cursor.cursor galax-1.1/streaming/small_stream_ast.mli0000664000076400007640000000364510560462366016631 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: small_stream_ast.mli,v 1.9 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Small_stream_ast Description: This *interface* contains type declarations for a fragment of XML document used to build a small stream. *) (* Describes small fragments of XML with holes in them *) type rsattribute = Namespace_names.rqname * Datatypes.xs_untyped type rsattribute_forest = rsattribute list type rsexpr = | RSDocument of (Dm_atomic.atomicAnyURI option ref * rsexpr list) | RSElem of Namespace_names.rqname * Namespace_context.binding_table * rsattribute_forest * Dm_atomic.atomicAnyURI option ref * rsexpr list | RSText of Datatypes.xs_untyped | RSPI of (Namespace_names.ncname * Datatypes.xs_untyped) | RSComment of Datatypes.xs_untyped | RSHole (* The same after namespace resolution *) type sattribute = Namespace_symbols.rattr_symbol * Datatypes.xs_untyped type sattribute_forest = sattribute list type sexpr = | SDocument of (Dm_atomic.atomicAnyURI option ref * sexpr list) | SElem of Namespace_symbols.relem_symbol * Namespace_context.nsenv * sattribute_forest * Dm_atomic.atomicAnyURI option ref * sexpr list | SText of Datatypes.xs_untyped | SPI of (Namespace_names.ncname * Datatypes.xs_untyped) | SComment of Datatypes.xs_untyped | SHole galax-1.1/streaming/streaming_ordered_context.ml0000664000076400007640000000454310560462366020365 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_ordered_context.ml,v 1.5 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_ordered_context Description: This module implements the context necessary to add identity information to an input typed stream. *) open Nodeid open Nodeid_context (********************************************) (* A type for the streaming ordered context *) (********************************************) type streaming_ordered_context = { streaming_ordered_context_docid : docid; streaming_ordered_context_idgen : nodeid_context } (*******************************************) (* Creates a new streaming ordered context *) (*******************************************) let build_streaming_ordered_context docid nodeid_context = { streaming_ordered_context_docid = docid; streaming_ordered_context_idgen = nodeid_context } (*******************************) (* Operations on node identity *) (*******************************) let get_docid streaming_ordered_context = streaming_ordered_context.streaming_ordered_context_docid let new_preorderid streaming_ordered_context = let docid = get_docid streaming_ordered_context in let preorder = new_pre streaming_ordered_context.streaming_ordered_context_idgen in (docid,preorder) let new_postorderid streaming_ordered_context = let docid = get_docid streaming_ordered_context in let postorder = new_post streaming_ordered_context.streaming_ordered_context_idgen in (docid,postorder) let new_leaf_docorder streaming_ordered_context = let docid = get_docid streaming_ordered_context in let (preorder,postorder) = new_leaf_pre_post streaming_ordered_context.streaming_ordered_context_idgen in (docid,preorder,postorder) galax-1.1/streaming/streaming_constructors.ml0000664000076400007640000003006410661364346017743 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_constructors.ml,v 1.19 2007/08/17 18:28:54 simeon Exp $ *) (* Module: Stream_constructors Description: Construction operations over XML streams. *) open Error open Namespace_names open Namespace_symbols open Namespace_resolve open Namespace_context open Streaming_types open Streaming_util open Small_stream_ast (* untyped_value_of_xml_stream is the physical implementation of fs:item-sequence-to-untypedAtomic(). It constructs an untyped value from a stream of atomic values, adding one character whitespace between each atomic value. It is used in the implementation of attribute, comment, processing-instruction, and text nodes. *) let rec untyped_value_of_xml_stream input_stream = String.concat "" (untyped_value_of_xml_stream_aux input_stream) and untyped_value_of_xml_stream_aux input_stream = match Cursor.cursor_peek input_stream with | None -> [] | Some { tse_desc = TSAX_atomicValue av; tse_annot = _; tse_loc = _ } -> begin Cursor.cursor_junk input_stream; (av#string_value()) :: ((match Cursor.cursor_peek input_stream with | Some ({ tse_desc = TSAX_atomicValue _; tse_annot = _; tse_loc = _ }) -> " " | _ -> "") :: (untyped_value_of_xml_stream_aux input_stream)) end | Some { tse_desc = TSAX_startEncl ; tse_annot = _; tse_loc = _ } | Some { tse_desc = TSAX_endEncl ; tse_annot = _; tse_loc = _ } -> Cursor.cursor_junk input_stream; (untyped_value_of_xml_stream_aux input_stream) | _ -> raise (Query (Stream_Error "Not a stream of atomic values")) (* Attribute constructor The second step should do implicit validation if necessary. *) let attribute_constructor sym input_stream = let resolved_stream = begin match sym with | (prefix,uri,ncname) -> if (uri = Namespace_symbols_builtin.xmlns_uri) || (ncname = Namespace_symbols_builtin.xmlns_local) then raise (Query (Constructor_Error "Attribute name is a reserved xmlns name")) else () end; let text = (untyped_value_of_xml_stream input_stream) in let text = Whitespace.whitespace_normalize text in let text = if symbol_equal sym Namespace_symbols_builtin.xml_id then Whitespace.whitespace_id_normalize text else text in Cursor.cursor_of_list [fmkrse_event(RSAX_attribute(sym,text)) Finfo.bogus] in Streaming_ops.typed_of_resolved_xml_stream resolved_stream (* Comment constructor *) let comment_constructor input_stream = let comment = (untyped_value_of_xml_stream input_stream) in let _ = Streaming_ops.check_valid_comment comment in Cursor.cursor_of_list [fmktse_event (TSAX_comment comment) Finfo.bogus] (* Processing instruction constructor *) let pi_constructor leading target input_stream = let content = (untyped_value_of_xml_stream input_stream) in let content = if leading then Whitespace.remove_leading_whitespace content else content in let (target', content') = Streaming_ops.check_valid_processing_instruction target content in Cursor.cursor_of_list [fmktse_event (TSAX_processingInstruction (target',content')) Finfo.bogus] (* XQuery 3.7.3.4 Text Node Constructors The content expression of a text node constructor is processed as follows: 1. Atomization is applied to the value of the content expression, converting it to a sequence of atomic values. (Handled by normalization) 2. If the result of atomization is an empty sequence, no text node is constructed. Otherwise, each atomic value in the atomized sequence is cast into a string. 3. The individual strings resulting from the previous step are merged into a single string by concatenating them with a single space character between each pair. The resulting string becomes the content property of the constructed text node. Note: It is possible for a text node constructor to construct a text node containing a zero-length string. However, if used in the content of a constructed element or document node, such a text node will be deleted or merged with another text node. *) let text_constructor input_stream = match Cursor.cursor_peek input_stream with | None -> Cursor.cursor_of_list [] | _ -> let text = (untyped_value_of_xml_stream input_stream) in Cursor.cursor_of_list [fmktse_event (TSAX_characters text) Finfo.bogus] let charref_constructor i = let text = Galax_camomile.utf8_string_of_code_point i in Cursor.cursor_of_list [fmktse_event (TSAX_characters text) Finfo.bogus] (* Element constructor *) let element_content_stream xml_stream = let rec next_element_content_event xml_stream () = try let event = (Cursor.cursor_next xml_stream) in match event.rse_desc with | RSAX_startDocument _ | RSAX_endDocument -> next_element_content_event xml_stream () | RSAX_attribute a -> raise (Query (Stream_Error "Content of the constructor contains attributes not at the beginning")) | _ -> Some event with | Stream.Failure -> None in Cursor.cursor_of_function (next_element_content_event xml_stream) let prefix_attribute nsenv (rsym,content) (prev_atts,prev_bindings,required_bindings) = let (uqname,opt_binding,required_binding) = rattr_name_with_binding nsenv rsym in match opt_binding with | None -> let ns_attributes = prev_bindings in ((uqname,content) :: prev_atts, ns_attributes,required_binding :: required_bindings) | Some new_binding -> let ns_attributes = new_binding :: prev_bindings in ((uqname,content) :: prev_atts, ns_attributes,required_bindings) let one_prefix_attribute nsenv (rsym,content) = let (uqname,opt_binding,required_binding) = rattr_name_with_binding nsenv rsym in ((uqname,content), opt_binding,required_binding) let prefix_attributes nsenv resolved_attributes = List.fold_right (prefix_attribute nsenv) resolved_attributes ([],[],[]) let fix_in_scope_namespaces nsenv rsym atts = let (uqname,opt_binding,required_binding) = relem_name_with_binding nsenv rsym in let (_,new_bindings,required_attribute_prefixes) = prefix_attributes nsenv atts in let required_bindings = match opt_binding with | None -> let required_bindings = required_binding :: required_attribute_prefixes in cleanup_out_bindings new_bindings required_bindings | Some created_binding -> let rb = created_binding :: new_bindings in cleanup_out_bindings rb [] in Namespace_context.patch_bindings nsenv required_bindings let element_constructor_of_resolved base_uri sym nsenv resolved_input_stream = (* 2. Get the leading attributes in the resolved stream *) let leading_attributes = Streaming_ops.consume_leading_attribute_events resolved_input_stream in (* 2.b. Check that attributes are not duplicated *) let leading_attributes = Streaming_util.check_duplicate_attributes leading_attributes in (* 3. Make sure that the rest of the stream contains proper nodes (no attributes, and returns the children of document nodes) *) let resolved_element_content_stream = element_content_stream resolved_input_stream in (* 4. Compute the in-scope namespaces (see XQuery Section 3.7.4 In-scope Namespaces of a Constructed Element) *) let in_scope_nsenv = fix_in_scope_namespaces nsenv sym leading_attributes in (* 5. Builds a small stream to construct the element *) let small_expr = SElem(sym,in_scope_nsenv,leading_attributes,base_uri,[SHole]) in let small_stream = Small_stream_context.resolved_xml_stream_of_sexpr small_expr in (* 6. Compose the small stream with the resolved input stream *) let new_xml_stream = Streaming_ops.compose_resolved_xml_streams small_stream [resolved_element_content_stream] in new_xml_stream let element_constructor base_uri sym nsenv input_stream = try (* 0. Clean up the labels, possibly buffering subtrees with nested labels *) let input_stream = Streaming_conv.typed_of_typed_labeled_xml_stream input_stream (* input_stream *) in (* 1. Do the erasure *) let resolved_input_stream = Streaming_ops.erase_xml_stream_section_3_7_1 input_stream in let new_xml_stream = element_constructor_of_resolved base_uri sym nsenv resolved_input_stream in (* 7. Add type annotations *) (* 7.a NOTE THAT THIS MERGES TEXT NODES NOW - Philippe and Jerome *) Streaming_ops.typed_of_resolved_xml_stream new_xml_stream with | Query(Cursor_Error(msg)) -> raise(Query(Constructor_Error("In element_constructor "^msg))) (* Document constructor *) let document_constructor baseuri input_stream = try (* 0. Clean up the labels, possibly buffering subtrees with nested labels *) let input_stream = Streaming_conv.typed_of_typed_labeled_xml_stream input_stream in (* 1. Do the erasure *) let resolved_input_stream = Streaming_ops.erase_xml_stream_section_3_7_1 input_stream in (* 2. Get the leading attributes in the resolved stream *) let leading_attributes = Streaming_ops.consume_leading_attribute_events resolved_input_stream in (* 2.b. Check that there is no attribute *) let _ = if not(leading_attributes = []) then raise (Query (Stream_Error "Document nodes should not have attribute content")) else () in (* 3. Make sure that the rest of the stream contains proper nodes (no attributes, and returns the children of document nodes) *) let resolved_document_content_stream = try element_content_stream resolved_input_stream with | (Query (Stream_Error "Content of the constructor contains attributes not at the beginning")) -> raise (Query (Stream_Error "Document nodes should not have attribute content")) in (* 2. Builds a small stream to construct the element *) let small_expr = SDocument(baseuri,[SHole]) in let small_stream = Small_stream_context.resolved_xml_stream_of_sexpr small_expr in (* 3. Compose the small stream with the resolved input stream *) let new_xml_stream = Streaming_ops.compose_resolved_xml_streams small_stream [resolved_document_content_stream] in (* 4. Add type annotations *) Streaming_ops.typed_of_resolved_xml_stream new_xml_stream with | Query(Cursor_Error(msg)) -> raise(Query(Constructor_Error("In document_constructor "^msg))) let sequence_constructor t1 t2 = Cursor.cursor_append t1 t2 (**************************) (* Serialization wrapping *) (**************************) (* Note: Essentially, this calls element construction with the proper parameters to build a top-level element. - Jerome *) let glx_result_serialization input_stream = let sym = Namespace_symbols.relem_symbol Namespace_builtin.glx_result in let nsenv = let export_orig_nsenv = Namespace_context.default_xml_out_nsenv () in let delta_bindings = [(Namespace_builtin.glx_prefix,Namespace_builtin.glx_uri)] in Namespace_context.add_all_ns export_orig_nsenv delta_bindings in element_constructor Dm_atomic_util.default_no_uri_dm sym nsenv input_stream let flatten_document_nodes input_stream = let no_doc () = match Cursor.cursor_peek input_stream with | Some event -> begin match event.tse_desc with | TSAX_startDocument _ -> Cursor.cursor_junk input_stream; Some (Cursor.cursor_next input_stream) | TSAX_endDocument -> Cursor.cursor_junk input_stream; Some (Cursor.cursor_next input_stream) | _ -> Some (Cursor.cursor_next input_stream) end | None -> None in Cursor.cursor_of_function no_doc let dburi = Dm_atomic_util.default_no_uri_dm let sequence_normalization input_stream = document_constructor dburi (flatten_document_nodes input_stream) galax-1.1/streaming/streaming_util.ml0000664000076400007640000001250310560462366016145 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_util.ml,v 1.7 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_util Description: Some utilities over streaming types. *) open Error open Namespace_names open Namespace_symbols open Namespace_resolve open Namespace_context open Datatypes open Datatypes_util open Streaming_types (********************) (* The empty stream *) (********************) let empty_xml_stream () = Cursor.cursor_of_list [] let empty_resolved_xml_stream () = Cursor.cursor_of_list [] let empty_typed_xml_stream () = Cursor.cursor_of_list [] let fmkse_event desc fi = { se_desc = desc; se_loc = fi } let fmkrse_event desc fi = { rse_desc = desc; rse_loc = fi } let fmktse_event desc fi = { tse_desc = desc; tse_annot = Sax_annot.empty_sax_annot(); tse_loc = fi } let fmkatse_event desc annot fi = { tse_desc = desc; tse_annot = annot; tse_loc = fi } let fmkotse_event desc annot fi = { otse_desc = desc; otse_annot = annot; otse_loc = fi } let mktse_event desc = fmktse_event desc Finfo.bogus (*******************************) (* Extracts special attributes *) (*******************************) (* Process an xmlns attribute *) let build_uri att content_thing = try (* Hack for now! - Jerome *) ignore(anyURI_of_untyped content_thing); NSUri content_thing with | _ -> let ns_attribute = Namespace_names.string_of_uqname att in raise (Query (Mapping_Failure ("Namespace attribute " ^ ns_attribute ^ " does not contain a URI"))) let build_prefix_uri_pair att prefix_thing content_thing = let ns_prefix = match prefix_thing with | Some ncame -> NSPrefix ncame | None -> NSDefaultElementPrefix in let uri = build_uri att content_thing in (ns_prefix, uri) (* This function extracts special attributes after parsing *) let rec extract_special_attributes attributes = match attributes with | [] -> (Whitespace.Default, [], None, []) | att1 :: attributes' -> begin let (whitespace_mode, namespace_decls, base_uri, other_attributes') = extract_special_attributes attributes' in match att1 with (* Note: in the case of xmlns attributes, those do not get preserved in the final Infoset. - Jerome *) | ((NSPrefix "xmlns", ncname), att_content) -> let new_namespace_decl = build_prefix_uri_pair (fst att1) (Some ncname) att_content in (whitespace_mode, new_namespace_decl :: namespace_decls, base_uri, other_attributes') | ((NSDefaultElementPrefix, "xmlns"), att_content) -> let new_namespace_decl = build_prefix_uri_pair (fst att1) None att_content in (whitespace_mode, new_namespace_decl :: namespace_decls, base_uri, other_attributes') (* Note: Other special attributes are preserved in the XML Infoset. - Jerome *) | ((NSPrefix "xml", "space"), "preserve") -> (Whitespace.Preserve, namespace_decls, base_uri, att1 :: other_attributes') | ((NSPrefix "xml", "space"), "default") -> (Whitespace.Default, namespace_decls, base_uri, att1 :: other_attributes') | ((NSPrefix "xml", "base"), att_content) -> let base_uri = AnyURI._kinda_uri_of_string att_content in let base_uri_dm = new Dm_atomic.atomicAnyURI base_uri in (whitespace_mode, namespace_decls, Some base_uri_dm, other_attributes') (* For normal attributes, just add them to the final list of attributes *) | _ -> (whitespace_mode, namespace_decls, base_uri, att1 :: other_attributes') end (* Checks for duplicates in attributes -- Returns the original sequence of attributes or raises and error *) let local_attribute_hash = Hashtbl.create 17 let check_duplicate_attributes attributes = let _ = Hashtbl.clear local_attribute_hash in let add_function (asym,_) = let (_,uri,local) = asym in if (Hashtbl.mem local_attribute_hash (uri,local)) then let caname = Namespace_symbols.rattr_name asym in let aname = Namespace_names.prefixed_string_of_rqname caname in raise (Query (Datamodel ("Attribute " ^ aname ^ " is duplicated in element"))) else (Hashtbl.add local_attribute_hash (uri,local) ()) in List.iter add_function attributes; attributes let string_of_resolved_sax_event_desc rse = match rse with | RSAX_startDocument _ -> "RSAX_startDocument" | RSAX_endDocument -> "RSAX_endDocument" | RSAX_startElement _ -> "RSAX_startElement" | RSAX_endElement -> "RSAX_endElement" | RSAX_processingInstruction _ -> "RSAX_processingInstruction" | RSAX_comment _ -> "RSAX_comment" | RSAX_characters _ -> "RSAX_characters" | RSAX_attribute _ -> "RSAX_attribute" | RSAX_atomicValue _ -> "RSAX_atomicValue" | RSAX_hole -> "RSAX_hole" galax-1.1/streaming/streaming_diff.ml0000664000076400007640000001036710560462366016106 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_diff.ml,v 1.4 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_diff Description: Compares two XML streams *) open Error open Streaming_types (***************) (* Stream diff *) (***************) let diff_error () = raise (Query (Stream_Error "XML streams differ")) (* Return the first stream up to the point where it differs with the second stream, in which case it raises an error *) let rec peek_no_whitespace s = let e = Cursor.cursor_peek s in match e with | Some ed -> begin match ed.tse_desc with | (TSAX_characters c) -> if Whitespace.whitespace_only c then begin Cursor.cursor_junk s; peek_no_whitespace s end else e | _ -> e end | _ -> e let compare_start_docs (xml_decl1,dtd1,baseuri1) (xml_decl2,dtd2,baseuri2) = match xml_decl1,xml_decl2 with | None,_ -> true | _,None -> true | (Some (xmlversion1,_,_),Some (xmlversion2,_,_)) -> xmlversion1 = xmlversion2 let compare_attribute_names_aux rsym1 rsym2 = let (_,uri1,ncname1) = Namespace_symbols.rattr_name rsym1 in let (_,uri2,ncname2) = Namespace_symbols.rattr_name rsym2 in compare (uri1,ncname1) (uri2,ncname2) let compare_attribute_names (rsym1,_,_,_) (rsym2,_,_,_) = compare_attribute_names_aux rsym1 rsym2 let compare_characters c1 c2 = c1 = c2 let compare_attributes (rsym1,c1,_,_) (rsym2,c2,_,_) = ((compare_attribute_names_aux rsym1 rsym2) = 0) && (compare_characters c1 c2) let rec compare_attributes_lists satts1 satts2 = match (satts1,satts2) with | [],[] -> true | att1 :: satts1', att2 :: satts2' -> if (compare_attributes att1 att2) then compare_attributes_lists satts1' satts2' else false | _ -> false let compare_start_elems (rsym1,atts1,_,_,_,_,_,_) (rsym2,atts2,_,_,_,_,_,_) = (Namespace_symbols.relem_equal rsym1 rsym2) && let satts1 = List.sort compare_attribute_names atts1 in let satts2 = List.sort compare_attribute_names atts2 in (compare_attributes_lists satts1 satts2) let compare_pis pi1 pi2 = pi1 = pi2 let compare_comments c1 c2 = c1 = c2 let compare_atomic_values a1 a2 = a1#atomic_value_eq a2 let next_diff_event s1 s2 () = let e1 = peek_no_whitespace s1 in let e2 = peek_no_whitespace s2 in let event_compare = match e1,e2 with | (None,None) -> true | (Some ed1,Some ed2) -> begin match ed1.tse_desc,ed2.tse_desc with | (TSAX_startDocument d1,TSAX_startDocument d2) -> compare_start_docs d1 d2 | (TSAX_endDocument,TSAX_endDocument) -> true | (TSAX_startElement e1, TSAX_startElement e2) -> compare_start_elems e1 e2 | (TSAX_endElement,TSAX_endElement) -> true | (TSAX_processingInstruction pi1, TSAX_processingInstruction pi2) -> compare_pis pi1 pi2 | (TSAX_comment c1, TSAX_comment c2) -> compare_comments c1 c2 | (TSAX_characters c1, TSAX_characters c2) -> compare_characters c1 c2 | (TSAX_attribute a1, TSAX_attribute a2) -> compare_attributes a1 a2 | (TSAX_atomicValue a1, TSAX_atomicValue a2) -> compare_atomic_values a1 a2 | (TSAX_hole, TSAX_hole) -> true | (TSAX_startEncl, TSAX_startEncl) -> true | (TSAX_endEncl, TSAX_endEncl) -> true | _ -> false end | _ -> false in begin if event_compare then () else diff_error (); Cursor.cursor_junk s1; Cursor.cursor_junk s2; e1 end let stream_diff s1 s2 = Cursor.cursor_of_function (next_diff_event s1 s2) let stream_boolean_diff s1 s2 = try Streaming_ops.discard_typed_xml_stream (stream_diff s1 s2); true with | (Query (Stream_Error _)) -> false galax-1.1/streaming/streaming_conv.mli0000664000076400007640000000456110560462366016313 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_conv.mli,v 1.7 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Stream_conv Description: This module provides data structures, accessor and conversion functions for an extended XML token stream model, labeled streams, namely. *) open Streaming_types type typed_labeled_sax_event = typed_annotated_sax_event type typed_labeled_xml_stream = typed_xml_stream (*****************) (* Stream access *) (*****************) val get_flag : typed_labeled_sax_event -> bool val set_flag : typed_labeled_sax_event -> unit val unset_flag : typed_labeled_sax_event -> unit (*********************) (* Stream conversion *) (*********************) val typed_labeled_of_typed_xml_stream : typed_xml_stream -> typed_xml_stream val typed_of_typed_labeled_xml_stream : typed_xml_stream -> typed_xml_stream (******************) (* Stream slicing *) (******************) (* Chopping a stream into separate sub-streams is needed for operators that need to access a stream item-at-a-time, such as an item to tuple map. - Michael *) val slice_typed_xml_stream : typed_xml_stream -> typed_xml_stream Cursor.cursor (* Chops a stream into slices, discarding each of them individually. This is useful whenever the item-cardinality of a stream is needed without actually accessing these items. - Michael *) val slice_discard_typed_xml_stream : typed_xml_stream -> unit Cursor.cursor val item_count_typed_labeled_xml_stream : typed_xml_stream -> int val item_range_typed_labeled_xml_stream : typed_xml_stream -> int -> int -> typed_xml_stream val nth_item_typed_labeled_xml_stream : typed_xml_stream -> int -> typed_xml_stream val first_item_typed_labeled_xml_stream : typed_xml_stream -> typed_xml_stream galax-1.1/streaming/streaming_ordered_context.mli0000664000076400007640000000333510560462366020534 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_ordered_context.mli,v 1.5 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_ordered_context Description: This module implements the context necessary to add identity information to an input typed stream. *) (********************************************) (* A type for the streaming ordered context *) (********************************************) type streaming_ordered_context (*******************************************) (* Creates a new streaming ordered context *) (*******************************************) val build_streaming_ordered_context : Nodeid.docid -> Nodeid_context.nodeid_context -> streaming_ordered_context (*******************************) (* Operations on node identity *) (*******************************) val get_docid : streaming_ordered_context -> Nodeid.docid val new_preorderid : streaming_ordered_context -> Nodeid.preint_docorder val new_postorderid : streaming_ordered_context -> Nodeid.postint_docorder val new_leaf_docorder : streaming_ordered_context -> Nodeid.prepostint_docorder galax-1.1/streaming/streaming_ops.ml0000664000076400007640000007233610705011417015767 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_ops.ml,v 1.26 2007/10/16 01:25:35 mff Exp $ *) (* Module: Streaming_ops Description: Some basic operations on XML streams. *) open Error open Namespace_names open Namespace_symbols open Namespace_resolve open Namespace_context open Xquery_common_ast open Streaming_types open Streaming_util (********************) (* The empty stream *) (********************) let empty_xml_stream () = Cursor.cursor_of_list [] let empty_resolved_xml_stream () = Cursor.cursor_of_list [] let empty_typed_xml_stream () = Cursor.cursor_of_list [] (***************************************) (* Validity checks on stream contents *) (***************************************) let check_valid_processing_instruction target pi_content = let target'= Datatypes_util.ncname_of_untyped target in if (String.lowercase(target') = "xml") then raise (Query(Constructor_Error("Invalid processing-instruction target: contains 'xml'"))) else if (Regularexp.matches pi_content "\\?>" "") then raise (Query(Constructor_Error("Invalid processing-instruction content: contains '?>'"))) else (target, pi_content) let check_valid_comment comment_content = if (Regularexp.matches comment_content "(--)|(-$)" "") then raise (Query(Constructor_Error("Invalid comment content: contains '--' or '-' at end of comment"))) else true (***********************) (* Discarding a stream *) (***********************) (* Discards the current XML subtree *) let rec discard_xml_stream xml_stream = try while true do ignore(Cursor.cursor_next xml_stream) done with | Stream.Failure -> () (* Discards the current resolved XML subtree *) let rec discard_resolved_xml_stream resolved_xml_stream = try while true do ignore(Cursor.cursor_next resolved_xml_stream) done with | Stream.Failure -> () (* Discards the current typed XML subtree *) let rec discard_typed_xml_stream typed_xml_stream = try while true do ignore(Cursor.cursor_next typed_xml_stream) done with | Stream.Failure -> () (***********************) (* Removing text nodes *) (***********************) (** * Code to merge text nodes in typed SAX streams **) let merge_xml_text_nodes_in_typed_stream typed_stream = let merge_text_nodes () = (* function for merging adjacent text nodes *) let rec next_event_discard previous tsa fi = match (Cursor.cursor_peek typed_stream) with | Some {tse_desc = (TSAX_characters (s))} -> begin ignore(Cursor.cursor_next typed_stream); next_event_discard (previous ^ s) tsa fi end | _ -> Some (fmkatse_event (TSAX_characters previous) tsa fi) in try let e = Cursor.cursor_next typed_stream in match e.tse_desc with | TSAX_characters s -> next_event_discard s e.tse_annot e.tse_loc | _ -> Some e with | Stream.Failure -> None in Cursor.cursor_of_function merge_text_nodes (*******************************************************) (* Conversion between well-formed and resolved streams *) (*******************************************************) (* Turns a well-formed XML stream into an resolved one *) let resolve_attribute ts_context attribute = let (nsenv,in_scope_nsenv) = Resolve_stream_context.get_nsenv ts_context in match attribute with | (attr_uqname, attr_content) -> let rattr_sym = Resolve_stream_context.resolve_attribute_name ts_context nsenv attr_uqname in (rattr_sym,attr_content) let resolve_attributes ts_context attributes = List.map (resolve_attribute ts_context) attributes let resolved_of_well_formed_event ts_context event = match event.se_desc with | SAX_startDocument (xmldecl,dtddecl,base_uri) -> fmkrse_event (RSAX_startDocument (xmldecl,dtddecl,base_uri)) event.se_loc | SAX_endDocument -> fmkrse_event RSAX_endDocument event.se_loc | SAX_startElement (elem_uqname,sax_attributes,has_element_content) -> (* Extract the namespace attributes and update the namespace environment *) let (ws_mode, new_nss, base_uri, other_sax_attributes) = Streaming_util.extract_special_attributes sax_attributes in let (nsenv,in_scope_nsenv) = begin Resolve_stream_context.push_ns_bindings ts_context new_nss; Resolve_stream_context.get_nsenv ts_context end in (* Resolve the element QName wrt namespaces *) let relem_sym,default = Resolve_stream_context.resolve_element_name ts_context nsenv elem_uqname in let in_scope_nsenv = if default then patch_bindings in_scope_nsenv Namespace_builtin.default_built_in_namespaces else in_scope_nsenv in let resolved_sax_xml_attributes = resolve_attributes ts_context other_sax_attributes in (* Checking for attribute duplicates here! *) let resolved_sax_xml_attributes = Streaming_util.check_duplicate_attributes resolved_sax_xml_attributes in fmkrse_event (RSAX_startElement (relem_sym,resolved_sax_xml_attributes,has_element_content,ref base_uri,in_scope_nsenv)) event.se_loc | SAX_endElement -> begin let _ = Resolve_stream_context.pop_nsenv ts_context in fmkrse_event RSAX_endElement event.se_loc end | SAX_processingInstruction pi_desc -> fmkrse_event (RSAX_processingInstruction pi_desc) event.se_loc | SAX_comment comment_desc -> fmkrse_event (RSAX_comment comment_desc) event.se_loc | SAX_characters text_desc -> fmkrse_event (RSAX_characters text_desc) event.se_loc | SAX_attribute sax_xml_attribute -> let typed_sax_xml_attribute = resolve_attribute ts_context sax_xml_attribute in fmkrse_event (RSAX_attribute typed_sax_xml_attribute) event.se_loc | SAX_atomicValue av -> fmkrse_event (RSAX_atomicValue av) event.se_loc | SAX_hole -> fmkrse_event RSAX_hole event.se_loc let resolve_event_wrap ts_context xml_stream = try let next_event = Cursor.cursor_next xml_stream in Some (resolved_of_well_formed_event ts_context next_event) with | Stream.Failure -> None let next_event_resolved_of_well_formed_xml_stream ts_context xml_stream n = resolve_event_wrap ts_context xml_stream let resolve_xml_stream xml_stream = let ts_context = Resolve_stream_context.build_ts_context () in Cursor.cursor_of_function (next_event_resolved_of_well_formed_xml_stream ts_context xml_stream) (* Turns a resolved XML stream back to one with prefixes *) let prefix_attribute nsenv (rsym,content) = let uqname = rattr_uname nsenv rsym in (uqname,content) let prefix_attributes nsenv resolved_attributes = List.map (prefix_attribute nsenv) resolved_attributes let recreate_ns_binding new_bindings x = begin match x with | (NSDefaultFunctionPrefix,_) | (NSWildcardPrefix,_) | (_,NSWildcardUri) | (NSInterfacePrefix _,_) | (NSServerPrefix _,_) -> () | (NSDefaultElementPrefix,NSUri uri) -> new_bindings := ((NSDefaultElementPrefix,"xmlns"),uri) :: !new_bindings | (NSPrefix ncname,NSUri uri) -> new_bindings := ((NSPrefix "xmlns",ncname),uri) :: !new_bindings end let recreate_ns_bindings delta_bindings = let new_bindings = ref [] in begin List.iter (recreate_ns_binding new_bindings) delta_bindings; !new_bindings end let prefix_event prefix_context resolved_event = match resolved_event.rse_desc with | RSAX_startDocument (xmldecl,dtddecl,base_uri) -> Some (fmkse_event (SAX_startDocument (xmldecl,dtddecl,base_uri)) resolved_event.rse_loc) | RSAX_endDocument -> Some (fmkse_event SAX_endDocument resolved_event.rse_loc) | RSAX_startElement (rsym,resolved_sax_xml_attributes,has_element_content,base_uri,nsenv) -> let ns_bindings = Prefix_context.push_nsenv_in_prefix_context prefix_context nsenv in let uqname = relem_uname nsenv rsym in let sax_xml_attributes = prefix_attributes nsenv resolved_sax_xml_attributes in let ns_attributes = recreate_ns_bindings ns_bindings in Some (fmkse_event (SAX_startElement (uqname,ns_attributes @ sax_xml_attributes,has_element_content)) resolved_event.rse_loc) | RSAX_endElement -> Prefix_context.pop_nsenv_from_prefix_context prefix_context; Some (fmkse_event SAX_endElement resolved_event.rse_loc) | RSAX_processingInstruction pi_desc -> Some (fmkse_event (SAX_processingInstruction pi_desc) resolved_event.rse_loc) | RSAX_comment comment_desc -> Some (fmkse_event (SAX_comment comment_desc) resolved_event.rse_loc) | RSAX_characters text_desc -> Some (fmkse_event (SAX_characters text_desc) resolved_event.rse_loc) | RSAX_attribute resolved_sax_xml_attribute -> (* At the top-level, assume an empty namespace environment *) let nsenv = empty_nsenv in let sax_xml_attribute = prefix_attribute nsenv resolved_sax_xml_attribute in Some (fmkse_event (SAX_attribute sax_xml_attribute) resolved_event.rse_loc) | RSAX_atomicValue av -> Some (fmkse_event (SAX_atomicValue av) resolved_event.rse_loc) | RSAX_hole -> Some (fmkse_event SAX_hole resolved_event.rse_loc) let rec prefix_xml_stream_next_event prefix_context resolved_xml_stream = try let next_event = Cursor.cursor_next resolved_xml_stream in match prefix_event prefix_context next_event with | None -> prefix_xml_stream_next_event prefix_context resolved_xml_stream | Some event -> Some event with | Stream.Failure -> None let next_event_for_prefix_xml_stream prefix_context resolved_xml_stream n = prefix_xml_stream_next_event prefix_context resolved_xml_stream let prefix_xml_stream resolved_xml_stream = let prefix_context = Prefix_context.build_prefix_context () in Cursor.cursor_of_function (next_event_for_prefix_xml_stream prefix_context resolved_xml_stream) (*************************************************) (* Conversion between resolved and typed streams *) (*************************************************) (* Turns an untyped XML stream into an typed one *) (* Note: The following adds xs:untyped as type annotations for elements, and xs:untypedAtomic for attributes. - Jerome *) let type_attribute attribute = match attribute with | (rattr_sym, attr_content) -> (rattr_sym,attr_content,untypedAtomicsym,[]) let type_attributes attributes = List.map type_attribute attributes let typed_of_resolved_event event = match event.rse_desc with | RSAX_startDocument (xmldecl,dtddecl,base_uri) -> fmktse_event (TSAX_startDocument (xmldecl,dtddecl,base_uri)) event.rse_loc | RSAX_endDocument -> fmktse_event TSAX_endDocument event.rse_loc | RSAX_startElement (relem_sym,sax_attributes,has_element_content,base_uri,nsenv) -> let typed_sax_xml_attributes = type_attributes sax_attributes in fmktse_event (TSAX_startElement (relem_sym,typed_sax_xml_attributes,has_element_content,base_uri,nsenv,false,untypedsym,[])) event.rse_loc | RSAX_endElement -> fmktse_event TSAX_endElement event.rse_loc | RSAX_processingInstruction pi_desc -> fmktse_event (TSAX_processingInstruction pi_desc) event.rse_loc | RSAX_comment comment_desc -> fmktse_event (TSAX_comment comment_desc) event.rse_loc | RSAX_characters text_desc -> fmktse_event (TSAX_characters text_desc) event.rse_loc | RSAX_attribute sax_xml_attribute -> let typed_sax_xml_attribute = type_attribute sax_xml_attribute in fmktse_event (TSAX_attribute typed_sax_xml_attribute) event.rse_loc | RSAX_atomicValue av -> fmktse_event (TSAX_atomicValue av) event.rse_loc | RSAX_hole -> fmktse_event TSAX_hole event.rse_loc let typed_event_wrap xml_stream = try let next_event = Cursor.cursor_next xml_stream in Some (typed_of_resolved_event next_event) with | Stream.Failure -> None let next_event_typed_of_resolved_xml_stream xml_stream n = typed_event_wrap xml_stream let typed_of_resolved_xml_stream xml_stream = let s1 = Cursor.cursor_of_function (next_event_typed_of_resolved_xml_stream xml_stream) in merge_xml_text_nodes_in_typed_stream s1 (* Turns a typed XML stream into a resolved, untyped one *) let erase_attribute (rsym,content,_,_) = (rsym,content) let erase_attributes typed_attributes = List.map erase_attribute typed_attributes let erase_event typed_event = match typed_event.tse_desc with | TSAX_startDocument (xmldecl,dtddecl,base_uri) -> Some (fmkrse_event (RSAX_startDocument (xmldecl,dtddecl,base_uri)) typed_event.tse_loc) | TSAX_endDocument -> Some (fmkrse_event (RSAX_endDocument) typed_event.tse_loc) | TSAX_startElement (rsym,typed_sax_xml_attributes,has_element_content,base_uri,nsenv,_,_,_) -> let sax_xml_attributes = erase_attributes typed_sax_xml_attributes in Some (fmkrse_event (RSAX_startElement (rsym,sax_xml_attributes,has_element_content,base_uri,nsenv)) typed_event.tse_loc) | TSAX_endElement -> Some (fmkrse_event (RSAX_endElement) typed_event.tse_loc) | TSAX_processingInstruction pi_desc -> Some (fmkrse_event (RSAX_processingInstruction pi_desc) typed_event.tse_loc) | TSAX_comment comment_desc -> Some (fmkrse_event (RSAX_comment comment_desc) typed_event.tse_loc) | TSAX_characters text_desc -> Some (fmkrse_event (RSAX_characters text_desc) typed_event.tse_loc) | TSAX_attribute typed_sax_xml_attribute -> let sax_xml_attribute = erase_attribute typed_sax_xml_attribute in Some (fmkrse_event (RSAX_attribute sax_xml_attribute) typed_event.tse_loc) | TSAX_atomicValue av -> Some (fmkrse_event (RSAX_atomicValue av) typed_event.tse_loc) | TSAX_hole -> Some (fmkrse_event (RSAX_hole) typed_event.tse_loc) | TSAX_startEncl | TSAX_endEncl -> None let rec erase_xml_stream_next_event typed_xml_stream = try let next_event = Cursor.cursor_next typed_xml_stream in match erase_event next_event with | None -> erase_xml_stream_next_event typed_xml_stream | Some event -> Some event with | Stream.Failure -> None let next_event_for_erase_xml_stream typed_xml_stream n = erase_xml_stream_next_event typed_xml_stream let erase_xml_stream typed_xml_stream = Cursor.cursor_of_function (next_event_for_erase_xml_stream typed_xml_stream) (* Turns a typed XML stream into a resolved, non-typed one, but also turns atomic values into text nodes according to the semantics in Section 3.7.1 of the XQuery 1.0 document, and rejects attribute events, since they should have been processed before-hand from the beginning of the stream. *) let erase_atomic_event_section_3_7_1 typed_xml_stream av0 = let rec erase_atomic_event_section_3_7_1_aux typed_xml_stream = let typed_event = (Cursor.cursor_peek typed_xml_stream) in match typed_event with Some annot_tse -> begin match annot_tse.tse_desc with (TSAX_atomicValue av1) -> begin ignore(Cursor.cursor_next typed_xml_stream); let avl = erase_atomic_event_section_3_7_1_aux typed_xml_stream in (av1 :: avl) end (* Start/end enclosed expr are sentinels to end concatenation of atomic values. They are always consumed. *) | (TSAX_startEncl) -> raise (Query(Stream_Error("Start enclosed event cannot follow atomic value event\n"))) | (TSAX_endEncl) -> ignore(Cursor.cursor_next typed_xml_stream); [] | _ -> [] end | _ -> [] in let avl = erase_atomic_event_section_3_7_1_aux typed_xml_stream in fmkrse_event (RSAX_characters (Dm_atomic_util.erase_simple_value (av0 :: avl))) Finfo.bogus let rec erase_event_section_3_7_1 typed_xml_stream typed_event = match typed_event.tse_desc with | TSAX_startDocument (xmldecl,dtddecl,base_uri) -> fmkrse_event (RSAX_startDocument (xmldecl,dtddecl,base_uri)) typed_event.tse_loc | TSAX_endDocument -> fmkrse_event (RSAX_endDocument) typed_event.tse_loc | TSAX_startElement (rsym,typed_sax_xml_attributes,has_element_content,base_uri,nsenv,_,_,_) -> let sax_xml_attributes = erase_attributes typed_sax_xml_attributes in fmkrse_event (RSAX_startElement (rsym,sax_xml_attributes,has_element_content,base_uri,nsenv)) typed_event.tse_loc | TSAX_endElement -> fmkrse_event (RSAX_endElement) typed_event.tse_loc | TSAX_processingInstruction pi_desc -> fmkrse_event (RSAX_processingInstruction pi_desc) typed_event.tse_loc | TSAX_comment comment_desc -> fmkrse_event (RSAX_comment comment_desc) typed_event.tse_loc (* From 3.7.1 Adjacent text nodes in the content sequence are merged into a single text node by concatenating their contents, with no intervening blanks. After concatenation, any text node whose content is a zero-length string is deleted from the content sequence. *) | TSAX_characters text_desc -> if (text_desc = "") then let next_event = Cursor.cursor_next typed_xml_stream in (erase_event_section_3_7_1 typed_xml_stream next_event) else fmkrse_event (RSAX_characters text_desc) typed_event.tse_loc | TSAX_attribute typed_sax_xml_attribute -> let sax_xml_attribute = erase_attribute typed_sax_xml_attribute in fmkrse_event (RSAX_attribute sax_xml_attribute) typed_event.tse_loc | TSAX_atomicValue av -> erase_atomic_event_section_3_7_1 typed_xml_stream av | TSAX_hole -> fmkrse_event (RSAX_hole) typed_event.tse_loc | TSAX_startEncl -> let next_event = Cursor.cursor_next typed_xml_stream in (erase_event_section_3_7_1 typed_xml_stream next_event) | TSAX_endEncl -> let next_event = Cursor.cursor_next typed_xml_stream in (erase_event_section_3_7_1 typed_xml_stream next_event) let erase_xml_stream_next_event_section_3_7_1 typed_xml_stream = try let next_event = Cursor.cursor_next typed_xml_stream in (Some (erase_event_section_3_7_1 typed_xml_stream next_event)) with | Stream.Failure -> None let next_event_for_erase_xml_stream_section_3_7_1 typed_xml_stream n = erase_xml_stream_next_event_section_3_7_1 typed_xml_stream let erase_xml_stream_section_3_7_1 typed_xml_stream = Cursor.cursor_of_function (next_event_for_erase_xml_stream_section_3_7_1 typed_xml_stream) (**********************************************) (* Conversion between typed and ordered typed *) (**********************************************) let order_attribute streaming_ordered_context typed_sax_xml_attribute = let attributeid = Streaming_ordered_context.new_leaf_docorder streaming_ordered_context in (typed_sax_xml_attribute, attributeid) let order_start_element streaming_ordered_context typed_element_desc : ordered_typed_element_desc = match typed_element_desc with | (relem_symbol, typed_sax_xml_attribute_forest, has_element_content, base_uri, nsenv, nilled_flag, type_annotation, simple_type_value) -> let typed_ordered_sax_xml_attribute_forest = List.map (order_attribute streaming_ordered_context) typed_sax_xml_attribute_forest in (relem_symbol, typed_ordered_sax_xml_attribute_forest, has_element_content, base_uri, nsenv, nilled_flag, type_annotation, simple_type_value) let ordered_event_of_typed_event streaming_ordered_context next_event = let next_ordered_event_desc = match next_event.tse_desc with | TSAX_startDocument document_desc -> let startdocid = Streaming_ordered_context.new_preorderid streaming_ordered_context in OTSAX_startDocument (document_desc,startdocid) | TSAX_endDocument -> let enddocid = Streaming_ordered_context.new_postorderid streaming_ordered_context in OTSAX_endDocument enddocid | TSAX_startElement typed_element_desc -> let startelementid = Streaming_ordered_context.new_preorderid streaming_ordered_context in let typed_ordered_element_desc = order_start_element streaming_ordered_context typed_element_desc in OTSAX_startElement (typed_ordered_element_desc, startelementid) | TSAX_endElement -> let endelementid = Streaming_ordered_context.new_postorderid streaming_ordered_context in OTSAX_endElement endelementid | TSAX_processingInstruction pi_desc -> let piid = Streaming_ordered_context.new_leaf_docorder streaming_ordered_context in OTSAX_processingInstruction (pi_desc,piid) | TSAX_comment comment_desc -> let commentid = Streaming_ordered_context.new_leaf_docorder streaming_ordered_context in OTSAX_comment (comment_desc,commentid) | TSAX_characters text_desc -> let textid = Streaming_ordered_context.new_leaf_docorder streaming_ordered_context in OTSAX_characters (text_desc,textid) (* Additions to the standard SAX events: *) | TSAX_attribute typed_sax_xml_attribute -> let tsxa = order_attribute streaming_ordered_context typed_sax_xml_attribute in OTSAX_attribute tsxa | TSAX_atomicValue av -> OTSAX_atomicValue av | TSAX_hole -> raise (Query (Stream_Error "Cannot create node identifier in a stream with a [hole]")) | TSAX_startEncl | TSAX_endEncl -> raise (Query (Stream_Error "1: Cannot create a node identified in an XML stream with start/end enclosed expression")) in fmkotse_event next_ordered_event_desc next_event.tse_annot next_event.tse_loc let next_ordered_typed_event streaming_ordered_context typed_xml_stream = try let next_event = Cursor.cursor_next typed_xml_stream in (Some (ordered_event_of_typed_event streaming_ordered_context next_event)) with | Stream.Failure -> None let next_ordered_typed_event_token streaming_ordered_context typed_xml_stream n = next_ordered_typed_event streaming_ordered_context typed_xml_stream let ordered_typed_of_typed_stream_for_docid docid nodeid_context typed_xml_stream = let streaming_ordered_context = Streaming_ordered_context.build_streaming_ordered_context docid nodeid_context in Cursor.cursor_of_function (next_ordered_typed_event_token streaming_ordered_context typed_xml_stream) let ordered_typed_of_typed_stream docid_gen nodeid_context typed_xml_stream = let docid = Nodeid.new_docid docid_gen in ordered_typed_of_typed_stream_for_docid docid nodeid_context typed_xml_stream (**********************) (* Stream composition *) (**********************) (* Composes a well-formed stream with holes with a list of well-formed streams *) let rec next_event_of_compose_xml_streams s0 current_top additional_streams = match !current_top with | None -> begin try let event = (Cursor.cursor_next s0) in match event.se_desc with | SAX_hole -> begin match !additional_streams with | [] -> raise (Query (Stream_Error "Stream composition: less streams than holes in the main stream")) | s1 :: sl -> begin current_top := Some s1; additional_streams := sl; next_event_of_compose_xml_streams s0 current_top additional_streams end end | _ -> Some event with | Stream.Failure -> begin match !additional_streams with | [] -> None | _ -> raise (Query (Stream_Error "Stream composition: more streams than holes in the main stream")) end end | Some s -> try Some (Cursor.cursor_next s) with | Stream.Failure -> begin (* Switch back to the top-level stream *) current_top := None; next_event_of_compose_xml_streams s0 current_top additional_streams end let next_event_of_compose_xml_streams_aux s0 current_top additional_streams n = next_event_of_compose_xml_streams s0 current_top additional_streams let compose_xml_streams s0 sl = let current_top = ref None in let additional_streams = ref sl in Cursor.cursor_of_function (next_event_of_compose_xml_streams_aux s0 current_top additional_streams) (* Composes a resolved stream with holes with a list of resolved streams *) let rec next_event_of_compose_resolved_xml_streams s0 current_top additional_streams = match !current_top with | None -> begin try let event = (Cursor.cursor_next s0) in match event.rse_desc with | RSAX_hole -> begin match !additional_streams with | [] -> raise (Query (Stream_Error "Stream composition: less streams than holes in the main stream")) | s1 :: sl -> begin current_top := Some s1; additional_streams := sl; next_event_of_compose_resolved_xml_streams s0 current_top additional_streams end end | _ -> Some event with | Stream.Failure -> begin match !additional_streams with | [] -> None | _ -> raise (Query (Stream_Error "Stream composition: more streams than holes in the main stream")) end end | Some s -> try Some (Cursor.cursor_next s) with | Stream.Failure -> begin (* Switch back to the top-level stream *) current_top := None; next_event_of_compose_resolved_xml_streams s0 current_top additional_streams end let next_event_of_compose_resolved_xml_streams_aux s0 current_top add_streams n = next_event_of_compose_resolved_xml_streams s0 current_top add_streams let compose_resolved_xml_streams s0 sl = let current_top = ref None in let additional_streams = ref sl in Cursor.cursor_of_function (next_event_of_compose_resolved_xml_streams_aux s0 current_top additional_streams) (* Composes a typed stream with holes with a list of typed streams *) let rec next_event_of_compose_typed_xml_streams s0 current_top additional_streams = match !current_top with | None -> begin try let event = (Cursor.cursor_next s0) in match event.tse_desc with | TSAX_hole -> begin match !additional_streams with | [] -> raise (Query (Stream_Error "Stream composition: less streams than holes in the main stream")) | s1 :: sl -> begin current_top := Some s1; additional_streams := sl; next_event_of_compose_typed_xml_streams s0 current_top additional_streams end end | _ -> Some event with | Stream.Failure -> begin match !additional_streams with | [] -> None | s::l -> raise (Query (Stream_Error "4: Stream composition: more streams than holes in the main stream")) end end | Some s -> try Some (Cursor.cursor_next s) with | Stream.Failure -> begin (* Switch back to the top-level stream *) current_top := None; next_event_of_compose_typed_xml_streams s0 current_top additional_streams end let next_event_of_compose_typed_xml_streams_aux s0 current_top additional_streams n = next_event_of_compose_typed_xml_streams s0 current_top additional_streams let compose_typed_xml_streams s0 sl = let current_top = ref None in let additional_streams = ref sl in Cursor.cursor_of_function (next_event_of_compose_typed_xml_streams_aux s0 current_top additional_streams) (* Builds a typed XML stream with just one SAX comment event. *) let rec simple_value_of_xml_stream input_stream = String.concat "" (simple_value_of_xml_stream_aux input_stream) and simple_value_of_xml_stream_aux input_stream = match Cursor.cursor_peek input_stream with | None -> [] | Some { tse_desc = TSAX_atomicValue av; tse_annot = _; tse_loc = _ } -> begin Cursor.cursor_junk input_stream; (av#string_value()) :: ((match Cursor.cursor_peek input_stream with | Some ({ tse_desc = TSAX_atomicValue _; tse_annot = _; tse_loc = _ }) -> " " | _ -> "") :: (simple_value_of_xml_stream_aux input_stream)) end | Some { tse_desc = TSAX_startEncl ; tse_annot = _; tse_loc = _ } | Some { tse_desc = TSAX_endEncl ; tse_annot = _; tse_loc = _ } -> Cursor.cursor_junk input_stream; (simple_value_of_xml_stream_aux input_stream) | _ -> raise (Query (Stream_Error "Not a stream of atomic values")) (* Builds an XML stream with just one TSAX_attribute event, from a stream of atomic values, adding one character whitespace between each atomic value. *) let resolved_xml_stream_of_attribute sym text = Cursor.cursor_of_list [fmkrse_event(RSAX_attribute(sym,text)) Finfo.bogus] (***************************) (* Simple stream accessors *) (***************************) (* Returns true is the stream is empty *) let is_empty_xml_stream xml_stream = match Cursor.cursor_peek xml_stream with | None -> true | Some _ -> false (* Returns true is the resolved stream is empty *) let is_empty_resolved_xml_stream xml_stream = match Cursor.cursor_peek xml_stream with | None -> true | Some _ -> false (* Returns true is the typed stream is empty *) let is_empty_typed_xml_stream xml_stream = match Cursor.cursor_peek xml_stream with | None -> true | Some _ -> false (* Return all of the leading attributes in the stream *) let rec consume_leading_attribute_events resolved_xml_stream = match (Cursor.cursor_peek resolved_xml_stream) with | Some { rse_desc = RSAX_attribute attribute_desc; rse_loc = _; } -> begin ignore(Cursor.cursor_next resolved_xml_stream); let next_attributes = consume_leading_attribute_events resolved_xml_stream in attribute_desc :: next_attributes end | _ -> [] galax-1.1/streaming/streaming_constructors.mli0000664000076400007640000000406610560462366020116 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_constructors.mli,v 1.10 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Stream_constructors Description: Construction operations over XML streams. *) open Finfo open Streaming_types (***********************) (* Stream constructors *) (***********************) val text_constructor : typed_xml_stream -> typed_xml_stream val charref_constructor : int -> typed_xml_stream val pi_constructor : bool -> Namespace_names.ncname -> typed_xml_stream -> typed_xml_stream val comment_constructor : typed_xml_stream -> typed_xml_stream val attribute_constructor : Namespace_symbols.rattr_symbol -> typed_xml_stream -> typed_xml_stream val element_constructor : Dm_atomic.atomicAnyURI option ref -> Namespace_symbols.relem_symbol -> Namespace_context.nsenv -> typed_xml_stream -> typed_xml_stream val document_constructor : Dm_atomic.atomicAnyURI option ref -> typed_xml_stream -> typed_xml_stream val sequence_constructor : typed_xml_stream -> typed_xml_stream -> typed_xml_stream val element_constructor_of_resolved : Dm_atomic.atomicAnyURI option ref -> Namespace_symbols.relem_symbol -> Namespace_context.nsenv -> resolved_xml_stream -> resolved_xml_stream (****************************) (* Serialization operations *) (****************************) val glx_result_serialization : typed_xml_stream -> typed_xml_stream val sequence_normalization : typed_xml_stream -> typed_xml_stream galax-1.1/streaming/streaming_diff.mli0000664000076400007640000000227010560462366016251 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_diff.mli,v 1.3 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_diff Description: Compares two XML streams *) open Streaming_types (***************) (* Stream diff *) (***************) (* Return the first stream up to the point where it differs with the second stream, in which case it raises an error *) val stream_diff : typed_xml_stream -> typed_xml_stream -> typed_xml_stream (* Same, but just returns true or false *) val stream_boolean_diff : typed_xml_stream -> typed_xml_stream -> bool galax-1.1/streaming/streaming_util.mli0000664000076400007640000000402010560462366016311 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: streaming_util.mli,v 1.4 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Streaming_util Description: Some utilities over streaming types. *) open Streaming_types (**********************) (* Event constructors *) (**********************) val fmkse_event : sax_event_desc -> Finfo.finfo -> sax_event val fmkrse_event : resolved_sax_event_desc -> Finfo.finfo -> resolved_sax_event val fmktse_event : typed_sax_event_desc -> Finfo.finfo -> typed_annotated_sax_event val fmkatse_event : typed_sax_event_desc -> Sax_annot.sax_annot -> Finfo.finfo -> typed_annotated_sax_event val fmkotse_event : ordered_typed_sax_event_desc -> Sax_annot.sax_annot -> Finfo.finfo -> ordered_typed_annotated_sax_event val mktse_event : typed_sax_event_desc -> typed_annotated_sax_event (********************) (* Events accessors *) (********************) (* Extracts special attributes *) val extract_special_attributes : sax_xml_attribute_forest -> (Whitespace.mode * (Namespace_names.prefix * Namespace_names.uri) list * Dm_atomic.atomicAnyURI option * sax_xml_attribute_forest) (* Checks for duplicates in attributes -- Returns the original sequence of attributes or raises and error *) val check_duplicate_attributes : resolved_sax_xml_attribute_forest -> resolved_sax_xml_attribute_forest val string_of_resolved_sax_event_desc : resolved_sax_event_desc -> string galax-1.1/streaming/resolve_stream_context.ml0000664000076400007640000000615110665643471017723 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: resolve_stream_context.ml,v 1.11 2007/08/30 22:39:53 simeon Exp $ *) (* Module: Type_stream_context Description: This module implements a the context used when adding types to a stream. *) open Error (*****************************) (* A type for the ts context *) (*****************************) type ts_context = { ts_nsenv : (Namespace_context.nsenv * Namespace_context.nsenv) Stack.t; ts_attr_names : (Namespace_context.nsenv * Namespace_names.uqname, Namespace_symbols.symbol) Hashtbl.t; ts_elem_names : (Namespace_context.nsenv * Namespace_names.uqname, Namespace_symbols.symbol) Hashtbl.t } (****************************) (* Creates a new ts context *) (****************************) let build_ts_context () = let init_stack = Stack.create () in begin Stack.push (Namespace_context.default_xml_nsenv, Namespace_context.default_xml_out_nsenv ()) init_stack; { ts_nsenv = init_stack; ts_attr_names = Hashtbl.create 1439; ts_elem_names = Hashtbl.create 1439; } end (* Accesses the loading context *) let get_nsenv ts_context = Stack.top ts_context.ts_nsenv let pop_nsenv ts_context = try ignore(Stack.pop ts_context.ts_nsenv) with | _ -> raise (Query (Stream_Error "Empty stack during namespace resolution over a SAX stream")) (*********************************************) (* Adds namespace bindings to the ts context *) (*********************************************) let push_ns_bindings ts_context bindings = let (nsenv,in_scope_nsenv) = get_nsenv ts_context in let nsenv' = Namespace_context.add_all_ns_test nsenv bindings in let in_scope_nsenv' = Namespace_context.add_all_ns_test in_scope_nsenv bindings in Stack.push (nsenv',in_scope_nsenv') ts_context.ts_nsenv let resolve_element_name ts_context nsenv uqname = try Hashtbl.find ts_context.ts_elem_names (nsenv,uqname),false with | _ -> let rqname,default = Namespace_resolve.resolve_element_qname_default nsenv uqname in let s = (Namespace_symbols.relem_symbol rqname) in begin Hashtbl.add ts_context.ts_elem_names (nsenv,uqname) s; s,default end let resolve_attribute_name ts_context nsenv uqname = try Hashtbl.find ts_context.ts_attr_names (nsenv,uqname) with | _ -> let rqname = Namespace_resolve.resolve_attribute_qname nsenv uqname in let s = (Namespace_symbols.rattr_symbol rqname) in begin Hashtbl.add ts_context.ts_attr_names (nsenv,uqname) s; s end galax-1.1/streaming/small_stream_context.mli0000664000076400007640000000441010560462366017515 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: small_stream_context.mli,v 1.6 2007/02/01 22:08:54 simeon Exp $ *) (* Module: Small_stream_context Description: This module implements the context used when building a small stream from an element-construction expression. *) open Error (****************************) (* The small stream context *) (****************************) type ss_context (**************************************) (* Creates a new small stream context *) (**************************************) val build_ss_context : Small_stream_ast.sexpr list -> ss_context (******************************************) (* Operations on the small stream context *) (******************************************) val get_current_sexpr_list : ss_context -> Small_stream_ast.sexpr list val get_remaining_sexpr_list : ss_context -> Small_stream_ast.sexpr list val replace_current_sexpr_list : ss_context -> Small_stream_ast.sexpr list -> unit val push_elem_to_ss_context : ss_context -> Small_stream_ast.sexpr -> Small_stream_ast.sexpr list -> Streaming_types.resolved_sax_event * Small_stream_ast.sexpr list val pop_elem_from_ss_context : ss_context -> (Streaming_types.resolved_sax_event * Small_stream_ast.sexpr list) option (******************************) (* Simple stream constructors *) (******************************) val resolved_xml_stream_of_sexpr : Small_stream_ast.sexpr -> Streaming_types.resolved_xml_stream (* Builds an XML stream with holes out of a fragment of AST which contains element construction operations. *) val sexpr_of_rsexpr : Namespace_context.nsenv -> Small_stream_ast.rsexpr -> Small_stream_ast.sexpr galax-1.1/TODO0000664000076400007640000004772610276120605011272 0ustar mffmff====================================================================== ------- | TODO: | ------- March 29, 2005 - Jerome and Mary ALIGNMENT OF GALAX WITH VLDB PAPER: (1) Now normalizing path expressions using FLWORs instead of for/let. (2) Removed old for/let from the core. (3) Changed SBDO optimization to operate over FLWORs. (4) Removed for but *not* let from the algebra. (5) Renamed SepSequence to MapIndexStep. (6) Remove Enclosed expressions from the algebra, no-op. (7) Remove let in the algebra, still used to bind the context item before an axis call. STILL TO DO: (6) Implement the full GroupBy. Adapt the optimization rules. (8) Are enclosed expressions in the core still necessary? (9) Compile some/every using tuples (10) Compile typeswitches using tuples (11) Fix a number of bugs in factorization and optimization (see previous threads). March 28, 2005 - Mary - EBV Alignment See: http://lists.w3.org/Archives/Member/w3c-xsl-query/2004Nov/0291.html - API o We need to review and test memory management in the C/Java APIs. There are no rules right now about who should be freeing memory and which objects must be freed. In the C API, who is responsible for freeing compiled_modules? If it is the client, what is the API call to free a compiled_module? August 4, 2004 - Mary and Jerome - Net URL library o We need to test Net URL library more thoroughly and determine whether it conforms to the URL specification. July 26, 2004 - Mary o Galax \version does not permit document nodes in the content of a new element, but should -- document node's children should be extracted. o [DONE] Add dates/times/durations to Java API o Change C and Java APIs to use input specs like O'Caml API. July 23, 2004 - Chris o Review the way symbols are handled and used in the string pools. - Removing the lookups results in a large performance improvement each element construction wraps a hash table lookup July 21, 2004 - Mary For Release v0.4.0 / Alignment: o Check current definitions of typed-value() and string-value() are implemented correctly o Test SequenceType more thoroughly o KindTest in path expressions excludes all the item tests. o Re-check atomization rules o Re-check value comparisons July 14, 2004 - Mary, Jerome, Avinash, and Chris *** Galax Release Version 0.4 *** We are preparing for V0.4 to be released on July 31. Many people have contributed to this release, so we have decided to set some rules for how and when new features are incorporated into a new release. The regression tests in xqueryunit/ are being cleaned up and simplified. New subdirectories will correspond to non-terminals in grammar or function names to make finding tests easier. There will be new installation tests, which will be run from galax/Makefile. Run 'make tests'. This target will run all the XQuery usecases and all the API examples. Features ======== There are three classes of Galax features : o Built-in [builtin] : Built-in feature of Galax that _does not_ depend on any external tools (written only in Caml) o Required tool [reqtool]: Built-in feature of Galax that _does_ depend on an external tool (written in Caml and/or C) o Optional tool [opttool]: Optional feature of Galax (written in Caml and/or C) If a feature is built-in, appropriate tests should be added to xqueryunit/ directory. For example, the SBDO feature would have its own directory in xqueryunit/tests/sbdo, which would exercise that feature thoroughly. If the feature is an optional user tool, the tools should have a corresponding directory under examples/. The directory should contain user documentation, which explains how to use the tool, example programs that use the tool, and a tests: target that make sure it it installed correctly. *** NB *** : A feature _must_ pass all the regression and installation tests before being incorporated. I will let everyone know when all the testing infrastructure is in place. Here is each new feature and person responsible for the feature: v0.4 Release Plan ================= Feature Kind Person (Notes) ----------------------------------------- v0.4.0 (July 31) XML Schema [builtin] Vladimir F&O [builtin] Doug PCRE [reqtool] Mary Jungle [opttool] Avinash (Old node numbering, no updates) v0.4.1 (Oct 15) DTDs [builtin] Doug SBDO [builtin] Philippe Projection [builtin] Amelie WSDL/Apache [opttool] Nicola v0.4.x Jungle [opttool] Avinash (New node numbering, updates) - v0.4.0 Release Plan ------------------- o End-to-end testing plan 1. Move optional tools to config/Makefile.opttools 2. Include PCRE and Jungle in release 3. tests: target in Galax to make sure environment is set up correctly Galax/ [mff] usecases/ examples/ caml_api/ c_api/ java_api/ jungle/ [vyas] ACTION Items ============ Jerome 1. Talk to Vladimir about status of XML Schema Mary 1. end-to-end linking & testing for Linux platform base tests, apis, xqueryunit, and Web site Avinash 1. Talk to Rick about moving db.bell-labs.com/galax to www.galaxquery.org 2. Tests and documentation for Jungle in examples/jungle Doug 1. check whether namespaces of all working drafts have changed ====================================================================== July 9, 2004 - Mary, Jerome, and Chris Preparatory tasks related to implementing the algebra: ----------------------------------------------------- - SBDO optimization Code review, clean-up, and testing plan. Currently the SBDO opt is _not_ the default in Galax. To make it the default, we need to clean-up the code and add appropriate tests to xqueryunit. Who: Philippe, Mary - Core AST: clean-up and remove tuple expressions Who: Jerome, Chris - Rewriting module : review & clean-up (depends on Core AST) Who: Mary, Jerome, Chris - Projection module: integrate back into mainline Galax (depends on Core AST) Who: Jerome, Amelie, (and Mary?) Implementing the algebra: ------------------------- - Source annotations (depends on Core AST) - Consider two kinds of algebraic operators o Independent of data-model implementation (e.g., tree pattern operators, and join ops) o Dependent on data-model implementation (e.g., stair-case joins & indexed operators) May 4, 2004 - Mary - Bug tracking! http://www.mozilla.org/projects/bugzilla/ Jan 8, 2004 - Mary - Processing model o Serialization should be in Procmod module Dec 12, 2003 - Mary - Modules : (4) in a library module, all user function declarations must be in the target namespace of the module Dec 1, 2003 - Mary o Alignment with LastCall F&O working draft : remove all get- prefixes Nov 18, 2003 - Mary o cleaning_rules.ml: - Disabled the rewriting rules for eliminating functions that convert xdt:untypedAtomic values to a target type, because they depend on the soundness of static typing and validation, i.e., on validation having correctly annotated data-model values with their types, but validation is not yet implemented completely. These rules should be re-enabled when named typing and validation are implemented. November 14, 2003 - Mary - Changed typing.ml so that element constructors pass static typing. This is a temporary fix until element construction is implemented completely and correctly. See comments with label **Element Construction HACK** in eval_expr.ml and typing.ml. Nov 12, 2003 - Mary Review rules for union types in 'promotes_to' function in typing_call.ml. Right now, the assumption is that the target type is _not_ a union. This is too restrictive. Nov 5, 2003 - Mary Default type of context item is not initialized in static context. Where should this happen? In toplevel/galax-run.ml, the values of external global variables are defined, but their types are not. Currently, API has no way of conveying types of external variables. Oct 30, 2003 - Mary The optimization rules in cleaning/cleaning_rules.ml are very conservative. The do not eliminate expressions that might have a side effect or that might fail. Current dynamic semantics permits elimination of unused expressions that might fail. Need to split side-effects from possible failure. Optimization rules also need to be re-considered under weak typing. Most of the "type-dependent" rules are assuming full static typing, which provides more precise typing info than weak typing. [DONE] Oct 15, 2003 * fn:floor, fn:ceiling, fn:round et al should be overloaded, like all other arithmetic operators Oct 1, 2003 * Export_dm should be updated when named typing is implemented. See comment by Mary in next_datamodel_event() July 31, 2003 * We need to add Finfo to SAX events so that error reporting can be good. Such Finfo will come either from the original XML document, or from the XQuery expression file in case of a stream built at run-time when doing copy of element/constructors. June 25, 2003 * Allowing seemless datamodel extensions [DONE] fn:doc() still needs to be changed to support alternative protocols. * Extensible annotations of core AST/query plan [DONE] * Implementing the new architecture with a query plan. I would believe that the 'streaming' iterator you are talking about should fit into that framework. * XML Schema validation and named typing. Depends on data model Element constructor * Improvements to the API * Compiling the API under Windows * Align with May 2003 grammar Modules Global/external variables SequenceType/TypeTest Element constructor Namespaces ala Mike K - Implement type-based optimizations for: o arithmetic operators applied to empty. - Clean up datatypes & datamodel : remove all polymorphic operators now that we have monomorphic operators - Check all operators on () & with optimization enabled. - Implement optimization for sorting by document order. - Make the Galax code reentrant. This means getting rid of all global variables and pass any required global structure in a functional way. - Complete review of treatment of namespaces & namespace attributes. - Align with May 2003 working drafts. - Whitespace handling during parsing & serialization. Relationship with pretty printing. ====================================================================== August 24, 2002: 1. Merging XML and XQuery parsers in Galax ** DONE --> Sharing the lexers. ** DONE --> Fixing the lexers to be XML 1.0 compliant ** DONE 2. SAX Parser 3. SAX-based loading of the data model 4. 'Generic' XML Schema validator --> Done on the XML AST --> XQuery data model --> Done on the XML-event stream --> XQuery data model --> Done on the XQuery data model --> XQuery data model --> LegoDB to generate statistics --> Statistics --> LegoDB to generate 'bulk-loading' --> Insert statements into RDBMS files (insert statements). Take an XML stream of events as input XML AST --> stream of events SAX parser --> stream of events Data model --> stream of events 5. SAX-based validation Instantiation of 4. with input = SAX parser output = data model creation 6. Named typing --> Change the XQuery type system (add names) --> New type AST in Galax --> Normalization: --> Mapping from XML Schema to the type system --> Dynamic: --> VALIDATION (need to add type annotations) (need to deal with derivations) (need to check substitution groups) --> Type matching (With the optimization theorem!!) --> Static: --> Subtyping --> Type inference REFERENCES: [1] XQuery 1.0 and XPath 2.0 Formal Semantics, August 16 2002. [2] The Essence of XML (preliminary version), Jerome Simeon, Philip Wadler, FLOPS 2002 [3] XML Schema Part 1: Structures [4] MSL: Semantics of XML Schema (only structural based) [5] RELAX NG, James Clark, Murata Makoto (only structural based) [6] XDuce (only structural based) [7] Look for OO languages. Formal models for OO languages. Cardelli, et al. [8] Subsumption for XML types, Gabi Kuper, Jerome Simeon. [9] Inclusion of tree grammar references. [10] Murali Mani, ask him for his note on subtyping for XML. 7. Hook-up schema validation inside XQuery data model 8. Document order and improved query rewritings 9. New semantics of element constructors. 10. 'Algebraic' optimizations ========================================================================== August 20, 2001: Features for demo & use cases: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To do: * Interface to include Query/Result/Inferred type on one page Window for loading XML Schema into type system define global $v { treat as t (document(...) } define global $v { e } * Current XML Parser : floats * implicit data() in eq/arith operators [ Mary ] - implement definition in email * implicit existential quantification e.g., A[b] * RANGE int TO int expression Also in predicates [1 TO 5] * functions: distinct-node, filter, date(), string(), namespace_uri(), newline()??, shallow()?? * operators: UNION, '//', AFTER, BEFORE * node tests : text() * Not sure how much use cases are sensitive to document order * Validation - in the meantime, escape non-string literals with {} in input documents Completed: X sequence index expressions, e.g., A[1], A[last()], A[position() = Expr ] X functions: contains, ends_with, distinct-value, count, Release Strategy for Source-code release ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Technical * Symbols in xs: xsd: namespaces X Schema import [ Byron & Mary ] * Validation [ Jerome ] - instance of data model * XML Parser - [ ??? ] 1. xerces-parser : document -> SAX interface : SAX -> data model instance (+ validation) 2. xquery-parser: document -> AST load : AST -> data model instance (+ validation) 3. pxp : document -> DOM-like interface not complete w.r.t. whitespace Unicode, etc. * XPath 2.0 semantics [ Mary & Jerome ] 1. Add '[]' predicates [ Mary ] 2. '//' [ Jerome ] '/' vs. FOR implicit conversion of element/attribute to its typed value in arithmetic exprs $v/price + 1 $v/price/data(.) + 1 --- explicit data() conditional expression --- strict strategy if ($v/k) ==/ineq operators (existential semantics) * Clean-up printing * Clean-up code - remove algebra * General error support - Sweep of all errors in system (Prototype, Internal_Errors) - Add error for non-trivial exprs with () type - Warnings, errors * Check 'C' interface Logistic * Binary : windows, linux, solaris Source code * Source forge for distribution * Documentation * Testing * Examples - schema-in;schema-out doc-in;doc-out Use cases * Bug/limitation list Advertisement * DBWORLD, xmlql-interest, Jerome's list, xml-dev, query-comments list, Caml list dbfans@research.att.com * 'Conformant' implementation not scalable one * Web site, Galax features the biggest TODO list in the world: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Support for new XPath 2.0 and XQuery syntax. This includes: o A new new XQuery parser with the latest syntax is available. He parses the all set of use cases but has not been tested in details. Support for types is still based on the old XML Algebra. - syntax for types is the same as latest XML Algebra document syntax, except for minOccurs maxOccurs who are written: t{{m,n}} for lexing reasons. - such types can be written everywhere the current XQuery grammar expects a datatype. - in the prolog, the following syntax for type declaration can be used: schema s { type X = t ... } where t is a type. o Mapping from XQuery full AST to XQuery Core AST. There is nothing yet in the implementation. This is also on going work within the XQuery and XPath groups so it will likely be done on the fly as the spec evolves. * Support for new version of the XML Query datamodel, i.e., support for XPath 2.0 data model. This should include support for current missing features, notably: o Adding support for node identity, parents and references. o Adding support for text nodes and original lexical XML representation. o Removing support for unordered collections o Support for XML Schema datatypes o Support for XQuery/XPath 2.0 operators * Mapping from XQuery to the Core o TBDone * Support for typing o New subtyping that takes XML Schema types into account and fixing current bug. Final resolution for subtyping is still opened (see corresponding issue), but temporary fix would be to support subsumption between supertype schema with local element restriction and one-unambiguity of content model and arbitrary subtype schema. o Support for '&' o descendant/index/sort o Back to old typing for 'for' ? o Mapping XML Schema to the type system. Generating back XML Schema from types. * Dynamic semantics for the core o unordered operator o parent/descendant/document order o index operator o sort operator * Optimization o Architecture o Do we want to bypass XQuery mapping to the core ? o Do we want a bulk algebra ? o Rewrittings (logical/physical) o Pushing evaluation to underlying physical storage * Native support for XML documents. o Input XML: Complete support would rather be out-sourced to something like PXP for the native parsing. Support for XML Schema is a big opened issue right now. o XML Serialization: Still opened. Some basic support existing. * Support for physical data representation. This is a big mess right now. Here are a number of tasks which could be considered part of that: o new XPath 2.0 data model (see above) o common interface for non-created nodes. I.e., we need something like a module to cover nodes already existing in another physical representation and 'viewed as instance nodes in the data model. o interface for more 'physical operations at the data model level', e.g., scan * Actual physical modules: o Native: cf. data model above o DataBlitz: first queries have been run. Many more work is required. o Xerces-c: Nothing here yet, but I believe it is a must for users. Specific tasks -------------- o Improve the way the pervasive.xq file is loaded. More generaly try to clean the handling of the global environment of queries. o Catch Not_found errors in access to variables in the environment and raise specific errors. In general error handling and messages needs to be improved. o Jerome: think about module names and directory names: there is no algebra anymore. o Add complete support for nameclasses. o Update built-in functions to the operator's document. o Update built-in types to the XML Schema document. o The distinction from attribute and element symbols in the Sym module should probably be removed. galax-1.1/compile/0000775000076400007640000000000010772255367012231 5ustar mffmffgalax-1.1/compile/compile_top.mli0000664000076400007640000000277710560462355015253 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_top.mli,v 1.10 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Compile_top Description: This module compiles an XQuery toplevel statement or module into the ALGEBRA. *) open Xquery_core_ast open Xquery_algebra_ast open Logical_algebra_types (* Note: During this stage of compilation, we do not put actual running code into the structure. This exception is generated if the structure is executed without replacement of the code. - Jerome & Chris *) (***************) (* Expressions *) (***************) val compile_statement : logical_compile_context -> acstatement -> logical_algop_expr val compile_prolog : logical_compile_context -> acprolog -> (logical_compile_context * logical_algop_prolog) val compile_xmodule : logical_compile_context -> acxmodule -> (logical_compile_context * logical_algop_xmodule) galax-1.1/compile/compile_top.ml0000664000076400007640000001656610707757211015104 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_top.ml,v 1.28 2007/10/25 00:08:41 mff Exp $ *) (* Module: Compile_top Description: This module compiles an XQuery toplevel statement or module into the ALGEBRA. *) open Xquery_core_ast open Xquery_core_ast_util open Xquery_algebra_ast open Xquery_algebra_ast_util open Compile_context open Compile_util open Compile_annotate open Compile_expr open Error (*************) (* Statement *) (*************) let compile_cexpr_annotate compile_ctxt cexpr = let aop = compile_cexpr compile_ctxt cexpr in annotate_algebraic_expression aop; aop let compile_statement compile_ctxt cstatement = Debug.print_compile_debug ("In compile_statement\n"); Norm_context.dump_norm_context (norm_context_from_compile_context compile_ctxt); compile_cexpr_annotate compile_ctxt cstatement (***********************) (* Prolog declarations *) (***********************) (* Function definition *) let compile_cfunction_body compile_ctxt cfunction_body = (* Function is either defined locally, is built-in, is an interface, or is imported *) match cfunction_body with | CEFunctionBltIn -> raise (Query (Compilation "Built-in function declarations should have been filtered before compilation")) | CEFunctionInterface -> raise (Query (Compilation "Interface function declarations should have been filtered before compilation")) | CEFunctionImported -> AOEFunctionImported | CEFunctionUser cexpr1 -> let ao1 = AOEFunctionUser (compile_cexpr_annotate compile_ctxt cexpr1) in ao1 (* In here we add the function definition to the compilation context*) let compile_cfunction_signature compile_ctxt (input_types,output_type) = List.map (compile_ctype compile_ctxt) input_types, compile_ctype compile_ctxt output_type let compile_cfunction_def_desc compile_ctxt cfunction_def_desc = begin let cfname = match cfunction_def_desc with | ((cfname,arity),vname_list,cfunction_signature,cfunction_body,upd) -> cfname in Debug.print_compile_debug ("\t\tCompiling function " ^ (Namespace_names.prefixed_string_of_rqname cfname)); end; match cfunction_def_desc with | ((cfname,arity),vname_list,cfunction_signature,cfunction_body,upd) -> let vname_array = Array.of_list vname_list in let (input_types,output_type) = compile_cfunction_signature compile_ctxt cfunction_signature in let compiled_body = fmkalgop_function_body vname_array (compile_cfunction_body compile_ctxt cfunction_body) None (Some output_type) in add_function_to_compile_context compile_ctxt (cfname,arity) compiled_body; ((cfname,arity),(input_types,output_type), compiled_body, upd) let compile_cfunction_def compile_context cfunction_def = let cfunction_def_desc = cfunction_def.pcfunction_def_desc in let fi = cfunction_def.pcfunction_def_loc in let fn_desc = compile_cfunction_def_desc compile_context cfunction_def_desc in fmkalgop_function_decl fn_desc fi (* Global variable declaration *) let compile_cvar_decl compile_ctxt cvar_decl = let cvar_decl_desc = cvar_decl.pcvar_decl_desc in let fi = cvar_decl.pcvar_decl_loc in match cvar_decl_desc with (* Variable is either defined locally, externally, or is imported *) | (vname,ocdt,CEVarUser cexpr1) -> let ao1 = compile_cexpr_annotate compile_ctxt cexpr1 in let oadt = compile_opt_ctype compile_ctxt ocdt in let op_name = AOEVarDecl (oadt, vname) in let indep = OneSub ao1 in let dep = NoSub in logical_aalgop_decl_mkop op_name indep dep fi | (vname,ocdt,CEVarExternal) -> let oadt = compile_opt_ctype compile_ctxt ocdt in let op_name = AOEVarDeclExternal (oadt, vname) in let indep = NoSub in let dep = NoSub in logical_aalgop_decl_mkop op_name indep dep fi | (vname,ocdt,CEVarImported) -> let oadt = compile_opt_ctype compile_ctxt ocdt in let op_name = AOEVarDeclImported (oadt, vname) in let indep = NoSub in let dep = NoSub in logical_aalgop_decl_mkop op_name indep dep fi | (vname,ocdt,CEVarInterface) -> raise (Query (Compilation "Interface variable declarations should have been filtered before compilation")) (* Key definitions *) let compile_cindex_def compile_ctxt cindex_def = let fi = cindex_def.pcindex_def_loc in match cindex_def.pcindex_def_desc with | CValueIndex (str,cexpr1,cexpr2) -> let ao1 = compile_cexpr_annotate compile_ctxt cexpr1 in let ao2 = compile_cexpr_annotate compile_ctxt cexpr2 in let op_name = AOEValueIndexDecl str in let indep = OneSub ao1 in let dep = OneSub ao2 in logical_aalgop_decl_mkop op_name indep dep fi | CNameIndex cename -> let relem_sym = Namespace_symbols.relem_symbol cename in let op_name = AOENameIndexDecl relem_sym in let indep = NoSub in let dep = NoSub in logical_aalgop_decl_mkop op_name indep dep fi (**********) (* Module *) (**********) let is_built_in_or_interface_function f = match f.pcfunction_def_desc with | (_,_,_,CEFunctionBltIn, _) | (_,_,_,CEFunctionInterface, _) -> true | _ -> false let is_interface_var v = match v.pcvar_decl_desc with | (_,_,CEVarInterface) -> true | _ -> false (* Prolog compilation The bodies of the declarations for locally defined and imported functions and variables are compiled, but the declarations for interfaces and externally defined functions and variables are not, because their definitions are not available locally. *) let compile_prolog compile_context cprolog = Debug.print_compile_debug "Starting compilation of prolog"; let non_builtin_or_interface_functions = List.filter (fun f -> not(is_built_in_or_interface_function f)) cprolog.pcprolog_functions in let non_interface_vars = List.filter (fun v -> not(is_interface_var v)) cprolog.pcprolog_vars in Debug.print_compile_debug "\tCompiling functions"; let cfuns = List.map (compile_cfunction_def compile_context) non_builtin_or_interface_functions in Debug.print_compile_debug "\tCompiling variables"; let cvars = List.map (compile_cvar_decl compile_context) non_interface_vars in Debug.print_compile_debug "\tCompiling indices"; let cinds = List.map (compile_cindex_def compile_context) cprolog.pcprolog_indices in let prolog = { palgop_prolog_functions = cfuns; palgop_prolog_vars = cvars; palgop_prolog_indices = cinds } in Debug.print_compile_debug "Finished compilation of prolog"; (compile_context,prolog) let compile_xmodule compile_context cxmodule = let (compile_context',prolog') = compile_prolog compile_context cxmodule.pcmodule_prolog in let xmod = { palgop_module_prolog = prolog'; palgop_module_statements = List.map (compile_statement compile_context') cxmodule.pcmodule_statements } in (compile_context',xmod) galax-1.1/compile/compile_annotate.mli0000664000076400007640000000276310560462355016255 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* Module: Compile_annotate Description: This module implements the walker to calculate annotations on the algebra AST. *) (****************************) (* Main annotation function *) (****************************) (* This function has a side-effect of filing out the compile_annotations field for an operator. It should only be called once. This condition is asserted inside the code. It will raise an Internal_Error if this does not hold. It calls itself on the entire sub-tree. This means it should only be called once on the root of each sub-tree. *) val annotate_algebraic_expression : ('a, 'b) Xquery_algebra_ast.aalgop_expr -> unit val reannotate_algebraic_expression : ('a, 'b) Xquery_algebra_ast.aalgop_expr -> unit val annotate_algebraic_module : ('a, 'b, 'c) Xquery_algebra_ast.aalgop_xmodule -> unit galax-1.1/compile/compile_util.ml0000664000076400007640000001051610624377617015252 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_util.ml,v 1.17 2007/05/21 20:22:39 mff Exp $ *) (* Module: Compile_util Description: This module contains some utilities used during the compilation phase. *) open Xquery_core_ast open Xquery_algebra_ast open Xquery_algebra_ast_util open Compile_context (*******************) (* The Input tuple *) (*******************) let compile_inputtuple eh fi = logical_aalgop_mkop AOEInputTuple NoSub NoSub None eh fi (******************) (* Sequence types *) (******************) let compile_element_test element_test = match element_test with | CSchemaElementTest cename -> let relem_sym = Namespace_symbols.relem_symbol cename in ASchemaElementTest relem_sym | CElementTest None -> AElementTest None | CElementTest (Some (cename,None)) -> let relem_sym = Namespace_symbols.relem_symbol cename in AElementTest (Some (relem_sym,None)) | CElementTest (Some (cename,Some ctname)) -> let relem_sym = Namespace_symbols.relem_symbol cename in let rtype_sym = Namespace_symbols.rtype_symbol ctname in AElementTest (Some (relem_sym,Some rtype_sym)) let compile_attribute_test attribute_test = match attribute_test with | CSchemaAttributeTest caname -> let rattr_sym = Namespace_symbols.rattr_symbol caname in ASchemaAttributeTest rattr_sym | CAttributeTest None -> AAttributeTest None | CAttributeTest (Some (caname,None)) -> let rattr_sym = Namespace_symbols.rattr_symbol caname in AAttributeTest (Some (rattr_sym,None)) | CAttributeTest (Some (caname,Some ctname)) -> let rattr_sym = Namespace_symbols.rattr_symbol caname in let rtype_sym = Namespace_symbols.rtype_symbol ctname in AAttributeTest (Some (rattr_sym,Some rtype_sym)) let compile_kind_test ckind_test = match ckind_test with | CDocumentKind None -> ADocumentKind None | CDocumentKind (Some et) -> ADocumentKind (Some (compile_element_test et)) | CElementKind et -> AElementKind (compile_element_test et) | CAttributeKind at -> AAttributeKind (compile_attribute_test at) | CPIKind pik -> APIKind pik | CCommentKind -> ACommentKind | CTextKind -> ATextKind | CAnyKind -> AAnyKind let compile_citemtype comp_ctxt citemtype = match citemtype with | CITKindTest ckind_test -> AITKindTest (compile_kind_test ckind_test) | CITTypeRef ctname -> let rtype_sym = Namespace_symbols.rtype_symbol ctname in AITTypeRef rtype_sym | CITItem -> AITItem | CITNumeric -> AITNumeric | CITAnyString -> AITAnyString | CITEmpty -> AITEmpty | CITAtomic ctname -> let rtype_sym = Namespace_symbols.rtype_symbol ctname in AITAtomic rtype_sym let compile_ctype comp_ctxt (cdt, cty) = let (citemtype,occ) = cdt.pcsequencetype_desc in let aitemtype = compile_citemtype comp_ctxt citemtype in fmkasequencetype (aitemtype,occ) cdt.pcsequencetype_loc let compile_opt_ctype comp_ctxt ocdt = match ocdt with | None -> None | Some cdt -> Some (compile_ctype comp_ctxt cdt) let compile_cnode_test cnode_test = match cnode_test with | CPNameTest rqname -> let relem_sym = Namespace_symbols.relem_symbol rqname in (APNameTest relem_sym) | CPNodeKindTest (ckind_test, ckind_type) -> APNodeKindTest (compile_kind_test ckind_test) let compile_fun_sig comp_ctxt (fn,(sl,s), opt_fun_kind, upd) = (fn,(List.map (compile_ctype comp_ctxt) sl, compile_ctype comp_ctxt s), upd) let compile_overloaded_table_sigs comp_ctxt sigs = List.map (compile_fun_sig comp_ctxt) sigs let compile_cfunction_sig comp_ctxt (intypes, outtype) = (List.map (compile_ctype comp_ctxt) intypes, compile_ctype comp_ctxt outtype) galax-1.1/compile/compile_update.ml0000664000076400007640000004766310560462355015565 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_update.ml,v 1.10 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Compile_update Description: This module compiles an XQuery core update into the XQuery algebra. *) open Xquery_core_ast open Xquery_core_ast_util open Xquery_algebra_ast open Xquery_algebra_ast_util open Compile_context open Compile_expr open Compile_util open Namespace_builtin (* Clean this out *) let get_fresh_var_name comp_ctxt = let v = Compile_context.next_var comp_ctxt in (Namespace_builtin.glx_prefix, Namespace_builtin.glx_uri, ("comp" ^ (string_of_int v))) (***********) (* Updates *) (***********) let compile_cinsert_location compile_ctxt cinsert_location = match cinsert_location with | CUAsLastInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAsLastInto) | CUAsFirstInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAsFirstInto) | CUInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUInto) | CUAfter cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAfter) | CUBefore cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUBefore) let compile_csimple_update_desc compile_ctxt csimple_update_desc eh fi = match csimple_update_desc with | CUInsert (cexpr1,cinsert_location) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let (ao2,insert_flag) = compile_cinsert_location compile_ctxt cinsert_location in let indep = TwoSub (ao1,ao2) in let dep = NoSub in let op_name = AOEInsert insert_flag in annotate_algop op_name indep dep eh fi | CUReplace (value_of_flag,cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub(ao1,ao2) in let dep = NoSub in let op_name = AOEReplace value_of_flag in annotate_algop op_name indep dep eh fi | CUDelete cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEDelete in annotate_algop op_name indep dep eh fi | CUEmpty -> let indep = NoSub in let dep = NoSub in let op_name = AOEEmpty in annotate_algop op_name indep dep eh fi let compile_csimple_update compile_ctxt csimple_update eh fi = let csimple_update_desc = csimple_update.pcsimple_update_desc in let fi = csimple_update.pcsimple_update_loc in compile_csimple_update_desc compile_ctxt csimple_update_desc eh fi (************************************) (* Compilation of flwor expressions *) (************************************) (* HACK FOR NOW: WE DUPLICATE CODE FROM compile_expr this should be removed and factorized Avinash & Jerome *) type 'a compile_select = | Cs_Disjunction of 'a compile_select * 'a compile_select | Cs_ComplexConjunct of 'a compile_select * 'a compile_select | Cs_SimpleConjunct of ('a option, unit) aalgop_expr list let is_fn_true op = match op.pcexpr_desc with | CECall (x, [], _, _, _) when x = fn_true -> true | _ -> false let is_fn_false op = match op.pcexpr_desc with | CECall (x, [], _, _, _) when x = fn_false -> true | _ -> false let is_fn_boolean op = match op.pcexpr_desc with | CECall (x, _, _, _, _) when x = fn_boolean -> true | _ -> false let rec compile_csimple_update_list compile_ctxt op3 csimple_update_list eh fi = (* [[ return Expr1 ]](Op3) == Map_(i -> [[ Expr1 ]])(Op3) *) let oplist = List.map (fun x -> compile_csimple_update compile_ctxt x eh fi) csimple_update_list in let oparray = Array.of_list oplist in let apply_updates_op = annotate_algop AOESequencing (ManySub oparray) NoSub eh fi in let table_op = annotate_algop AOEMaterializeTable (OneSub op3) NoSub eh fi in let map = annotate_algop AOEMapToItem (OneSub table_op) (OneSub apply_updates_op) eh fi in map and compile_order_by_clause_update compile_ctxt op3 (order_by_clause,csimple_update_list) eh fi = match order_by_clause with | None -> compile_csimple_update_list compile_ctxt op3 csimple_update_list eh fi | Some (stablekind,corder_spec_list) -> begin let rec split_fun corder_spec_list = match corder_spec_list with | [] -> ([],[]) | (cexpr,sortkind,emptysortkind) :: corder_spec_list' -> let (cexpr_list,rest_list) = split_fun corder_spec_list' in (cexpr :: cexpr_list, (sortkind,emptysortkind) :: rest_list) in let splited = split_fun corder_spec_list in let op_name = AOEOrderBy (stablekind,snd splited) in let op1_list = List.map (compile_cexpr compile_ctxt) (fst splited) in let op1_array = Array.of_list op1_list in let new_op3 = aalgop_mkop op_name (OneSub op3) (ManySub op1_array) None eh fi in compile_csimple_update_list compile_ctxt new_op3 csimple_update_list eh fi end and compile_where_clause_update compile_ctxt op3 (where_clause,order_by_clause,csimple_update_list) eh fi = match where_clause with | None -> compile_order_by_clause_update compile_ctxt op3 (order_by_clause,csimple_update_list) eh fi | Some where_expr -> let select_predicate = compile_cexpr compile_ctxt where_expr in let select = compile_select compile_ctxt where_expr op3 eh fi in compile_order_by_clause_update compile_ctxt select (order_by_clause,csimple_update_list) eh fi and compile_fl_clauses_update compile_ctxt op3 fl_clauses (where_clause,order_by_clause,csimple_update_list) eh fi = match fl_clauses with | [] -> (* Calls compilation for the rest of the FLWOR block *) begin compile_where_clause_update compile_ctxt op3 (where_clause,order_by_clause,csimple_update_list) eh fi end | (CEFOR (odt,vname,None,cexpr1)) :: fl_clauses' -> (* [[ for $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> Map_(j -> [ v : j ] o i)([[ Expr1 ]]))(Op3)) / $v --> #v ] Changed to: [ [[Expr2]] (MapConcat( i -> MapItemTuple{[v : j]}([[Expr1]]) o i)(Op3) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_name = get_fresh_var_name compile_ctxt in let input_j = aalgop_mkop (AOEVar fresh_name) NoSub NoSub None eh fi in let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let tname = get_tuple_field_name compile_ctxt' vname in let tuple_construct = aalgop_mkop (AOECreateTuple [|tname|]) (ManySub [|input_j|]) NoSub None eh fi in let map_inner = aalgop_mkop (AOEMapFromItem fresh_name) (OneSub op1) (OneSub tuple_construct) None eh fi in let map_outer = aalgop_mkop AOEMapConcat (OneSub op3) (OneSub map_inner) None eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) compile_fl_clauses_update compile_ctxt' map_outer fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end | (CEFOR (odt,vname,Some vname',cexpr1)) :: fl_clauses' -> (* [[ for $v at $i in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> MapIndex(i)(Map_(j -> [ v : j ] o i)([[ Expr1 ]])))(Op3)) / $v --> #v ] ChangedTo: [ [[ Expr2 ]](MapConcat( t -> MapIndex(i)(MapItemTuple{[v: j]} ([[ Expr1 ]]))) (Op3)) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_name = get_fresh_var_name compile_ctxt in let input_j = aalgop_mkop (AOEVar fresh_name) NoSub NoSub None eh fi in (***************************) (* Compile the tuple names *) (***************************) let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let compile_ctxt'' = add_variable_field_to_compile_context compile_ctxt' vname' in let tname = get_tuple_field_name compile_ctxt'' vname in let tname' = get_tuple_field_name compile_ctxt'' vname' in let tuple_construct = aalgop_mkop (AOECreateTuple [|tname|]) (ManySub [|input_j|]) NoSub None eh fi in let input_i = annotate_inputtuple eh fi in let tuple_append = aalgop_mkop AOEConcatTuples (TwoSub (tuple_construct,input_i)) NoSub None eh fi in let map_inner = aalgop_mkop (AOEMapFromItem fresh_name) (OneSub op1) (OneSub tuple_append) None eh fi in let map_index = aalgop_mkop (AOEMapIndex(tname')) (OneSub map_inner) NoSub None eh fi in let map_outer = aalgop_mkop AOEMapConcat (OneSub op3) (OneSub map_index) None eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) compile_fl_clauses_update compile_ctxt'' map_outer fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end | (CELET (odt,vname,cexpr1)) :: fl_clauses' -> (* [[ let $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> [ v : [[ Expr1 ]] ] o i)(Op3)) / $v --> #v ] Changed To: [ [[ Expr2 ]](MapConcat(t -> [ v : [[ Expr1 ]] ]))(Op3) / $v --> #v *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let tname = get_tuple_field_name compile_ctxt' vname in (* 2. Build the new Op3 *) let tuple_construct = aalgop_mkop (AOECreateTuple [|tname|]) (ManySub [|op1|]) NoSub None eh fi in let map = aalgop_mkop AOEMapConcat (OneSub op3) (OneSub tuple_construct) None eh fi in compile_fl_clauses_update compile_ctxt' map fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end (* let compile_order_by_clause_update compile_ctxt op3 (order_by_clause,csimple_update_list) eh fi = match order_by_clause with | None -> compile_csimple_update_list compile_ctxt op3 csimple_update_list eh fi | Some (stablekind,corder_spec_list) -> begin let rec split_fun corder_spec_list = match corder_spec_list with | [] -> ([],[]) | (cexpr,sortkind,emptysortkind) :: corder_spec_list' -> let (cexpr_list,rest_list) = split_fun corder_spec_list' in (cexpr :: cexpr_list, (sortkind,emptysortkind) :: rest_list) in let splited = split_fun corder_spec_list in let op_name = AOEOrderBy (stablekind,snd splited) in let op1_list = List.map (compile_cexpr compile_ctxt) (fst splited) in let op1_array = Array.of_list op1_list in let new_op3 = annotate_algop op_name (OneSub op3) (ManySub op1_array) eh fi in compile_csimple_update_list compile_ctxt new_op3 csimple_update_list eh fi end let compile_where_clause_update compile_ctxt op3 (where_clause,order_by_clause,csimple_update_list) eh fi = match where_clause with | None -> compile_order_by_clause_update compile_ctxt op3 (order_by_clause,csimple_update_list) eh fi | Some where_expr -> raise (Error.Query (Error.Internal_Error ("Selects not yet compiled for updates"))) (* let select_predicate = compile_cexpr compile_ctxt where_expr in let select = annotate_algop AOESelect (OneSub op3) (OneSub select_predicate) eh fi in compile_order_by_clause_update compile_ctxt select (order_by_clause,csimple_update_list) eh fi *) let rec compile_fl_clauses_update compile_ctxt op3 fl_clauses (where_clause,order_by_clause,csimple_update_list) eh fi = match fl_clauses with | [] -> (* Calls compilation for the rest of the FLWOR block *) begin compile_where_clause_update compile_ctxt op3 (where_clause,order_by_clause,csimple_update_list) eh fi end | (CEFOR (odt,vname,None,cexpr1)) :: fl_clauses' -> (* [[ for $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> Map_(j -> [ v : j ] o i)([[ Expr1 ]]))(Op3)) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_var_name = get_fresh_var_name compile_ctxt in let input_j = annotate_algop (AOEVar fresh_var_name) NoSub NoSub eh fi in let tuple_construct = annotate_algop (AOECreateTuple [|vname|]) (ManySub [|input_j|]) NoSub eh fi in let input_i = annotate_algop AOEInputTuple NoSub NoSub eh fi in let tuple_append = annotate_algop AOEConcatTuples (TwoSub (tuple_construct,input_i)) NoSub eh fi in let map_inner = annotate_algop (AOEMapFromItem fresh_var_name) (OneSub op1) (OneSub tuple_append) eh fi in let map_outer = annotate_algop AOEMap (OneSub op3) (OneSub map_inner) eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in compile_fl_clauses_update compile_ctxt' map_outer fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end | (CEFOR (odt,vname,Some vname',cexpr1)) :: fl_clauses' -> (* [[ for $v at $i in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> MapIndex(i)(Map_(j -> [ v : j ] o i)([[ Expr1 ]])))(Op3)) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_var_name = get_fresh_var_name compile_ctxt in let input_j = annotate_algop (AOEVar fresh_var_name) NoSub NoSub eh fi in let tuple_construct = annotate_algop (AOECreateTuple [|vname|]) (ManySub [|input_j|]) NoSub eh fi in let input_i = annotate_algop AOEInputTuple NoSub NoSub eh fi in let tuple_append = annotate_algop AOEConcatTuples (TwoSub (tuple_construct,input_i)) NoSub eh fi in let map_inner = annotate_algop (AOEMapFromItem fresh_var_name) (OneSub op1) (OneSub tuple_append) eh fi in let rname' = vname' in (* For now, the tuple field name is the variable name *) let map_index = annotate_algop (AOEMapIndex(rname')) (OneSub map_inner) NoSub eh fi in let map_outer = annotate_algop AOEMap (OneSub op3) (OneSub map_index) eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let compile_ctxt'' = add_variable_field_to_compile_context compile_ctxt' vname' in compile_fl_clauses_update compile_ctxt'' map_outer fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end | (CELET (odt,vname,cexpr1)) :: fl_clauses' -> (* [[ let $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> [ v : [[ Expr1 ]] ] o i)(Op3)) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let tname = get_tuple_field_name compile_ctxt' vname in (* 2. Build the new Op3 *) let tuple_construct = annotate_algop (AOECreateTuple [|tname|]) (ManySub [|op1|]) NoSub eh fi in let input_i = annotate_algop AOEInputTuple NoSub NoSub eh fi in let tuple_append = annotate_algop AOEConcatTuples (TwoSub (tuple_construct,input_i)) NoSub eh fi in let map = annotate_algop AOEMap (OneSub op3) (OneSub tuple_append) eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in compile_fl_clauses_update compile_ctxt' map fl_clauses' (where_clause,order_by_clause,csimple_update_list) eh fi end *) and compile_flwor_update compile_ctxt fl_clauses (where_clause,order_by_clause,csimple_update_list) eh fi = (* The initial input is an empty table *) let init_op3 = if has_input_set compile_ctxt then annotate_algop AOEInputTuple NoSub NoSub eh fi else annotate_algop (AOECreateTuple [||]) (ManySub [||]) NoSub eh fi in compile_fl_clauses_update compile_ctxt init_op3 fl_clauses (where_clause,order_by_clause,csimple_update_list) eh fi and compile_select compile_ctxt select_clause op3 eh fi = let rec compile_select_helper cur_expr = match cur_expr.pcexpr_desc with | CEIf(cexpr1, cexpr2, cexpr3) -> (***************************************) (* Restricted form *) (* Or -> if (e1) then true else (e2) *) (* And -> if (e1) then (e2) else false *) (***************************************) let ae1 = compile_select_helper cexpr1 in if (is_fn_true cexpr2) then begin Cs_Disjunction( ae1, (compile_select_helper cexpr3)) end else if (is_fn_false cexpr3) then begin let rhs = compile_select_helper cexpr2 in match (ae1, rhs) with | (Cs_SimpleConjunct( c1 ), Cs_SimpleConjunct (c2)) -> Cs_SimpleConjunct ( c1 @ c2 ) | _ -> Cs_ComplexConjunct( ae1, rhs ) end else begin (* An introduced if statement... can not compile it *) let ae1 = compile_cexpr compile_ctxt cur_expr in Cs_SimpleConjunct ( ae1 :: [] ) end | CECall(_,[cexpr],_, _, _) when (is_fn_boolean cur_expr) -> compile_select_helper cexpr | _ -> let ae1 = compile_cexpr compile_ctxt cur_expr in Cs_SimpleConjunct ( ae1 :: [] ) in let rec compile_conjuncts conjunct index = match conjunct with | Cs_SimpleConjunct ( op_list ) -> let length = List.length op_list in let o_list = Array.of_list op_list in SimpleConjunct( index, (index + length -1)), o_list, (index + length) | Cs_ComplexConjunct(c1, c2) -> let op1, op_l1, index = compile_conjuncts c1 index in let op2, op_l2, index = compile_conjuncts c1 index in let ops = Array.append op_l1 op_l2 in ComplexConjunct(op1,op2), ops, index | Cs_Disjunction (d1,d2) -> let c1, op1, cur_index = compile_conjuncts d1 index in let c2, op2, cur_index = compile_conjuncts d2 cur_index in let ops = Array.append op1 op2 in Disjunct( c1,c2 ), ops, cur_index in let internal_conjunct_desc = compile_select_helper select_clause in let conjunct_desc,ops,_ = compile_conjuncts internal_conjunct_desc 0 in aalgop_mkop (AOESelect conjunct_desc) (OneSub op3) (ManySub ops) None eh fi let compile_update_desc compile_ctxt ccomplex_update_desc eh fi = match ccomplex_update_desc with | CUCond (cexpr1,csimple_update1,csimple_update2) -> (* Conditional updates compiled into an IF-THEN-ELSE *) let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_csimple_update compile_ctxt csimple_update1 eh fi in let ao3 = compile_csimple_update compile_ctxt csimple_update2 eh fi in let indep = OneSub ao1 in let dep = TwoSub (ao2,ao3) in let op_name = AOEIf in annotate_algop op_name indep dep eh fi | CUFLWOR (fl_clauses,where_clause,order_by_clause,csimple_update_list) -> (* FLWOR update compiled as FLWOR expressions, except for the simple updates part. *) compile_flwor_update compile_ctxt fl_clauses (where_clause,order_by_clause,csimple_update_list) eh fi let compile_cupdate compile_ctxt cupdate = let cupdate_desc = cupdate.pcupdate_desc in let cupdate_orig = cupdate.pcupdate_origin in let fi = cupdate.pcupdate_loc in compile_update_desc compile_ctxt cupdate_desc None fi galax-1.1/compile/compile_expr.ml0000664000076400007640000010660510705011416015236 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_expr.ml,v 1.98 2007/10/16 01:25:34 mff Exp $ *) (* Module: Compile_expr Description: This module compiles an XQuery core expression into the ALGEBRA. *) open Error open Namespace_builtin open Xquery_common_ast open Xquery_core_ast open Xquery_core_ast_util open Xquery_algebra_ast open Xquery_algebra_ast_util open Logical_algebra_types open Compile_context open Compile_util open Processing_context let decode_proto_value cexpr_list = match cexpr_list with | [cexpr1;cexpr2] -> begin match cexpr2.pcexpr_desc with | CEProtoValue atomic_value -> (cexpr1,atomic_value) | CEScalar (IntegerLiteral _ ) -> (cexpr1,Datatypes.ATInteger) | CEScalar (DecimalLiteral _ ) -> (cexpr1,Datatypes.ATDecimal) | CEScalar (DoubleLiteral _ ) -> (cexpr1,Datatypes.ATDouble) | CEScalar (StringLiteral _ ) -> (cexpr1,Datatypes.ATString) | CEScalar (BooleanLiteral _ ) -> (cexpr1,Datatypes.ATBoolean) | CEScalar (URILiteral _ ) -> (cexpr1,Datatypes.ATAnyURI) | _ -> raise (Query (Error ("Could not find proto value during compilation "^(Print_top.bprintf_acexpr "" cexpr2)))) end | _ -> raise (Query (Error "Could not find proto value during compilation")) let decode_noproto_value cexpr_list = match cexpr_list with | [cexpr1] -> cexpr1 | _ -> raise (Query (Error "Could not find proto value during compilation")) let get_fresh_var_name comp_ctxt = Compile_context.get_new_variable_name comp_ctxt "comp" type 'a compile_select = | Cs_Disjunction of 'a compile_select * 'a compile_select | Cs_ComplexConjunct of 'a compile_select * 'a compile_select | Cs_SimpleConjunct of logical_algop_expr list let is_fn_true op = match op.pcexpr_desc with | CECall (x, [], _, _, _) when Namespace_names.rqname_equal x fn_true -> true | _ -> false let is_fn_false op = match op.pcexpr_desc with | CECall (x, [], _, _, _) when Namespace_names.rqname_equal x fn_false -> true | _ -> false let is_ce_empty op = match op.pcexpr_desc with | CEEmpty -> true | _ -> false let is_fn_boolean_cexpr op = match op.pcexpr_desc with | CECall (x, _, _, _, _ ) when Namespace_names.rqname_equal x fn_boolean -> true | _ -> false (*********************************************************) (* Helper functions for tree join recognition. - Michael *) (*********************************************************) (* Indicates wether the semantics emposed by the expression is covered by the semantics of a tree join at its output. *) let is_tree_join_input_redundant cexpr = match cexpr.pcexpr_desc with | CECall (cfname, _, _, _, _) when cfname = fs_node_sequence -> true | _ -> false (* Indicates wether the semantics imposed by the expression is covered by the semantics of a tree join at its input. *) let is_tree_join_output_redundant cexpr = match cexpr.pcexpr_desc with | CECall (cfname, _, _, _, _) -> if cfname = fs_distinct_docorder || cfname = fs_distinct_docorder_or_atomic_sequence || cfname = fs_distinct || cfname = fs_distinct_or_atomic_sequence || cfname = fs_docorder || cfname = fs_docorder_or_atomic_sequence || cfname = fs_node_sequence || cfname = fs_node_sequence_or_atomic_sequence then true else false | _ -> false let is_fl_clauses_well_behaved fl_clauses = match fl_clauses with | fl_clause :: [] -> begin match fl_clause with | CEFOR (None, _, None, _) -> true | _ -> false end | _ -> false let is_where_clause_well_behaved where_clause = match where_clause with | None -> true | Some _ -> false let is_order_by_clause_well_behaved order_by_clause = match order_by_clause with | None -> true | Some _ -> false (* Actual pattern matching for identifying tree joins. *) let axis_nt_for_tree_join fl_clauses where_clause order_by_clause return_clause = let raise_tree_join_match_error () = raise (Query (Compilation "Tree join match error.")) in (*****************************************************) (* The pattern is: for $var in expr return axis::nt. *) (*****************************************************) match return_clause.pcexpr_desc with | CEForwardAxis (v,axis, cnode_test) | CEReverseAxis (v,axis, cnode_test) -> (* The following makes sure the other flwor clauses are structured as expected. *) if is_fl_clauses_well_behaved fl_clauses && is_where_clause_well_behaved where_clause && is_order_by_clause_well_behaved order_by_clause then Some (v, axis, cnode_test) else raise_tree_join_match_error () | _ -> None (* Helps assessing wether a sbdo-related function call can be removed. That is the case whenever its argument qualifies for being compiled into a tree join. *) let qualifies_for_tree_join cexpr = match cexpr.pcexpr_desc with | CEFLWOR (fl_clauses, where_clause, order_by_clause, return_clause) -> begin match axis_nt_for_tree_join fl_clauses where_clause order_by_clause return_clause with | Some _ -> true | None -> false end | _ -> false let access_arg1 cexpr = match cexpr.pcexpr_desc with | CECall (_, arg1 :: [], _, _, _) -> arg1 | _ -> raise (Query (Compilation "Was expecting exactly one function argument.")) let access_for_clause fl_clauses = match fl_clauses with | CEFOR (None, _, None, cexpr) :: [] -> cexpr | _ -> raise (Query (Compilation "Was expecting exactly one for-clause.")) (************************************************************) (* Helper functions for parse stream recognition. - Michael *) (************************************************************) let access_static_document_uri mod_proc_ctxt cexpr = let raise_fn_doc_match_error () = raise (Query (Compilation "fn:doc match error.")) in let access_arg1 cexpr cfname = match cexpr.pcexpr_desc with | CECall(cfn, cexprs, _, _, _) when (Namespace_names.rqname_equal cfname cfn) -> List.hd cexprs | CECall(cfn, cexprs, _, _, _) -> raise_fn_doc_match_error () | _ -> raise_fn_doc_match_error () in let fn_doc_arg1 = access_arg1 cexpr fn_doc in (* When evaluating normalized exprs, it may be that the fn_data, fs_convert_simple_operand and fs_promote_to_anystring are not there. The following deals with this *) let data_arg = match fn_doc_arg1.pcexpr_desc with | CEScalar _ -> fn_doc_arg1 | _ -> let fs_promote_arg1 = access_arg1 fn_doc_arg1 fs_promote_to_anystring in match fs_promote_arg1.pcexpr_desc with | CEScalar _ -> fs_promote_arg1 | _ -> let fs_cso_arg1 = access_arg1 fs_promote_arg1 fs_convert_simple_operand in match fs_cso_arg1.pcexpr_desc with | CEScalar _ -> fs_cso_arg1 | _ -> access_arg1 fs_cso_arg1 fn_data in match data_arg.pcexpr_desc with | CEScalar av -> begin try let uri_string = string_of_literal av in let base_uri = Processing_context.get_base_uri mod_proc_ctxt in match base_uri with | None -> uri_string | Some base_uri -> let uri = AnyURI._kinda_uri_of_string uri_string in let absolute_uri = AnyURI._uri_resolve base_uri uri in let absolute_uri_string = AnyURI._string_of_uri absolute_uri in absolute_uri_string with | Query (Datamodel _) -> raise_fn_doc_match_error () end | _ -> raise_fn_doc_match_error () let is_static_fn_doc_cexpr mod_proc_ctxt cexpr = try ignore(access_static_document_uri mod_proc_ctxt cexpr); true with | (Query (Compilation "fn:doc match error.")) -> false (***************) (* Expressions *) (***************) (******************************************* Compile_cexpr: Goal: Compile core XQuery expressions into the algebraic representation Template for each portion of the match statement: - Compile parameter expressions into the Algebra - Construct the operation name (keeping non-algebraic information for rewrites) - Build caml code capabable of implementing the functionality - Coerce this into algop_eval_code_dep - Construct the return structure (algop_mkop ... ) *) let rec compile_cexpr compile_ctxt cexpr = (* The Core expression contains the file location that should be correlated with errors, so we catch any exceptions here and rewrap the exceptions with the file location. *) let cexpr_desc = cexpr.pcexpr_desc in let eh = cexpr.pcexpr_origin in let fi = cexpr.pcexpr_loc in try match cexpr_desc with | CEUnordered cexpr1 -> (* Currently we ignore unordered expressions *) let ao1 = compile_cexpr compile_ctxt cexpr1 in ao1 | CEOrdered cexpr1 -> (* Currently we ignore ordered expressions *) let ao1 = compile_cexpr compile_ctxt cexpr1 in ao1 | CEFLWOR (fl_clauses,where_clause,order_by_clause,return_clause) -> compile_flwor compile_ctxt fl_clauses (where_clause,order_by_clause,return_clause) eh fi | CEIf(cexpr1,cexpr2,cexpr3) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let ao3 = compile_cexpr compile_ctxt cexpr3 in let indep = OneSub ao1 in let dep = TwoSub (ao2,ao3) in let op_name = AOEIf in logical_aalgop_mkop op_name indep dep None eh fi | CEWhile(cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = NoSub in let dep = TwoSub (ao1,ao2) in let op_name = AOEWhile in logical_aalgop_mkop op_name indep dep None eh fi | CETypeswitch(cexpr0,branch_list) -> let ao0 = compile_cexpr compile_ctxt cexpr0 in let pattern, dep_exprs = List.split (List.map (compile_typeswitch_branch compile_ctxt) branch_list) in let indep = OneSub ao0 in let dep = ManySub (Array.of_list dep_exprs) in let op_name = AOETypeswitch (Array.of_list pattern) in logical_aalgop_mkop op_name indep dep None eh fi | CEVar vname1 -> begin match get_variable_field_from_compile_context compile_ctxt vname1 with (* If the variable is not bound to a field access, then this is a standard XQuery variable or an imported variable. *) | None -> let indep = NoSub in let dep = NoSub in let op_name = AOEVar vname1 in logical_aalgop_mkop op_name indep dep None eh fi (* Otherwise, we just need to access the proper field from the input tuple. *) | Some crname -> let indep = NoSub in let dep = NoSub in let op_name = AOEAccessTuple(crname) in logical_aalgop_mkop op_name indep dep None eh fi end | CECall (cfname,cexpr_list,(optintypes,outtype), upd, selfrecur) -> let mod_proc_ctxt = Norm_context.module_context_from_norm_context (norm_context_from_compile_context compile_ctxt) in if (is_static_fn_doc_cexpr mod_proc_ctxt cexpr) then (* In the case of a fn:doc call with a static URL, compile to dedicated Parse operator. *) let indep = NoSub in let dep = NoSub in let document_uri = access_static_document_uri mod_proc_ctxt cexpr in let op_name = AOEParse(document_uri) in logical_aalgop_mkop op_name indep dep None eh fi else if (Namespace_names.rqname_equal cfname Namespace_builtin.fs_promote_to_numeric) then (* In the case of protofunctions (fs:convert-simple-operand...), compile to dedicated algebraic operators. *) let (cexpr1,atomic_type) = decode_proto_value cexpr_list in let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEPromoteNumeric atomic_type in logical_aalgop_mkop op_name indep dep None eh fi else if (Namespace_names.rqname_equal cfname Namespace_builtin.fs_promote_to_anystring) then (* In the case of protofunctions (fs:convert-simple-operand...), compile to dedicated algebraic operators. *) let cexpr1 = decode_noproto_value cexpr_list in let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEPromoteAnyString in logical_aalgop_mkop op_name indep dep None eh fi else if (Namespace_names.rqname_equal cfname Namespace_builtin.fs_unsafe_promote_to_numeric) then let (cexpr1,atomic_type) = decode_proto_value cexpr_list in let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEUnsafePromoteNumeric atomic_type in logical_aalgop_mkop op_name indep dep None eh fi else if (Namespace_names.rqname_equal cfname Namespace_builtin.fs_convert_simple_operand) then let (cexpr1,atomic_type) = decode_proto_value cexpr_list in let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEConvertSimple atomic_type in logical_aalgop_mkop op_name indep dep None eh fi else (* Otherwise, compile the function call as usual. *) begin let ao_args = List.map (compile_cexpr compile_ctxt) cexpr_list in let arity = List.length cexpr_list in let indep = ManySub (Array.of_list ao_args) in let dep = NoSub in let op_name = if is_builtin (cfname,arity) then AOECallBuiltIn ( (cfname,arity), (Array.of_list (List.map (compile_opt_ctype compile_ctxt) optintypes)), compile_ctype compile_ctxt outtype, upd) else AOECallUserDefined ( (cfname,arity), (Array.of_list (List.map (compile_opt_ctype compile_ctxt) optintypes)), compile_ctype compile_ctxt outtype, upd, selfrecur) in logical_aalgop_mkop op_name indep dep None eh fi end | CEOverloadedCall (cfname,cexpr_list, osig) -> let ao_args = List.map (compile_cexpr compile_ctxt) cexpr_list in let arity = List.length cexpr_list in let indep = ManySub (Array.of_list ao_args) in let dep = NoSub in let asig = compile_overloaded_table_sigs compile_ctxt osig in let op_name = AOECallOverloaded ((cfname,arity),asig) in logical_aalgop_mkop op_name indep dep None eh fi | CEScalar dmv -> let indep = NoSub in let dep = NoSub in let op_name = AOEScalar dmv in logical_aalgop_mkop op_name indep dep None eh fi | CEProtoValue pv -> raise (Query (Internal_Error "Prototypical value occuring outside of proper fs:functions!")) | CESeq (cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub (ao1, ao2) in let dep = NoSub in let op_name = AOESeq in logical_aalgop_mkop op_name indep dep None eh fi | CEEmpty -> let indep = NoSub in let dep = NoSub in let op_name = AOEEmpty in logical_aalgop_mkop op_name indep dep None eh fi | CEDocument cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEDocument in logical_aalgop_mkop op_name indep dep None eh fi | CEPI (ncname,str) -> let indep = NoSub in let dep = NoSub in let op_name = AOEPI (ncname,str) in logical_aalgop_mkop op_name indep dep None eh fi | CEPIComputed (cexpr1, cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub (ao1, ao2) in let dep = NoSub in let op_name = AOEPIComputed in logical_aalgop_mkop op_name indep dep None eh fi | CEComment str -> let indep = NoSub in let dep = NoSub in let op_name = AOEComment str in logical_aalgop_mkop op_name indep dep None eh fi | CECommentComputed cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOECommentComputed in logical_aalgop_mkop op_name indep dep None eh fi | CEText text -> let indep = NoSub in let dep = NoSub in let op_name = AOEText text in logical_aalgop_mkop op_name indep dep None eh fi | CECharRef i -> let indep = NoSub in let dep = NoSub in let op_name = AOECharRef i in logical_aalgop_mkop op_name indep dep None eh fi | CETextComputed cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub(ao1) in let dep = NoSub in let op_name = AOETextComputed in logical_aalgop_mkop op_name indep dep None eh fi | CEElem (cename,nsenv,cexpr_list) -> let ao_list = List.map (compile_cexpr compile_ctxt) cexpr_list in let indep = ManySub (Array.of_list ao_list) in let dep = NoSub in let relem_sym = Namespace_symbols.relem_symbol cename in let op_name = AOEElem (relem_sym,nsenv) in logical_aalgop_mkop op_name indep dep None eh fi | CEAnyElem (cexpr1,nsenv1, nsenv2, cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub(ao1,ao2) in let dep = NoSub in let op_name = AOEAnyElem (nsenv1, nsenv2) in logical_aalgop_mkop op_name indep dep None eh fi | CEAttr (caname,cexpr_list) -> let ao_list = List.map (compile_cexpr compile_ctxt) cexpr_list in let indep = ManySub (Array.of_list ao_list) in let dep = NoSub in let rattr_sym = Namespace_symbols.rattr_symbol caname in let op_name = AOEAttr rattr_sym in logical_aalgop_mkop op_name indep dep None eh fi | CEAnyAttr (cexpr1,nsenv,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub(ao1,ao2) in let dep = NoSub in let op_name = AOEAnyAttr nsenv in logical_aalgop_mkop op_name indep dep None eh fi | CEError cexpr_list -> let ao_list = List.map (compile_cexpr compile_ctxt) cexpr_list in let indep = ManySub (Array.of_list ao_list) in let dep = NoSub in let op_name = AOEError in logical_aalgop_mkop op_name indep dep None eh fi | CETreat (cexpr1,cdt1) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let adt1 = compile_ctype compile_ctxt cdt1 in let op_name = AOETreat adt1 in logical_aalgop_mkop op_name indep dep None eh fi | CEValidate (vmode,cexpr1) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEValidate vmode in logical_aalgop_mkop op_name indep dep None eh fi | CECast (cexpr1,nsenv,cdt1) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let adt1 = compile_ctype compile_ctxt cdt1 in let op_name = AOECast (nsenv, adt1) in logical_aalgop_mkop op_name indep dep None eh fi | CECastable (cexpr1,nsenv,cdt1) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let adt1 = compile_ctype compile_ctxt cdt1 in let op_name = AOECastable (nsenv, adt1) in logical_aalgop_mkop op_name indep dep None eh fi | CEForwardAxis (v,axis,cnode_test) | CEReverseAxis (v,axis,cnode_test) -> let dot_access = begin match get_variable_field_from_compile_context compile_ctxt v with | None -> let indep = NoSub in let dep = NoSub in let op_name = AOEVar v in logical_aalgop_mkop op_name indep dep None eh fi | Some crname -> let indep = NoSub in let dep = NoSub in let op_name = AOEAccessTuple(crname) in logical_aalgop_mkop op_name indep dep None eh fi end in let indep = OneSub dot_access in let dep = NoSub in let anode_test = compile_cnode_test cnode_test in let op_name = AOETreeJoin(axis,anode_test) in logical_aalgop_mkop op_name indep dep None eh fi | CESome (ocdt,vname1,cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = OneSub ao1 in let dep = OneSub ao2 in let oadt = compile_opt_ctype compile_ctxt ocdt in let op_name = AOESome (oadt, vname1) in logical_aalgop_mkop op_name indep dep None eh fi | CEEvery (ocdt,vname1,cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = OneSub ao1 in let dep = OneSub ao2 in let oadt = compile_opt_ctype compile_ctxt ocdt in let op_name = AOEEvery (oadt, vname1) in logical_aalgop_mkop op_name indep dep None eh fi | CELetServerImplement (nc1, uri, hostport, cexpr) -> let hostportao = compile_cexpr compile_ctxt hostport in let ao1 = compile_cexpr compile_ctxt cexpr in let indep = OneSub hostportao in let dep = OneSub ao1 in let op_name = AOEServerImplements (nc1, uri) in logical_aalgop_mkop op_name indep dep None eh fi | CEForServerClose (nc1, uri, cexpr) -> let ao1 = compile_cexpr compile_ctxt cexpr in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEForServerClose (nc1, uri) in logical_aalgop_mkop op_name indep dep None eh fi | CEEvalClosure (cexpr) -> let ao1 = compile_cexpr compile_ctxt cexpr in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEEvalClosure in logical_aalgop_mkop op_name indep dep None eh fi | CEExecute (async, nc, uri, hostport, cexpr) -> let hostportao = compile_cexpr compile_ctxt hostport in let ao1 = compile_cexpr compile_ctxt cexpr in let indep = TwoSub (hostportao, ao1) in let dep = NoSub in let op_name = if (async) then AOEASyncExecute (nc, uri) else AOEExecute (nc, uri) in logical_aalgop_mkop op_name indep dep None eh fi | CECopy cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOECopy in logical_aalgop_mkop op_name indep dep None eh fi | CEDelete cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let indep = OneSub ao1 in let dep = NoSub in let op_name = AOEDelete in logical_aalgop_mkop op_name indep dep None eh fi | CEInsert (cexpr1,cinsert_location) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let (ao2,insert_flag) = compile_cinsert_location compile_ctxt cinsert_location in let indep = TwoSub (ao1,ao2) in let dep = NoSub in let op_name = AOEInsert insert_flag in logical_aalgop_mkop op_name indep dep None eh fi | CERename (nsenv,cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub(ao1,ao2) in let dep = NoSub in let op_name = AOERename nsenv in logical_aalgop_mkop op_name indep dep None eh fi | CEReplace (value_of_flag,cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub(ao1,ao2) in let dep = NoSub in let op_name = AOEReplace value_of_flag in logical_aalgop_mkop op_name indep dep None eh fi | CESnap (sm,acexpr) -> let ao = compile_cexpr compile_ctxt acexpr in let indep = NoSub in let dep = OneSub ao in let op_name = AOESnap sm in logical_aalgop_mkop op_name indep dep None eh fi | CELetvar (codt,vn,cexpr1,cexpr2) -> let aodt = compile_opt_ctype compile_ctxt codt in let ao1 = compile_cexpr compile_ctxt cexpr1 in let compile_ctxt' = hide_variable_field_from_compile_context compile_ctxt vn in let ao2 = compile_cexpr compile_ctxt' cexpr2 in logical_aalgop_mkop (AOELetvar(aodt,vn)) (OneSub ao1) (OneSub ao2) None eh fi | CESet (vn,cexpr1) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in logical_aalgop_mkop (AOESet vn) (OneSub ao1) NoSub None eh fi | CEImperativeSeq (cexpr1,cexpr2) -> let ao1 = compile_cexpr compile_ctxt cexpr1 in let ao2 = compile_cexpr compile_ctxt cexpr2 in let indep = TwoSub (ao1, ao2) in let dep = NoSub in let op_name = AOEImperativeSeq in logical_aalgop_mkop op_name indep dep None eh fi with | exn -> raise (Error.error_with_file_location fi exn) (*****************************************) (* Compilation of typeswitch expressions *) (*****************************************) and compile_cpattern compile_ctxt cpattern = let apattern_desc = match cpattern.pcpattern_desc with | CCase csequencetype -> ACase (compile_ctype compile_ctxt csequencetype) | CDefault -> ADefault in fmkapattern apattern_desc cpattern.pcpattern_loc and compile_typeswitch_branch compile_ctxt (cpattern,ovname,cexpr1) = let compile_ctxt' = match ovname with | None -> compile_ctxt | Some vname -> hide_variable_field_from_compile_context compile_ctxt vname in let apattern = compile_cpattern compile_ctxt cpattern in (apattern,ovname),(compile_cexpr compile_ctxt' cexpr1) (************************************) (* Compilation of flwor expressions *) (************************************) and compile_return_clause compile_ctxt op3 return_clause eh fi = (* [[ return Expr1 ]](Op3) == Map_(i -> [[ Expr1 ]])(Op3) *) let op1 = compile_cexpr compile_ctxt return_clause in (* let table_op = *) (* logical_aalgop_mkop AOEMaterializeTable (OneSub op3) NoSub None eh fi *) (* in *) let map = logical_aalgop_mkop AOEMapToItem (OneSub op3) (OneSub op1) None eh fi in map and compile_order_by_clause compile_ctxt op3 (order_by_clause,return_clause) eh fi = match order_by_clause with | None -> compile_return_clause compile_ctxt op3 return_clause eh fi | Some (stablekind,corder_spec_list,osig) -> begin let rec split_fun corder_spec_list = match corder_spec_list with | [] -> ([],[]) | (cexpr,sortkind,emptysortkind) :: corder_spec_list' -> let (cexpr_list,rest_list) = split_fun corder_spec_list' in (cexpr :: cexpr_list, (sortkind,emptysortkind) :: rest_list) in let splited = split_fun corder_spec_list in let asig = compile_overloaded_table_sigs compile_ctxt osig in let op_name = AOEOrderBy (stablekind,snd splited, asig) in let op1_list = List.map (compile_cexpr compile_ctxt) (fst splited) in let op1_array = Array.of_list op1_list in let new_op3 = logical_aalgop_mkop op_name (OneSub op3) (ManySub op1_array) None eh fi in compile_return_clause compile_ctxt new_op3 return_clause eh fi end and compile_where_clause compile_ctxt op3 (where_clause,order_by_clause,return_clause) eh fi = match where_clause with | None -> compile_order_by_clause compile_ctxt op3 (order_by_clause,return_clause) eh fi | Some where_expr -> let select = compile_select compile_ctxt where_expr op3 eh fi in compile_order_by_clause compile_ctxt select (order_by_clause,return_clause) eh fi and compile_fl_clauses compile_ctxt op3 fl_clauses (where_clause,order_by_clause,return_clause) eh fi = match fl_clauses with | [] -> (* Calls compilation for the rest of the FLWOR block *) begin compile_where_clause compile_ctxt op3 (where_clause,order_by_clause,return_clause) eh fi end | (CEFOR (odt,vname,None,cexpr1)) :: fl_clauses' -> (* [[ for $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> Map_(j -> [ v : j ] o i)([[ Expr1 ]]))(Op3)) / $v --> #v ] Changed to: [ [[Expr2]] (MapConcat( i -> MapItemTuple{[v : j]}([[Expr1]]) o i)(Op3) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_name = get_fresh_var_name compile_ctxt in let input_j = logical_aalgop_mkop (AOEVar fresh_name) NoSub NoSub None eh fi in let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let tname = get_tuple_field_name compile_ctxt' vname in let aodt = compile_opt_ctype compile_ctxt odt in let tuple_construct = logical_aalgop_mkop (AOECreateTuple [|(aodt,tname)|]) (ManySub [|input_j|]) NoSub None eh fi in let map_inner = logical_aalgop_mkop (AOEMapFromItem fresh_name) (OneSub op1) (OneSub tuple_construct) None eh fi in let map_outer = logical_aalgop_mkop AOEMapConcat (OneSub op3) (OneSub map_inner) None eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) compile_fl_clauses compile_ctxt' map_outer fl_clauses' (where_clause,order_by_clause,return_clause) eh fi end | (CEFOR (odt,vname,Some vname',cexpr1)) :: fl_clauses' -> (* [[ for $v at $i in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> MapIndex(i)(Map_(j -> [ v : j ] o i)([[ Expr1 ]])))(Op3)) / $v --> #v ] ChangedTo: [ [[ Expr2 ]](MapConcat( t -> MapIndex(i)(MapItemTuple{[v: j]} ([[ Expr1 ]]))) (Op3)) / $v --> #v ] *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in (* 2. Build the new Op3 *) let fresh_name = get_fresh_var_name compile_ctxt in let input_j = logical_aalgop_mkop (AOEVar fresh_name) NoSub NoSub None eh fi in (***************************) (* Compile the tuple names *) (***************************) let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let compile_ctxt'' = add_variable_field_to_compile_context compile_ctxt' vname' in let tname = get_tuple_field_name compile_ctxt'' vname in let tname' = get_tuple_field_name compile_ctxt'' vname' in let aodt = compile_opt_ctype compile_ctxt odt in let tuple_construct = logical_aalgop_mkop (AOECreateTuple [|(aodt,tname)|]) (ManySub [|input_j|]) NoSub None eh fi in let input_i = compile_inputtuple eh fi in let tuple_append = logical_aalgop_mkop AOEConcatTuples (TwoSub (tuple_construct,input_i)) NoSub None eh fi in let map_inner = logical_aalgop_mkop (AOEMapFromItem fresh_name) (OneSub op1) (OneSub tuple_append) None eh fi in let map_index = logical_aalgop_mkop (AOEMapIndex(tname')) (OneSub map_inner) NoSub None eh fi in let map_outer = logical_aalgop_mkop AOEMapConcat (OneSub op3) (OneSub map_index) None eh fi in (* 3. Apply compilation to the rest of the FLWOR block, passing the new Op3 *) compile_fl_clauses compile_ctxt'' map_outer fl_clauses' (where_clause,order_by_clause,return_clause) eh fi end | (CELET (odt,vname,cexpr1)) :: fl_clauses' -> (* [[ let $v in Expr1 return Expr2 ]](Op3) == [ [[ Expr2 ]](Map_(i -> [ v : [[ Expr1 ]] ] o i)(Op3)) / $v --> #v ] Changed To: [ [[ Expr2 ]](MapConcat(t -> [ v : [[ Expr1 ]] ]))(Op3) / $v --> #v *) (* 1. Compile Expr1 *) begin let op1 = compile_cexpr compile_ctxt cexpr1 in let compile_ctxt' = add_variable_field_to_compile_context compile_ctxt vname in let tname = get_tuple_field_name compile_ctxt' vname in let aodt = compile_opt_ctype compile_ctxt odt in (* 2. Build the new Op3 *) let tuple_construct = logical_aalgop_mkop (AOECreateTuple [|(aodt,tname)|]) (ManySub [|op1|]) NoSub None eh fi in let map = logical_aalgop_mkop AOEMapConcat (OneSub op3) (OneSub tuple_construct) None eh fi in compile_fl_clauses compile_ctxt' map fl_clauses' (where_clause,order_by_clause,return_clause) eh fi end and compile_flwor compile_ctxt fl_clauses (where_clause,order_by_clause,return_clause) eh fi = (* A special case is handled first, i.e. only let bindings and no other conditions *) (* Order By can be used, but is meaningless in all let bindings *) begin (* The initial input is an empty table *) let init_op3 = if has_input_set compile_ctxt then compile_inputtuple eh fi else logical_aalgop_mkop (AOECreateTuple [||]) (ManySub [||]) NoSub None eh fi in compile_fl_clauses compile_ctxt init_op3 fl_clauses (where_clause,order_by_clause,return_clause) eh fi end and compile_select compile_ctxt select_clause op3 eh fi = let rec compile_select_helper cur_expr replace_flag = match cur_expr.pcexpr_desc with | CEIf(cexpr1, cexpr2, cexpr3) -> (******************************************) (* Restricted form *) (* Or -> if (e1) then true else (e2) *) (* And -> if (e1) then (e2) else false|() *) (******************************************) let ae1 = compile_select_helper cexpr1 replace_flag in if (is_fn_true cexpr2) then Cs_Disjunction( ae1, (compile_select_helper cexpr3 replace_flag)) else if (is_fn_false cexpr3 || is_ce_empty cexpr3) then begin let rhs = compile_select_helper cexpr2 replace_flag in match (ae1, rhs) with | (Cs_SimpleConjunct( c1 ), Cs_SimpleConjunct (c2)) -> Cs_SimpleConjunct ( c1 @ c2 ) | _ -> Cs_ComplexConjunct( ae1, rhs ) end else begin (* An introduced if statement... can not compile it *) let ae1 = compile_cexpr compile_ctxt cur_expr in Cs_SimpleConjunct ( ae1 :: [] ) end | CECall(_,[cexpr],_,_,_) when (is_fn_boolean_cexpr cur_expr) -> (* We strip these out as we walk, but they should be replaced below *) compile_select_helper cexpr true | _ -> (* Here we need to ensure that we compute the effective boolean value of these *) let norm_ctxt = norm_context_from_compile_context compile_ctxt in let cur_expr = if ((is_fn_boolean_cexpr cur_expr) || not(replace_flag)) (* no need to do it twice *) then cur_expr else begin let eh = cur_expr.pcexpr_origin in let fi = cur_expr.pcexpr_loc in Norm_util.normalize_effective_boolean_value norm_ctxt cur_expr eh fi end in let ae1 = compile_cexpr compile_ctxt cur_expr in Cs_SimpleConjunct ( ae1 :: [] ) in let rec compile_conjuncts conjunct index = match conjunct with | Cs_SimpleConjunct ( op_list ) -> let length = List.length op_list in let o_list = Array.of_list op_list in SimpleConjunct( index, (index + length -1)), o_list, (index + length) | Cs_ComplexConjunct(c1, c2) -> let op1, op_l1, index = compile_conjuncts c1 index in let op2, op_l2, index = compile_conjuncts c1 index in let ops = Array.append op_l1 op_l2 in ComplexConjunct(op1,op2), ops, index | Cs_Disjunction (d1,d2) -> let c1, op1, cur_index = compile_conjuncts d1 index in let c2, op2, cur_index = compile_conjuncts d2 cur_index in let ops = Array.append op1 op2 in Disjunct( c1,c2 ), ops, cur_index in let internal_conjunct_desc = compile_select_helper select_clause false in let conjunct_desc,ops,_ = compile_conjuncts internal_conjunct_desc 0 in logical_aalgop_mkop (AOESelect conjunct_desc) (OneSub op3) (ManySub ops) None eh fi (*************************************) (* Compilation of update expressions *) (*************************************) and compile_cinsert_location compile_ctxt cinsert_location = match cinsert_location with | CUAsLastInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAsLastInto) | CUAsFirstInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAsFirstInto) | CUInto cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUInto) | CUAfter cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUAfter) | CUBefore cexpr1 -> let ao1 = compile_cexpr compile_ctxt cexpr1 in (ao1,AOUBefore) galax-1.1/compile/compile_util.mli0000664000076400007640000000332110560462355015410 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_util.mli,v 1.12 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Compile_util Description: This module contains some utilities used during the compilation phase. *) open Xquery_core_ast open Xquery_algebra_ast open Logical_algebra_types open Compile_context val compile_inputtuple : Xquery_ast.expr_handle -> Finfo.finfo -> logical_algop_expr val compile_ctype : logical_compile_context -> (Xquery_core_ast.csequencetype * Xquery_type_core_ast.cxtype) -> Xquery_algebra_ast.asequencetype val compile_opt_ctype : logical_compile_context -> (Xquery_core_ast.csequencetype * Xquery_type_core_ast.cxtype) option -> Xquery_algebra_ast.asequencetype option val compile_cnode_test : Xquery_core_ast.cnode_test -> Xquery_algebra_ast.anode_test val compile_overloaded_table_sigs : logical_compile_context -> Xquery_core_ast.overloaded_signature_table -> Xquery_algebra_ast.aoverloaded_signature_table val compile_cfunction_sig : logical_compile_context -> Xquery_core_ast.cfunction_signature -> Xquery_algebra_ast.afunction_signature galax-1.1/compile/compile_expr.mli0000664000076400007640000000201210560462355015405 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_expr.mli,v 1.9 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Compile_expr Description: The main interface to compilation. Takes in typed core expressions and returns a compiled version of the algebra *) open Xquery_core_ast open Logical_algebra_types val compile_cexpr : logical_compile_context -> acexpr -> logical_algop_expr galax-1.1/compile/compile_context_util.ml0000664000076400007640000000304310645741552017007 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_context_util.ml,v 1.5 2007/07/13 18:24:42 mff Exp $ *) (* This function has been excised from Compile_context in order to resolve the cyclic dependency Logical_algebra_types -> Compile_context -> Logical_algebra_types. - Michael *) open Compile_context open Namespace_util open Xquery_algebra_ast let copy_strip_functions comp_ctxt = let convert_binding ht key func_defn = match !(func_defn.palgop_func_optimized_logical_plan) with | AOEFunctionImported -> () | AOEFunctionUser op -> (* What does stripping do here? *) let body = Xquery_algebra_ast_annotation_util.strip_annotation op in let func_defn = Xquery_algebra_ast_util.fmkalgop_function_body func_defn.palgop_func_formal_args (AOEFunctionUser body) None func_defn.palgop_func_output_type in RQNameIntHashtbl.add ht key func_defn in map_function_bodies comp_ctxt convert_binding galax-1.1/compile/compile_context.mli0000664000076400007640000001020210622621771016111 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_context.mli,v 1.22 2007/05/16 15:32:09 mff Exp $ *) (* Module: Compile_context Description: This module contains context information used during algebraic compilation. *) open Xquery_core_ast open Xquery_common_ast open Xquery_algebra_ast open Typing_context (***********************) (* Compilation context *) (***********************) type ('a,'b) compile_context (* Create a new compilation context *) val build_compile_context : static_context -> ('a,'b) compile_context (* Default compilation context *) val default_compile_context : Norm_context.norm_context -> ('a,'b) compile_context (* Replace the static context *) val replace_static_context_in_compile_context : static_context -> ('a,'b) compile_context -> ('a,'b) compile_context val replace_namespace_env_in_compile_context : Namespace_context.nsenv -> ('a,'b) compile_context -> ('a,'b) compile_context (* Accesses parts of the static context from the compilation context *) val static_context_from_compile_context : ('a,'b) compile_context -> static_context val norm_context_from_compile_context : ('a,'b) compile_context -> Norm_context.norm_context (***************************) (* Treatement of functions *) (***************************) val add_function_to_compile_context : ('a,'b) compile_context -> (cfname * int) -> ('a,'b) aalgop_function_body -> unit val get_function_from_compile_context : string -> ('a,'b) compile_context -> (cfname * int) -> ('a,'b) aalgop_function_body val mem_function_from_compile_context : ('a,'b) compile_context -> (cfname * int) -> bool val update_physical_plan_in_compile_context : ('a,'b) compile_context -> (cfname * int) -> ('a,'b) aalgop_expr -> unit val register_builtin : (cfname * int) -> unit val is_builtin : (cfname * int) -> bool (***************************) (* Treatement of variables *) (***************************) (* val set_input : ('a,'b) compile_context -> ('a,'b) compile_context val unset_input : ('a,'b) compile_context -> ('a,'b) compile_context *) val has_input_set : ('a,'b) compile_context -> bool val add_variable_field_to_compile_context : ('a,'b) compile_context -> cvname -> ('a,'b) compile_context val hide_variable_field_from_compile_context : ('a,'b) compile_context -> cvname -> ('a,'b) compile_context val get_variable_field_from_compile_context : ('a,'b) compile_context -> cvname -> crname option val get_tuple_field_name : ('a,'b) compile_context -> cvname -> crname (* Getting fresh variables *) val get_new_variable_name : ('a, 'b) compile_context -> string -> Namespace_names.rqname val get_new_group_name : ('a, 'b) compile_context -> Namespace_names.rqname val get_new_dot_name : ('a, 'b) compile_context -> Namespace_names.rqname val get_new_var_name : ('a, 'b) compile_context -> Namespace_names.rqname val no_more_input : ('a,'b) compile_context -> unit val update_compile_context_from_module : ('a, 'b) compile_context -> ('a, 'b, 'c) Xquery_algebra_ast.aalgop_xmodule -> ('a, 'b) compile_context val copy_compile_context : ('a,'b) compile_context ->('a,'b) compile_context val copy_without_functions : ('a,'b) compile_context ->('c,'d) compile_context val map_function_bodies : ('a,'b) compile_context -> ((('c,'d) aalgop_function_body) Namespace_util.RQNameIntHashtbl.t -> (cfname * int) -> ('a,'b) aalgop_function_body -> unit) -> ('c,'d) compile_context val update_compile_context : ('a,'b) compile_context -> ('a,'b) compile_context -> ('a,'b) compile_context galax-1.1/compile/compile_context.ml0000664000076400007640000002105110622621771015744 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_context.ml,v 1.28 2007/05/16 15:32:09 mff Exp $ *) (* Module: Compile_context Description: This module contains context information used during algebraic compilation. *) open Namespace_util open Namespace_names open Norm_context open Typing_context open Xquery_algebra_ast open Xquery_common_ast open Error (***********************) (* Compilation context *) (***********************) type ('a,'b) compile_context = { compiled_static_context : static_context; compiled_functions : (('a,'b) aalgop_function_body) RQNameIntHashtbl.t; compiled_variables : (crname) RQNameHashtbl.t; mutable compiled_has_input : bool; next_variable : Namespace_generate.name_gen ref} (* Creates a new compilation context *) let build_compile_context stat_ctxt = let mod_ctxt = Norm_context.module_context_from_norm_context (Typing_context.norm_context_from_stat_context stat_ctxt) in let ng = Processing_context.get_name_generator mod_ctxt Namespace_builtin.glx_prefix Namespace_builtin.glx_uri "" in { compiled_static_context = stat_ctxt; compiled_functions = RQNameIntHashtbl.create 167; compiled_variables = RQNameHashtbl.create 167; compiled_has_input = false; next_variable = ng } (* Default compilation context *) let default_compile_context norm_ctxt = let default_stat_ctxt = default_static_context norm_ctxt in build_compile_context default_stat_ctxt (* Replace the static context *) let replace_static_context_in_compile_context stat_ctxt comp_ctxt = { compiled_static_context = stat_ctxt; compiled_functions = comp_ctxt.compiled_functions; compiled_variables = comp_ctxt.compiled_variables; compiled_has_input = comp_ctxt.compiled_has_input; next_variable = comp_ctxt.next_variable } (* Accesses parts of the static context from the compilation context *) let static_context_from_compile_context c = c.compiled_static_context (* Replace the namespace environment *) let replace_namespace_env_in_compile_context nsenv comp_ctxt = (* Mary : This is gross... *) let stat_ctxt = static_context_from_compile_context comp_ctxt in let norm_ctxt = norm_context_from_stat_context stat_ctxt in let norm_ctxt' = replace_namespace_env_in_norm_context nsenv norm_ctxt in let stat_ctxt' = replace_norm_context_in_static_context norm_ctxt' stat_ctxt in replace_static_context_in_compile_context stat_ctxt' comp_ctxt let norm_context_from_compile_context alg_ctxt = norm_context_from_stat_context (static_context_from_compile_context alg_ctxt) (***************************) (* Treatement of functions *) (***************************) let add_function_to_compile_context comp_ctxt (cfname,arity) fb = if (RQNameIntHashtbl.mem comp_ctxt.compiled_functions (cfname,arity)) then raise (Query (Symbol_Already_Defined ("Function ", (prefixed_string_of_rqname cfname)))) else RQNameIntHashtbl.add comp_ctxt.compiled_functions (cfname,arity) fb let get_function_from_compile_context msg comp_ctxt (cfname,arity) = try RQNameIntHashtbl.find (comp_ctxt.compiled_functions) (cfname,arity) with | Not_found -> raise (Query (Undefined(msg^"Function " ^ (curly_uri_string_of_rqname cfname) ^ " with arity " ^ (string_of_int arity) ^ " not found in compile context."))) let mem_function_from_compile_context comp_ctxt (cfname,arity) = RQNameIntHashtbl.mem comp_ctxt.compiled_functions (cfname,arity) let update_physical_plan_in_compile_context comp_ctxt (name,arity) body = let func_defn = get_function_from_compile_context "update_physical_plan" comp_ctxt (name,arity) in match !(func_defn.palgop_func_physical_plan) with | None -> func_defn.palgop_func_physical_plan := Some body | Some _ -> raise(Query(Code_Selection("Physical plan for "^(Namespace_names.prefixed_string_of_rqname name)^" already defined."))) (* Built in helper functions *) let builtin_fn_hash = RQNameIntHashtbl.create 167;; let register_builtin (cfname,arity) = RQNameIntHashtbl.add builtin_fn_hash (cfname,arity) ();; let is_builtin (cfname,arity) = RQNameIntHashtbl.mem builtin_fn_hash (cfname,arity) (***************************) (* Treatement of variables *) (***************************) let copy_compile_context comp_ctxt = { compiled_static_context = comp_ctxt.compiled_static_context; compiled_functions = RQNameIntHashtbl.copy comp_ctxt.compiled_functions; compiled_variables = RQNameHashtbl.copy comp_ctxt.compiled_variables; compiled_has_input = comp_ctxt.compiled_has_input; next_variable = comp_ctxt.next_variable} let has_input_set comp_ctxt = comp_ctxt.compiled_has_input let get_new_name comp_ctxt orig_name = Namespace_generate.generate_name_with_prefix !(comp_ctxt.next_variable) orig_name let get_new_group_name comp_ctxt = get_new_name comp_ctxt "group_created" let get_new_dot_name comp_ctxt = get_new_name comp_ctxt "dot_new" let get_new_var_name comp_ctxt = get_new_name comp_ctxt "var_new" let get_new_tuple_name comp_ctxt cvname = let (_,_, orig_name) = cvname in get_new_name comp_ctxt orig_name let get_new_variable_name comp_ctxt cvname = get_new_name comp_ctxt cvname (* Assumes we know it to be a tuple field name.. *) let get_tuple_field_name comp_ctxt cvname = try RQNameHashtbl.find comp_ctxt.compiled_variables cvname with Not_found -> raise (Query (Compilation ("Looking for tuple field " ^ (Namespace_names.prefixed_string_of_rqname cvname) ^ " but has not been compiled"))) let add_variable_field_to_compile_context comp_ctxt cvname = let comp_ctxt' = copy_compile_context comp_ctxt in begin let crname = get_new_tuple_name comp_ctxt' cvname in RQNameHashtbl.replace comp_ctxt'.compiled_variables cvname crname; comp_ctxt'.compiled_has_input <- true; comp_ctxt' end let hide_variable_field_from_compile_context comp_ctxt cvname = let comp_ctxt' = copy_compile_context comp_ctxt in begin RQNameHashtbl.remove comp_ctxt'.compiled_variables cvname; comp_ctxt' end let get_variable_field_from_compile_context comp_ctxt cvname = if (RQNameHashtbl.mem comp_ctxt.compiled_variables cvname) then Some (RQNameHashtbl.find comp_ctxt.compiled_variables cvname) else None let no_more_input comp_ctxt = comp_ctxt.compiled_has_input <- false (* External Build functions *) let update_compile_context_from_module comp_ctxt m = let fns = m.palgop_module_prolog.palgop_prolog_functions in List.iter (fun fdecl -> let (name, _, body, _) = fdecl.palgop_function_decl_desc in add_function_to_compile_context comp_ctxt name body) fns; comp_ctxt let copy_without_functions comp_ctxt = { compiled_static_context = comp_ctxt.compiled_static_context; compiled_functions = RQNameIntHashtbl.create 167; compiled_variables = RQNameHashtbl.copy comp_ctxt.compiled_variables; compiled_has_input = comp_ctxt.compiled_has_input; next_variable = comp_ctxt.next_variable} let map_function_bodies comp_ctxt fn = let ht = RQNameIntHashtbl.create 167 in RQNameIntHashtbl.iter (fn ht) comp_ctxt.compiled_functions; { compiled_static_context = comp_ctxt.compiled_static_context; compiled_functions = ht; compiled_variables = RQNameHashtbl.copy comp_ctxt.compiled_variables; compiled_has_input = comp_ctxt.compiled_has_input; next_variable = comp_ctxt.next_variable} (* If a binding from context_2 is in context_1, context_1's binding is replaced *) let update_compile_context context_1 context_2 = let check_fn ht key binding = if RQNameIntHashtbl.mem context_1.compiled_functions key then RQNameIntHashtbl.add ht key (RQNameIntHashtbl.find context_1.compiled_functions key) else RQNameIntHashtbl.add ht key binding in map_function_bodies context_2 check_fn (* let _ = print_string("Compile_context\n") *) galax-1.1/compile/compile_context_util.mli0000664000076400007640000000214210560462355017154 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_context_util.mli,v 1.3 2007/02/01 22:08:45 simeon Exp $ *) (* This function has been excides from Compile_context in order to resolve the cyclic dependency Logical_algebra_types -> Compile_context -> Logical_algebra_types. - Michael *) open Compile_context open Logical_algebra_types (* More expensive, strip functions of their annotations *) val copy_strip_functions : ('a,'b) compile_context -> logical_compile_context galax-1.1/compile/compile_annotate.ml0000664000076400007640000015224410764247300016101 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* Module: Compile_annotate Description: This module implements the walker to calculate annotations on the algebra AST. *) open Error open Xquery_algebra_ast open Xquery_algebra_ast_util open Xquery_algebra_ast_annotation_util (******************) (* Free Variables *) (******************) let empty_fields = [] (*************************************************************) (*** TO EASE PROCESSING THE LISTS ARE KEPT IN SORTED ORDER ***) (*************************************************************) let cv_compare i j = String.compare (Namespace_names.curly_uri_string_of_rqname i) (Namespace_names.curly_uri_string_of_rqname j) let use_count_compare (i,_) (j,_) = cv_compare i j (* Used to merge variable counts (which are sorted individually *) let variable_count_merge l = (* Merge all the lists so they are now in order by cv name *) let merged = List.fold_left (List.merge use_count_compare) [] l in (* Now, scan the list and merge it *) let rec merge_helper (completed_items, left_over) new_item = let new_v, (new_count,old_usage) = new_item in match left_over with | None -> (completed_items, (Some new_item)) | Some (v,(count, usage)) -> begin if v = new_v then begin let count' = count + new_count in (* Check to see if it is once used or never used *) if (((old_usage = Never) && (usage = Once)) || ((usage = Never) && (old_usage = Once))) && (count' = 1) then (completed_items, (Some (v, (count', Once)))) else (completed_items, (Some (v, (count', Many)))) end else (completed_items @ [v,(count,usage)]), (Some new_item) end in let cl, lo = List.fold_left merge_helper ([], None) merged in let cl = match lo with None -> cl | Some it -> cl @ [ it ] in cl (* Must list all bound variables here *) (* Return must be sorted by cv_compare *) let extract_bound_free_variables aname = match aname with | AOEMapFromItem(vn) | AOELetvar(_,vn) | AOESome (_,vn) | AOEEvery (_, vn) -> ((vn,(0,Never)) :: [], []) | AOESeq | AOEEmpty | AOEDocument | AOEPI _ | AOEPIComputed | AOEComment _ | AOECommentComputed | AOEText _ | AOECharRef _ | AOETextComputed | AOEElem _ | AOEAnyElem _| AOEAttr _ | AOEAnyAttr _ | AOEError | AOETreat _ | AOEValidate _ | AOECast _ | AOECastable _ | AOEIf | AOEWhile | AOEScalar _ | AOEInputTuple | AOECallBuiltIn _ | AOECallOverloaded _ | AOEConvertSimple _ | AOEPromoteNumeric _ | AOEPromoteAnyString | AOEUnsafePromoteNumeric _ | AOEServerImplements _ | AOEExecute _ | AOEForServerClose _ | AOEEvalClosure | AOEASyncExecute _ | AOECallUserDefined _ | AOEOuterMapConcat _ | AOEMapConcat | AOEGroupBy _ | AOENullMap _ | AOEParse _ | AOETreeJoin _ | AOETupleTreePattern (_,_) (* | AOEPrune _ | AOEDistinct _ *) | AOEImperativeSeq -> ([],[]) | AOEVar(vn) -> ([], (vn,(1,Once)) :: []) | AOESet(vn) -> ([], (vn,(1,Many)) :: []) (* Nicola: The variable is not actually "used", but "defined" *) | AOETypeswitch _ -> raise (Query (Internal_Error ("SHOULD NOT BE HERE - Typeswitch must be handled differently"))) (* Function Calls *) (* Galax extensions *) (* Tuple operations *) | AOEAccessTuple _ (* Tuple field access *) | AOECreateTuple _ (* Tuple creation *) | AOEConcatTuples (* Tuple concatenation *) | AOEProject _ | AOEMapToItem (* Tuple to item iteration *) | AOEMap (* Tuple iteration *) | AOEProduct (* Cartesian product *) | AOEJoin _ (* Join *) | AOELeftOuterJoin _ | AOESelect _ (* Selection *) | AOEOrderBy _ | AOEMapIndex _ | AOEMapIndexStep _ -> ([], []) (* Update operations *) | AOECopy | AOEDelete | AOEInsert _ | AOERename _ | AOEReplace _ | AOESnap _ -> ([], []) let is_typeswitch cur = match cur with AOETypeswitch __ -> true | _ -> false let handle_typeswitch cur dep = let ovn_list = match cur with AOETypeswitch l -> List.map (fun (_,ovn) -> ovn) (Array.to_list l) | _ -> raise (Query (Internal_Error ("Typeswitch handle called on non-typeswitch coreexp"))) in (* Must be removed individually from the scopes. case $v ... case $x ($v is still free) $v *) let scope_handle (ovn,dep) = let fvs = algop_get_use_counts dep in match ovn with None -> fvs, [] | Some v -> (* Odds are it will be used so, no .mem *) (* This is stable *) List.partition (fun (x,_) -> (not (v = x))) fvs in try List.split (List.map scope_handle (List.combine ovn_list dep)) with Invalid_argument a -> raise (Query (Malformed_Algebra_Expr("There are differing numbers of dependent expressions and typeswitch cases.."^ a ))) (* If you appear as a dependent expression of one of the below ops, and are not bound by it, then you are used many times. Think: let $y .. for $x in .. $y return .. *) let calculate_usage cur dep_free = match cur with | AOESome _ | AOEEvery _ | AOEMapFromItem _ | AOEMapToItem | AOEMap | AOEMapConcat | AOEOuterMapConcat _ -> List.map (fun (vn,(c,_)) -> (vn, (c, Many))) dep_free | _ -> dep_free (* returns lists in sorted order (by cv_compare) *) let compute_free_and_bound_vars cur indep dep = let bound_filter bv = List.partition (fun (x,_) -> not (List.mem_assoc x bv)) in (* Get all the variables bound by this one *) let dep_free, bound_variables = if is_typeswitch cur then handle_typeswitch cur dep else begin let bound_variables,free_variables = extract_bound_free_variables cur in let fv_lists = (List.map algop_get_use_counts dep) @ [free_variables] in let bound_removed,bound = List.split (List.map (bound_filter bound_variables) fv_lists) in (* Returns a list of lists, each list in sorted order *) (* If it is unused, it will not appear in bound, we want it to show up with useage 0 *) let bound_with_cur = List.map (List.merge use_count_compare bound_variables) bound in bound_removed, bound_with_cur end in (* Usage Count: If vn is bound outside a loop and we cross a loop, then it is a many time access for example: let $y := .. for $x in .. return $y/id *) let indep_free = List.map algop_get_use_counts indep in (* dep_free should be those variables which are free in the dep and not used in this expression *) let dep_usage = List.map (calculate_usage cur) dep_free in (* We couldn't have bound anything in the indeps and remove duplicates *) let free_variables = variable_count_merge (indep_free @ dep_usage) in let bound_variables = variable_count_merge bound_variables in free_variables, bound_variables (* Tuple Access Rules: NOTE: This function assumes the indep_expr is already properly annotated. TupleAccess if there is an access tuple TupleReturn if there is a tuple return If TupleConcat just put in an ordered list and remove with duplicates *) let rec handle_returned_fields_and_dep be cur_op input_tuple_fields input_candidate_fields = let indep = cur_op.psub_expression in let dep = cur_op.pdep_sub_expression in let indep_list = subexpr_to_list indep in let indep_returned = List.concat (List.map algop_get_returned_fields indep_list) in let returned_tuple_fields, bdep_annotated = match cur_op.palgop_expr_name with (* let-server-return and for-server-return propagates the tuple fields produced by its indep subexpr *) | AOEServerImplements _ | AOEExecute _ -> indep_returned, false | AOEASyncExecute _ | AOEForServerClose _ | AOEEvalClosure -> indep_returned, false | AOECreateTuple crname_array -> (* Just want to keep the names *) let crname_list = Array.to_list crname_array in (* Don't want to use fst in case it changes *) let crnames = List.map (fun (odt, name) -> name) crname_list in crnames, false | AOEConcatTuples -> indep_returned, false | AOEAccessTuple _ -> [], false (* Tuple field access *) | AOENullMap v -> (v :: indep_returned), false | AOETupleTreePattern (input_field, pt) -> let outputs = get_all_outputs_from_twig_pattern pt in (* let remove_field fieldname fields = *) (* List.filter (fun (x) -> not (Namespace_names.rqname_equal fieldname x)) fields *) (* in *) (* (outputs @ (remove_field input_field indep_returned)), false *) [input_field] @ outputs @ indep_returned, false | AOEProject f_arr -> let is_project_field pfields field = List.fold_left (fun b f -> (Namespace_names.rqname_equal field f) || b) false pfields in List.filter (fun f -> is_project_field (Array.to_list f_arr) f) indep_returned, false | AOEOuterMapConcat vn -> let dep_expression = access_onesub dep in let input_tuple = algop_get_returned_fields (access_onesub indep) in let (_, input_tuple_candidates, _) = algop_get_tuple_field_use_counts (access_onesub indep) (*cur_op*) in annotate_compilation be input_tuple input_tuple_candidates dep_expression; (* Dependent has precedent *) let return_fields = (algop_get_returned_fields dep_expression) @ input_tuple in (vn ::return_fields), true | AOEMapConcat -> let dep_expression = access_onesub dep in let input_tuple = algop_get_returned_fields (access_onesub indep) in let (_, input_tuple_candidates, _) = algop_get_tuple_field_use_counts (access_onesub indep) (*cur_op*) in annotate_compilation be input_tuple input_tuple_candidates dep_expression; (* Dependent has precedent *) let return_fields = (algop_get_returned_fields dep_expression) @ input_tuple in return_fields, true | AOEMapToItem -> (* unwraps tuples *) let dep_expression = access_onesub dep in let input_tuple = algop_get_returned_fields (access_onesub indep) in let (_, input_tuple_candidates, _) = algop_get_tuple_field_use_counts (access_onesub indep) (*cur_op*) in annotate_compilation be input_tuple input_tuple_candidates dep_expression; [], true | AOEMapFromItem _ -> (* Item to tuple iteration *) (* Does not introdcue a context tuple *) let dep_expression = access_onesub dep in annotate_compilation be input_tuple_fields input_candidate_fields dep_expression; algop_get_returned_fields dep_expression, true (* Actual Tuple maps *) | AOEMapIndex iname | AOEMapIndexStep iname -> (iname :: indep_returned), false | AOEMap -> let dep_expression = access_onesub dep in let input_tuple = algop_get_returned_fields (access_onesub indep) in let (_, input_tuple_candidates, _) = algop_get_tuple_field_use_counts (access_onesub indep) (*cur_op*) in annotate_compilation be input_tuple input_tuple_candidates dep_expression; algop_get_returned_fields dep_expression, true (* *** Join, Select etc. bind the INPUT tuple - must reflect that here for tuple field use count analysis!? - Michael *** *) | AOEJoin _ -> indep_returned, false | AOELeftOuterJoin (vn,pred_desc) -> (vn :: indep_returned), false | AOEProduct -> let i1, i2 = access_twosub indep in let returned_fields = (algop_get_returned_fields i1) @ (algop_get_returned_fields i2) in returned_fields, false | AOESelect _ (* Does not change the fields *) | AOEOrderBy _ -> indep_returned, false (* The index groups are stripped out, The returned_fields are those that are not stripped out *) (* This is not always the case... the scoping for groups needs work *) (* You can group on the same attribute in different group clauses *) | AOEGroupBy gd_list -> (* Should remove duplicates to speed the process *) (* let group_names = List.flatten (List.map Xquery_algebra_ast_util.get_group_names gd_list) in let returned_names = List.map Xquery_algebra_ast_util.get_aggregate_name gd_list in let returned_fields = (List.filter (fun x -> not (List.mem x group_names)) indep_returned) @ returned_names in (Gmisc.remove_duplicates returned_fields), false *) (* The first one is the last applied *) let returned_names = begin match gd_list with | [] -> raise (Query (Malformed_Algebra_Expr ("Empty group description"))) | gd :: rest -> (get_aggregate_name gd) (* The aggregate name and the induced names *) :: (get_induced_group gd) end in (* let returned_names = List.map Xquery_algebra_ast_util.get_aggregate_name gd_list in *) (* (indep_returned @ returned_names), false (* There should not be any duplicates *) *) returned_names, false | AOEInputTuple -> input_tuple_fields, false (* This should return the fields of the indep expression of the map... *) (* | AOEPrune _ | AOEDistinct _ -> [], false *) | _ -> (* Non tuple operations *) [], false in (* If the dependents were not annotated above, then do it now *) if (not bdep_annotated) then annotate_subexpr be input_tuple_fields input_candidate_fields dep ; (* On exit, annotated and returning return fields *) Gmisc.remove_duplicates returned_tuple_fields (*************************************************) (* This function assumes the indep's annotations *) (* are set correctly. *) (*************************************************) and annotate_op_and_dep be input_tuple_fields input_candidate_fields cur = let cur_name = cur.palgop_expr_name in let indep = cur.psub_expression in let dep = cur.pdep_sub_expression in let indep_list = subexpr_to_list indep in let indep_accessed = List.concat (List.map algop_get_accessed_fields indep_list) in (* This sets up the dep expressions too *) let returned_fields = handle_returned_fields_and_dep be cur input_tuple_fields input_candidate_fields in (* This needs the above to have setup the dep. expressions *) let dep_list = subexpr_to_list dep in let dep_accessed = List.concat (List.map algop_get_accessed_fields dep_list) in (* calculate tuples accessed *) let tuples_accessed = match cur_name with | AOEAccessTuple (vn) -> vn :: [] (* Should not have any indep expressions *) | AOETupleTreePattern (i, p) -> Gmisc.remove_duplicates (i :: indep_accessed @ dep_accessed) (* TupleTreePattern actually accesses a field ! *) | AOEGroupBy gd_list -> let group_accessed = List.concat (List.map get_group_names gd_list) in let group_accessed2 = List.concat (List.map get_valid_names gd_list) in let group_accessed3 = List.concat (List.map get_induced_group gd_list) in Gmisc.remove_duplicates (indep_accessed @ dep_accessed @ group_accessed @ group_accessed2 @ group_accessed3) | _ -> Gmisc.remove_duplicates (indep_accessed @ dep_accessed) in (************************************) (* Tuple field use counts - Michael *) (************************************) let (tfuc, cf, c) = if (!Conf.allow_streamed_tuple_fields) then (* Accessors *) let get_tuple_field_use_counts op = let (tfuc, _, _) = algop_get_tuple_field_use_counts op in tfuc in let get_candidate_fields op = let (_, cf, _) = algop_get_tuple_field_use_counts op in cf in let get_cardinality op = let (_, _, c) = algop_get_tuple_field_use_counts op in c in (* Cardinality checks *) let is_cardinality_many c = match c with | Table CMany -> true | Table COne -> false | _ -> raise (Query (Prototype "Expected Table cardinality!")) in (* Cardinality of cross product *) let cross_product_cardinality c1 c2 = let raise_no_table_cardinality () = raise (Query (Prototype "Expected Table cardinality!")) in match c1 with | Table COne -> begin match c2 with | Table COne -> Table COne | Table CMany -> Table CMany | _ -> raise_no_table_cardinality () end | Table CMany -> begin match c2 with | Table _ -> Table CMany | _ -> raise_no_table_cardinality () end | _ -> raise_no_table_cardinality () in (* Use count construction *) let set_use_count_many tuple_fields = List.map (fun f -> (f, (0, Many))) tuple_fields in let set_use_count_once tuple_fields = List.map (fun f -> (f, (1, Once))) tuple_fields in (* Independent use count information *) let indep_tfuc = List.fold_left (List.merge use_count_compare) [] (List.map get_tuple_field_use_counts indep_list) in let indep_cf = List.fold_left (List.merge cv_compare) [] (List.map get_candidate_fields indep_list) in let indep_rf = List.fold_left (List.merge cv_compare) [] (List.map algop_get_returned_fields indep_list) in (* Dependent use count information *) let dep_tfuc = List.fold_left (List.merge use_count_compare) [] (List.map get_tuple_field_use_counts dep_list) in let dep_cf = List.fold_left (List.merge cv_compare) [] (List.map get_candidate_fields dep_list) in let dep_af = List.fold_left (List.merge cv_compare) [] (List.map algop_get_accessed_fields dep_list) in (******************************************************************) (* Contribution of the current operator to tuple field use counts *) (******************************************************************) let (current_tfuc, current_cf, current_c) = match cur_name with (* STATUS ------ Aligned inference rules/code; several operators are not covered properly, marked '(* *** OPEN ISSUE! *** *)'. General observations and principles: ------------------------------------ 1) Tuple field use count analysis uses the existing infrastructure for determining the fields that are accessed and returned by algebraic expressions. These are assumed to be available as functions (s.b.), and are excluded from inference rules for the sake of simplicity. 2) Besides counting literal accesses (AccessTuple operator), the analysis takes 'iterating operators' (e.g., MapConcat) into account. 2a) Iterating operators may not access tuple fields themselves, but they can create output tuple streams that contain 'virtual' copies of input tuples, although these copies are never actually made. As a consequence, the notion of 'candidate tuple fields' will be used to memorize those field names that are known to exist 'virtually' many times. Subsequent access to such a field later on in the plan must be counted as multiple accesses. 2b) The notion of 'free' variables does also exist for tuple fields: whenever a field is accessed in an expression expr, but not created in expr (because it is created elsewhere and communicated via the INPUT tuple), it is free in expr. 3) Some operators need to know the cardinality of a tuple stream in order to drive iteration (a special case is to test wether a tuple stream is empty). No explicit access is counted for cardinality assessment. NOTE: With XML token streams, the cardinality of a tuple stream that results from mapping a streamed item sequence to tuple fields does not 'come for free'. In case these fields are explicitly accessed at some point, the cardinality information falls out as a by-product (without actually being an additional access), but in case there is no explicit access, item boundaries remain 'undiscovered'. As a consequence, zero use count tuple fields are associated dummy-consumers during code selection. 4) Some operators materialize input tuple fields (e.g., Product). This must be counted as a single access to each of those fields. 5) *** What about the Cursor.cursor_peek in NullMap etc.??? *** 6) *** Could get rid of dependency of [rf(indep)/]af(dep) completely (model everything by means of candidates, see Join rules; need then correct propagation of input tuple candidates above!)?! *** Assumptions ----------- 1) Tuple field names are unique throughout the query plan. 2) A tuple operator always processes all tuples in its input in the same fashion. In particular, tuple field accesses affect potentially all 'virtual' copies of a field. Implementation -------------- The following approach is set-based. '+' stands for set-union, '*' for intersection. 'm(s)' sets the use count of all the tuple fields in s to '>1', o(s) to '1'. rf(e) gets the returned fields of expression e, (af) the accessed fields, and cf(e) the candiate fields (s.b.). c(e) is its cardinality (1 for a single tuple, >1 for a table of multiple tuples, and _ in case e does not return a table). Cardinality is needed in order to identify non-iterating map concat operations resulting from let-bindings inside for-loops. The algorithm proceeds by first calculating the contribution of the current operator. This is then combined in a uniform fashion (same for each kind of operator) with the analysis results from indep./dep. subexpressions (component-wise set union, where use counts are merged implicitly). The environment 'env' consists of a single slot for communicating candidate fields in analogy to data flow through the INPUT tuple (icf, INPUT Candidate Fields). [Augmentation of the environment 'env' actually happens in handle_returned_fields_and_dep.] Analysis result triple: ----------------------- ('tuple field use count information (accessed fields)', -- tfuc Name of tuple field, (None|Once|Many) accesses 'candidates fields (names of fields that for which -- cf any subsequent access implies repeated access)', 'operator output cardinality') -- c Mary's notes: Operator output cardinality: conservative estimate of the number of tuples produced by the operator. Necessary for let bindings, which gets compiled into MapConcat. Analysis takes iteration into account. *) (*************************) (* Polymorphic operators *) (*************************) | AOEExecute _ -> (* Execute is a polymorphic operator, which inherits the annotation of its independent operator. *) let (indep1, indep2) = access_twosub indep in let _ = access_nosub dep in (get_tuple_field_use_counts indep2, get_candidate_fields indep2, get_cardinality indep2) (*******************) (* Tuple operators *) (*******************) (* InputTuple *) (* We just have to place the candidate fields of the INPUT tuple from the context to the corresponding field in the result. *) (* env : icf ------------------------------------- env |- InputTuple{}() -> ({}, icf, 1) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) ------------------------------------------------------------- UCC |- InputTuple{}() uses ({}, CF_Input, 1) returns RF_Input *) | AOEInputTuple -> let _ = access_nosub indep in let _ = access_nosub dep in ([], input_candidate_fields, Table COne) (* MapFromItem *) (* MapFromItem does not bind the INPUT tuple. That is, INPUT tuple fields are 'free' - accesses to INPUT tuple fields in the dep. branch must be counted as repeated accesses. *) (* tfuc = m(rf(IN) * af(dep)) ------------------------------------------------- env' |- MapFromItem{dep}(indep) -> (tfuc, {}, >1) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep UCC' = (CF_Input + RF_Input, RF_Input) UCC' |- Dep uses (UF_Dep, CF_Dep, C_Dep) returns RF_Dep ---------------------------------------------------------------------------------------------- UCC' |- MapFromItem{Dep}(Indep) uses (UF_Indep + UF_Dep, CF_Indep + CF_Dep, >1) returns RF_Dep *) | AOEMapFromItem _ -> let _ = access_onesub indep in let _ = access_onesub dep in let tfuc = Gmisc.intersect_list input_tuple_fields dep_af in (set_use_count_many tfuc, [], Table CMany) (* AccessTuple *) (* We have to count at least a single access for q. In case it is included in the INPUT tuple candidates, we must count multiple accesses. +: adds the use counts for a particular tuple field. *) (* env : icf ------------------------------------------------------------ env |- AccessTuple[q]{}() -> ({(q,1)} + m({q} * icf), {}, _) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) ----------------------------------------------------------------------------- UCC |- AccessTuple[q]{}() uses (o({q}) + m({q} * CF_Input), {}, _) returns {} *) | AOEAccessTuple crname -> let _ = access_nosub indep in let _ = access_nosub dep in (* AOEAccessTuple implicitly accesses the input tuple. *) let qualified_candidates = set_use_count_many (Gmisc.intersect_list [crname] input_candidate_fields) in ((crname, (1, Once)) :: qualified_candidates, [], NoTable) (* CreateTuple *) (* We must add q1,...,qn with zero accesses for the analysis to properly merge use counts, later. *) (* ----------------------------------------------------------------------- env |- CreateTuple[q1,...,qn]{}(e) -> ({(q,0)} + ... + {(qn,0)}, {}, 1) *) (* FINAL *) (* UCC |- Indep_1 uses (UF_Indep_1, CF_Indep_1, C_Indep_1) returns RF_Indep_1 ... UCC |- Indep_n uses (UF_Indep_n, CF_Indep_n, C_Indep_n) returns RF_Indep_n ------------------------------------------------------------------------------------------------------------- UCC |- CreateTuple[q_1,...q_n]{}(Indep_1,...,Indep_n) uses ({(q_1,0),...(q_n,0)}, {}, 1) returns {q_1,...q_n} *) | AOECreateTuple crname_array -> let _ = access_manysub indep in let _ = access_nosub dep in let crname_list = Array.to_list crname_array in let fresh_fields = List.map (fun (odt, name) -> (name, (0, Never))) crname_list in (fresh_fields, [], Table COne) (* MapConcat *) (* a 1 b 1 c 1 ... a'2 b'2 c'2 ... | MapConcat / \ dep. / \ indep. a 1 b 2 c 3 ... ... In case the dep. branch has cardinality >1, we know that, should any field returned by indep. ever be accessed later on (as part of the result tuple stream of the MapConcat, this access would have to be counted as multiple accesses (iteration). This is achieved by memorizing the corresponding field names as 'candidates'. What about the other direction? If the cardinality of the indep. branch is >1, then we know that the dep. expression is evaluated multiple times, once for each tuple of indep., that is, once for each possible state of the INPUT tuple. Subsequent accesses to tuple fields do not target one and the same, but distinct values, and do not have to be counted as multiple accesses, thus. Do we have to take into account that fields may have been injected somewhere in the dep. branch by means of the INPUT tuple from an operator above? No, because MapConcat binds INPUT itself. That is, there can't be multiple accesses on 'free' input tuple fields in the dep. branch. *) (* env' = cf(indep) cf = if c(dep) > 1 then rf(indep) else {} c = if c(dep) = c(indep) = 1 then 1 else >1 ---------------------------------------------- env' |- MapConcat{dep}(indep) -> ({}, cf, c) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep UCC' = (CF_Indep, RF_Indep) UCC' |- Dep uses (UF_Dep, CF_Dep, C_Dep) returns RF_Dep CF = if (C_Dep > 1) then RF_Indep else {} C = max(C_Indep, C_Dep) ----------------------------------------------------------------------------------------------------------- UCC' |- MapConcat{Dep}(Indep) uses (UF_Dep + UF_Indep, CF + CF_Dep + CF_Indep, C) returns RF_Indep + RF_Dep *) | AOEMapConcat -> let indep_c = get_cardinality (access_onesub indep) in let dep_c = get_cardinality (access_onesub dep) in let cf = if (is_cardinality_many dep_c) then indep_rf else [] in let c = cross_product_cardinality indep_c dep_c in ([], cf, c) (* OuterMapConcat *) (* What about the Cursor.cursor_peek in the null test? - Michael *) (* env' = cf(indep) cf = if c(dep) > 1 then rf(indep) else {} c = if c(dep) = c(indep) = 1 then 1 else >1 ---------------------------------------------------------- env' |- OuterMapConcat[q]{dep}(indep) -> ({(q,0)}, cf, >1) *) | AOEOuterMapConcat crname -> let indep_c = get_cardinality (access_onesub indep) in let dep_c = get_cardinality (access_onesub dep) in let cf = if (is_cardinality_many dep_c) then indep_rf else [] in let c = cross_product_cardinality indep_c dep_c in ([(crname, (0, Never))], cf, c) (* Product *) (* A Product is (semantically) fully symmetric. The implementation does only materialize the right side, though. This has to be taken care of during physical typing. *) (* OLD *) (* cf1 = if c(i1) > 1 then rf(i2) else {} cf2 = if c(i2) > 1 then rf(i1) else {} c = if c(i1) = c(i2) = 1 then 1 else >1 --------------------------------------------- env |- Product{}(i1,i2) -> ({}, cf1 + cf2, c) *) (* NEW *) (* This is tricky! The materialization of a streamed tuple field on the right hand side consumes the stream cursor, and must be counted as a single access. E.g. for $x in $xs, $y in $ys => Product(MapFromItem{...}(...), let $z := $y MapConcat{[z:IN#y]}(...)) return $z If $y was bound to a stream (which was the case if only one explicit access was counted), that stream would be consumed when $z is bound (and materialized, because it is obviously accessed repeatedly). Additional materialization by the Product would then access an exhausted stream cursor in field $y. *) (* cf1 = if c(i1) > 1 then rf(i2) else {} cf2 = if c(i2) > 1 then rf(i1) else {} tfuc = o(rf(i2)) c = if c(i1) = c(i2) = 1 then 1 else >1 ----------------------------------------------- env |- Product{}(i1,i2) -> (tfuc, cf1 + cf2, c) *) (* FINAL *) (* UCC |- Indep_1 uses (UF_Indep_1, CF_Indep_1, C_Indep_1) returns RF_Indep_1 UCC |- Indep_2 uses (UF_Indep_2, CF_Indep_2, C_Indep_2) returns RF_Indep_2 CF_1 = if (C_Indep1 > 1) then RF_Indep2 else {} CF_2 = if (C_Indep2 > 1) then RF_Indep1 else {} CF = CF_1 + CF_2 + CF_Indep_1 + CF_Indep_2 UF = o(RF_Indep2) C = max(C_Indep_1, C_Indep_2) ------------------------------------------------------------------------------------------------------------ UCC |- Product{}(Indep_1,Indep_2) uses (UF + UF_Indep_1 + UF_Indep_2, CF, C) returns RF_Indep_1 + RF_Indep_2 *) | AOEProduct -> let _ = access_nosub dep in let (indep1, indep2) = access_twosub indep in let (rf1, rf2) = (algop_get_returned_fields indep1, algop_get_returned_fields indep2) in let (c1, c2) = (get_cardinality indep1, get_cardinality indep2) in let cf1 = if (is_cardinality_many c1) then rf2 else [] in let cf2 = if (is_cardinality_many c2) then rf1 else [] in let tfuc = set_use_count_once rf2 in let c = cross_product_cardinality c1 c2 in (tfuc, cf1 @ cf2, c) (* Join *) (* For joins, it actually matters what physical instantiation is chosen. A Nested Loop Join is implemented as a Product, followed by a Select.The right hand side is materialized into an array, and the left side scanned just once. Each tuple field used in a predicate is potentially accessed multiple times (true for both left and right hand sides). This corresponds to memorizing the affected fields as candidates (Product) and counting multiple accesses later on (Select). For a Hash Join, the right side is materialized to a hash table, and the left side scanned just once. Probing for matches requires only one access per tuple field on the left side; right side tuple fields may be probed multiple times. In both cases, there are two 'components' that have to be considered: 1) multiple accesses inside the predicates 2) potential multiple accesses later (candidate generation in analogy to Products) *) (* OLD *) (* env' = cf(i1 + i2) tfuc1 = if c(i1) > 1 then rf(i2) * af(d1) + ... + af(dn) tfuc2 = if c(i2) > 1 then rf(i1) * af(d1) + ... + af(dn) cf1 = if c(i1) > 1 then rf(i2) else {} cf2 = if c(i2) > 1 then rf(i1) else {} c = if c(i1) = c(i2) = 1 then 1 else >1 ---------------------------------------------------------------------------- env' |- NestedLoopJoin{d1,...,dn}(i1,i2) -> (m(tfuc1 + tfuc2), cf1 + cf2, c) *) (* This assumes the hash table bucket size is 1. - Michael *) (* OLD *) (* env' = cf(i1 + i2) tfuc = if c(i1) > 1 then rf(i2) * af(d1) + ... + af(dn) cf1 = if c(i1) > 1 then rf(i2) else {} cf2 = if c(i2) > 1 then rf(i1) else {} c = if c(i1) = c(i2) = 1 then 1 else >1 ------------------------------------------------------------- env' |- HashJoin{d1,...,dn}(i1,i2) -> (m(tfuc), cf1 + cf2, c) *) (* This conservatively assumes a Nested Loop Join (= Select{...}(Product(...))). *) (* NEW *) (* We have to count a single access for each tuple field of the right hand side independent subexpression (that side is always materialized; see Product). *) (* env' = cf(i1) + cf(i2) // tfuc1, tfuc2 should be modeled via candidate fields cf1, cf2, as follows: //cf1 = if c(i1) > 1 then rf(i2) else {} //cf2 = if c(i2) > 1 then rf(i1) else {} //env' = cf(i1) + cf(i2) + cf1 + cf2 //Can the current candidate fields be uniformly bound to the input candidate //fields of indep. expressions whenever the current op. binds INPUT? NO (MapConcat)! tfuc2 = if c(i2) > 1 then rf(i1) * af(d1) + ... + af(dn) else {} tfuc3 = rf(i2) cf1 = if c(i1) > 1 then rf(i2) else {} cf2 = if c(i2) > 1 then rf(i1) else {} c = if c(i1) = c(i2) = 1 then 1 else >1 --------------------------------------------------------------------------------------- env' |- NestedLoopJoin{d1,...,dn}(i1,i2) -> (m(tfuc1 + tfuc2) + o(tfuc3), cf1 + cf2, c) *) (* FINAL *) (* For a HashJoin, don't have to propagate left branch returned fields as candidates to the dependent expressions. There is at least one access though?! But it's not table materialization as for the right branch. *) (* UCC = (CF_Input, RF_Input) UCC |- Indep_1 uses (UF_Indep_1, CF_Indep_1, C_Indep_1) returns RF_Indep_1 UCC |- Indep_2 uses (UF_Indep_2, CF_Indep_2, C_Indep_2) returns RF_Indep_2 // candidate generation as in a Product CF_1 = if (C_Indep1 > 1) then RF_Indep2 else {} CF_2 = if (C_Indep2 > 1) then RF_Indep1 else {} CF = CF_1 + CF_2 + CF_Indep_1 + CF_Indep_2 // candidates must be propagated to the dep. branches UCC' = (CF, RF_Indep1 + RF_Indep2) UCC' |- Dep_1 uses (UF_Dep_1, CF_Dep_1, C_Dep_1) returns RF_Dep_1 ... UCC' |- Dep_n uses (UF_Dep_n, CF_Dep_n, C_Dep_n) returns RF_Dep_n // materialization of the right hand side corresponds to one access UF = o(RF_Indep2) + UF_Dep_1 + ... + UF_Dep_n + UF_Indep_1 + UF_Indep_2 C = max(C_Indep_1, C_Indep_2) ------------------------------------------------------------------------------------------------------------------------------------- UCC' |- NestedLoopJoin{Dep_1,...,Dep_n}(Indep_1,Indep_2) uses (UF, CF + CF_Dep_1 + ... + CF_Dep_n, C) returns RF_Indep_1 + RF_Indep_2 *) | AOEJoin _ -> let _ = access_manysub dep in let (indep1, indep2) = access_twosub indep in let (c1, c2) = (get_cardinality indep1, get_cardinality indep2) in let (rf1, rf2) = (algop_get_returned_fields indep1, algop_get_returned_fields indep2) in let tfuc1 = if (is_cardinality_many c1) then (Gmisc.intersect_list rf2 dep_af) else [] in let tfuc2 = if (is_cardinality_many c2) then (Gmisc.intersect_list rf1 dep_af) else [] in let tfuc3 = rf2 in let cf1 = if (is_cardinality_many c1) then rf2 else [] in let cf2 = if (is_cardinality_many c2) then rf1 else [] in let c = cross_product_cardinality c1 c2 in ((set_use_count_many (tfuc1 @ tfuc2)) @ (set_use_count_once tfuc3), cf1 @ cf2, c) (* FINAL *) (* Does a boolean field exist virtually many times in the result stream? - Michael *) (* UCC = (CF_Input, RF_Input) UCC |- Indep_1 uses (UF_Indep_1, CF_Indep_1, C_Indep_1) returns RF_Indep_1 UCC |- Indep_2 uses (UF_Indep_2, CF_Indep_2, C_Indep_2) returns RF_Indep_2 // candidate generation as in a Product CF_1 = if (C_Indep1 > 1) then RF_Indep2 else {} CF_2 = if (C_Indep2 > 1) then RF_Indep1 else {} CF = CF_1 + CF_2 + CF_Indep_1 + CF_Indep_2 // candidates must be propagated to the dep. branches UCC' = (CF, RF_Indep1 + RF_Indep2) UCC' |- Dep_1 uses (UF_Dep_1, CF_Dep_1, C_Dep_1) returns RF_Dep_1 ... UCC' |- Dep_n uses (UF_Dep_n, CF_Dep_n, C_Dep_n) returns RF_Dep_n // materialization of the right hand side corresponds to one access UF = {(q,0)} + o(RF_Indep2) + UF_Dep_1 + ... + UF_Dep_n + UF_Indep_1 + UF_Indep_2 C = max(C_Indep_1, C_Indep_2) ------------------------------------------------------------------------------------------------------------------------------------------------- UCC' |- NestedLoopLeftOuterJoin[q]{Dep_1,...,Dep_n}(Indep_1,Indep_2) uses (UF, CF + CF_Dep_1 + ... + CF_Dep_n, C) returns RF_Indep_1 + RF_Indep_2 *) | AOELeftOuterJoin (crname, _) -> let dep = access_manysub dep in let _ = Array.to_list dep in let (indep1, indep2) = access_twosub indep in let (c1, c2) = (get_cardinality indep1, get_cardinality indep2) in let (rf1, rf2) = (algop_get_returned_fields indep1, algop_get_returned_fields indep2) in let tfuc1 = if (is_cardinality_many c1) then (Gmisc.intersect_list rf2 dep_af) else [] in let tfuc2 = if (is_cardinality_many c2) then (Gmisc.intersect_list rf1 dep_af) else [] in let tfuc3 = rf2 in let cf1 = if (is_cardinality_many c1) then rf2 else [] in let cf2 = if (is_cardinality_many c2) then rf1 else [] in let c = cross_product_cardinality c1 c2 in ((crname, (0, Never)) :: [] @ (set_use_count_many (tfuc1 @ tfuc2)) @ (set_use_count_once tfuc3), cf1 @ cf2, c) (* Map, Select *) (* Cardinalities could be refined! - Michael *) (* env' |- cf(indep) ----------------------------------------- env' |- Map{dep}(indep) -> ({}, {}, >1) env' |- Select{dep}(indep) -> ({}, {}, >1) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) UCC |- Indep uses UF_Indep cand CF_Indep ret RF_Indep cand C_Indep UCC' = (CF_Indep, RF_Indep) UCC' |- Dep_1 uses UF_Dep_1 cand CF_Dep_1 ret RF_Dep_1 cand C_Dep_1 -------------------------------------------------------------------------------------- UCC' |- Map{Dep}(Indep) uses (UF_Dep + UF_Indep, CF_Dep + CF_Indep, >1) returns RF_Dep *) | AOEMap -> let _ = access_onesub indep in let _ = access_onesub dep in ([], [], Table CMany) (* FINAL *) (* UCC = (CF_Input, RF_Input) UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep UCC' = (CF_Indep, RF_Indep) UCC' |- Dep_1 uses (UF_Dep_1, CF_Dep_1, C_Dep_1) returns RF_Dep_1 ... UCC' |- Dep_n uses (UF_Dep_n, CF_Dep_n, C_Dep_n) returns RF_Dep_n ------------------------------------------------------------------ UCC' |- Select{Dep_1,...,Dep_n}(Indep) uses (UF_Dep_1 + ... + UF_Dep_n + UF_Indep, CF_Dep_1 + ... + CF_Dep_n + CF_Indep, >1) returns RF_Indep *) | AOESelect _ -> let _ = access_onesub indep in let _ = access_manysub dep in ([], [], Table CMany) (* NullMap *) (* *** OPEN ISSUE! *** *) (* Cursor.cursor_is_empty does a peek, implications? - Michael *) (* The null field is 'virtually' repeated for each tuple in the indep. tuple stream. *) (* cf = if (c(indep) > 1) then q else {} c = c(indep) --------------------------------------------------- env |- NullMap[q]{}(indep) -> ({(q,0)}, cf, c) *) (* FINAL *) (* UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep CF = if (C_Indep > 1) then {q} else {} ------------------------------------------------------------------------------------------------- UCC |- NullMap[q]{}(Indep) uses ({(q, 0)} + UF_Indep, CF + CF_Indep, >1) returns ({q} + RF_Indep) *) | AOENullMap crname -> let indep_c = get_cardinality (access_onesub indep) in let _ = access_nosub dep in let cf = if (is_cardinality_many indep_c) then [crname] else [] in ((crname, (0, Never)) :: [], cf, indep_c) (* MapIndex, MapIndexStep *) (* *** OPEN ISSUE! *** *) (* Cursor.cursor_is_empty does a peek, implications? - Michael *) (* c = c(indep) --------------------------------------------------- env |- MapIndex[q]{}(indep) -> ({(q,0)}, {}, c) env |- MapIndexStep[q]{}(indep) -> ({(q,0)}, {}, c) *) (* FINAL *) (* UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep ------------------------------------------------------------------------------------------------------ UCC |- MapIndexStep[q]{}(Indep) uses ({(q, 0)} + UF_Indep, CF_Indep, C_Indep) returns (RF_Indep + {q}) UCC |- MapIndex[q]{}(Indep) uses ({(q, 0)} + UF_Indep, CF_Indep, C_Indep) returns (RF_Indep + {q}) *) | AOEMapIndex crname | AOEMapIndexStep crname -> let indep_c = get_cardinality (access_onesub indep) in let _ = access_nosub dep in ((crname, (0, Never)) :: [], [], indep_c) (* TreeJoin *) (* Must return Table cardinality here in case of sort joins! - Michael *) (* ---------------------------------------------- env |- Treejoin[a::nt]{}(indep) -> ({}, {}, _) *) (* FINAL *) (* UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep ----------------------------------------------------------------------- UCC |- TreeJoin[a::nt]{}(Indep) uses (UF_Indep, CF_Indep, _) returns {} *) | AOETreeJoin _ -> let _ = access_onesub indep in let _ = access_nosub dep in ([], [], NoTable) (* FINAL *) (* Overly conservative? Here, the strategy is diffenent than, e.g., OrderBy where materialization is enforced by the operator itself and not by conservative use counts. - Michael *) (* UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep RF = {q_1,...,q_n} + RF_Indep UF = m(RF) -------------------------------------------------------------------------------- UCC |- TupleTreePattern[q_1,...,q_n]{}(Indep) uses (UF, CF_Indep, >1) returns RF *) | AOETupleTreePattern (input, pattern) -> let _ = access_onesub indep in let _ = access_nosub dep in let outputs = get_all_outputs_from_twig_pattern pattern in (set_use_count_many (input :: outputs), [], Table CMany) (* ConcatTuples *) (* ------------------------------------------- env |- ConcatTuples{}(i1,i2) -> ({}, {}, 1) *) (* FINAL *) (* UCC |- Indep_1 uses (UF_Indep_1, CF_Indep_1, C_Indep_1) returns RF_Indep_1 UCC |- Indep_2 uses (UF_Indep_2, CF_Indep_2, C_Indep_2) returns RF_Indep_2 --------------------------------------------------------------------------------------------------------------------------------- UCC |- ConcatTuples{}(Indep_1,Indep_2) uses (UF_Indep_1 + UF_Indep_2, CF_Indep_1 + CF_Indep_1, 1) returns RF_Indep_1 + FR_Indep_2 *) | AOEConcatTuples -> let _ = access_twosub indep in let _ = access_nosub dep in ([], [], Table COne) (* Order By *) (* This assumes repeated evalutation of repeatedly needed values. *) (* OLD *) (* tfuc = rf(indep) * (af(d1) + ... + af(dn)) ---------------------------------------------------- env |- OrderBy{d1,...dn}(indep) -> (m(tfuc), {}, >1) *) (* NEW *) (* OrderBy materializes all input tuple fields. This has to be counted as a single access (see Product)! *) (* tfuc1 = o(rf(indep)) tfuc2 = m(rf(indep) * (af(d1) + ... + af(dn))) ---------------------------------------------------------- env |- OrderBy{d1,...dn}(indep) -> (tfuc1 + tfuc2, {}, >1) *) (* FINAL *) (* Can make use of C_Indep = 1 - Michael *) (* UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep // candidate generation equivalent to UF' = RF_Indep * (AF_Dep_1 + ... + AF_Ddep_n) UCC' = (CF_Indep + RF_Indep, RF_Indep) UCC' |- Dep_1 uses (UF_Dep_1, CF_Dep_1, C_Dep_1) returns RF_Dep_1 ... UCC' |- Dep_n uses (UF_Dep_n, CF_Dep_n, C_Dep_n) returns RF_Dep_n // materialization corresponds to one access UF = o(RF_Indep) + UF_Dep_1 + ... + UF_Dep_n + UF_Indep CF = CF_Dep_1 + ... + CF_Dep_n + CF_Indep ------------------------------------------------------------------------------------ UCC' |- OrderBy[osl]{Dep_1,...,Dep_n}(Indep) uses (UF, CF, C_Indep) returns RF_Indep *) | AOEOrderBy _ -> let _ = access_onesub indep in let _ = access_manysub dep in let tfuc1 = set_use_count_once indep_rf in let tfuc2 = set_use_count_many (Gmisc.intersect_list indep_rf dep_af) in (tfuc1 @ tfuc2, [], Table CMany) (* Group By *) (* *** OPEN ISSUE! *** *) (* This does internal sorting (and materialization?), finish this! - Michael *) (* cf = {a1,...,an} *) (* -------------------------------------------------- *) (* env |- GroupBy[[g1_1,...,g1_k][v1_1,...v1_l][a1] *) (* ... *) (* [gn_1,...,gn_k][vn_1,...,vn_l][an]] *) (* {d1,...,dm}(indep) -> ({}, cf, >1) *) | AOEGroupBy gds -> let _ = access_onesub indep in let _ = access_manysub dep in let cf = List.map (fun gd -> let (odt, an) = gd.aggregate_name in an) gds in (* Added 07|03|2006 - Michael *) let tfuc1 = set_use_count_once indep_rf in (*([], cf, Table CMany)*) (tfuc1, cf, Table CMany) (* *** OPEN ISSUE! *** *) (* handle these! *) | AOEProject _ -> (* this is a guess -- Ph *) let _ = access_onesub indep in let _ = access_nosub dep in let tfuc1 = set_use_count_once indep_rf in (tfuc1, indep_rf, Table CMany) (* | AOEDistinct _ | AOEPrune _ *) (* raise (Query (Prototype "Cannot handle this kind of operator in tuple field use count analysis!")) *) (***********************) (* Non-tuple operators *) (***********************) (* Some, Every *) (* The returned fields of the input tuple have been created elsewhere; as soon as they are accessed in the dep. branch of a quantifier, it is those fields that are accessed repeatedly ('free accessed fields'). *) (* tfuc = m(rf(IN) * af(dep)) -------------------------------------------- env |- Some[q]{dep}(indep) -> (tfuc, {}, _) env |- Every[q]{dep}(indep) -> (tfuc, {}, _) *) (* FINAL *) (* UCC = (CF_Input, RF_Input) UCC |- Indep uses (UF_Indep, CF_Indep, C_Indep) returns RF_Indep // candidate generation equivalent to UF' = RF_Indep * (AF_Dep) UCC' = (CF_Input + RF_Input, RF_Input) UCC' |- Dep uses (UF_Dep, CF_Dep, C_Dep) returns RF_Dep ------------------------------------------------------------------------------------ UCC' |- Some{Dep}(Indep) uses (UF_Dep + UF_Indep, CF_Dep + CF_Indep, >1) returns {} UCC' |- Every{Dep}(Indep) uses (UF_Dep + UF_Indep, CF_Dep + CF_Indep, >1) returns {} *) | AOESome _ | AOEEvery _ -> (* this will throw an exception for the matches above !! let _ = access_onesub indep in let _ = access_onesub dep in *) let free_accessed = Gmisc.intersect_list input_tuple_fields dep_af in (set_use_count_many free_accessed, [], NoTable) | _ -> ([], [], NoTable) in (* Need to sort everything for succesful merge. *) let current_tfuc = List.fold_left (List.merge use_count_compare) [] (List.map (fun uc -> [uc]) current_tfuc) in let current_cf = List.fold_left (List.merge cv_compare) [] (List.map (fun uc -> [uc]) current_cf) in ((variable_count_merge ([current_tfuc] @ [indep_tfuc] @ [dep_tfuc])), Gmisc.remove_duplicates (List.fold_left (List.merge cv_compare) [] ([current_cf] @ [indep_cf] @ [dep_cf])), current_c) else ([], [], NoTable) in (***********************************************************************************) let free_variables, bound_variables = compute_free_and_bound_vars cur_name indep_list dep_list in (* calculate returned tuple fields *) mk_annotation free_variables bound_variables tuples_accessed returned_fields (tfuc, cf, c) (*************************) (* Walks a Subexpression *) (*************************) and annotate_subexpr be input_tuple input_candidate_fields sub = match sub with | NoSub -> () | OneSub s -> annotate_compilation be input_tuple input_candidate_fields s | TwoSub (s0,s1) -> annotate_compilation be input_tuple input_candidate_fields s0; annotate_compilation be input_tuple input_candidate_fields s1 | ManySub s_array -> Array.iter (annotate_compilation be input_tuple input_candidate_fields) s_array (* cur is a cexpr_desc *) and annotate_compilation bshould_be_empty input_tuple_fields input_candidate_fields cur = let indep = cur.psub_expression in annotate_subexpr bshould_be_empty input_tuple_fields input_candidate_fields indep; if (bshould_be_empty && (not (cur.compile_annotations = None))) then raise (Query (Internal_Error ("Compile_annotation error: Algop '"^(Xquery_algebra_ast_util.string_of_algop_expr_name cur.palgop_expr_name)^"' is already annotated"))); begin let fvd = annotate_op_and_dep bshould_be_empty input_tuple_fields input_candidate_fields cur in cur.compile_annotations <- (Some fvd) end (* Called externally at the head of the tree *) let annotate_algebraic_expression root = annotate_compilation true empty_fields empty_fields root (* Called to reannotate the algebraic tree *) let reannotate_algebraic_expression root = annotate_compilation false empty_fields empty_fields root (****************************************************) (* These are all done by side-effects. This should probably be switched since it is not as nice. These are not for reannotation - they insist that the annotations be blank. *) (****************************************************) let annotate_algebraic_function fn = let (cfname, arity), _, fn_body, _ = fn.palgop_function_decl_desc in print_string ("annotate function "^(Namespace_names.prefixed_string_of_rqname cfname)^"\n"); match !(fn_body.palgop_func_optimized_logical_plan) with | AOEFunctionImported -> () | AOEFunctionUser userfn_body -> annotate_algebraic_expression userfn_body let annotate_algebraic_decl dcl = annotate_subexpr true empty_fields empty_fields dcl.alg_decl_indep; annotate_subexpr true empty_fields empty_fields dcl.alg_decl_dep let annotate_algebraic_prolog p = List.iter annotate_algebraic_function p.palgop_prolog_functions; List.iter annotate_algebraic_decl p.palgop_prolog_vars; List.iter annotate_algebraic_decl p.palgop_prolog_indices let annotate_algebraic_module m = annotate_algebraic_prolog m.palgop_module_prolog; List.iter annotate_algebraic_expression (m.palgop_module_statements) galax-1.1/compile/logical_algebra_types.mli0000664000076400007640000000354110560462355017242 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: logical_algebra_types.mli,v 1.5 2007/02/01 22:08:45 simeon Exp $ *) (* Module logical_algebra_types Description: This *interface* contains type declarations for logical code. *) open Xquery_algebra_ast open Alg_path_struct (* Additional 'semi-logical' types. Introduced in order to keep everything related to logical types in one place (here). - Michael *) type 'a semilogical_opt_algop_expr = ('a option, path_annotation) aalgop_expr type 'a semilogical_opt_algop_sub_exprs = ('a option, path_annotation) aalgop_sub_exprs type 'a semilogical_algop_sub_exprs = ('a, path_annotation) aalgop_sub_exprs type ('a, 'b) semilogical_algop_decl = ('a, path_annotation ,'b option) aalgop_decl type logical_algop_sub_exprs = (unit, path_annotation) aalgop_sub_exprs type logical_algop_expr = (unit, path_annotation) aalgop_expr type logical_algop_decl = (unit, path_annotation, unit) aalgop_decl type logical_algop_prolog = (unit, path_annotation, unit) aalgop_prolog type logical_algop_xmodule = (unit, path_annotation, unit) aalgop_xmodule type logical_algop_function_body = (unit, path_annotation) aalgop_expr type logical_compile_context = (unit, path_annotation) Compile_context.compile_context galax-1.1/compile/compile_update.mli0000664000076400007640000000205610560462355015721 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: compile_update.mli,v 1.6 2007/02/01 22:08:45 simeon Exp $ *) (* Module: Compile_update Description: This module compiles an XQuery core update into the XQuery algebra. *) open Xquery_core_ast open Xquery_algebra_ast val compile_cupdate : ('a option, unit) Compile_context.compile_context -> Xquery_core_ast.acupdate -> ('a option, unit) Xquery_algebra_ast.aalgop_expr galax-1.1/website/0000775000076400007640000000000010772255371012236 5ustar mffmffgalax-1.1/website/static.html0000664000076400007640000001320010324003546014373 0ustar mffmff Galax: Static Typing

[Galax]

Tech. Center                        
 XQuery Optimization
 Static Typing
 Testing
 Research Papers
 Galax Projects

About Static Typing

The purpose of this page is to help you get acquainted with XQuery's type system, and static typing.

Static typing in XQuery is one of the most innovative feature of XQuery. Static typing can be used to check at compile time that your query will never raise any type errors. This allows to detect a large number of errors during early phases of the development of your application. Static typing has proved itself an invaluable tool in other programming languages in the past, for instance in Java or ML. This benefit has a cost though, since the developer must first understand the type system, and how to work effectively with it. In the context of XQuery, the type system is based on XML Schema.

Galax and Static Typing. Galax supports XQuery's static typing option. One way to learn about static typing is to give it a try.


XML Schema and the XQuery Type System

The first step in understanding static typing is to understand XML Schema which is XQuery's type system. An introduction to the semantics of XML Schema and the corresponding type system can be found in the following paper:

  • The Essence of XML, Philip Wadler, and Jrme Simon. ACM Conference on Principles of Programming Languages (POPL'2003). January 2003.

The complete specification for XQuery's type system and many more examples can be found in the Formal Semantics.


Static Typing in XQuery

The way static typing functions can be somewhat complex. Understanding how static typing works is not necessary for a user of the language, but is necessary for an XQuery implementor. The following papers and presentations explain the basics of static typing in XQuery.

The complete specification for XQuery's static typing feature can be found in the Formal Semantics.


Subtyping

Dealing subtyping on XML Schema types is one of the main technical difficulty when implementing XQuery's type system. The following paper presents a way to deal with subtyping in XQuery. Galax subtyping algorithm is based on this paper.

  • Subsumption for XML Types. Gabriel M. Kuper and Jrme Simon, in International Conference on Database Theory (ICDT'2001), January 2001, London, UK.

galax-1.1/website/images/0000775000076400007640000000000010772255371013503 5ustar mffmffgalax-1.1/website/images/galax.gif0000664000076400007640000000503107463250350015260 0ustar mffmffGIF89ad!19!9!!!!!J))))))R)1c11119k19s99999B{9B9JBBBBJJ1JJJJRJRRRRRRcRZZBZZZZkcBcscscsk!kJkkkk{k{k{s!sssss{){{{{{{{))111999B!)1199B!Ό9BB!J!J)R1Z9J!kJkR{cJ!k{県J!凜ﵥﵭƵR)R))9BRZs{{ބޔ!,dW H*\ȰÇ#JHŋ3jȱǏ CIɓ(S\ɲ˗0cʜI͛8sϟx }8i* Ч%j8Nj׫YMX(*_Uv[ؘ@ݛۭnl#F*]_K>b~ Yrė) 3(4C#Xa0y{m%wmvUE>V@rSHzɀΓ0}R q:>WVph$Y%@@ElpAwSqU1WmSD'!v^Z![N1{$`Pzm9bXbM@`~XP78͈EaJ,4YbUmԆRd.G_sNaJ!()}fpr5g['zgILjIygnVvhHV "arj@8)IZh 9xa⩁b|Zjx<yR"PCH©Z}+Fɒ鞠-QTm~9cH*n_3aC"[TI+R;f܌sa#l# ކyh5 9 j&s1@L"(|2' .30li0Yrgv$4H#MIB_jyq"$ВIrBjI]P)NO]s%gZsY"-vؑQn<- \!wK^tW`&.k19 A[=.%B'G" B$7BW~dB)n5xEa _Wfh4<'s(wOI ݶ{f| bXDj+Pl&CCBO~@|Rf!.?C#f 7 \**;I!AGBtzRr SЄ'kH!,+g "0;~ p-&-+CAk: fg*-*sϬx̣!N5~,HoF5 0}1E,} _:2 gtx !"!(W4O M!J8Lrc.ܲ*ڕQ 1MJX&S2H qgv[ K$<҈J#-̧O/ƒ0L"Ĝ<$  nBUBQ ~:CuUĦA 4M$$GdR2PTdgJS D->eLYA&\Jq"q8+0H#*Jޭti^ZX ҟ A:Mb}b*JԂ@+g@  e 6` $ӂG D ͬV&xLPH%T Hj?aBUlg[Æ T&BQĶICl, KR[84\'(nta&~% ? |BDͽ p4x%oh]5Ujx=sk_p`J:(X$IM{_ SY[zDxX$I0caxTԥ0Z 0('<1\L!D*TKL+L1S)[Xβ.{`L2% ;galax-1.1/website/images/hot.gif0000664000076400007640000000175707647326664015011 0ustar mffmffGIF89a ?(xL TTTTTԀTԨTTTTTTTTT(TT(T(T(T(T(TTTTTTTTTTTTTTTTTTTԀTTTTTTTԨTTTTTTTTTTTTTTTTT(T(((((TTTTTTTTԀTԨԀTԀԀԀԀԀTT(T(((((TTTTTTTTԀTԨԨTԨԨԨԨԨTTԀԨ(T(Ԁ(Ԩ(((TTTԀTԨTTTTԀԨԀTԀԨԨTԀԨTԀԨT(T(((((TTTTTTTTԀTԨTT<<$@k_&mɴҵg=[-X Lf-*gW~AIĎ68}/[YCMt㔨S6ukc6;galax-1.1/website/images/galax_small.gif0000664000076400007640000000336007475050264016460 0ustar mffmffGIF89auB!19!9!!!!!J))))))R)1c11119k19s99999B{9B9JBBBBJJ1JJJJRJRRRRRRcRZZBZZZZkcBcscscsk!kJkkkk{k{k{s!sssss{){{{{{{{))111999B!)1199B!Ό9BB!J!J)R1Z9J!kJkR{cJ!k{県J!凜ﵥﵭƵR)R))9BRZs{{ބޔ!,uB H*\ȰÇ#JHŋ3jȱǏ CIɓ(+'4}ё8Ȥa; ~IeОHذ@EAҫ6*5*U.Uꇯ3.m,QhQ@QQ K@ܓV& O:-L Pt4H2'p$3[ԲIw<*)S>mAA=.۸z`ݼ{G %čG sJ\b|ɯo}tǿ]YDG"\n>xo^uVz@xq Galax: Downloads

[Galax]

Galax Home
Download Galax
 Galax 0.6.8 Source
 Older Versions
 License

This page is likely to be the reason you came to the Galax Website, so enjoy!


Latest Version (0.6.8)

Galax 0.6.8 is a GODI package. GODI is an installation and configuration tool for O'Caml applications, which automatically installs Galax and all the libraries on which it depends.

We strongly recommend that you use the GODI tool to install Galax. See Installation instructions for how to run GODI.

If you must install Galax manually, here is the source package:


Galax-related tools

Trevor Jim has implemented a simple distributed XQuery protocol, "dxq:", for executing XQuery programs at remote nodes and a distributed Galax daemon (galaxd). A network control and visualization application for distributed Galax is available at: http://homepage.mac.com/tjim/


Older Versions

You can download the previous version:

Galax 0.6.5 Source Code

Galax 0.5.0 Binaries

For your convenience, here are pre-compiled binary packages:

Galax 0.5.0 Source Code

The source version requires that you have the O'Caml compilers installed on your system. Then compiling the Galax source should not be difficult. For more information, see the User's Manual.

0.4.0

License

If you download and use Galax, you are agreeing to the terms of the Lucent Public License Version 1.0


galax-1.1/website/users.html0000664000076400007640000001341210432640354014256 0ustar mffmff Galax: Overview

[Galax]

Galax Community
 Galax Users
 Mailing List
 Report a bug

Who are YOU?

As XQuery and Galax become more mature, our user community is growing. The purpose of this page is to advertise what some of our users are doing, what are the projects they are working on, and what they use Galax for. If you are using Galax, we would love to hear from you: just send us a note with a small project description and a URL and we will add you to the list.


LegoDB

What does it do?LegoDB is an XML storage engine that can figure out the best relational storage for your application. The application is characterize by an XML Schema, an XQuery workload, and some sample(s) of the document(s) that are going to be stored.
Who is involved?Bell Laboratories and Database Systems Lab, Indian Institute of Science.
How is Galax used?To parse and represent XQueries, to parse and reason about XML Schemas.
Where can I learn more about it?db.bell-labs.com/legodb

XQuery Examples for MusicXML

What does it do?Processing Music represented as MusicXML documents.
Who is involved?Michael Good, at Recordare LLC.
How is Galax used?To process XQueries on MusicXML documents.
Where can I learn more about it?www.recordare.com/good/max2002-update.html

PIX

What does it do?PIX is a Full Text query engine for XML. PIX integrates full text capabilities into XQuery.
Who is involved?AT&T Labs
How is Galax used?Galax is used to handle the pure XQuery part of the queries.
Where can I learn more about it?www.research.att.com/~sihem

galax-1.1/website/testing.html0000664000076400007640000000336110435355016014575 0ustar mffmff Galax: Testing

[Galax]

Galax Home
Tech. Center                        
 XQuery Optimization
 Static Typing
 Testing
 Research Papers
 Galax Projects

Testing Galax for Conformance

Galax has been tested with the W3C XQuery 1.0 Test Suite. See Galax's current Release Notes for conformance results.

galax-1.1/website/demo/0000775000076400007640000000000010772255371013162 5ustar mffmffgalax-1.1/website/demo/demo_conf.mli0000664000076400007640000000152410560462367015620 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: demo_conf.mli,v 1.5 2007/02/01 22:08:55 simeon Exp $ *) (* config parameters *) val document_dir : string ref val cgibin_prefix : string ref val demo_init : string -> unit galax-1.1/website/demo/.depend0000664000076400007640000000034607513647613014430 0ustar mffmffdemo_conf.cmo: demo_conf.cmi demo_conf.cmx: demo_conf.cmi querycgi.cmo: demo_conf.cmi querycgi.cmx: demo_conf.cmx usecase.cmo: demo_conf.cmi usecase.cmx: demo_conf.cmx viewxml.cmo: demo_conf.cmi viewxml.cmx: demo_conf.cmx galax-1.1/website/demo/viewxml.ml0000664000076400007640000000457210560462367015217 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: viewxml.ml,v 1.9 2007/02/01 22:08:55 simeon Exp $ *) open Demo_conf open Error open Top_util open Netcgi open Netcgi_types let main (cgiobj : cgi_activation) = demo_init "galax_demo_config"; (* Sys.chdir !document_dir; *) let file = cgiobj # argument_value "file" in (* cgiobj # output # output_string ("File parsed is: " ^ file ^"
\n"); *) let content = Gmisc.load_file file in (* cgiobj # output # output_string "
"; *)
(*  cgiobj # output # output_string (Netencoding.Html.encode_from_latin1 content) *)
  cgiobj # output # output_string content
(*  cgiobj # output # output_string "

\n"*) let beg_html = "Galax\n\n" let end_html = "\n\ \n" let exec func (cgiobj : cgi_activation) = try cgiobj # set_header ~status:`Ok ~content_type:"text/plain" (); (* cgiobj # output # output_string beg_html; *) func cgiobj; (* cgiobj # output # output_string end_html; *) cgiobj # output # commit_work() with e -> begin cgiobj # output # rollback_work(); cgiobj # set_header ~status:`Internal_server_error (); cgiobj # output # output_string beg_html; cgiobj # output # output_string "

An Error Was Raised:

\n
\n";
        cgiobj # output # output_string (bprintf_error_safe "  " e);
        cgiobj # output # output_string ("\n
\n"^(!separator)); cgiobj # output # output_string end_html; cgiobj # output # commit_work() end let _ = let operating_type = Netcgi.buffered_transactional_optype in let cgiobj = ((new Netcgi.std_activation ~operating_type ()) :> cgi_activation) in exec main cgiobj galax-1.1/website/demo/querycgi.ml0000664000076400007640000002173410766054642015354 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: querycgi.ml,v 1.35 2008/03/12 22:30:58 simeon Exp $ *) (* Galax modules *) open Error open Conf open Xquery_ast open Print_top open Processing_context open Top_util open Top_config open Galax (* Caml modules *) open Format open Netcgi open Netcgi_types (* demo modules *) open Demo_conf (* Additional global parameters *) let showresult = ref true let showtype = ref true let html_setup () = separator := "
" (* Extract CGI arguments *) let language = ref "xquery10" let has_side_effects () = match !language with | "xquery10" -> false | _ -> true let the_documents = ref [] let set_documents doc = let docs = Gmisc.split_on_char doc ' ' in the_documents := docs let print_docs cgiobj prepared_prolog = let proc_ctxt = default_processing_context () in (* The default for the command-line are different than those for the API *) set_serialization_kind proc_ctxt Serialize_As_Standard; Top_config.set_language_kind_arg proc_ctxt !language; begin let buff = Buffer.create 512 in Conf.xml_formatter := Format.formatter_of_buffer buff; let verbose = !Conf.verbose in let pe = !Conf.print_expr in let pce = !Conf.print_core_expr in let pt = !Conf.print_type in let poe = !Conf.print_optimized_expr in let px = !Conf.print_xml in List.iter (fun x -> Conf.verbose := false; Conf.print_expr := false; Conf.print_core_expr := false; Conf.print_type := false; Conf.print_optimized_expr := false; Conf.print_xml := true; let v = eval_statement prepared_prolog (Galax_io.String_Input x) in Galax.serialize proc_ctxt (Galax_io.Formatter_Output (!Conf.xml_formatter)) v; let res = Buffer.contents buff in let escaped_res = !Conf.xml_charescape_fn res in Conf.verbose := verbose; Conf.print_expr := pe; Conf.print_core_expr := pce; Conf.print_type := pt; Conf.print_optimized_expr := poe; Conf.print_xml := px; cgiobj # output # output_string escaped_res) !the_documents; cgiobj # output # output_string ("\n\n"^(!separator)); end let print_docs_before cgiobj prepared_prolog = begin cgiobj # output # output_string "

Documents Before the Query:

\n
\n";
    print_docs cgiobj prepared_prolog
  end

let print_docs_after cgiobj prepared_prolog =
  begin
    cgiobj # output # output_string "

Documents Before the Query:

\n
\n";
    print_docs cgiobj prepared_prolog
  end

let process_args proc_ctxt (cgiobj : cgi_activation) =
(*  Conf.unsafe_join_hack := true ;  *)
  begin
    try
      language := (cgiobj # argument_value "language")
    with
    | _ -> ()
  end;
  begin
    try
      let the_documents = (cgiobj # argument_value "documents") in
      set_documents the_documents
    with
    | _ -> ()
  end;
  begin
    try
      if (bool_of_string (cgiobj # argument_value "debug")) then Debug.set_debug [Debug.DefaultDebug] else ()
    with
    | _ -> ()
  end;
  begin
    try
      Conf.print_expr := bool_of_string (cgiobj # argument_value "expr-print")
    with
    | _ -> ()
  end;
  begin
    try
      Conf.print_core_expr := bool_of_string (cgiobj # argument_value "core-print")
    with
    | _ -> ()
  end;
  begin
    try
      set_typing_phase_arg proc_ctxt (cgiobj # argument_value "typing")
    with
    | _ -> ()
  end;
  begin
    try
      Conf.print_type := bool_of_string (cgiobj # argument_value "typing-print");
    with
    | _ -> ()
  end;
  begin
    try
      set_rewriting_phase_arg proc_ctxt (cgiobj # argument_value "optim")
    with
    | _ -> ()
  end;
  begin
    try
      Conf.print_optimized_expr := bool_of_string (cgiobj # argument_value "optim-print")
    with
    | _ -> ()
  end;
  begin
    try
      set_evaluation_phase_arg proc_ctxt (cgiobj # argument_value "eval")
    with
    | _ -> ()
  end;
  begin
    try
      Conf.print_xml := bool_of_string (cgiobj # argument_value "eval-print")
    with
    | _ -> ()
  end;
  check_config proc_ctxt;
  let query_string = (cgiobj # argument_value "query")
  (* Load local context file *)
  and query_context = Gmisc.load_file (cgiobj # argument_value "context") in
  (query_context,query_string)

let html_xquery_loop cgiobj prepared_prolog proc_ctxt t cs =
  let buff = Buffer.create 512 in
  Conf.xml_formatter := Format.formatter_of_buffer buff;
  let verbose = !Conf.verbose in
  let v = eval_compiled_statement prepared_prolog cs in
  Conf.verbose := false;
  Galax.serialize proc_ctxt (Galax_io.Formatter_Output (!Conf.xml_formatter)) v;
  let res = Buffer.contents buff in
  let escaped_res = !Conf.xml_charescape_fn res in
  Conf.verbose := verbose;
  cgiobj # output # output_string (!Conf.xml_header); 
  cgiobj # output # output_string escaped_res;
  cgiobj # output # output_string (!Conf.xml_footer) 

let main cgiobj =
  (* The default for the command-line are different than those for the API *)
  html_setup ();
  demo_init "galax_demo_config";

  Conf.verbose := true; 

  (* Set up the output headers and footers *)
  Conf.expr_header := "\n"^(!separator)^"\n

XQuery Expression:

\n
\n";
  Conf.expr_footer := "

\n"^(!separator); Conf.core_expr_header := "\n

Normalized Expression (XQuery Core):

\n
\n";
  Conf.core_expr_footer := "

\n"^(!separator); Conf.type_header := "

Static Type Analysis:

\n
\n";
  Conf.type_footer := "

\n"^(!separator); Conf.optimized_expr_header := "\n

Optimized Normalized Expression (XQuery Core):

\n
\n";
  Conf.optimized_expr_footer := "

\n"^(!separator); Conf.xml_header := "

Dynamic Evaluation:

\n
\n";
  Conf.xml_footer := "

\n"^(!separator); (* Why was that commented? - Jerome *) Sys.chdir !document_dir; (* First get the default processing context *) let proc_ctxt = default_processing_context () in (* The default for the command-line are different than those for the API *) set_serialization_kind proc_ctxt Serialize_As_XQuery; let (context,queries) = process_args proc_ctxt cgiobj in Top_config.set_language_kind_arg proc_ctxt !language; Top_config.set_serialization_kind_arg proc_ctxt "xquery"; let full_text = (context ^ queries) in let full_text = let b = Netbuffer.create 20 in Gmisc.load_string_in_buffer b full_text; b in if (Debug.default_debug()) then begin cgiobj # output # output_string "Query context is:\n\n"; cgiobj # output # output_string context; begin let fi = open_out "/tmp/content.txt" in output_string fi "Query is:\n\n"; output_string fi queries; close_out fi end end; begin Conf.xml_charescape_fn := Netencoding.Html.encode_from_latin1; let compiled_prolog = Galax.load_standard_library proc_ctxt in (* Load main module *) let (compiled_module, stmts) = Galax.import_main_module false compiled_prolog (Galax_io.Buffer_Input full_text) in (* Evaluate global variables *) let prepared_prolog = Galax.prepare_program compiled_module None in if has_side_effects () then print_docs_before cgiobj prepared_prolog; List.iter (html_xquery_loop cgiobj prepared_prolog proc_ctxt full_text) stmts; if has_side_effects () then print_docs_after cgiobj prepared_prolog end let beg_html = "Galax\n\n\n" let end_html = "\n\ \n" let exec func (cgiobj : cgi_activation) = try cgiobj # set_header (); cgiobj # output # output_string beg_html; cgiobj # output # commit_work(); func cgiobj; cgiobj # output # output_string end_html; cgiobj # output # commit_work() with e -> begin cgiobj # output # rollback_work(); cgiobj # set_header ~status:`Internal_server_error (); cgiobj # output # output_string beg_html; cgiobj # output # output_string "

An Error Was Raised:

\n
\n";
	cgiobj # output # output_string ("Document dir:" ^ !document_dir ^"\n");
        cgiobj # output # output_string (bprintf_error_safe "  " e);
        cgiobj # output # output_string ("\n
\n"^(!separator)); cgiobj # output # output_string end_html; cgiobj # output # commit_work() end let _ = let operating_type = Netcgi.buffered_transactional_optype in let cgiobj = ((new Netcgi.std_activation ~operating_type ()) :> cgi_activation) in exec main cgiobj galax-1.1/website/demo/Makefile0000664000076400007640000000673510754650252014632 0ustar mffmff######################################################################### # # # GALAX # # XQuery Engine # # # # Copyright 2001-2007. # # Distributed only by permission. # # # ######################################################################### # $Id: Makefile,v 1.30 2008/02/13 20:18:50 mff Exp $ # # Web demo Makefile # Makefile pre-includes LOCALPREFIX=../.. # User configuration -include $(LOCALPREFIX)/config/Makefile.galax # Web site configuration include ../Makefile.config ########################################################################### # You should not need to modify below this line CGITARGET=galax.cgi OPTCGITARGET=galax$(OPT).cgi USECASETARGET=usecase$(EXE) OPTUSECASETARGET=usecase$(OPT) OBJS=demo_conf.cmo querycgi.cmo VIEWOBJS=demo_conf.cmo USECASEOBJS=demo_conf.cmo usecase.cmo OPTUSECASEOBJS=$(USECASEOBJS:.cmo=.cmx) all: $(USECASETARGET) $(CGITARGET) opt: $(OPTUSECASETARGET) $(OPTCGITARGET) $(USECASETARGET): $(USECASEOBJS) $(OCAMLC) -custom -linkall -o $@ $(OCAMLC_FLAGS) $(GALAX_ALL_INCLUDES) $(GALAX_ALL_LIBS) $(USECASEOBJS) $(OPTUSECASETARGET): $(USECASEOBJS:.cmo=.cmx) $(OCAMLOPT) -linkall -o $@ $(OCAMLOPT_FLAGS) $(GALAX_ALL_INCLUDES) $(GALAX_ALL_OPTLIBS) $(USECASEOBJS:.cmo=.cmx) $(CGITARGET): $(OBJS) $(OCAMLC) -custom -linkall -o $@ $(OCAMLC_FLAGS) $(GALAX_ALL_INCLUDES) -I $(CONF_OCAML_PKGLIB)/cgi $(GALAX_ALL_LIBS) $(CONF_OCAML_PKGLIB)/netcgi1/cgi.cma $(OBJS) $(OPTCGITARGET): $(OBJS:.cmo=.cmx) $(OCAMLOPT) -linkall -o $@ $(OCAMLOPT_FLAGS) $(GALAX_ALL_INCLUDES) -I $(CONF_OCAML_PKGLIB)/cgi $(GALAX_ALL_OPTLIBS) $(CONF_OCAML_PKGLIB)/netcgi1/cgi.cmxa $(OBJS:.cmo=.cmx) installdocs: if test -d $(DEMOSITE); then : ; else $(MKDIR) $(DEMOSITE); fi if test -d $(DEMOSITE)/docs; then rm -f $(DEMOSITE)/docs/*: ; else $(MKDIR) $(DEMOSITE)/docs; fi echo "DOCUMENT_DIR=$(DEMOSITE)" > galax_demo_config echo "CGIBIN_PREFIX=$(CGIBIN_PREFIX)" >> galax_demo_config cp galax_demo_config $(DEMOCGIBIN) cp maindocuments/*.* $(DEMOSITE) cp ../../usecases/*_context.xq $(DEMOSITE) cp -R ../../usecases/docs/* $(DEMOSITE)/docs (if test -f $(OPTUSECASETARGET); then ./$(OPTUSECASETARGET) ../../usecases $(DEMOSITE) galax_demo_config; else :; fi) cp -R ../../usecases/xqupdate/docs/* $(DEMOSITE)/docs cp ../../usecases/xqupdate/*_context.xq $(DEMOSITE) (if test -f $(OPTUSECASETARGET); then ./$(OPTUSECASETARGET) -language ultf ../../usecases/xqupdate $(DEMOSITE) galax_demo_config; else :; fi) install: installdocs $(CGITARGET) cp $(CGITARGET) $(DEMOCGIBIN) chmod a+x $(DEMOSITE) installopt: installdocs $(OPTCGITARGET) cp $(OPTCGITARGET) $(DEMOCGIBIN) chmod a+x $(DEMOSITE) .depend: *.mli *.ml $(OCAMLDEP) *.mli *.ml >.depend clean: rm -f *.cmx *.cmo *.cmi *.o rm -f $(CGITARGET) $(OPTCGITARGET) rm -f $(USECASETARGET) $(OPTUSECASETARGET) rm -f core *~ TAGS rm -f galax_demo_config tags: etags *.ml *.mli *.mll *.mly .ml.cmo: $(OCAMLC) $(OCAMLC_FLAGS) $(GALAX_ALL_INCLUDES) -I $(CONF_OCAML_PKGLIB)/netcgi1 -c $< .ml.cmx: $(OCAMLOPT) $(OCAMLOPT_FLAGS) $(GALAX_ALL_INCLUDES) -I $(CONF_OCAML_PKGLIB)/netcgi1 -c $< include .depend galax-1.1/website/demo/usecase.ml0000664000076400007640000002331610754650252015146 0ustar mffmff(***********************************************************************) (* *) (* GALAX *) (* XQuery Engine *) (* *) (* Copyright 2001-2007. *) (* Distributed only by permission. *) (* *) (***********************************************************************) (* $Id: usecase.ml,v 1.23 2008/02/13 20:18:50 mff Exp $ *) open Xquery_ast open Demo_conf (* Automatic generation of the use cases JavaScript files from the examples use cases *) let usage_msg = Format.sprintf "Usage: %s inputdir outputdir configfile" Sys.argv.(0) (* Process the input arguments *) let language = ref "xquery10" let the_documents = ref "" let process_args () = let args = ref [] in Arg.parse [ "-language", Arg.String (fun s -> language := s; ), "Language class [xquery10, ultf, xquerybang, xqueryp]" ] (fun arg -> args := arg :: !args) usage_msg; match !args with | configfile :: javascriptdir :: usecasedir :: [] -> (usecasedir, javascriptdir, configfile) | _ -> failwith "You must specify input and output usecase directories, and a configfile" (* Extract the comments from the original use case *) let rec process_comments sl = match sl with [] -> [] | Str.Text txt :: sl' -> (process_comments sl') | (Str.Delim smalltext) :: (Str.Text _) :: (Str.Delim longtext) :: (Str.Text _) :: sl' -> let smalltext' = Str.global_replace (Str.regexp "^(: \\| :)") "" smalltext in let longtext' = Str.global_replace (Str.regexp "^(: \\| :)") "" longtext in (smalltext',longtext') :: (process_comments sl') | Str.Delim x :: sl' -> failwith "Use case file not in appropriate format: two SINGLE-line comments, followed by query, then semi-colon." let extract_comment usecasefile = let querycontent = Gmisc.load_file usecasefile in (* let reg = Str.regexp "^(: \\| :)" in *) let reg = Str.regexp "^(: Q[0-9]+.* :)[ \t]*$" in let splited_list = Str.full_split reg querycontent in process_comments splited_list (* HTML templates *) let build_queries all_comments = let query = ref "" in let counter = ref 0 in let rec add_query all_comments = match all_comments with | [] -> () | (smallcomment,_) :: all_comments' -> begin counter := !counter + 1; let new_query = ("